From 72a3528e556e99f8101889e8298c699802ae09a0 Mon Sep 17 00:00:00 2001 From: Matthias Koefferlein Date: Wed, 20 Nov 2019 23:52:37 +0100 Subject: [PATCH] WIP: implementation of a fix (explicit text mode for XML reader and stream writer) --- src/lym/lym/lymMacro.cc | 2 +- src/tl/tl/tlStream.cc | 39 +++++++++++++++++++++++++++--- src/tl/tl/tlStream.h | 8 ++++-- src/tl/tl/tlXMLParser.cc | 8 ++++-- src/tl/unit_tests/tlStreamTests.cc | 31 ++++++++++++++++++++++++ 5 files changed, 79 insertions(+), 9 deletions(-) diff --git a/src/lym/lym/lymMacro.cc b/src/lym/lym/lymMacro.cc index 1fa6d5e54..ca7ced614 100644 --- a/src/lym/lym/lymMacro.cc +++ b/src/lym/lym/lymMacro.cc @@ -196,7 +196,7 @@ void Macro::save_to (const std::string &path) tl::log << "Saving macro to " << path; } - tl::OutputStream os (path, tl::OutputStream::OM_Plain); + tl::OutputStream os (path, tl::OutputStream::OM_Plain, true /*as text*/); if (m_format == MacroFormat) { xml_struct.write (os, *this); diff --git a/src/tl/tl/tlStream.cc b/src/tl/tl/tlStream.cc index 6b8be57b4..899dd5c5c 100644 --- a/src/tl/tl/tlStream.cc +++ b/src/tl/tl/tlStream.cc @@ -658,8 +658,8 @@ InputZLibFile::filename () const // --------------------------------------------------------------- // OutputStream implementation -OutputStream::OutputStream (OutputStreamBase &delegate) - : m_pos (0), mp_delegate (&delegate), m_owns_delegate (false) +OutputStream::OutputStream (OutputStreamBase &delegate, bool as_text) + : m_pos (0), mp_delegate (&delegate), m_owns_delegate (false), m_as_text (as_text) { m_buffer_capacity = 16384; m_buffer_pos = 0; @@ -690,8 +690,8 @@ OutputStreamBase *create_file_stream (const std::string &path, OutputStream::Out } } -OutputStream::OutputStream (const std::string &abstract_path, OutputStreamMode om) - : m_pos (0), mp_delegate (0), m_owns_delegate (false) +OutputStream::OutputStream (const std::string &abstract_path, OutputStreamMode om, bool as_text) + : m_pos (0), mp_delegate (0), m_owns_delegate (false), m_as_text (as_text) { // Determine output mode om = output_mode_from_filename (abstract_path, om); @@ -761,6 +761,37 @@ OutputStream::flush () void OutputStream::put (const char *b, size_t n) +{ + if (m_as_text) { + // skip CR, but replace LF by CRLF -> this will normalize the line terminators to CRLF + while (n > 0) { + if (*b == '\r') { + ++b; + --n; + } else if (*b == '\n') { +#if defined(__WIN32) + put_raw ("\r\n", 2); +#else + put_raw ("\n", 1); +#endif + ++b; + --n; + } else { + const char *b0 = b; + while (n > 0 && *b != '\r' && *b != '\n') { + ++b; + --n; + } + put_raw (b0, b - b0); + } + } + } else { + put_raw (b, n); + } +} + +void +OutputStream::put_raw (const char *b, size_t n) { m_pos += n; diff --git a/src/tl/tl/tlStream.h b/src/tl/tl/tlStream.h index d9429a35d..75127728b 100644 --- a/src/tl/tl/tlStream.h +++ b/src/tl/tl/tlStream.h @@ -1016,14 +1016,15 @@ public: * * This constructor takes a delegate object. */ - OutputStream (OutputStreamBase &delegate); + OutputStream (OutputStreamBase &delegate, bool as_text = false); /** * @brief Open an output stream with the given path and stream mode * * This will automatically create a delegate object and delete it later. + * If "as_text" is true, the output will be formatted with the system's line separator. */ - OutputStream (const std::string &abstract_path, OutputStreamMode om = OM_Auto); + OutputStream (const std::string &abstract_path, OutputStreamMode om = OM_Auto, bool as_text = false); /** * @brief Destructor @@ -1141,9 +1142,12 @@ private: size_t m_pos; OutputStreamBase *mp_delegate; bool m_owns_delegate; + bool m_as_text; char *mp_buffer; size_t m_buffer_capacity, m_buffer_pos; + void put_raw (const char *b, size_t n); + // No copying currently OutputStream (const OutputStream &); OutputStream &operator= (const OutputStream &); diff --git a/src/tl/tl/tlXMLParser.cc b/src/tl/tl/tlXMLParser.cc index 935c3a997..a9f973f9d 100644 --- a/src/tl/tl/tlXMLParser.cc +++ b/src/tl/tl/tlXMLParser.cc @@ -639,8 +639,12 @@ public: } qint64 n0 = n; - for (const char *rd = 0; n > 0 && (rd = mp_stream->get (1)) != 0; --n) { - *data++ = *rd; + for (const char *rd = 0; n > 0 && (rd = mp_stream->get (1)) != 0; ) { + // NOTE: we skip CR to compensate for Windows CRLF line terminators (issue #419). + if (*rd != '\r') { + *data++ = *rd; + --n; + } } if (n0 == n) { diff --git a/src/tl/unit_tests/tlStreamTests.cc b/src/tl/unit_tests/tlStreamTests.cc index e8d3dca15..1111c6608 100644 --- a/src/tl/unit_tests/tlStreamTests.cc +++ b/src/tl/unit_tests/tlStreamTests.cc @@ -42,3 +42,34 @@ TEST(InputPipe2) tl::info << "Process exit code: " << ret; EXPECT_NE (ret, 0); } + +TEST(TextOutputStream) +{ + std::string fn = tmp_file ("test.txt"); + + { + tl::OutputStream os (fn, tl::OutputStream::OM_Auto, false); + os << "Hello, world!\nWith another line\n\r\r\nseparated by a LFCR and CRLF."; + } + + { + tl::InputStream is (fn); + std::string s = is.read_all (); + EXPECT_EQ (s, "Hello, world!\nWith another line\n\r\r\nseparated by a LFCR and CRLF."); + } + + { + tl::OutputStream os (fn, tl::OutputStream::OM_Auto, true); + os << "Hello, world!\nWith another line\n\r\r\nseparated by a LFCR and CRLF."; + } + + { + tl::InputStream is (fn); + std::string s = is.read_all (); +#if defined(__WIN32) + EXPECT_EQ (s, "Hello, world!\r\nWith another line\r\n\r\nseparated by a LFCR and CRLF."); +#else + EXPECT_EQ (s, "Hello, world!\nWith another line\n\nseparated by a LFCR and CRLF."); +#endif + } +}