From a430b4df831a80db813626ac5b564c83afdb8547 Mon Sep 17 00:00:00 2001 From: Matthias Koefferlein Date: Sat, 7 Dec 2019 19:08:35 +0100 Subject: [PATCH 1/2] Supply 'read_all' capability of text stream with CRLF replacement. --- src/tl/tl/tlStream.cc | 68 +++++++++++++++++++++--------- src/tl/tl/tlStream.h | 14 +++++- src/tl/unit_tests/tlStreamTests.cc | 38 +++++++++++++++++ 3 files changed, 98 insertions(+), 22 deletions(-) diff --git a/src/tl/tl/tlStream.cc b/src/tl/tl/tlStream.cc index 7b1724bf8..d12002dde 100644 --- a/src/tl/tl/tlStream.cc +++ b/src/tl/tl/tlStream.cc @@ -422,6 +422,30 @@ TextInputStream::TextInputStream (InputStream &stream) } } +std::string +TextInputStream::read_all () +{ + return read_all (std::numeric_limits::max ()); +} + +std::string +TextInputStream::read_all (size_t max_count) +{ + std::string text; + + while (! at_end () && max_count > 0) { + char c = get_char (); + if (c == 0) { + break; + } else { + --max_count; + text += c; + } + } + + return text; +} + const std::string & TextInputStream::get_line () { @@ -430,9 +454,7 @@ TextInputStream::get_line () while (! at_end ()) { char c = get_char (); - if (c == '\r') { - // simply skip CR - } else if (c == '\n' || c == 0) { + if (c == '\n' || c == 0) { break; } else { m_line_buffer += c; @@ -445,31 +467,35 @@ TextInputStream::get_line () char TextInputStream::get_char () { - m_line = m_next_line; - const char *c = m_stream.get (1); - if (c == 0) { - m_at_end = true; - return 0; - } else { - if (*c == '\n') { - ++m_next_line; + while (true) { + m_line = m_next_line; + const char *c = m_stream.get (1); + if (c == 0) { + m_at_end = true; + return 0; + } else if (*c != '\r' && *c) { + if (*c == '\n') { + ++m_next_line; + } + return *c; } - return *c; } } char TextInputStream::peek_char () { - m_line = m_next_line; - const char *c = m_stream.get (1); - if (c == 0) { - m_at_end = true; - return 0; - } else { - char cc = *c; - m_stream.unget (1); - return cc; + while (true) { + m_line = m_next_line; + const char *c = m_stream.get (1); + if (c == 0) { + m_at_end = true; + return 0; + } else if (*c != '\r' && *c) { + char cc = *c; + m_stream.unget (1); + return cc; + } } } diff --git a/src/tl/tl/tlStream.h b/src/tl/tl/tlStream.h index c300c551d..6752b03c4 100644 --- a/src/tl/tl/tlStream.h +++ b/src/tl/tl/tlStream.h @@ -564,7 +564,7 @@ private: // --------------------------------------------------------------------------------- /** - * @brief An ASCII input stream + * @brief A text input stream (UTF8 encoded) * * This class is put in front of a InputStream to format the input as text input stream. */ @@ -591,6 +591,18 @@ public: */ const std::string &get_line (); + /** + * @brief Reads all remaining bytes into the string + */ + std::string read_all (); + + /** + * @brief Reads all remaining bytes into the string + * + * This function reads all remaining of max_count bytes. + */ + std::string read_all (size_t max_count); + /** * @brief Get a single character */ diff --git a/src/tl/unit_tests/tlStreamTests.cc b/src/tl/unit_tests/tlStreamTests.cc index 4c398e206..01a6c6b71 100644 --- a/src/tl/unit_tests/tlStreamTests.cc +++ b/src/tl/unit_tests/tlStreamTests.cc @@ -107,3 +107,41 @@ TEST(TextOutputStream) throw; } } + +TEST(TextInputStream) +{ + std::string fn = tmp_file ("test.txt"); + + { + tl::OutputStream os (fn, tl::OutputStream::OM_Auto, false); + os << "Hello, world!\nWith another line\n\r\r\nseparated by a LFCR and CRLF."; + } + + { + tl::InputStream is (fn); + tl::TextInputStream tis (is); + EXPECT_EQ (tis.get_line (), "Hello, world!"); + EXPECT_EQ (tis.line_number (), 1); + EXPECT_EQ (tis.get_line (), "With another line"); + EXPECT_EQ (tis.line_number (), 2); + EXPECT_EQ (tis.peek_char (), '\n'); + EXPECT_EQ (tis.get_line (), ""); + EXPECT_EQ (tis.line_number (), 3); + EXPECT_EQ (tis.peek_char (), 's'); + EXPECT_EQ (tis.get_line (), "separated by a LFCR and CRLF."); + EXPECT_EQ (tis.line_number (), 4); + EXPECT_EQ (tis.at_end (), true); + } + + { + tl::InputStream is (fn); + tl::TextInputStream tis (is); + EXPECT_EQ (tis.read_all (5), "Hello"); + } + + { + tl::InputStream is (fn); + tl::TextInputStream tis (is); + EXPECT_EQ (tis.read_all (), "Hello, world!\nWith another line\n\nseparated by a LFCR and CRLF."); + } +} From 9ef90aa13549034b72af13cbb7b91b8c2f098d3f Mon Sep 17 00:00:00 2001 From: Matthias Koefferlein Date: Sat, 7 Dec 2019 19:14:21 +0100 Subject: [PATCH 2/2] Fixed #439 (CRLF issue with plain-text macros) --- src/lym/lym/lymMacro.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lym/lym/lymMacro.cc b/src/lym/lym/lymMacro.cc index c8a94b594..b82ce7b5a 100644 --- a/src/lym/lym/lymMacro.cc +++ b/src/lym/lym/lymMacro.cc @@ -240,7 +240,8 @@ void Macro::load_from (const std::string &fn) } else if (m_format == PlainTextFormat || m_format == PlainTextWithHashAnnotationsFormat) { tl::InputStream stream (path); - m_text = stream.read_all (); + tl::TextInputStream text_stream (stream); + m_text = text_stream.read_all (); sync_properties_with_text (); }