diff --git a/src/tl/tl/tlProtocolBuffer.cc b/src/tl/tl/tlProtocolBuffer.cc index 58dae9f76..0099fdf39 100644 --- a/src/tl/tl/tlProtocolBuffer.cc +++ b/src/tl/tl/tlProtocolBuffer.cc @@ -292,17 +292,78 @@ ProtocolBufferReader::error (const std::string &msg) } // ---------------------------------------------------------------------------------- +// Writer utilities -ProtocolBufferWriter::ProtocolBufferWriter (tl::OutputStream &stream) - : mp_stream (&stream), m_bytes_counted (0), m_debug (false), m_debug_pos (0) +inline uint32_t zigzag_encode (int32_t v) { - // .. nothing yet .. + if (v < 0) { + return ((uint32_t (-v) - 1) << 1) + 1; + } else { + return uint32_t (v) << 1; + } } -void ProtocolBufferWriter::set_debug (bool f) +inline uint64_t zigzag_encode (int64_t v) { - m_debug = f; - m_debug_pos = 0; + if (v < 0) { + return ((uint64_t (-v) - 1) << 1) + 1; + } else { + return uint64_t (v) << 1; + } +} + +inline void little_endian_encode (char *b, uint32_t v) +{ + for (unsigned int i = 0; i < sizeof (v); ++i) { + b[i] = (char) v; + v >>= 8; + } +} + +inline void little_endian_encode (char *b, uint64_t v) +{ + for (unsigned int i = 0; i < sizeof (v); ++i) { + b[i] = (char) v; + v >>= 8; + } +} + +inline size_t count_varint_bytes (pb_varint v) +{ + size_t n = 0; + while (true) { + ++n; + if (v < 0x80) { + break; + } + v >>= 7; + } + return n; +} + +const size_t max_varint_bytes = (sizeof (pb_varint) * 8) / 7 + 1; + +inline size_t varint_encode (char *b, pb_varint v) +{ + char *cp = b; + while (true) { + if (v < 0x80) { + *cp++ = char (v); + break; + } else { + *cp++ = (char (v) | 0x80); + } + v >>= 7; + } + return cp - b; +} + +// ---------------------------------------------------------------------------------- + +ProtocolBufferWriter::ProtocolBufferWriter (tl::OutputStream &stream) + : mp_stream (&stream), m_bytes_counted (0) +{ + // .. nothing yet .. } void ProtocolBufferWriter::write (int tag, float v) @@ -322,25 +383,11 @@ void ProtocolBufferWriter::write (int tag, uint32_t v, bool fixed) write_varint (pb_varint ((tag << 3) + PB_I32), true); if (is_counting ()) { - m_byte_counter_stack.back () += sizeof (v); - } else { - - auto vv = v; - char b[sizeof (v)]; - for (unsigned int i = 0; i < sizeof (v); ++i) { - b[i] = (char) v; - v >>= 8; - } - - if (m_debug) { - dump (b, sizeof (v), "I32", tl::to_string (vv)); - } - + little_endian_encode (b, v); mp_stream->put (b, sizeof (v)); - } } else { @@ -356,11 +403,7 @@ void ProtocolBufferWriter::write (int tag, int32_t v, bool fixed) if (fixed) { write (tag, uint32_t (v), true); } else { - if (v < 0) { - write (tag, ((uint32_t (-v) - 1) << 1) + 1, false); - } else { - write (tag, uint32_t (v) << 1, false); - } + write (tag, zigzag_encode (v), false); } } @@ -371,25 +414,11 @@ void ProtocolBufferWriter::write (int tag, uint64_t v, bool fixed) write_varint (pb_varint ((tag << 3) + PB_I64), true); if (is_counting ()) { - m_byte_counter_stack.back () += sizeof (v); - } else { - - auto vv = v; - char b[sizeof (v)]; - for (unsigned int i = 0; i < sizeof (v); ++i) { - b[i] = (char) v; - v >>= 8; - } - - if (m_debug) { - dump (b, sizeof (v), "I64", tl::to_string (vv)); - } - + little_endian_encode (b, v); mp_stream->put (b, sizeof (v)); - } } else { @@ -405,11 +434,7 @@ void ProtocolBufferWriter::write (int tag, int64_t v, bool fixed) if (fixed) { write (tag, uint64_t (v), true); } else { - if (v < 0) { - write (tag, ((uint64_t (-v) - 1) << 1) + 1, false); - } else { - write (tag, uint64_t (v) << 1, false); - } + write (tag, zigzag_encode (v), false); } } @@ -424,17 +449,9 @@ void ProtocolBufferWriter::write (int tag, const std::string &s) write_varint (s.size ()); if (is_counting ()) { - m_byte_counter_stack.back () += s.size (); - } else { - - if (m_debug) { - dump (s.c_str (), s.size (), "(string)", s); - } - mp_stream->put (s.c_str (), s.size ()); - } } @@ -477,68 +494,199 @@ void ProtocolBufferWriter::end_seq () } } -void -ProtocolBufferWriter::write_varint (pb_varint v, bool id) +void ProtocolBufferWriter::write_varint (pb_varint v, bool /*id*/) { if (is_counting ()) { - size_t n = 0; - while (true) { - ++n; - if (v < 0x80) { - break; - } - v >>= 7; - } - - m_byte_counter_stack.back () += n; + m_byte_counter_stack.back () += count_varint_bytes (v); } else { - auto vv = v; + char b [max_varint_bytes]; + size_t n = varint_encode (b, v); + mp_stream->put (b, n); - char b[16]; - char *cp = b; - while (true) { - if (v < 0x80) { - *cp++ = char (v); - break; - } else { - *cp++ = (char (v) | 0x80); - } - v >>= 7; + } +} + +// ---------------------------------------------------------------------------------- + +ProtocolBufferDumper::ProtocolBufferDumper () + : m_bytes_counted (0), m_debug_pos (0) +{ + // .. nothing yet .. +} + +void ProtocolBufferDumper::write (int tag, float v) +{ + write (tag, *reinterpret_cast (&v), true); +} + +void ProtocolBufferDumper::write (int tag, double v) +{ + write (tag, *reinterpret_cast (&v), true); +} + +void ProtocolBufferDumper::write (int tag, uint32_t v, bool fixed) +{ + if (fixed) { + + write_varint (pb_varint ((tag << 3) + PB_I32), true); + + if (is_counting ()) { + m_byte_counter_stack.back () += sizeof (v); + } else { + char b[sizeof (v)]; + little_endian_encode (b, v); + dump (b, sizeof (v), "I32", tl::to_string (v)); } - if (m_debug) { - if (id) { - unsigned int wt = v & 7; - std::string wire_type; - if (wt == PB_EGROUP) { - wire_type = "EGROUP"; - } else if (wt == PB_SGROUP) { - wire_type = "SGROUP"; - } else if (wt == PB_VARINT) { - wire_type = "VARINT"; - } else if (wt == PB_I32) { - wire_type = "I32"; - } else if (wt == PB_I64) { - wire_type = "I64"; - } else if (wt == PB_LEN) { - wire_type = "LEN"; - } - dump (b, cp - b, "(id)", "#" + tl::to_string (vv >> 3) + " " + wire_type); - } else { - dump (b, cp - b, "VARINT", tl::to_string (vv)); - } + } else { + + write_varint (pb_varint ((tag << 3) + PB_VARINT), true); + write_varint (pb_varint (v)); + + } +} + +void ProtocolBufferDumper::write (int tag, int32_t v, bool fixed) +{ + if (fixed) { + write (tag, uint32_t (v), true); + } else { + write (tag, zigzag_encode (v), false); + } +} + +void ProtocolBufferDumper::write (int tag, uint64_t v, bool fixed) +{ + if (fixed) { + + write_varint (pb_varint ((tag << 3) + PB_I64), true); + + if (is_counting ()) { + m_byte_counter_stack.back () += sizeof (v); + } else { + char b[sizeof (v)]; + little_endian_encode (b, v); + dump (b, sizeof (v), "I64", tl::to_string (v)); } - mp_stream->put (b, cp - b); + } else { + + write_varint (pb_varint ((tag << 3) + PB_VARINT), true); + write_varint (pb_varint (v)); + + } +} + +void ProtocolBufferDumper::write (int tag, int64_t v, bool fixed) +{ + if (fixed) { + write (tag, uint64_t (v), true); + } else { + write (tag, zigzag_encode (v), false); + } +} + +void ProtocolBufferDumper::write (int tag, bool b) +{ + write (tag, uint32_t (b ? 1 : 0)); +} + +void ProtocolBufferDumper::write (int tag, const std::string &s) +{ + write_varint (pb_varint ((tag << 3) + PB_LEN), true); + write_varint (s.size ()); + + if (is_counting ()) { + + m_byte_counter_stack.back () += s.size (); + + } else { + + dump (s.c_str (), s.size (), "(string)", s); + + } +} + +bool ProtocolBufferDumper::is_counting () const +{ + return ! m_byte_counter_stack.empty (); +} + +void ProtocolBufferDumper::begin_seq (int tag, bool counting) +{ + if (counting) { + + if (is_counting ()) { + write_varint (pb_varint ((tag << 3) + PB_LEN), true); + } + + m_byte_counter_stack.push_back (0); + + } else { + + write_varint (pb_varint ((tag << 3) + PB_LEN), true); + write_varint (m_bytes_counted); + + } +} + +void ProtocolBufferDumper::end_seq () +{ + if (is_counting ()) { + + m_bytes_counted = m_byte_counter_stack.back (); + m_byte_counter_stack.pop_back (); + + // just for adding the required bytes + if (is_counting ()) { + m_byte_counter_stack.back () += m_bytes_counted; + write_varint (m_bytes_counted); + } } } void -ProtocolBufferWriter::dump (const char *cp, size_t n, const std::string &type, const std::string &value) +ProtocolBufferDumper::write_varint (pb_varint v, bool id) +{ + if (is_counting ()) { + + m_byte_counter_stack.back () += count_varint_bytes (v); + + } else { + + char b[max_varint_bytes]; + size_t n = varint_encode (b, v); + + if (id) { + tl::PBWireType wt = tl::PBWireType (v & 7); + std::string wire_type; + if (wt == PB_EGROUP) { + wire_type = "EGROUP"; + } else if (wt == PB_SGROUP) { + wire_type = "SGROUP"; + } else if (wt == PB_VARINT) { + wire_type = "VARINT"; + } else if (wt == PB_I32) { + wire_type = "I32"; + } else if (wt == PB_I64) { + wire_type = "I64"; + } else if (wt == PB_LEN) { + wire_type = "LEN"; + } + dump (b, n, "(id)", "#" + tl::to_string (v >> 3) + " " + wire_type); + } else { + dump (b, n, "VARINT", tl::to_string (v)); + } + + } +} + +void +ProtocolBufferDumper::dump (const char *cp, size_t n, const std::string &type, const std::string &value) { bool first = true; size_t nn = n; diff --git a/src/tl/tl/tlProtocolBuffer.h b/src/tl/tl/tlProtocolBuffer.h index 106401719..61acf4b04 100644 --- a/src/tl/tl/tlProtocolBuffer.h +++ b/src/tl/tl/tlProtocolBuffer.h @@ -67,27 +67,126 @@ private: * * This is a low-level decoder for ProtocolBuffer files. * - * The following LEN-type related concepts need to be implemented by the client code: - * - submessages - * - maps - * - packed repetitions - * - strings + * Use "read_tag" to read a new tag. Unknown tags must be skipped. + * Use "skip" to skip an entry. * - * Unknown tags need to be skipped with "skip". * - * Submessages: if a corresponding tag is encountered with "is_seq()" true, the - * reader code needs to call "open" to enter the sequence and read tags until - * "at_end" is true. Then, call "close" to leave the sequence. + */ +class TL_PUBLIC ProtocolBufferReaderBase +{ +public: + /** + * @brief Constructor + */ + ProtocolBufferReaderBase () + { + // .. nothing yet .. + } + + /** + * @brief Destructor + */ + virtual ~ProtocolBufferReaderBase () + { + // .. nothing yet .. + } + + /** + * @brief Reads a new tag + * This method will also set the current write type. + * @returns The message ID + */ + virtual int read_tag () = 0; + + /** + * @brief Gets the current wire type + */ + virtual PBWireType type () const = 0; + + /** + * @brief Skips the current tag + */ + virtual void skip () = 0; + + /** + * @brief Reads a floating-point value from the current message + * Throws a reader error if the current tag's value is not compatible with a double value. + */ + virtual void read (double &d) = 0; + + /** + * @brief Reads a floating-point value from the current message + * Throws a reader error if the current tag's value is not compatible with a float value. + */ + virtual void read (float &f) = 0; + + /** + * @brief Reads a string from the current message + * Throws a reader error if the current tag's value is not compatible with a string. + */ + virtual void read (std::string &s) = 0; + + /** + * @brief Reads a uint32_t value from the current message + * Throws a reader error if the current tag's value is not compatible with a uint32_t. + */ + virtual void read (uint32_t &ui32) = 0; + + /** + * @brief Reads a int32_t value from the current message + * Throws a reader error if the current tag's value is not compatible with a int32_t. + */ + virtual void read (int32_t &i32) = 0; + + /** + * @brief Reads a uint64_t value from the current message + * Throws a reader error if the current tag's value is not compatible with a uint64_t. + */ + virtual void read (uint64_t &ui64) = 0; + + /** + * @brief Reads a int64_t value from the current message + * Throws a reader error if the current tag's value is not compatible with a int64_t. + */ + virtual void read (int64_t &i64) = 0; + + /** + * @brief Reads a boolean value from the current message + * Throws a reader error if the current tag's value is not compatible with a bool. + */ + virtual void read (bool &b) = 0; + + /** + * @brief Opens a LEN sequence + * After calling "open", the parser will continue reading messages, but + * "at_end" will report true on the end of the sequence, not at the end of the + * file. + * This method will throw an exception if not in a message of LEN type. + */ + virtual void open () = 0; + + /** + * @brief Closes the LEN sequence + * This method will jump to the end of the sequence and continue reading + * messages from the previous block or file. + */ + virtual void close () = 0; + + /** + * @brief Returns true if at the end of the file or end of a block + */ + virtual bool at_end () const = 0; +}; + +/** + * @brief A reader for ProtocolBuffer files and streams * - * Packed repetitions: same a submessages, but single values are read - * with one of the "read" types. - * - * Maps are read like submessages with key/values as tags 1 and 2. - * - * Strings: if a corresponding tag is encountered, use "read(s)" to read - * the string. "is_seq()" is required to be true, i.e. wire type is LEN. + * This is the reader implementation for binary files + * as described here: + * https://protobuf.dev/programming-guides/encoding/ */ class TL_PUBLIC ProtocolBufferReader + : public ProtocolBufferReaderBase { public: /** @@ -110,16 +209,6 @@ public: return m_type; } - /** - * @brief Returns true, if the current message is a LEN type sequence - * Such messages can be read into strings or "open" can be used on them to - * open a submessage, map or packed repetition. - */ - bool is_seq () const - { - return m_type == PB_LEN; - } - /** * @brief Skips the current tag */ @@ -224,7 +313,62 @@ private: * 4. if "is_counting()" is false, repeat steps 1 to 3 with * "counting" set to false on "begin_seq". */ +class TL_PUBLIC ProtocolBufferWriterBase +{ +public: + /** + * @brief Constructor + */ + ProtocolBufferWriterBase () + { + // .. nothing yet .. + } + + /** + * @brief Destructor + */ + virtual ~ProtocolBufferWriterBase () + { + // .. nothing yet .. + } + + /** + * @brief Writes a scalar tag with the given value + */ + virtual void write (int tag, float v) = 0; + virtual void write (int tag, double v) = 0; + virtual void write (int tag, uint32_t v, bool fixed = false) = 0; + virtual void write (int tag, int32_t v, bool fixed = false) = 0; + virtual void write (int tag, uint64_t v, bool fixed = false) = 0; + virtual void write (int tag, int64_t v, bool fixed = false) = 0; + virtual void write (int tag, bool b) = 0; + virtual void write (int tag, const std::string &s) = 0; + + /** + * @brief Returns true if the writer is in counting mode + */ + virtual bool is_counting () const = 0; + + /** + * @brief Initiates a new sequence. See class documentation for details. + */ + virtual void begin_seq (int tag, bool counting) = 0; + + /** + * @brief Ends a sequence. See class documentation for details. + */ + virtual void end_seq () = 0; +}; + +/** + * @brief A writer for ProtocolBuffer files and streams + * + * This is the writer implementation for binary files + * as described here: + * https://protobuf.dev/programming-guides/encoding/ + */ class TL_PUBLIC ProtocolBufferWriter + : public ProtocolBufferWriterBase { public: /** @@ -259,20 +403,62 @@ public: */ void end_seq (); +private: + void write_varint (pb_varint v, bool id = false); + + tl::OutputStream *mp_stream; + size_t m_bytes_counted; + std::vector m_byte_counter_stack; +}; + +/** + * @brief A writer implementation that dumps the file content to tl::info + * + * This implementation does a halfway job of producing binary files, + * but only insofar it is needed for dumping the binary data. + */ +class TL_PUBLIC ProtocolBufferDumper + : public ProtocolBufferWriterBase +{ +public: /** - * @brief Enables or disables debug mode - * In debug mode, the stream will be dumped in a human readable form + * @brief Creates the writer */ - void set_debug (bool f); + ProtocolBufferDumper (); + + /** + * @brief Writes a scalar tag with the given value + */ + void write (int tag, float v); + void write (int tag, double v); + void write (int tag, uint32_t v, bool fixed = false); + void write (int tag, int32_t v, bool fixed = false); + void write (int tag, uint64_t v, bool fixed = false); + void write (int tag, int64_t v, bool fixed = false); + void write (int tag, bool b); + void write (int tag, const std::string &s); + + /** + * @brief Returns true if the writer is in counting mode + */ + bool is_counting () const; + + /** + * @brief Initiates a new sequence. See class documentation for details. + */ + void begin_seq (int tag, bool counting); + + /** + * @brief Ends a sequence. See class documentation for details. + */ + void end_seq (); private: void write_varint (pb_varint v, bool id = false); void dump (const char *cp, size_t n, const std::string &type, const std::string &value); - tl::OutputStream *mp_stream; size_t m_bytes_counted; std::vector m_byte_counter_stack; - bool m_debug; size_t m_debug_pos; }; diff --git a/src/tl/tl/tlProtocolBufferStruct.cc b/src/tl/tl/tlProtocolBufferStruct.cc index a52bc879d..2df45d4ff 100644 --- a/src/tl/tl/tlProtocolBufferStruct.cc +++ b/src/tl/tl/tlProtocolBufferStruct.cc @@ -40,14 +40,14 @@ PBParser::~PBParser () } void -PBParser::parse (tl::ProtocolBufferReader &reader, const PBElementBase *root, PBReaderState *reader_state) +PBParser::parse (tl::ProtocolBufferReaderBase &reader, const PBElementBase *root, PBReaderState *reader_state) { mp_state = reader_state; parse_element (root, reader); } void -PBParser::parse_element (const PBElementBase *parent, tl::ProtocolBufferReader &reader) +PBParser::parse_element (const PBElementBase *parent, tl::ProtocolBufferReaderBase &reader) { while (! reader.at_end ()) { diff --git a/src/tl/tl/tlProtocolBufferStruct.h b/src/tl/tl/tlProtocolBufferStruct.h index 613bc3024..560cd89f9 100644 --- a/src/tl/tl/tlProtocolBufferStruct.h +++ b/src/tl/tl/tlProtocolBufferStruct.h @@ -296,8 +296,8 @@ public: PBParser (); ~PBParser (); - void parse (tl::ProtocolBufferReader &reader, const PBElementBase *root, PBReaderState *reader_state); - void parse_element (const PBElementBase *parent, tl::ProtocolBufferReader &reader); + void parse (tl::ProtocolBufferReaderBase &reader, const PBElementBase *root, PBReaderState *reader_state); + void parse_element (const PBElementBase *parent, tl::ProtocolBufferReaderBase &reader); PBReaderState &reader_state () { @@ -517,7 +517,7 @@ public: virtual PBElementBase *clone () const = 0; virtual void create (const PBElementBase *parent, PBReaderState &objs) const = 0; - virtual void parse (PBParser *, tl::ProtocolBufferReader &) const = 0; + virtual void parse (PBParser *, tl::ProtocolBufferReaderBase &) const = 0; virtual void finish (const PBElementBase *parent, PBReaderState &objs) const = 0; virtual void write (const PBElementBase *, tl::ProtocolBufferWriter &, PBWriterState &) const { } @@ -602,7 +602,7 @@ public: objs.pop (tag); } - virtual void parse (PBParser *parser, tl::ProtocolBufferReader &reader) const + virtual void parse (PBParser *parser, tl::ProtocolBufferReaderBase &reader) const { reader.open (); parser->parse_element (this, reader); @@ -767,7 +767,7 @@ public: // .. nothing yet .. } - virtual void parse (PBParser *parser, tl::ProtocolBufferReader &reader) const + virtual void parse (PBParser *parser, tl::ProtocolBufferReaderBase &reader) const { PBObjTag tag; PBObjTag parent_tag; @@ -865,17 +865,17 @@ private: } // read incarnations - void read (tl::ProtocolBufferReader &reader, float &v) const + void read (tl::ProtocolBufferReaderBase &reader, float &v) const { reader.read (v); } - void read (tl::ProtocolBufferReader &reader, double &v) const + void read (tl::ProtocolBufferReaderBase &reader, double &v) const { reader.read (v); } - void read (tl::ProtocolBufferReader &reader, uint8_t &v) const + void read (tl::ProtocolBufferReaderBase &reader, uint8_t &v) const { uint32_t vv = 0; reader.read (vv); @@ -883,7 +883,7 @@ private: v = vv; } - void read (tl::ProtocolBufferReader &reader, int8_t &v) const + void read (tl::ProtocolBufferReaderBase &reader, int8_t &v) const { int32_t vv = 0; reader.read (vv); @@ -891,7 +891,7 @@ private: v = vv; } - void read (tl::ProtocolBufferReader &reader, uint16_t &v) const + void read (tl::ProtocolBufferReaderBase &reader, uint16_t &v) const { uint32_t vv = 0; reader.read (vv); @@ -899,7 +899,7 @@ private: v = vv; } - void read (tl::ProtocolBufferReader &reader, int16_t &v) const + void read (tl::ProtocolBufferReaderBase &reader, int16_t &v) const { int32_t vv = 0; reader.read (vv); @@ -907,38 +907,38 @@ private: v = vv; } - void read (tl::ProtocolBufferReader &reader, uint32_t &v) const + void read (tl::ProtocolBufferReaderBase &reader, uint32_t &v) const { reader.read (v); } - void read (tl::ProtocolBufferReader &reader, int32_t &v) const + void read (tl::ProtocolBufferReaderBase &reader, int32_t &v) const { reader.read (v); } - void read (tl::ProtocolBufferReader &reader, uint64_t &v) const + void read (tl::ProtocolBufferReaderBase &reader, uint64_t &v) const { reader.read (v); } - void read (tl::ProtocolBufferReader &reader, int64_t &v) const + void read (tl::ProtocolBufferReaderBase &reader, int64_t &v) const { reader.read (v); } - void read (tl::ProtocolBufferReader &reader, bool &v) const + void read (tl::ProtocolBufferReaderBase &reader, bool &v) const { reader.read (v); } - void read (tl::ProtocolBufferReader &reader, std::string &v) const + void read (tl::ProtocolBufferReaderBase &reader, std::string &v) const { reader.read (v); } template - void read (tl::ProtocolBufferReader &reader, const T &v) const + void read (tl::ProtocolBufferReaderBase &reader, const T &v) const { std::string vv; reader.read (vv); @@ -994,7 +994,7 @@ public: } } - void parse (tl::ProtocolBufferReader &reader, Obj &root) const + void parse (tl::ProtocolBufferReaderBase &reader, Obj &root) const { PBObjTag tag; PBReaderState rs; @@ -1011,7 +1011,7 @@ private: // disable base class implementation } - virtual void parse (PBParser *, tl::ProtocolBufferReader &) const + virtual void parse (PBParser *, tl::ProtocolBufferReaderBase &) const { // disable base class implementation }