This commit is contained in:
Matthias Koefferlein 2024-08-11 15:27:32 +02:00
parent ee06ac2f7a
commit 0f067e1dd8
4 changed files with 487 additions and 153 deletions

View File

@ -292,17 +292,78 @@ ProtocolBufferReader::error (const std::string &msg)
}
// ----------------------------------------------------------------------------------
// Writer utilities
ProtocolBufferWriter::ProtocolBufferWriter (tl::OutputStream &stream)
: mp_stream (&stream), m_bytes_counted (0), m_debug (false), m_debug_pos (0)
inline uint32_t zigzag_encode (int32_t v)
{
// .. nothing yet ..
if (v < 0) {
return ((uint32_t (-v) - 1) << 1) + 1;
} else {
return uint32_t (v) << 1;
}
}
void ProtocolBufferWriter::set_debug (bool f)
inline uint64_t zigzag_encode (int64_t v)
{
m_debug = f;
m_debug_pos = 0;
if (v < 0) {
return ((uint64_t (-v) - 1) << 1) + 1;
} else {
return uint64_t (v) << 1;
}
}
inline void little_endian_encode (char *b, uint32_t v)
{
for (unsigned int i = 0; i < sizeof (v); ++i) {
b[i] = (char) v;
v >>= 8;
}
}
inline void little_endian_encode (char *b, uint64_t v)
{
for (unsigned int i = 0; i < sizeof (v); ++i) {
b[i] = (char) v;
v >>= 8;
}
}
inline size_t count_varint_bytes (pb_varint v)
{
size_t n = 0;
while (true) {
++n;
if (v < 0x80) {
break;
}
v >>= 7;
}
return n;
}
const size_t max_varint_bytes = (sizeof (pb_varint) * 8) / 7 + 1;
inline size_t varint_encode (char *b, pb_varint v)
{
char *cp = b;
while (true) {
if (v < 0x80) {
*cp++ = char (v);
break;
} else {
*cp++ = (char (v) | 0x80);
}
v >>= 7;
}
return cp - b;
}
// ----------------------------------------------------------------------------------
ProtocolBufferWriter::ProtocolBufferWriter (tl::OutputStream &stream)
: mp_stream (&stream), m_bytes_counted (0)
{
// .. nothing yet ..
}
void ProtocolBufferWriter::write (int tag, float v)
@ -322,25 +383,11 @@ void ProtocolBufferWriter::write (int tag, uint32_t v, bool fixed)
write_varint (pb_varint ((tag << 3) + PB_I32), true);
if (is_counting ()) {
m_byte_counter_stack.back () += sizeof (v);
} else {
auto vv = v;
char b[sizeof (v)];
for (unsigned int i = 0; i < sizeof (v); ++i) {
b[i] = (char) v;
v >>= 8;
}
if (m_debug) {
dump (b, sizeof (v), "I32", tl::to_string (vv));
}
little_endian_encode (b, v);
mp_stream->put (b, sizeof (v));
}
} else {
@ -356,11 +403,7 @@ void ProtocolBufferWriter::write (int tag, int32_t v, bool fixed)
if (fixed) {
write (tag, uint32_t (v), true);
} else {
if (v < 0) {
write (tag, ((uint32_t (-v) - 1) << 1) + 1, false);
} else {
write (tag, uint32_t (v) << 1, false);
}
write (tag, zigzag_encode (v), false);
}
}
@ -371,25 +414,11 @@ void ProtocolBufferWriter::write (int tag, uint64_t v, bool fixed)
write_varint (pb_varint ((tag << 3) + PB_I64), true);
if (is_counting ()) {
m_byte_counter_stack.back () += sizeof (v);
} else {
auto vv = v;
char b[sizeof (v)];
for (unsigned int i = 0; i < sizeof (v); ++i) {
b[i] = (char) v;
v >>= 8;
}
if (m_debug) {
dump (b, sizeof (v), "I64", tl::to_string (vv));
}
little_endian_encode (b, v);
mp_stream->put (b, sizeof (v));
}
} else {
@ -405,11 +434,7 @@ void ProtocolBufferWriter::write (int tag, int64_t v, bool fixed)
if (fixed) {
write (tag, uint64_t (v), true);
} else {
if (v < 0) {
write (tag, ((uint64_t (-v) - 1) << 1) + 1, false);
} else {
write (tag, uint64_t (v) << 1, false);
}
write (tag, zigzag_encode (v), false);
}
}
@ -424,17 +449,9 @@ void ProtocolBufferWriter::write (int tag, const std::string &s)
write_varint (s.size ());
if (is_counting ()) {
m_byte_counter_stack.back () += s.size ();
} else {
if (m_debug) {
dump (s.c_str (), s.size (), "(string)", s);
}
mp_stream->put (s.c_str (), s.size ());
}
}
@ -477,68 +494,199 @@ void ProtocolBufferWriter::end_seq ()
}
}
void
ProtocolBufferWriter::write_varint (pb_varint v, bool id)
void ProtocolBufferWriter::write_varint (pb_varint v, bool /*id*/)
{
if (is_counting ()) {
size_t n = 0;
while (true) {
++n;
if (v < 0x80) {
break;
}
v >>= 7;
}
m_byte_counter_stack.back () += n;
m_byte_counter_stack.back () += count_varint_bytes (v);
} else {
auto vv = v;
char b [max_varint_bytes];
size_t n = varint_encode (b, v);
mp_stream->put (b, n);
char b[16];
char *cp = b;
while (true) {
if (v < 0x80) {
*cp++ = char (v);
break;
} else {
*cp++ = (char (v) | 0x80);
}
v >>= 7;
}
}
// ----------------------------------------------------------------------------------
ProtocolBufferDumper::ProtocolBufferDumper ()
: m_bytes_counted (0), m_debug_pos (0)
{
// .. nothing yet ..
}
void ProtocolBufferDumper::write (int tag, float v)
{
write (tag, *reinterpret_cast<uint32_t *> (&v), true);
}
void ProtocolBufferDumper::write (int tag, double v)
{
write (tag, *reinterpret_cast<uint64_t *> (&v), true);
}
void ProtocolBufferDumper::write (int tag, uint32_t v, bool fixed)
{
if (fixed) {
write_varint (pb_varint ((tag << 3) + PB_I32), true);
if (is_counting ()) {
m_byte_counter_stack.back () += sizeof (v);
} else {
char b[sizeof (v)];
little_endian_encode (b, v);
dump (b, sizeof (v), "I32", tl::to_string (v));
}
if (m_debug) {
if (id) {
unsigned int wt = v & 7;
std::string wire_type;
if (wt == PB_EGROUP) {
wire_type = "EGROUP";
} else if (wt == PB_SGROUP) {
wire_type = "SGROUP";
} else if (wt == PB_VARINT) {
wire_type = "VARINT";
} else if (wt == PB_I32) {
wire_type = "I32";
} else if (wt == PB_I64) {
wire_type = "I64";
} else if (wt == PB_LEN) {
wire_type = "LEN";
}
dump (b, cp - b, "(id)", "#" + tl::to_string (vv >> 3) + " " + wire_type);
} else {
dump (b, cp - b, "VARINT", tl::to_string (vv));
}
} else {
write_varint (pb_varint ((tag << 3) + PB_VARINT), true);
write_varint (pb_varint (v));
}
}
void ProtocolBufferDumper::write (int tag, int32_t v, bool fixed)
{
if (fixed) {
write (tag, uint32_t (v), true);
} else {
write (tag, zigzag_encode (v), false);
}
}
void ProtocolBufferDumper::write (int tag, uint64_t v, bool fixed)
{
if (fixed) {
write_varint (pb_varint ((tag << 3) + PB_I64), true);
if (is_counting ()) {
m_byte_counter_stack.back () += sizeof (v);
} else {
char b[sizeof (v)];
little_endian_encode (b, v);
dump (b, sizeof (v), "I64", tl::to_string (v));
}
mp_stream->put (b, cp - b);
} else {
write_varint (pb_varint ((tag << 3) + PB_VARINT), true);
write_varint (pb_varint (v));
}
}
void ProtocolBufferDumper::write (int tag, int64_t v, bool fixed)
{
if (fixed) {
write (tag, uint64_t (v), true);
} else {
write (tag, zigzag_encode (v), false);
}
}
void ProtocolBufferDumper::write (int tag, bool b)
{
write (tag, uint32_t (b ? 1 : 0));
}
void ProtocolBufferDumper::write (int tag, const std::string &s)
{
write_varint (pb_varint ((tag << 3) + PB_LEN), true);
write_varint (s.size ());
if (is_counting ()) {
m_byte_counter_stack.back () += s.size ();
} else {
dump (s.c_str (), s.size (), "(string)", s);
}
}
bool ProtocolBufferDumper::is_counting () const
{
return ! m_byte_counter_stack.empty ();
}
void ProtocolBufferDumper::begin_seq (int tag, bool counting)
{
if (counting) {
if (is_counting ()) {
write_varint (pb_varint ((tag << 3) + PB_LEN), true);
}
m_byte_counter_stack.push_back (0);
} else {
write_varint (pb_varint ((tag << 3) + PB_LEN), true);
write_varint (m_bytes_counted);
}
}
void ProtocolBufferDumper::end_seq ()
{
if (is_counting ()) {
m_bytes_counted = m_byte_counter_stack.back ();
m_byte_counter_stack.pop_back ();
// just for adding the required bytes
if (is_counting ()) {
m_byte_counter_stack.back () += m_bytes_counted;
write_varint (m_bytes_counted);
}
}
}
void
ProtocolBufferWriter::dump (const char *cp, size_t n, const std::string &type, const std::string &value)
ProtocolBufferDumper::write_varint (pb_varint v, bool id)
{
if (is_counting ()) {
m_byte_counter_stack.back () += count_varint_bytes (v);
} else {
char b[max_varint_bytes];
size_t n = varint_encode (b, v);
if (id) {
tl::PBWireType wt = tl::PBWireType (v & 7);
std::string wire_type;
if (wt == PB_EGROUP) {
wire_type = "EGROUP";
} else if (wt == PB_SGROUP) {
wire_type = "SGROUP";
} else if (wt == PB_VARINT) {
wire_type = "VARINT";
} else if (wt == PB_I32) {
wire_type = "I32";
} else if (wt == PB_I64) {
wire_type = "I64";
} else if (wt == PB_LEN) {
wire_type = "LEN";
}
dump (b, n, "(id)", "#" + tl::to_string (v >> 3) + " " + wire_type);
} else {
dump (b, n, "VARINT", tl::to_string (v));
}
}
}
void
ProtocolBufferDumper::dump (const char *cp, size_t n, const std::string &type, const std::string &value)
{
bool first = true;
size_t nn = n;

View File

@ -67,27 +67,126 @@ private:
*
* This is a low-level decoder for ProtocolBuffer files.
*
* The following LEN-type related concepts need to be implemented by the client code:
* - submessages
* - maps
* - packed repetitions
* - strings
* Use "read_tag" to read a new tag. Unknown tags must be skipped.
* Use "skip" to skip an entry.
*
* Unknown tags need to be skipped with "skip".
*
* Submessages: if a corresponding tag is encountered with "is_seq()" true, the
* reader code needs to call "open" to enter the sequence and read tags until
* "at_end" is true. Then, call "close" to leave the sequence.
*/
class TL_PUBLIC ProtocolBufferReaderBase
{
public:
/**
* @brief Constructor
*/
ProtocolBufferReaderBase ()
{
// .. nothing yet ..
}
/**
* @brief Destructor
*/
virtual ~ProtocolBufferReaderBase ()
{
// .. nothing yet ..
}
/**
* @brief Reads a new tag
* This method will also set the current write type.
* @returns The message ID
*/
virtual int read_tag () = 0;
/**
* @brief Gets the current wire type
*/
virtual PBWireType type () const = 0;
/**
* @brief Skips the current tag
*/
virtual void skip () = 0;
/**
* @brief Reads a floating-point value from the current message
* Throws a reader error if the current tag's value is not compatible with a double value.
*/
virtual void read (double &d) = 0;
/**
* @brief Reads a floating-point value from the current message
* Throws a reader error if the current tag's value is not compatible with a float value.
*/
virtual void read (float &f) = 0;
/**
* @brief Reads a string from the current message
* Throws a reader error if the current tag's value is not compatible with a string.
*/
virtual void read (std::string &s) = 0;
/**
* @brief Reads a uint32_t value from the current message
* Throws a reader error if the current tag's value is not compatible with a uint32_t.
*/
virtual void read (uint32_t &ui32) = 0;
/**
* @brief Reads a int32_t value from the current message
* Throws a reader error if the current tag's value is not compatible with a int32_t.
*/
virtual void read (int32_t &i32) = 0;
/**
* @brief Reads a uint64_t value from the current message
* Throws a reader error if the current tag's value is not compatible with a uint64_t.
*/
virtual void read (uint64_t &ui64) = 0;
/**
* @brief Reads a int64_t value from the current message
* Throws a reader error if the current tag's value is not compatible with a int64_t.
*/
virtual void read (int64_t &i64) = 0;
/**
* @brief Reads a boolean value from the current message
* Throws a reader error if the current tag's value is not compatible with a bool.
*/
virtual void read (bool &b) = 0;
/**
* @brief Opens a LEN sequence
* After calling "open", the parser will continue reading messages, but
* "at_end" will report true on the end of the sequence, not at the end of the
* file.
* This method will throw an exception if not in a message of LEN type.
*/
virtual void open () = 0;
/**
* @brief Closes the LEN sequence
* This method will jump to the end of the sequence and continue reading
* messages from the previous block or file.
*/
virtual void close () = 0;
/**
* @brief Returns true if at the end of the file or end of a block
*/
virtual bool at_end () const = 0;
};
/**
* @brief A reader for ProtocolBuffer files and streams
*
* Packed repetitions: same a submessages, but single values are read
* with one of the "read" types.
*
* Maps are read like submessages with key/values as tags 1 and 2.
*
* Strings: if a corresponding tag is encountered, use "read(s)" to read
* the string. "is_seq()" is required to be true, i.e. wire type is LEN.
* This is the reader implementation for binary files
* as described here:
* https://protobuf.dev/programming-guides/encoding/
*/
class TL_PUBLIC ProtocolBufferReader
: public ProtocolBufferReaderBase
{
public:
/**
@ -110,16 +209,6 @@ public:
return m_type;
}
/**
* @brief Returns true, if the current message is a LEN type sequence
* Such messages can be read into strings or "open" can be used on them to
* open a submessage, map or packed repetition.
*/
bool is_seq () const
{
return m_type == PB_LEN;
}
/**
* @brief Skips the current tag
*/
@ -224,7 +313,62 @@ private:
* 4. if "is_counting()" is false, repeat steps 1 to 3 with
* "counting" set to false on "begin_seq".
*/
class TL_PUBLIC ProtocolBufferWriterBase
{
public:
/**
* @brief Constructor
*/
ProtocolBufferWriterBase ()
{
// .. nothing yet ..
}
/**
* @brief Destructor
*/
virtual ~ProtocolBufferWriterBase ()
{
// .. nothing yet ..
}
/**
* @brief Writes a scalar tag with the given value
*/
virtual void write (int tag, float v) = 0;
virtual void write (int tag, double v) = 0;
virtual void write (int tag, uint32_t v, bool fixed = false) = 0;
virtual void write (int tag, int32_t v, bool fixed = false) = 0;
virtual void write (int tag, uint64_t v, bool fixed = false) = 0;
virtual void write (int tag, int64_t v, bool fixed = false) = 0;
virtual void write (int tag, bool b) = 0;
virtual void write (int tag, const std::string &s) = 0;
/**
* @brief Returns true if the writer is in counting mode
*/
virtual bool is_counting () const = 0;
/**
* @brief Initiates a new sequence. See class documentation for details.
*/
virtual void begin_seq (int tag, bool counting) = 0;
/**
* @brief Ends a sequence. See class documentation for details.
*/
virtual void end_seq () = 0;
};
/**
* @brief A writer for ProtocolBuffer files and streams
*
* This is the writer implementation for binary files
* as described here:
* https://protobuf.dev/programming-guides/encoding/
*/
class TL_PUBLIC ProtocolBufferWriter
: public ProtocolBufferWriterBase
{
public:
/**
@ -259,20 +403,62 @@ public:
*/
void end_seq ();
private:
void write_varint (pb_varint v, bool id = false);
tl::OutputStream *mp_stream;
size_t m_bytes_counted;
std::vector<size_t> m_byte_counter_stack;
};
/**
* @brief A writer implementation that dumps the file content to tl::info
*
* This implementation does a halfway job of producing binary files,
* but only insofar it is needed for dumping the binary data.
*/
class TL_PUBLIC ProtocolBufferDumper
: public ProtocolBufferWriterBase
{
public:
/**
* @brief Enables or disables debug mode
* In debug mode, the stream will be dumped in a human readable form
* @brief Creates the writer
*/
void set_debug (bool f);
ProtocolBufferDumper ();
/**
* @brief Writes a scalar tag with the given value
*/
void write (int tag, float v);
void write (int tag, double v);
void write (int tag, uint32_t v, bool fixed = false);
void write (int tag, int32_t v, bool fixed = false);
void write (int tag, uint64_t v, bool fixed = false);
void write (int tag, int64_t v, bool fixed = false);
void write (int tag, bool b);
void write (int tag, const std::string &s);
/**
* @brief Returns true if the writer is in counting mode
*/
bool is_counting () const;
/**
* @brief Initiates a new sequence. See class documentation for details.
*/
void begin_seq (int tag, bool counting);
/**
* @brief Ends a sequence. See class documentation for details.
*/
void end_seq ();
private:
void write_varint (pb_varint v, bool id = false);
void dump (const char *cp, size_t n, const std::string &type, const std::string &value);
tl::OutputStream *mp_stream;
size_t m_bytes_counted;
std::vector<size_t> m_byte_counter_stack;
bool m_debug;
size_t m_debug_pos;
};

View File

@ -40,14 +40,14 @@ PBParser::~PBParser ()
}
void
PBParser::parse (tl::ProtocolBufferReader &reader, const PBElementBase *root, PBReaderState *reader_state)
PBParser::parse (tl::ProtocolBufferReaderBase &reader, const PBElementBase *root, PBReaderState *reader_state)
{
mp_state = reader_state;
parse_element (root, reader);
}
void
PBParser::parse_element (const PBElementBase *parent, tl::ProtocolBufferReader &reader)
PBParser::parse_element (const PBElementBase *parent, tl::ProtocolBufferReaderBase &reader)
{
while (! reader.at_end ()) {

View File

@ -296,8 +296,8 @@ public:
PBParser ();
~PBParser ();
void parse (tl::ProtocolBufferReader &reader, const PBElementBase *root, PBReaderState *reader_state);
void parse_element (const PBElementBase *parent, tl::ProtocolBufferReader &reader);
void parse (tl::ProtocolBufferReaderBase &reader, const PBElementBase *root, PBReaderState *reader_state);
void parse_element (const PBElementBase *parent, tl::ProtocolBufferReaderBase &reader);
PBReaderState &reader_state ()
{
@ -517,7 +517,7 @@ public:
virtual PBElementBase *clone () const = 0;
virtual void create (const PBElementBase *parent, PBReaderState &objs) const = 0;
virtual void parse (PBParser *, tl::ProtocolBufferReader &) const = 0;
virtual void parse (PBParser *, tl::ProtocolBufferReaderBase &) const = 0;
virtual void finish (const PBElementBase *parent, PBReaderState &objs) const = 0;
virtual void write (const PBElementBase *, tl::ProtocolBufferWriter &, PBWriterState &) const { }
@ -602,7 +602,7 @@ public:
objs.pop (tag);
}
virtual void parse (PBParser *parser, tl::ProtocolBufferReader &reader) const
virtual void parse (PBParser *parser, tl::ProtocolBufferReaderBase &reader) const
{
reader.open ();
parser->parse_element (this, reader);
@ -767,7 +767,7 @@ public:
// .. nothing yet ..
}
virtual void parse (PBParser *parser, tl::ProtocolBufferReader &reader) const
virtual void parse (PBParser *parser, tl::ProtocolBufferReaderBase &reader) const
{
PBObjTag<Value> tag;
PBObjTag<Parent> parent_tag;
@ -865,17 +865,17 @@ private:
}
// read incarnations
void read (tl::ProtocolBufferReader &reader, float &v) const
void read (tl::ProtocolBufferReaderBase &reader, float &v) const
{
reader.read (v);
}
void read (tl::ProtocolBufferReader &reader, double &v) const
void read (tl::ProtocolBufferReaderBase &reader, double &v) const
{
reader.read (v);
}
void read (tl::ProtocolBufferReader &reader, uint8_t &v) const
void read (tl::ProtocolBufferReaderBase &reader, uint8_t &v) const
{
uint32_t vv = 0;
reader.read (vv);
@ -883,7 +883,7 @@ private:
v = vv;
}
void read (tl::ProtocolBufferReader &reader, int8_t &v) const
void read (tl::ProtocolBufferReaderBase &reader, int8_t &v) const
{
int32_t vv = 0;
reader.read (vv);
@ -891,7 +891,7 @@ private:
v = vv;
}
void read (tl::ProtocolBufferReader &reader, uint16_t &v) const
void read (tl::ProtocolBufferReaderBase &reader, uint16_t &v) const
{
uint32_t vv = 0;
reader.read (vv);
@ -899,7 +899,7 @@ private:
v = vv;
}
void read (tl::ProtocolBufferReader &reader, int16_t &v) const
void read (tl::ProtocolBufferReaderBase &reader, int16_t &v) const
{
int32_t vv = 0;
reader.read (vv);
@ -907,38 +907,38 @@ private:
v = vv;
}
void read (tl::ProtocolBufferReader &reader, uint32_t &v) const
void read (tl::ProtocolBufferReaderBase &reader, uint32_t &v) const
{
reader.read (v);
}
void read (tl::ProtocolBufferReader &reader, int32_t &v) const
void read (tl::ProtocolBufferReaderBase &reader, int32_t &v) const
{
reader.read (v);
}
void read (tl::ProtocolBufferReader &reader, uint64_t &v) const
void read (tl::ProtocolBufferReaderBase &reader, uint64_t &v) const
{
reader.read (v);
}
void read (tl::ProtocolBufferReader &reader, int64_t &v) const
void read (tl::ProtocolBufferReaderBase &reader, int64_t &v) const
{
reader.read (v);
}
void read (tl::ProtocolBufferReader &reader, bool &v) const
void read (tl::ProtocolBufferReaderBase &reader, bool &v) const
{
reader.read (v);
}
void read (tl::ProtocolBufferReader &reader, std::string &v) const
void read (tl::ProtocolBufferReaderBase &reader, std::string &v) const
{
reader.read (v);
}
template <class T>
void read (tl::ProtocolBufferReader &reader, const T &v) const
void read (tl::ProtocolBufferReaderBase &reader, const T &v) const
{
std::string vv;
reader.read (vv);
@ -994,7 +994,7 @@ public:
}
}
void parse (tl::ProtocolBufferReader &reader, Obj &root) const
void parse (tl::ProtocolBufferReaderBase &reader, Obj &root) const
{
PBObjTag<Obj> tag;
PBReaderState rs;
@ -1011,7 +1011,7 @@ private:
// disable base class implementation
}
virtual void parse (PBParser *, tl::ProtocolBufferReader &) const
virtual void parse (PBParser *, tl::ProtocolBufferReaderBase &) const
{
// disable base class implementation
}