Implemented a fix for #1794 (giant meta info produces invalid GDS)

This fix solves two problems:
* Too large meta info data
* Too many meta info entries

The first problem is fixed by splitting the strings that serialize
the meta info.

The second problem is fixed by introducing prefixed strings that
indicate the attribute index within the string, not inside
the PROPATTR record.

The solution is backward compatible, although old versions will
not read all meta info and skip entries that exceed the GDS
capacity.

Caveat: the produced GDS files may contain duplicate PROPATTR
keys. This is not strictly illegal, but some third-party processors
may drop such entries.
This commit is contained in:
Matthias Koefferlein 2024-07-20 22:35:54 +02:00
parent 4383802d80
commit 0dd6fca51e
4 changed files with 210 additions and 21 deletions

View File

@ -461,6 +461,7 @@ GDS2ReaderBase::read_context_info_cell ()
if (valid_hook) {
std::vector <std::string> &strings = m_context_info.insert (std::make_pair (cn, std::vector <std::string> ())).first->second;
std::map <size_t, std::vector<std::string> > strings_ex;
size_t attr = 0;
@ -474,16 +475,60 @@ GDS2ReaderBase::read_context_info_cell ()
attr = size_t (get_ushort ());
} else if (rec_id == sPROPVALUE) {
if (strings.size () <= attr) {
strings.resize (attr + 1, std::string ());
const char *str = get_string ();
// To embed long strings and more than 64k attributes, a separate notation is used:
// "#<n>,<p>:<string>"
// where <n> is a string index and <p> is the part index (zero-based).
// For such properties, the PROPATTR value is ignored. This means however, that the
// attribute numbers may not be unique.
// See issue #1794.
if (str[0] == '#') {
tl::Extractor ex (str + 1);
size_t n = 0, p = 0;
if (ex.try_read (n) && ex.test (",") && ex.try_read (p) && ex.test (":")) {
if (strings.size () <= n) {
strings.resize (n + 1, std::string ());
}
std::vector<std::string> &sv = strings_ex[n];
if (sv.size () <= p) {
sv.resize (p + 1, std::string ());
}
sv[p] = ex.get ();
}
} else {
if (strings.size () <= attr) {
strings.resize (attr + 1, std::string ());
}
strings [attr] = str;
}
strings [attr] = get_string ();
} else {
error (tl::to_string (tr ("ENDEL, PROPATTR or PROPVALUE record expected")));
}
}
}
// combine the multipart strings (#1794)
for (auto es = strings_ex.begin (); es != strings_ex.end (); ++es) {
if (es->first < strings.size ()) {
std::string &s = strings [es->first];
s.clear ();
size_t sz = 0;
for (auto i = es->second.begin (); i != es->second.end (); ++i) {
sz += i->size ();
}
s.reserve (sz);
for (auto i = es->second.begin (); i != es->second.end (); ++i) {
s += *i;
}
}
}
} else {
error (tl::to_string (tr ("Invalid record inside a context info cell")));

View File

@ -72,6 +72,54 @@ inline int scale (double sf, int value)
}
}
void
GDS2WriterBase::write_context_string (size_t n, const std::string &s)
{
// max. size for GDS strings used as payload carrier
size_t chunk_size = 32000;
short max_short = std::numeric_limits<short>::max ();
if (n > size_t (max_short) || s.size () > chunk_size) {
// Split strings and use a separate notation: "#<n>,<+p>:..." for the partial
// strings. n is the string index and p the part index (zero based).
// The property number is not defined in that case. There may be properties with
// the same number. See issue #1794.
size_t nchunks = (s.size () + (chunk_size - 1)) / chunk_size;
while (nchunks > 0) {
--nchunks;
std::string partial;
partial.reserve (chunk_size + 100); // approx.
partial += "#";
partial += tl::to_string (n);
partial += ",";
partial += tl::to_string (nchunks);
partial += ":";
size_t pos = nchunks * chunk_size;
partial += std::string (s, pos, std::min (s.size (), pos + chunk_size) - pos);
write_record_size (6);
write_record (sPROPATTR);
write_short (n <= size_t (max_short) ? short (n) : max_short);
write_string_record (sPROPVALUE, partial);
}
} else {
write_record_size (6);
write_record (sPROPATTR);
write_short (short (n));
write_string_record (sPROPVALUE, s);
}
}
void
GDS2WriterBase::write_context_cell (db::Layout &layout, const short *time_data, const std::vector<db::cell_index_type> &cells)
{
@ -112,15 +160,9 @@ GDS2WriterBase::write_context_cell (db::Layout &layout, const short *time_data,
// Hint: write in the reverse order since this way, the reader is more efficient (it knows how many strings
// will arrive)
for (std::vector <std::string>::const_iterator s = context_prop_strings.end (); s != context_prop_strings.begin (); ) {
--s;
write_record_size (6);
write_record (sPROPATTR);
write_short (short (std::distance (std::vector <std::string>::const_iterator (context_prop_strings.begin ()), s))); // = user string
write_string_record (sPROPVALUE, *s);
size_t n = std::distance (std::vector <std::string>::const_iterator (context_prop_strings.begin ()), s);
write_context_string (n, *s);
}
}
@ -151,15 +193,9 @@ GDS2WriterBase::write_context_cell (db::Layout &layout, const short *time_data,
// Hint: write in the reverse order since this way, the reader is more efficient (it knows how many strings
// will arrive)
for (std::vector <std::string>::const_iterator s = context_prop_strings.end (); s != context_prop_strings.begin (); ) {
--s;
write_record_size (6);
write_record (sPROPATTR);
write_short (short (std::distance (std::vector <std::string>::const_iterator (context_prop_strings.begin ()), s))); // = user string
write_string_record (sPROPVALUE, *s);
size_t n = std::distance (std::vector <std::string>::const_iterator (context_prop_strings.begin ()), s);
write_context_string (n, *s);
}
}

View File

@ -169,6 +169,7 @@ private:
void write_properties (const db::Layout &layout, db::properties_id_type prop_id);
void write_context_cell (db::Layout &layout, const short *time_data, const std::vector<cell_index_type> &cells);
void write_context_string (size_t n, const std::string &s);
};
} // namespace db

View File

@ -1196,7 +1196,7 @@ TEST(121)
}
// Meta info
TEST(130a)
TEST(130)
{
db::Layout layout_org;
@ -1312,6 +1312,113 @@ TEST(130a)
EXPECT_EQ (layout_read.meta_info ("b").value.to_string (), "nil");
}
// Giant meta info (issue #1794)
TEST(131)
{
db::Layout layout_org;
layout_org.add_cell ("U");
db::cell_index_type ci = layout_org.add_cell ("X");
std::vector<tl::Variant> ll1;
std::vector<tl::Variant> ll2;
for (unsigned int i = 0; i < 100000; ++i) {
ll1.push_back (tl::Variant (i));
ll2.push_back ("C" + tl::to_string (i));
}
layout_org.add_meta_info ("a", db::MetaInfo ("", ll1, true));
layout_org.add_meta_info ("b", db::MetaInfo ("", "value", true));
layout_org.add_meta_info (ci, "a", db::MetaInfo ("", ll2, true));
layout_org.add_meta_info (ci, "c", db::MetaInfo ("", -1, true));
std::string tmp_file = tl::TestBase::tmp_file ("tmp_GDS2Writer_131.gds");
{
tl::OutputStream out (tmp_file);
db::SaveLayoutOptions options;
db::Writer writer (options);
writer.write (layout_org, out);
}
db::Layout layout_read;
{
tl::InputStream in (tmp_file);
db::Reader reader (in);
reader.read (layout_read);
}
EXPECT_EQ (layout_read.has_meta_info ("x"), false);
EXPECT_EQ (layout_read.has_meta_info ("a"), true);
EXPECT_EQ (layout_read.meta_info ("x").value.to_string (), "nil");
EXPECT_EQ (layout_read.meta_info ("a").value == ll1, true);
EXPECT_EQ (layout_read.has_meta_info ("b"), true);
EXPECT_EQ (layout_read.meta_info ("b").value.to_string (), "value");
db::cell_index_type ci2 = layout_read.cell_by_name ("X").second;
EXPECT_EQ (layout_read.meta_info (ci2, "x").value.to_string (), "nil");
EXPECT_EQ (layout_read.meta_info (ci2, "a").value == ll2, true);
EXPECT_EQ (layout_read.meta_info (ci2, "c").value.to_string (), "-1");
}
// Many meta info (issue #1794)
TEST(132)
{
db::Layout layout_org;
layout_org.add_cell ("U");
db::cell_index_type ci = layout_org.add_cell ("X");
for (unsigned int i = 0; i < 100000; ++i) {
layout_org.add_meta_info ("a" + tl::to_string (i), db::MetaInfo ("", i, true));
}
layout_org.add_meta_info ("b", db::MetaInfo ("", "value", true));
for (unsigned int i = 0; i < 100000; ++i) {
layout_org.add_meta_info (ci, "a" + tl::to_string (i * 2), db::MetaInfo ("", i * 2, true));
}
layout_org.add_meta_info (ci, "c", db::MetaInfo ("", -1, true));
std::string tmp_file = tl::TestBase::tmp_file ("tmp_GDS2Writer_132.gds");
{
tl::OutputStream out (tmp_file);
db::SaveLayoutOptions options;
db::Writer writer (options);
writer.write (layout_org, out);
}
db::Layout layout_read;
{
tl::InputStream in (tmp_file);
db::Reader reader (in);
reader.read (layout_read);
}
EXPECT_EQ (layout_read.has_meta_info ("x"), false);
EXPECT_EQ (layout_read.meta_info ("x").value.to_string (), "nil");
for (unsigned int i = 0; i < 10; ++i) {
EXPECT_EQ (layout_read.has_meta_info ("a" + tl::to_string (i)), true);
EXPECT_EQ (layout_read.meta_info ("a" + tl::to_string (i)).value.to_string (), tl::Variant (i).to_string ());
}
EXPECT_EQ (layout_read.has_meta_info ("b"), true);
EXPECT_EQ (layout_read.meta_info ("b").value.to_string (), "value");
db::cell_index_type ci2 = layout_read.cell_by_name ("X").second;
EXPECT_EQ (layout_read.meta_info (ci2, "x").value.to_string (), "nil");
for (unsigned int i = 0; i < 10; ++i) {
EXPECT_EQ (layout_read.has_meta_info (ci2, "a" + tl::to_string (i * 2)), true);
EXPECT_EQ (layout_read.meta_info (ci2, "a" + tl::to_string (i * 2)).value.to_string (), tl::Variant (i * 2).to_string ());
}
EXPECT_EQ (layout_read.meta_info (ci2, "c").value.to_string (), "-1");
}
// Extreme fracturing by max. points
TEST(166)
{