From 679a6567e93cc953a0c0bdfc7ed68412e4498263 Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Sat, 5 Mar 2022 17:36:23 -0800 Subject: [PATCH 1/2] Fix string handling of escaped special characters. String literals may have escaped special characters in them. Make sure we are processing all the special characters that the standard supports, and also fix the handling of the assignment to strings. Note that the vvp input has string literals sanitized so that the parser can handle the various binary values. desanitize the strings when pushing string literals into the string stack. This fixes string assignments, and other string operations. --- verinum.cc | 33 ++++++++++++++++++++++++++++++ vvp/vthread.cc | 54 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/verinum.cc b/verinum.cc index 2ffd58849..c150a2161 100644 --- a/verinum.cc +++ b/verinum.cc @@ -81,6 +81,18 @@ static string process_verilog_string_quotes(const string&str) res = res + '\t'; idx += 1; break; + case 'v': + res = res + '\v'; + idx += 1; + break; + case 'f': + res = res + '\f'; + idx += 1; + break; + case 'a': + res = res + '\a'; + idx += 1; + break; case '0': case '1': case '2': @@ -101,6 +113,27 @@ static string process_verilog_string_quotes(const string&str) res = res + byte_val; break; } + case 'x': { + char byte_val = 0; + int odx = 1; + while (odx < 3 && idx+odx < str_len) { + if (str[idx+odx] >= '0' && str[idx+odx] <= '9') { + byte_val = 16*byte_val + str[idx+odx]-'0'; + odx += 1; + } else if (str[idx+odx] >= 'a' && str[idx+odx] <= 'f') { + byte_val = 16*byte_val + str[idx+odx]-'a'+10; + odx += 1; + } else if (str[idx+odx] >= 'A' && str[idx+odx] <= 'F') { + byte_val = 16*byte_val + str[idx+odx]-'A'+10; + odx += 1; + } else { + break; + } + } + idx += odx; + res = res + byte_val; + break; + } default: res = res + str[idx]; idx += 1; diff --git a/vvp/vthread.cc b/vvp/vthread.cc index cb583a157..ec4c976c9 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -364,6 +364,56 @@ void vthread_s::debug_dump(ostream&fd, const char*label) fd << "**** Done ****" << endl; } +/* + * This function converts the text format of the string by interpreting + * any octal characters (\nnn) to their single byte value. We do this here + * because the text value in the vvp_code_t is stored as a C string. This + * converts it to a C++ string that can hold binary values. We only have + * to handle the octal escapes because the main compiler takes care of all + * the other string special characters and normalizes the strings to use + * only this format. + */ +static string filter_string(const char*text) +{ + vector tmp (strlen(text)+1); + size_t dst = 0; + for (const char*ptr = text ; *ptr ; ptr += 1) { + // Not an escape? Move on. + if (*ptr != '\\') { + tmp[dst++] = *ptr; + continue; + } + + // Now we know that *ptr is pointing to a \ character and we + // have an octal sequence coming up. Advance the ptr and start + // processing octal digits. + ptr += 1; + if (*ptr == 0) + break; + + char byte = 0; + int cnt = 3; + while (*ptr && cnt > 0 && *ptr >= '0' && *ptr <= '7') { + byte *= 8; + byte += *ptr - '0'; + cnt -= 1; + ptr += 1; + } + tmp[dst++] = byte; + + // After the while loop above, the ptr points to the next character, + // but the for-loop condition is assuming that ptr points to the last + // character, since it has the ptr+=1. + ptr -= 1; + } + + // Put a nul byte at the end of the built up string, but really we are + // using the known length in the string constructor. + tmp[dst] = 0; + string res (&tmp[0], dst); + return res; +} + static void do_join(vthread_t thr, vthread_t child); __vpiScope* vthread_scope(struct vthread_s*thr) @@ -2264,7 +2314,7 @@ bool of_CONCAT_STR(vthread_t thr, vvp_code_t) bool of_CONCATI_STR(vthread_t thr, vvp_code_t cp) { const char*text = cp->text; - thr->peek_str(0).append(text); + thr->peek_str(0).append(filter_string(text)); return true; } @@ -5012,7 +5062,7 @@ bool of_PUSHI_REAL(vthread_t thr, vvp_code_t cp) bool of_PUSHI_STR(vthread_t thr, vvp_code_t cp) { const char*text = cp->text; - thr->push_str(string(text)); + thr->push_str(filter_string(text)); return true; } From d2ac403415cdfb5de8fba805893cbc2b2a3dc96a Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Sat, 5 Mar 2022 17:37:01 -0800 Subject: [PATCH 2/2] Add sv_string7 and sv_stting7b tests. These test string assignments of various escaped special characters. --- ivtest/ivltests/sv_string7.v | 45 ++++++++++++++++++++++++++++++++++ ivtest/ivltests/sv_string7b.v | 46 +++++++++++++++++++++++++++++++++++ ivtest/regress-sv.list | 2 ++ 3 files changed, 93 insertions(+) create mode 100644 ivtest/ivltests/sv_string7.v create mode 100644 ivtest/ivltests/sv_string7b.v diff --git a/ivtest/ivltests/sv_string7.v b/ivtest/ivltests/sv_string7.v new file mode 100644 index 000000000..eec33c63c --- /dev/null +++ b/ivtest/ivltests/sv_string7.v @@ -0,0 +1,45 @@ + +module main; + + string foo; + int error_count; + + task check_char(input int idx, input [7:0] val); + if (foo[idx] !== val) begin + $display("FAILED: foo[%0d]==%02h, expecting %02h", + idx, foo[idx], val); + error_count = error_count+1; + end + endtask // check_char + + initial begin + // These are the special charasters in strings as defined by + // IEEE Std 1800-2017: 5.9.1 Special characters in strings. + // The string assignment is governed by: + // IEEE Std 1800-2017: 6.16 String data type + foo = "abc\n\t\\\"\v\f\a\001\002\x03\x04"; + error_count = 0; + + check_char(0, 8'h61); // 'a' + check_char(1, 8'h62); // 'b' + check_char(2, 8'h63); // 'c' + check_char(3, 8'h0a); // '\n' + check_char(4, 8'h09); // '\t' + check_char(5, 8'h5c); // '\\' + check_char(6, 8'h22); // '\"' + check_char(7, 8'h0b); // '\v' + check_char(8, 8'h0c); // '\f' + check_char(9, 8'h07); // '\a' + check_char(10, 8'h01); // '\001' + check_char(11, 8'h02); // '\002' + check_char(12, 8'h03); // '\x03' + check_char(13, 8'h04); // '\x04' + + if (foo.len() !== 14) begin + $display("FAILED: foo.len() == %0d, should be 14", foo.len()); + error_count = error_count+1; + end + + if (error_count == 0) $display("PASSED"); + end +endmodule // main diff --git a/ivtest/ivltests/sv_string7b.v b/ivtest/ivltests/sv_string7b.v new file mode 100644 index 000000000..abb95128e --- /dev/null +++ b/ivtest/ivltests/sv_string7b.v @@ -0,0 +1,46 @@ + +module main; + + string foo; + int error_count; + + task check_char(input int idx, input [7:0] val); + if (foo[idx] !== val) begin + $display("FAILED: foo[%0d]==%02h, expecting %02h", + idx, foo[idx], val); + error_count = error_count+1; + end + endtask // check_char + + initial begin + // These are the special charasters in strings as defined by + // IEEE Std 1800-2017: 5.9.1 Special characters in strings. + // The string assignment is governed by: + // IEEE Std 1800-2017: 6.16 String data type + foo = "abc"; + foo = {foo, "\n\t\\\"\v\f\a\001\002\x03\x04"}; + error_count = 0; + + check_char(0, 8'h61); // 'a' + check_char(1, 8'h62); // 'b' + check_char(2, 8'h63); // 'c' + check_char(3, 8'h0a); // '\n' + check_char(4, 8'h09); // '\t' + check_char(5, 8'h5c); // '\\' + check_char(6, 8'h22); // '\"' + check_char(7, 8'h0b); // '\v' + check_char(8, 8'h0c); // '\f' + check_char(9, 8'h07); // '\a' + check_char(10, 8'h01); // '\001' + check_char(11, 8'h02); // '\002' + check_char(12, 8'h03); // '\x03' + check_char(13, 8'h04); // '\x04' + + if (foo.len() !== 14) begin + $display("FAILED: foo.len() == %0d, should be 14", foo.len()); + error_count = error_count+1; + end + + if (error_count == 0) $display("PASSED"); + end +endmodule // main diff --git a/ivtest/regress-sv.list b/ivtest/regress-sv.list index d6f190e08..601647f8c 100644 --- a/ivtest/regress-sv.list +++ b/ivtest/regress-sv.list @@ -530,6 +530,8 @@ sv_string3 normal,-g2009 ivltests sv_string4 normal,-g2009 ivltests sv_string5 normal,-g2009 ivltests sv_string6 normal,-g2009 ivltests +sv_string7 normal,-g2009 ivltests +sv_string7b normal,-g2009 ivltests sv_timeunit_prec1 normal,-g2005-sv ivltests sv_timeunit_prec2 normal,-g2009 ivltests sv_timeunit_prec3a normal,-g2005-sv ivltests gold=sv_timeunit_prec3a.gold