From 679a6567e93cc953a0c0bdfc7ed68412e4498263 Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Sat, 5 Mar 2022 17:36:23 -0800 Subject: [PATCH] Fix string handling of escaped special characters. String literals may have escaped special characters in them. Make sure we are processing all the special characters that the standard supports, and also fix the handling of the assignment to strings. Note that the vvp input has string literals sanitized so that the parser can handle the various binary values. desanitize the strings when pushing string literals into the string stack. This fixes string assignments, and other string operations. --- verinum.cc | 33 ++++++++++++++++++++++++++++++ vvp/vthread.cc | 54 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/verinum.cc b/verinum.cc index 2ffd58849..c150a2161 100644 --- a/verinum.cc +++ b/verinum.cc @@ -81,6 +81,18 @@ static string process_verilog_string_quotes(const string&str) res = res + '\t'; idx += 1; break; + case 'v': + res = res + '\v'; + idx += 1; + break; + case 'f': + res = res + '\f'; + idx += 1; + break; + case 'a': + res = res + '\a'; + idx += 1; + break; case '0': case '1': case '2': @@ -101,6 +113,27 @@ static string process_verilog_string_quotes(const string&str) res = res + byte_val; break; } + case 'x': { + char byte_val = 0; + int odx = 1; + while (odx < 3 && idx+odx < str_len) { + if (str[idx+odx] >= '0' && str[idx+odx] <= '9') { + byte_val = 16*byte_val + str[idx+odx]-'0'; + odx += 1; + } else if (str[idx+odx] >= 'a' && str[idx+odx] <= 'f') { + byte_val = 16*byte_val + str[idx+odx]-'a'+10; + odx += 1; + } else if (str[idx+odx] >= 'A' && str[idx+odx] <= 'F') { + byte_val = 16*byte_val + str[idx+odx]-'A'+10; + odx += 1; + } else { + break; + } + } + idx += odx; + res = res + byte_val; + break; + } default: res = res + str[idx]; idx += 1; diff --git a/vvp/vthread.cc b/vvp/vthread.cc index cb583a157..ec4c976c9 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -364,6 +364,56 @@ void vthread_s::debug_dump(ostream&fd, const char*label) fd << "**** Done ****" << endl; } +/* + * This function converts the text format of the string by interpreting + * any octal characters (\nnn) to their single byte value. We do this here + * because the text value in the vvp_code_t is stored as a C string. This + * converts it to a C++ string that can hold binary values. We only have + * to handle the octal escapes because the main compiler takes care of all + * the other string special characters and normalizes the strings to use + * only this format. + */ +static string filter_string(const char*text) +{ + vector tmp (strlen(text)+1); + size_t dst = 0; + for (const char*ptr = text ; *ptr ; ptr += 1) { + // Not an escape? Move on. + if (*ptr != '\\') { + tmp[dst++] = *ptr; + continue; + } + + // Now we know that *ptr is pointing to a \ character and we + // have an octal sequence coming up. Advance the ptr and start + // processing octal digits. + ptr += 1; + if (*ptr == 0) + break; + + char byte = 0; + int cnt = 3; + while (*ptr && cnt > 0 && *ptr >= '0' && *ptr <= '7') { + byte *= 8; + byte += *ptr - '0'; + cnt -= 1; + ptr += 1; + } + tmp[dst++] = byte; + + // After the while loop above, the ptr points to the next character, + // but the for-loop condition is assuming that ptr points to the last + // character, since it has the ptr+=1. + ptr -= 1; + } + + // Put a nul byte at the end of the built up string, but really we are + // using the known length in the string constructor. + tmp[dst] = 0; + string res (&tmp[0], dst); + return res; +} + static void do_join(vthread_t thr, vthread_t child); __vpiScope* vthread_scope(struct vthread_s*thr) @@ -2264,7 +2314,7 @@ bool of_CONCAT_STR(vthread_t thr, vvp_code_t) bool of_CONCATI_STR(vthread_t thr, vvp_code_t cp) { const char*text = cp->text; - thr->peek_str(0).append(text); + thr->peek_str(0).append(filter_string(text)); return true; } @@ -5012,7 +5062,7 @@ bool of_PUSHI_REAL(vthread_t thr, vvp_code_t cp) bool of_PUSHI_STR(vthread_t thr, vvp_code_t cp) { const char*text = cp->text; - thr->push_str(string(text)); + thr->push_str(filter_string(text)); return true; }