Fix string handling of escaped special characters.

String literals may have escaped special characters in them. Make sure
we are processing all the special characters that the standard supports,
and also fix the handling of the assignment to strings. Note that the
vvp input has string literals sanitized so that the parser can handle
the various binary values. desanitize the strings when pushing string
literals into the string stack. This fixes string assignments, and other
string operations.
This commit is contained in:
Stephen Williams 2022-03-05 17:36:23 -08:00
parent 98a87b49c8
commit 679a6567e9
2 changed files with 85 additions and 2 deletions

View File

@ -81,6 +81,18 @@ static string process_verilog_string_quotes(const string&str)
res = res + '\t';
idx += 1;
break;
case 'v':
res = res + '\v';
idx += 1;
break;
case 'f':
res = res + '\f';
idx += 1;
break;
case 'a':
res = res + '\a';
idx += 1;
break;
case '0':
case '1':
case '2':
@ -101,6 +113,27 @@ static string process_verilog_string_quotes(const string&str)
res = res + byte_val;
break;
}
case 'x': {
char byte_val = 0;
int odx = 1;
while (odx < 3 && idx+odx < str_len) {
if (str[idx+odx] >= '0' && str[idx+odx] <= '9') {
byte_val = 16*byte_val + str[idx+odx]-'0';
odx += 1;
} else if (str[idx+odx] >= 'a' && str[idx+odx] <= 'f') {
byte_val = 16*byte_val + str[idx+odx]-'a'+10;
odx += 1;
} else if (str[idx+odx] >= 'A' && str[idx+odx] <= 'F') {
byte_val = 16*byte_val + str[idx+odx]-'A'+10;
odx += 1;
} else {
break;
}
}
idx += odx;
res = res + byte_val;
break;
}
default:
res = res + str[idx];
idx += 1;

View File

@ -364,6 +364,56 @@ void vthread_s::debug_dump(ostream&fd, const char*label)
fd << "**** Done ****" << endl;
}
/*
* This function converts the text format of the string by interpreting
* any octal characters (\nnn) to their single byte value. We do this here
* because the text value in the vvp_code_t is stored as a C string. This
* converts it to a C++ string that can hold binary values. We only have
* to handle the octal escapes because the main compiler takes care of all
* the other string special characters and normalizes the strings to use
* only this format.
*/
static string filter_string(const char*text)
{
vector<char> tmp (strlen(text)+1);
size_t dst = 0;
for (const char*ptr = text ; *ptr ; ptr += 1) {
// Not an escape? Move on.
if (*ptr != '\\') {
tmp[dst++] = *ptr;
continue;
}
// Now we know that *ptr is pointing to a \ character and we
// have an octal sequence coming up. Advance the ptr and start
// processing octal digits.
ptr += 1;
if (*ptr == 0)
break;
char byte = 0;
int cnt = 3;
while (*ptr && cnt > 0 && *ptr >= '0' && *ptr <= '7') {
byte *= 8;
byte += *ptr - '0';
cnt -= 1;
ptr += 1;
}
tmp[dst++] = byte;
// After the while loop above, the ptr points to the next character,
// but the for-loop condition is assuming that ptr points to the last
// character, since it has the ptr+=1.
ptr -= 1;
}
// Put a nul byte at the end of the built up string, but really we are
// using the known length in the string constructor.
tmp[dst] = 0;
string res (&tmp[0], dst);
return res;
}
static void do_join(vthread_t thr, vthread_t child);
__vpiScope* vthread_scope(struct vthread_s*thr)
@ -2264,7 +2314,7 @@ bool of_CONCAT_STR(vthread_t thr, vvp_code_t)
bool of_CONCATI_STR(vthread_t thr, vvp_code_t cp)
{
const char*text = cp->text;
thr->peek_str(0).append(text);
thr->peek_str(0).append(filter_string(text));
return true;
}
@ -5012,7 +5062,7 @@ bool of_PUSHI_REAL(vthread_t thr, vvp_code_t cp)
bool of_PUSHI_STR(vthread_t thr, vvp_code_t cp)
{
const char*text = cp->text;
thr->push_str(string(text));
thr->push_str(filter_string(text));
return true;
}