update identifier extraction to conform to IEEE Verilog/SV Standard for proper handling of leading backslashes and trailing whitespace

This commit is contained in:
Chaitanya Sharma 2025-09-07 16:19:12 -04:00
parent d67d3323ad
commit 430c6cdf3c
No known key found for this signature in database
GPG Key ID: 447BDA75F3F61022
2 changed files with 40 additions and 3 deletions

View File

@ -431,10 +431,17 @@ TU [munpf]
}
\\[^ \t\b\f\r\n]+ {
a\\[^ \t\b\f\r\n]+[ \t\b\f\r\n] {
assert(yylloc.lexical_pos != UINT_MAX);
yylloc.lexical_pos += 1;
yylval.text = strdupnew(yytext+1);
// Extract identifier name from escaped identifier according to IEEE Std 1800-2023
// Format: \ {any_printable_ASCII_character_except_white_space} white_space
// The identifier name excludes both the leading \ and trailing whitespace
size_t len = strlen(yytext);
char* escaped_name = new char[len - 1]; // len-2 chars + null terminator
strncpy(escaped_name, yytext + 1, len - 2); // Skip leading \ and trailing whitespace
escaped_name[len - 2] = '\0';
yylval.text = escaped_name;
if (gn_system_verilog()) {
if (PPackage*pkg = pform_test_package_identifier(yylval.text)) {
delete[]yylval.text;

View File

@ -66,7 +66,37 @@ static char* strdupnew(char const *str)
contents of the string without the enclosing quotes. */
\"([^\"\\]|\\.)*\" {
yytext[strlen(yytext)-1] = 0;
yylval.text = strdupnew(yytext+1);
char* raw_str = yytext+1;
// Unescape backslashes in the string
char* unescaped = new char[strlen(raw_str) + 1];
char* dst = unescaped;
char* src = raw_str;
while (*src) {
if (*src == '\\' && *(src+1)) {
// Handle escape sequences
src++; // skip the backslash
switch (*src) {
case '\\': *dst++ = '\\'; break; // \\ -> \
case '"': *dst++ = '"'; break; // \" -> "
case 'n': *dst++ = '\n'; break; // \n -> newline
case 't': *dst++ = '\t'; break; // \t -> tab
case 'r': *dst++ = '\r'; break; // \r -> carriage return
default:
// For any other escaped character, include both backslash and character
*dst++ = '\\';
*dst++ = *src;
break;
}
src++;
} else {
*dst++ = *src++;
}
}
*dst = '\0';
yylval.text = unescaped;
assert(yylval.text);
return T_STRING; }