diff --git a/lexor.lex b/lexor.lex index ef291f9d6..3c6a6aff1 100644 --- a/lexor.lex +++ b/lexor.lex @@ -431,10 +431,17 @@ TU [munpf] } -\\[^ \t\b\f\r\n]+ { +a\\[^ \t\b\f\r\n]+[ \t\b\f\r\n] { assert(yylloc.lexical_pos != UINT_MAX); yylloc.lexical_pos += 1; - yylval.text = strdupnew(yytext+1); + // Extract identifier name from escaped identifier according to IEEE Std 1800-2023 + // Format: \ {any_printable_ASCII_character_except_white_space} white_space + // The identifier name excludes both the leading \ and trailing whitespace + size_t len = strlen(yytext); + char* escaped_name = new char[len - 1]; // len-2 chars + null terminator + strncpy(escaped_name, yytext + 1, len - 2); // Skip leading \ and trailing whitespace + escaped_name[len - 2] = '\0'; + yylval.text = escaped_name; if (gn_system_verilog()) { if (PPackage*pkg = pform_test_package_identifier(yylval.text)) { delete[]yylval.text; diff --git a/vvp/lexor.lex b/vvp/lexor.lex index 83f07d12b..11546a73f 100644 --- a/vvp/lexor.lex +++ b/vvp/lexor.lex @@ -66,7 +66,37 @@ static char* strdupnew(char const *str) contents of the string without the enclosing quotes. */ \"([^\"\\]|\\.)*\" { yytext[strlen(yytext)-1] = 0; - yylval.text = strdupnew(yytext+1); + char* raw_str = yytext+1; + + // Unescape backslashes in the string + char* unescaped = new char[strlen(raw_str) + 1]; + char* dst = unescaped; + char* src = raw_str; + + while (*src) { + if (*src == '\\' && *(src+1)) { + // Handle escape sequences + src++; // skip the backslash + switch (*src) { + case '\\': *dst++ = '\\'; break; // \\ -> \ + case '"': *dst++ = '"'; break; // \" -> " + case 'n': *dst++ = '\n'; break; // \n -> newline + case 't': *dst++ = '\t'; break; // \t -> tab + case 'r': *dst++ = '\r'; break; // \r -> carriage return + default: + // For any other escaped character, include both backslash and character + *dst++ = '\\'; + *dst++ = *src; + break; + } + src++; + } else { + *dst++ = *src++; + } + } + *dst = '\0'; + + yylval.text = unescaped; assert(yylval.text); return T_STRING; }