update identifier extraction to conform to IEEE Verilog/SV Standard for proper handling of leading backslashes and trailing whitespace
This commit is contained in:
parent
d67d3323ad
commit
430c6cdf3c
11
lexor.lex
11
lexor.lex
|
|
@ -431,10 +431,17 @@ TU [munpf]
|
|||
}
|
||||
|
||||
|
||||
\\[^ \t\b\f\r\n]+ {
|
||||
a\\[^ \t\b\f\r\n]+[ \t\b\f\r\n] {
|
||||
assert(yylloc.lexical_pos != UINT_MAX);
|
||||
yylloc.lexical_pos += 1;
|
||||
yylval.text = strdupnew(yytext+1);
|
||||
// Extract identifier name from escaped identifier according to IEEE Std 1800-2023
|
||||
// Format: \ {any_printable_ASCII_character_except_white_space} white_space
|
||||
// The identifier name excludes both the leading \ and trailing whitespace
|
||||
size_t len = strlen(yytext);
|
||||
char* escaped_name = new char[len - 1]; // len-2 chars + null terminator
|
||||
strncpy(escaped_name, yytext + 1, len - 2); // Skip leading \ and trailing whitespace
|
||||
escaped_name[len - 2] = '\0';
|
||||
yylval.text = escaped_name;
|
||||
if (gn_system_verilog()) {
|
||||
if (PPackage*pkg = pform_test_package_identifier(yylval.text)) {
|
||||
delete[]yylval.text;
|
||||
|
|
|
|||
|
|
@ -66,7 +66,37 @@ static char* strdupnew(char const *str)
|
|||
contents of the string without the enclosing quotes. */
|
||||
\"([^\"\\]|\\.)*\" {
|
||||
yytext[strlen(yytext)-1] = 0;
|
||||
yylval.text = strdupnew(yytext+1);
|
||||
char* raw_str = yytext+1;
|
||||
|
||||
// Unescape backslashes in the string
|
||||
char* unescaped = new char[strlen(raw_str) + 1];
|
||||
char* dst = unescaped;
|
||||
char* src = raw_str;
|
||||
|
||||
while (*src) {
|
||||
if (*src == '\\' && *(src+1)) {
|
||||
// Handle escape sequences
|
||||
src++; // skip the backslash
|
||||
switch (*src) {
|
||||
case '\\': *dst++ = '\\'; break; // \\ -> \
|
||||
case '"': *dst++ = '"'; break; // \" -> "
|
||||
case 'n': *dst++ = '\n'; break; // \n -> newline
|
||||
case 't': *dst++ = '\t'; break; // \t -> tab
|
||||
case 'r': *dst++ = '\r'; break; // \r -> carriage return
|
||||
default:
|
||||
// For any other escaped character, include both backslash and character
|
||||
*dst++ = '\\';
|
||||
*dst++ = *src;
|
||||
break;
|
||||
}
|
||||
src++;
|
||||
} else {
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
*dst = '\0';
|
||||
|
||||
yylval.text = unescaped;
|
||||
assert(yylval.text);
|
||||
return T_STRING; }
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue