From abc738b6f1eb106b29ec87a89948d1d045c9f138 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Thu, 17 Sep 2009 22:23:18 -0400 Subject: [PATCH] Fix deep defines causing flex scanner overflows. --- Changes | 2 ++ src/V3PreLex.h | 16 ++++++++++++---- src/V3PreLex.l | 37 +++++++++++++++++++++++++---------- src/V3PreProc.cpp | 49 ++++++++++++++++++++++++++++++++++------------- 4 files changed, 77 insertions(+), 27 deletions(-) diff --git a/Changes b/Changes index acf1f8684..67dd093e6 100644 --- a/Changes +++ b/Changes @@ -13,6 +13,8 @@ indicates the contributor was also the author of the fix; Thanks! **** Don't require SYSTEMPERL_INCLUDE if SYSTEMPERL/src exists. [Gary Thomas] +**** Fix deep defines causing flex scanner overflows. [Brad Dobbie] + **** Fix preprocessing commas in deep parameterized macros. [Brad Dobbie] **** Fix tracing escaped dotted identifiers, bug107. diff --git a/src/V3PreLex.h b/src/V3PreLex.h index 3faae2a23..2170034e8 100644 --- a/src/V3PreLex.h +++ b/src/V3PreLex.h @@ -26,6 +26,8 @@ #ifndef _VPREPROCLEX_H_ // Guard #define _VPREPROCLEX_H_ 1 +#include + #include "V3Error.h" // Token codes @@ -105,7 +107,7 @@ class V3PreLex { // Parse state FILE* m_fp; // File state is for - YY_BUFFER_STATE m_yyState; // flex input state + stack m_bufferStack; // Stack of inserted text above current point // State to lexer static V3PreLex* s_currentLexp; // Current lexing point @@ -121,14 +123,18 @@ class V3PreLex { // CONSTRUCTORS V3PreLex(FILE* fp) { m_fp = fp; - m_yyState = yy_create_buffer (fp, YY_BUF_SIZE); m_keepComments = 0; m_pedantic = false; m_parenLevel = 0; m_pslParenLevel = 0; m_pslMoreNeeded = false; + m_bufferStack.push(yy_create_buffer (fp, YY_BUF_SIZE)); + yy_switch_to_buffer(m_bufferStack.top()); + } + ~V3PreLex() { + fclose(m_fp); + while (!m_bufferStack.empty()) { yy_delete_buffer(m_bufferStack.top()); m_bufferStack.pop(); } } - ~V3PreLex() { fclose(m_fp); yy_delete_buffer(m_yyState); } // Called by V3PreLex.l from lexer void appendDefValue(const char* text, int len); @@ -139,9 +145,11 @@ class V3PreLex { void pushStateDefForm(); void pushStateDefValue(); void pushStateIncFilename(); - void unputString(const char* textp); + void scanBytes(const string& strg); /// Called by VPreproc.cpp to get data from lexer + YY_BUFFER_STATE currentBuffer(); int currentStartState(); + void dumpStack(); }; #endif // Guard diff --git a/src/V3PreLex.l b/src/V3PreLex.l index 026e32f0f..1f397b202 100644 --- a/src/V3PreLex.l +++ b/src/V3PreLex.l @@ -1,4 +1,3 @@ -/* -*- C++ -*- */ /************************************************************************** * DESCRIPTION: Verilator: Flex verilog preprocessor * @@ -131,6 +130,7 @@ psl [p]sl /* Note '(' must IMMEDIATELY follow definition name */ [(] { appendDefValue("(",1); BEGIN(DEFFORM); } {crnl} { yy_pop_state(); unput('\n'); yyleng=0; return VP_DEFFORM; } /* DEFVAL will later grab the return */ +<> { yy_pop_state(); return VP_DEFFORM; } /* empty formals */ . { yy_pop_state(); unput(yytext[yyleng-1]); yyleng=0; return VP_DEFFORM; } /* empty formals */ /* Reading definition formals */ @@ -204,7 +204,6 @@ psl [p]sl [{(] { pslParenLevelInc(); return (VP_TEXT); } [})] { pslParenLevelDec(); return (VP_TEXT); } [;] { if (!pslParenLevel()) {BEGIN PSLONEE; pslMoreNeeded(false);} return (VP_TEXT); } -<> { yyerrorf("EOF in '/* ... */' psl comment\n"); yyleng=0; yyterminate(); } {crnl} { linenoInc(); yy_pop_state(); yytext=(char*)"\n"; yyleng=1; return(VP_WHITE); } /* Completed psl oneline comments */ @@ -224,16 +223,17 @@ psl [p]sl . { yymore(); } /* Psl C-style comments. */ + /* EOFs are normal because / * `foo(..) * / hits a unputString EOF */ .|{crnl} { yyless(0); BEGIN PSLMULM; return(VP_PSL); } "*/" { yy_pop_state(); return(VP_COMMENT); } "//"[^\n\r]* { return (VP_COMMENT); } /* Comments inside block comments get literal inclusion (later removal) */ -<> { yyerrorf("EOF in '/* ... */' psl comment\n"); yyleng=0; yyterminate(); } /* Define calls */ "`"{symb} { return (VP_DEFREF); } /* Generics */ {crnl} { linenoInc(); yytext=(char*)"\n"; yyleng=1; return(VP_WHITE); } +<> { yyterminate(); } /* A "normal" EOF */ {symb} { return (VP_SYMBOL); } {wsn}+ { return (VP_WHITE); } {drop} { } @@ -268,13 +268,9 @@ void V3PreLex::pushStateIncFilename() { yymore(); } -void V3PreLex::unputString(const char* textp) { - // Add characters to input stream in back-to-front order - const char* cp; - for (cp = textp; *cp; cp++); - for (cp--; cp >= textp; cp--) { - unput(*cp); - } +void V3PreLex::scanBytes(const string& strg) { + yy_scan_bytes(strg.c_str(), strg.length()); + m_bufferStack.push(currentBuffer()); // yy_scan_bytes makes new buffer } void V3PreLex::appendDefValue(const char* textp, int len) { @@ -282,6 +278,10 @@ void V3PreLex::appendDefValue(const char* textp, int len) { m_defValue.append(textp,len); } +YY_BUFFER_STATE V3PreLex::currentBuffer() { + return YY_CURRENT_BUFFER; +} + int V3PreLex::currentStartState() { return YY_START; } @@ -291,3 +291,20 @@ void V3PreLex::lineDirective(const char* textp) { // Make sure we have a dependency on whatever file was specified V3File::addSrcDepend(m_curFilelinep->filename()); } + +void V3PreLex::dumpStack() { + // For debug use + stack tmpstack = m_bufferStack; + printf(" bufferStack[%p]:",this); + while (!tmpstack.empty()) { + printf(" %p",tmpstack.top()); + tmpstack.pop(); + } + printf("\n"); +} + +/*################################################################### + * Local Variables: + * mode: C++ + * End: + */ diff --git a/src/V3PreProc.cpp b/src/V3PreProc.cpp index 562ba3894..074efba45 100644 --- a/src/V3PreProc.cpp +++ b/src/V3PreProc.cpp @@ -160,6 +160,7 @@ private: bool commentTokenMatch(string& cmdr, const char* strg); string trimWhitespace(const string& strg); + void unputString(const string& strg); void parsingOn() { m_off--; @@ -386,6 +387,18 @@ const char* V3PreProcImp::tokenName(int tok) { } } +void V3PreProcImp::unputString(const string& strg) { + // We used to just m_lexp->unputString(strg.c_str()); + // However this can lead to "flex scanner push-back overflow" + // so instead we scan from a temporary buffer, then on EOF return. + // This is also faster than the old scheme, amazingly. + if (m_lexp->m_bufferStack.empty() || m_lexp->m_bufferStack.top()!=m_lexp->currentBuffer()) { + fileline()->v3fatalSrc("bufferStack missing current buffer; will return incorrectly"); + // Hard to debug lost text as won't know till much later + } + m_lexp->scanBytes(strg); +} + string V3PreProcImp::trimWhitespace(const string& strg) { string out = strg; while (out.length()>0 && isspace(out[0])) { @@ -535,7 +548,6 @@ void V3PreProcImp::openFile(FileLine* fl, const string& filename) { addLineComment(1); // Enter yy_flex_debug = (debug()>4)?1:0; - yy_switch_to_buffer(m_lexp->m_yyState); } void V3PreProcImp::insertUnreadbackAtBol(const string& text) { @@ -560,16 +572,27 @@ void V3PreProcImp::addLineComment(int enter_exit_level) { } void V3PreProcImp::eof() { - // Remove current lexer - UINFO(4,fileline()<<"EOF!\n"); - addLineComment(2); // Exit - delete m_lexp; m_lexp=NULL; - // Perhaps there's a parent file including us? - if (!m_includeStack.empty()) { - // Back to parent. - m_lexp = m_includeStack.top(); m_includeStack.pop(); - addLineComment(0); - yy_switch_to_buffer(m_lexp->m_yyState); + // Perhaps we're completing unputString + if (m_lexp->m_bufferStack.size()>1) { + UINFO(4,fileline()<<"EOS\n"); + // Switch to file or next unputString, but not a eof so don't delete lexer + yy_delete_buffer(m_lexp->currentBuffer()); + m_lexp->m_bufferStack.pop(); // Must work as size>1 + yy_switch_to_buffer(m_lexp->m_bufferStack.top()); + } else { + // Remove current lexer + UINFO(4,fileline()<<"EOF!\n"); + addLineComment(2); // Exit + // Destructor will call yy_delete_buffer + delete m_lexp; m_lexp=NULL; + // Perhaps there's a parent file including us? + if (!m_includeStack.empty()) { + // Back to parent. + m_lexp = m_includeStack.top(); m_includeStack.pop(); + addLineComment(0); + if (m_lexp->m_bufferStack.empty()) fileline()->v3fatalSrc("No include buffer to return to"); + yy_switch_to_buffer(m_lexp->m_bufferStack.top()); // newest buffer in older lexer + } } } @@ -808,7 +831,7 @@ int V3PreProcImp::getToken() { // Similar code in non-parenthesized define (Search for END_OF_DEFARG) m_defRefs.pop(); if (m_defRefs.empty()) { - m_lexp->unputString(out.c_str()); + unputString(out.c_str()); m_state = ps_TOP; m_lexp->m_parenLevel = 0; } @@ -941,7 +964,7 @@ int V3PreProcImp::getToken() { // Similar code in parenthesized define (Search for END_OF_DEFARG) if (m_defRefs.empty()) { // Just output the substitution - m_lexp->unputString(out.c_str()); + unputString(out.c_str()); } else { // Inside another define. Can't subst now, or // `define a x,y