From Verilog-Perl: Fix parsing single files > 2GB.

This commit is contained in:
Wilson Snyder 2010-04-06 20:20:44 -04:00
parent 7ed18ac62d
commit f8eabbc100
10 changed files with 172 additions and 53 deletions

View File

@ -17,6 +17,8 @@ indicates the contributor was also the author of the fix; Thanks!
**** Fix trace files with empty modules crashing some viewers. **** Fix trace files with empty modules crashing some viewers.
**** Fix parsing single files > 2GB. [Jeffrey Short]
* Verilator 3.801 2010/03/17 * Verilator 3.801 2010/03/17
*** Support "break", "continue", "return". *** Support "break", "continue", "return".

View File

@ -44,6 +44,8 @@
#include "V3PreShell.h" #include "V3PreShell.h"
#include "V3Ast.h" #include "V3Ast.h"
// If change this code, run a test with the below size set very small
//#define INFILTER_IPC_BUFSIZ 16
#define INFILTER_IPC_BUFSIZ 64*1024 // For debug, try this as a small number #define INFILTER_IPC_BUFSIZ 64*1024 // For debug, try this as a small number
#define INFILTER_CACHE_MAX 64*1024 // Maximum bytes to cache if same file read twice #define INFILTER_CACHE_MAX 64*1024 // Maximum bytes to cache if same file read twice
@ -271,6 +273,7 @@ void V3File::createMakeDir() {
class V3InFilterImp { class V3InFilterImp {
typedef map<string,string> FileContentsMap; typedef map<string,string> FileContentsMap;
typedef V3InFilter::StrList StrList;
FileContentsMap m_contentsMap; // Cache of file contents FileContentsMap m_contentsMap; // Cache of file contents
bool m_readEof; // Received EOF on read bool m_readEof; // Received EOF on read
@ -292,27 +295,27 @@ private:
return level; return level;
} }
bool readContents(const string& filename, string& out) { bool readContents(const string& filename, StrList& outl) {
if (m_pid) return readContentsFilter(filename,out); if (m_pid) return readContentsFilter(filename,outl);
else return readContentsFile(filename,out); else return readContentsFile(filename,outl);
} }
bool readContentsFile(const string& filename, string& out) { bool readContentsFile(const string& filename, StrList& outl) {
int fd = open (filename.c_str(), O_RDONLY); int fd = open (filename.c_str(), O_RDONLY);
if (!fd) return false; if (!fd) return false;
m_readEof = false; m_readEof = false;
out = readBlocks(fd, -1); readBlocks(fd, -1, outl);
close(fd); close(fd);
return true; return true;
} }
bool readContentsFilter(const string& filename, string& out) { bool readContentsFilter(const string& filename, StrList& outl) {
if (filename!="" || out!="") {} // Prevent unused if (filename!="" || outl.empty()) {} // Prevent unused
#ifdef INFILTER_PIPE #ifdef INFILTER_PIPE
writeFilter("read \""+filename+"\"\n"); writeFilter("read \""+filename+"\"\n");
string line = readFilterLine(); string line = readFilterLine();
if (line.find("Content-Length") != string::npos) { if (line.find("Content-Length") != string::npos) {
int len = 0; int len = 0;
sscanf(line.c_str(), "Content-Length: %d\n", &len); sscanf(line.c_str(), "Content-Length: %d\n", &len);
out = readBlocks(m_readFd, len); readBlocks(m_readFd, len, outl);
return true; return true;
} else { } else {
if (line!="") v3error("--pipe-filter protocol error, unexpected: "<<line); if (line!="") v3error("--pipe-filter protocol error, unexpected: "<<line);
@ -334,15 +337,19 @@ private:
#endif #endif
} }
string readBlocks(int fd, int size=-1) { string readBlocks(int fd, int size, StrList& outl) {
string out; string out;
char buf[INFILTER_IPC_BUFSIZ]; char buf[INFILTER_IPC_BUFSIZ];
while (!m_readEof && (size<0 || size>(int)out.length())) { ssize_t sizegot = 0;
int todo = INFILTER_IPC_BUFSIZ; while (!m_readEof && (size<0 || size>sizegot)) {
ssize_t todo = INFILTER_IPC_BUFSIZ;
if (size>0 && size<INFILTER_IPC_BUFSIZ) todo = size; if (size>0 && size<INFILTER_IPC_BUFSIZ) todo = size;
int got = read (fd, buf, todo); ssize_t got = read (fd, buf, todo);
//UINFO(9,"RD GOT g "<< got<<" e "<<errno<<" "<<strerror(errno)<<endl); usleep(50*1000); //UINFO(9,"RD GOT g "<< got<<" e "<<errno<<" "<<strerror(errno)<<endl); usleep(50*1000);
if (got>0) out.append(buf, got); if (got>0) {
outl.push_back(string(buf, got));
sizegot += got;
}
else if (errno == EINTR || errno == EAGAIN else if (errno == EINTR || errno == EAGAIN
#ifdef EWOULDBLOCK #ifdef EWOULDBLOCK
|| errno == EWOULDBLOCK || errno == EWOULDBLOCK
@ -358,9 +365,11 @@ private:
UINFO(9,"readFilterLine\n"); UINFO(9,"readFilterLine\n");
string line; string line;
while (!m_readEof) { while (!m_readEof) {
string c = readBlocks(m_readFd, 1); StrList outl;
line += c; readBlocks(m_readFd, 1, outl);
if (c == "\n") { string onechar = listString(outl);
line += onechar;
if (onechar == "\n") {
if (line == "\n") { line=""; continue; } if (line == "\n") { line=""; continue; }
else break; else break;
} }
@ -477,21 +486,35 @@ private:
protected: protected:
friend class V3InFilter; friend class V3InFilter;
// Read file contents and return it // Read file contents and return it
bool readWholefile(const string& filename, string& out) { bool readWholefile(const string& filename, StrList& outl) {
FileContentsMap::iterator it = m_contentsMap.find(filename); FileContentsMap::iterator it = m_contentsMap.find(filename);
if (it != m_contentsMap.end()) { if (it != m_contentsMap.end()) {
out = it->second; outl.push_back(it->second);
return true; return true;
} }
if (!readContents(filename, out)) return false; if (!readContents(filename, outl)) return false;
if (out.length() < INFILTER_CACHE_MAX) { if (listSize(outl) < INFILTER_CACHE_MAX) {
// Cache small files (only to save space) // Cache small files (only to save space)
// It's quite common to `include "timescale" thousands of times // It's quite common to `include "timescale" thousands of times
// This isn't so important if it's just a open(), but filtering can be slow // This isn't so important if it's just a open(), but filtering can be slow
m_contentsMap.insert(make_pair(filename,out)); m_contentsMap.insert(make_pair(filename,listString(outl)));
} }
return true; return true;
} }
size_t listSize(StrList& sl) {
size_t out = 0;
for (StrList::iterator it=sl.begin(); it!=sl.end(); ++it) {
out += it->length();
}
return out;
}
string listString(StrList& sl) {
string out;
for (StrList::iterator it=sl.begin(); it!=sl.end(); ++it) {
out += *it;
}
return out;
}
// CONSTRUCTORS // CONSTRUCTORS
V3InFilterImp(const string& command) { V3InFilterImp(const string& command) {
m_readEof = false; m_readEof = false;
@ -512,9 +535,9 @@ protected:
V3InFilter::V3InFilter(const string& command) { m_impp = new V3InFilterImp(command); } V3InFilter::V3InFilter(const string& command) { m_impp = new V3InFilterImp(command); }
V3InFilter::~V3InFilter() { if (m_impp) delete m_impp; m_impp=NULL; } V3InFilter::~V3InFilter() { if (m_impp) delete m_impp; m_impp=NULL; }
bool V3InFilter::readWholefile(const string& filename, string& out) { bool V3InFilter::readWholefile(const string& filename, V3InFilter::StrList& outl) {
if (!m_impp) v3fatalSrc("readWholefile on invalid filter"); if (!m_impp) v3fatalSrc("readWholefile on invalid filter");
return m_impp->readWholefile(filename, out); return m_impp->readWholefile(filename, outl);
} }
//###################################################################### //######################################################################

View File

@ -28,6 +28,7 @@
#include <cstdio> #include <cstdio>
#include <stack> #include <stack>
#include <set> #include <set>
#include <list>
#include <fstream> #include <fstream>
//============================================================================ //============================================================================
@ -79,9 +80,12 @@ class V3InFilterImp;
class V3InFilter { class V3InFilter {
V3InFilterImp* m_impp; V3InFilterImp* m_impp;
public: public:
// TYPES
typedef list<string> StrList;
// METHODS // METHODS
// Read file contents and return it. Return true on success. // Read file contents and return it. Return true on success.
bool readWholefile(const string& filename, string& out); bool readWholefile(const string& filename, StrList& outl);
// CONSTRUCTORS // CONSTRUCTORS
V3InFilter(const string& command); V3InFilter(const string& command);

View File

@ -66,12 +66,12 @@ V3ParseImp::~V3ParseImp() {
if (debug()>=9) { UINFO(0,"~V3ParseImp\n"); symp()->dump(cout, "-vpi: "); } if (debug()>=9) { UINFO(0,"~V3ParseImp\n"); symp()->dump(cout, "-vpi: "); }
} }
int V3ParseImp::ppInputToLex(char* buf, int max_size) { size_t V3ParseImp::ppInputToLex(char* buf, size_t max_size) {
int got = 0; size_t got = 0;
while (got < max_size // Haven't got enough while (got < max_size // Haven't got enough
&& !m_ppBuffers.empty()) { // And something buffered && !m_ppBuffers.empty()) { // And something buffered
string front = m_ppBuffers.front(); m_ppBuffers.pop_front(); string front = m_ppBuffers.front(); m_ppBuffers.pop_front();
int len = front.length(); size_t len = front.length();
if (len > (max_size-got)) { // Front string too big if (len > (max_size-got)) { // Front string too big
string remainder = front.substr(max_size-got); string remainder = front.substr(max_size-got);
front = front.substr(0, max_size-got); front = front.substr(0, max_size-got);

View File

@ -260,7 +260,7 @@ public:
static const char* tokenName(int tok); static const char* tokenName(int tok);
void ppPushText(const string& text) { m_ppBuffers.push_back(text); } void ppPushText(const string& text) { m_ppBuffers.push_back(text); }
int ppInputToLex(char* buf, int max_size); size_t ppInputToLex(char* buf, size_t max_size);
static V3ParseImp* parsep() { return s_parsep; } static V3ParseImp* parsep() { return s_parsep; }
@ -278,7 +278,7 @@ public:
m_stringps.push_back(strp); m_stringps.push_back(strp);
return strp; return strp;
} }
string* newString(const char* text, int length) { string* newString(const char* text, size_t length) {
string* strp = new string (text, length); string* strp = new string (text, length);
m_stringps.push_back(strp); m_stringps.push_back(strp);
return strp; return strp;
@ -307,7 +307,7 @@ public:
void statePushVlg(); // Parser -> lexer communication void statePushVlg(); // Parser -> lexer communication
void statePop(); // Parser -> lexer communication void statePop(); // Parser -> lexer communication
int stateVerilogRecent(); // Parser -> lexer communication int stateVerilogRecent(); // Parser -> lexer communication
int flexPpInputToLex(char* buf, int max_size) { return ppInputToLex(buf,max_size); } size_t flexPpInputToLex(char* buf, size_t max_size) { return ppInputToLex(buf,max_size); }
//==== Symbol tables //==== Symbol tables
V3ParseSym* symp() { return &m_sym; } V3ParseSym* symp() { return &m_sym; }

View File

@ -27,6 +27,7 @@
#ifndef _VPREPROCLEX_H_ // Guard #ifndef _VPREPROCLEX_H_ // Guard
#define _VPREPROCLEX_H_ 1 #define _VPREPROCLEX_H_ 1
#include <deque>
#include <stack> #include <stack>
#include "V3Error.h" #include "V3Error.h"
@ -112,7 +113,7 @@ void yy_delete_buffer( YY_BUFFER_STATE b );
#define KEEPCMT_SUB 2 #define KEEPCMT_SUB 2
//====================================================================== //======================================================================
// Class entry for each per-lexter state // Class entry for each per-lexer state
class V3PreLex { class V3PreLex {
public: // Used only by V3PreLex.cpp and V3PreProc.cpp public: // Used only by V3PreLex.cpp and V3PreProc.cpp
@ -120,6 +121,7 @@ class V3PreLex {
// Parse state // Parse state
stack<YY_BUFFER_STATE> m_bufferStack; // Stack of inserted text above current point stack<YY_BUFFER_STATE> m_bufferStack; // Stack of inserted text above current point
deque<string> m_buffers; ///< Buffer of characters to process
// State to lexer // State to lexer
static V3PreLex* s_currentLexp; // Current lexing point static V3PreLex* s_currentLexp; // Current lexing point
@ -143,13 +145,15 @@ class V3PreLex {
m_defCmtSlash = false; m_defCmtSlash = false;
m_pslParenLevel = 0; m_pslParenLevel = 0;
m_pslMoreNeeded = false; m_pslMoreNeeded = false;
initFirstBuffer();
} }
~V3PreLex() { ~V3PreLex() {
while (!m_bufferStack.empty()) { yy_delete_buffer(m_bufferStack.top()); m_bufferStack.pop(); } while (!m_bufferStack.empty()) { yy_delete_buffer(m_bufferStack.top()); m_bufferStack.pop(); }
} }
void initFirstBuffer();
// Called by V3PreLex.l from lexer // Called by V3PreLex.l from lexer
void appendDefValue(const char* text, int len); void appendDefValue(const char* text, size_t len);
void lineDirective(const char* text); void lineDirective(const char* text);
void incLineno() { m_curFilelinep->incLineno(); } void incLineno() { m_curFilelinep->incLineno(); }
// Called by V3PreProc.cpp to inform lexer // Called by V3PreProc.cpp to inform lexer
@ -157,10 +161,13 @@ class V3PreLex {
void pushStateDefForm(); void pushStateDefForm();
void pushStateDefValue(); void pushStateDefValue();
void pushStateIncFilename(); void pushStateIncFilename();
void scanBytes(const string& strg); void scanBytes(const char* strp, size_t len);
void scanBytesBack(const string& str);
size_t inputToLex(char* buf, size_t max_size);
/// Called by VPreproc.cpp to get data from lexer /// Called by VPreproc.cpp to get data from lexer
YY_BUFFER_STATE currentBuffer(); YY_BUFFER_STATE currentBuffer();
int currentStartState(); int currentStartState();
void dumpSummary();
void dumpStack(); void dumpStack();
}; };

View File

@ -33,6 +33,9 @@ V3PreLex* V3PreLex::s_currentLexp = NULL; // Current lexing point
#define LEXP V3PreLex::s_currentLexp #define LEXP V3PreLex::s_currentLexp
#define YY_INPUT(buf,result,max_size) \
result = LEXP->inputToLex(buf,max_size);
// Accessors, because flex keeps changing the type of yyleng // Accessors, because flex keeps changing the type of yyleng
char* yyourtext() { return yytext; } char* yyourtext() { return yytext; }
size_t yyourleng() { return yyleng; } size_t yyourleng() { return yyleng; }
@ -44,7 +47,7 @@ static bool optPsl() { return V3PreProc::optPsl(); }
static bool pedantic() { return LEXP->m_pedantic; } static bool pedantic() { return LEXP->m_pedantic; }
static void yyerror(char* msg) { LEXP->m_curFilelinep->v3error(msg); } static void yyerror(char* msg) { LEXP->m_curFilelinep->v3error(msg); }
static void yyerrorf(const char* msg) { LEXP->m_curFilelinep->v3error(msg); } static void yyerrorf(const char* msg) { LEXP->m_curFilelinep->v3error(msg); }
static void appendDefValue(const char* t,int l) { LEXP->appendDefValue(t,l); } static void appendDefValue(const char* t, size_t l) { LEXP->appendDefValue(t,l); }
static int pslParenLevel() { return LEXP->m_pslParenLevel; } static int pslParenLevel() { return LEXP->m_pslParenLevel; }
static void pslParenLevelInc() { LEXP->m_pslParenLevel++; } static void pslParenLevelInc() { LEXP->m_pslParenLevel++; }
static void pslParenLevelDec() { if (pslParenLevel()) LEXP->m_pslParenLevel--; } static void pslParenLevelDec() { if (pslParenLevel()) LEXP->m_pslParenLevel--; }
@ -78,6 +81,7 @@ crnl [\r]*[\n]
quote [\"] quote [\"]
backslash [\\] backslash [\\]
symb ([a-zA-Z_][a-zA-Z0-9_$]*|\\[^ \t\f\r\n]+) symb ([a-zA-Z_][a-zA-Z0-9_$]*|\\[^ \t\f\r\n]+)
word [a-zA-Z0-9_]+
drop [\032] drop [\032]
psl [p]sl psl [p]sl
@ -113,6 +117,7 @@ psl [p]sl
<INITIAL,PSLMULM,PSLONEM>{quote} { yy_push_state(STRMODE); yymore(); } <INITIAL,PSLMULM,PSLONEM>{quote} { yy_push_state(STRMODE); yymore(); }
<STRMODE><<EOF>> { linenoInc(); yyerrorf("EOF in unterminated string"); yyleng=0; yyterminate(); } <STRMODE><<EOF>> { linenoInc(); yyerrorf("EOF in unterminated string"); yyleng=0; yyterminate(); }
<STRMODE>{crnl} { linenoInc(); yyerrorf("Unterminated string"); BEGIN(INITIAL); } <STRMODE>{crnl} { linenoInc(); yyerrorf("Unterminated string"); BEGIN(INITIAL); }
<STRMODE>{word} { yymore(); }
<STRMODE>[^\"\\] { yymore(); } <STRMODE>[^\"\\] { yymore(); }
<STRMODE>{backslash}{crnl} { linenoInc(); yymore(); } <STRMODE>{backslash}{crnl} { linenoInc(); yymore(); }
<STRMODE>{backslash}. { yymore(); } <STRMODE>{backslash}. { yymore(); }
@ -167,6 +172,7 @@ psl [p]sl
<DEFVAL>[\\]{crnl} { linenoInc(); appendDefValue((char*)"\n",1); } /* Return, but not \ is part of define value */ <DEFVAL>[\\]{crnl} { linenoInc(); appendDefValue((char*)"\n",1); } /* Return, but not \ is part of define value */
<DEFVAL>[^\/\*\n\r\\]+ | <DEFVAL>[^\/\*\n\r\\]+ |
<DEFVAL>[\\][^\n\r] | <DEFVAL>[\\][^\n\r] |
<DEFVAL>{word} { appendDefValue(yytext,yyleng); }
<DEFVAL>. { appendDefValue(yytext,yyleng); } <DEFVAL>. { appendDefValue(yytext,yyleng); }
/* Comments inside define values - if embedded get added to define value per spec */ /* Comments inside define values - if embedded get added to define value per spec */
@ -177,6 +183,7 @@ psl [p]sl
appendDefValue(yytext,yyleng-2); appendDefValue((char*)"\n",1); } /* Return but not \ */ appendDefValue(yytext,yyleng-2); appendDefValue((char*)"\n",1); } /* Return but not \ */
<DEFCMT>{crnl} { linenoInc(); yymore(); if (LEXP->m_defCmtSlash) yyerrorf("One line of /* ... */ is missing \\ before newline"); <DEFCMT>{crnl} { linenoInc(); yymore(); if (LEXP->m_defCmtSlash) yyerrorf("One line of /* ... */ is missing \\ before newline");
BEGIN(CMTMODE); } BEGIN(CMTMODE); }
<DEFCMT>{word} { yymore(); }
<DEFCMT>. { yymore(); } <DEFCMT>. { yymore(); }
<DEFCMT><<EOF>> { yyerrorf("EOF in '/* ... */' block comment\n"); yyleng=0; yyterminate(); } <DEFCMT><<EOF>> { yyerrorf("EOF in '/* ... */' block comment\n"); yyleng=0; yyterminate(); }
@ -241,6 +248,7 @@ psl [p]sl
<CMTBEGM,CMTMODE>"*/" { yy_pop_state(); return(VP_COMMENT); } <CMTBEGM,CMTMODE>"*/" { yy_pop_state(); return(VP_COMMENT); }
<CMTBEGM,CMTMODE>{crnl} { linenoInc(); yymore(); } <CMTBEGM,CMTMODE>{crnl} { linenoInc(); yymore(); }
<CMTBEGM,CMTMODE><<EOF>> { yyerrorf("EOF in '/* ... */' block comment\n"); yyleng=0; yyterminate(); } <CMTBEGM,CMTMODE><<EOF>> { yyerrorf("EOF in '/* ... */' block comment\n"); yyleng=0; yyterminate(); }
<CMTMODE>{word} { yymore(); }
<CMTBEGM>. { BEGIN CMTMODE; yymore(); } /* Non 'psl' beginning in comment */ <CMTBEGM>. { BEGIN CMTMODE; yymore(); } /* Non 'psl' beginning in comment */
<CMTMODE>. { yymore(); } <CMTMODE>. { yymore(); }
@ -290,12 +298,57 @@ void V3PreLex::pushStateIncFilename() {
yymore(); yymore();
} }
void V3PreLex::scanBytes(const string& strg) { void V3PreLex::initFirstBuffer() {
yy_scan_bytes(strg.c_str(), strg.length()); // Called from constructor to make first buffer
// yy_create_buffer also sets yy_fill_buffer=1 so reads from YY_INPUT
yy_switch_to_buffer(yy_create_buffer(NULL, YY_BUF_SIZE));
m_bufferStack.push(currentBuffer());
yyrestart(NULL);
}
size_t V3PreLex::inputToLex(char* buf, size_t max_size) {
// We need a custom YY_INPUT because we can't use flex buffers.
// Flex buffers are limited to 2GB, and we can't chop into 2G pieces
// because buffers can't end in the middle of tokens.
// m_buffers only applies to the "base" buffer when there's no scanBytes outstanding
// It won't be called on scan_buffers as they don't have yy_fill_buffer set.
//
//if (debug()) { cout<<"- pp:inputToLex ITL s="<<max_size<<" bs="<<m_bufferStack.size()<<endl; dumpSummary(); }
// For testing, use really small chunks
//if (max_size > 13) max_size=13;
size_t got = 0;
while (got < max_size // Haven't got enough
&& !m_buffers.empty()) { // And something buffered
string front = m_buffers.front(); m_buffers.pop_front();
size_t len = front.length();
if (len > (max_size-got)) { // Front string too big
string remainder = front.substr(max_size-got);
front = front.substr(0, max_size-got);
m_buffers.push_front(remainder); // Put back remainder for next time
len = (max_size-got);
}
strncpy(buf+got, front.c_str(), len);
got += len;
}
//if (debug()) { cout<<"- pp::inputToLex got="<<got<<" '"<<string(buf,got)<<"'"<<endl; }
return got;
}
void V3PreLex::scanBytes(const char* strp, size_t len) {
// Note buffers also appended in ::scanBytesBack
// Not "m_buffers.push_front(string(strp,len))" as we need a `define
// to take effect immediately, in the middle of the current buffer
yy_scan_bytes(strp, len);
m_bufferStack.push(currentBuffer()); // yy_scan_bytes makes new buffer m_bufferStack.push(currentBuffer()); // yy_scan_bytes makes new buffer
} }
void V3PreLex::appendDefValue(const char* textp, int len) { void V3PreLex::scanBytesBack(const string& str) {
// Initial creation, that will pull from YY_INPUT==inputToLex
// Note buffers also appended in ::scanBytes
m_buffers.push_back(str);
}
void V3PreLex::appendDefValue(const char* textp, size_t len) {
// Append given text to current definition value being formed // Append given text to current definition value being formed
m_defValue.append(textp,len); m_defValue.append(textp,len);
} }
@ -314,8 +367,15 @@ void V3PreLex::lineDirective(const char* textp) {
V3File::addSrcDepend(m_curFilelinep->filename()); V3File::addSrcDepend(m_curFilelinep->filename());
} }
void V3PreLex::dumpSummary() {
cout<<"- pp::dumpSummary curBuf="<<(void*)(currentBuffer())
<<" nBuf="<<m_bufferStack.size()
<<" yyfill="<<currentBuffer()->yy_fill_buffer<<endl;
}
void V3PreLex::dumpStack() { void V3PreLex::dumpStack() {
// For debug use // For debug use
dumpSummary();
stack<YY_BUFFER_STATE> tmpstack = m_bufferStack; stack<YY_BUFFER_STATE> tmpstack = m_bufferStack;
printf(" bufferStack[%p]:",this); printf(" bufferStack[%p]:",this);
while (!tmpstack.empty()) { while (!tmpstack.empty()) {

View File

@ -23,12 +23,14 @@
#include "config_build.h" #include "config_build.h"
#include "verilatedos.h" #include "verilatedos.h"
#include <cstdio> #include <cstdio>
#include <cstdlib>
#include <cstdarg> #include <cstdarg>
#include <unistd.h> #include <unistd.h>
#include <fstream> #include <fstream>
#include <stack> #include <stack>
#include <vector> #include <vector>
#include <map> #include <map>
#include <list>
#include "V3Error.h" #include "V3Error.h"
#include "V3Global.h" #include "V3Global.h"
@ -108,6 +110,7 @@ public:
struct V3PreProcImp : public V3PreProc { struct V3PreProcImp : public V3PreProc {
// TYPES // TYPES
typedef std::map<string,V3Define> DefinesMap; typedef std::map<string,V3Define> DefinesMap;
typedef V3InFilter::StrList StrList;
// debug() -> see V3PreShellImp::debug // debug() -> see V3PreShellImp::debug
@ -165,7 +168,7 @@ private:
bool commentTokenMatch(string& cmdr, const char* strg); bool commentTokenMatch(string& cmdr, const char* strg);
string trimWhitespace(const string& strg, bool trailing); string trimWhitespace(const string& strg, bool trailing);
void unputString(const string& strg, bool first=false); void unputString(const string& strg);
void parsingOn() { void parsingOn() {
m_off--; m_off--;
@ -414,18 +417,19 @@ const char* V3PreProcImp::tokenName(int tok) {
} }
} }
void V3PreProcImp::unputString(const string& strg, bool first) { void V3PreProcImp::unputString(const string& strg) {
// Note: The preliminary call in ::openFile bypasses this function
// We used to just m_lexp->unputString(strg.c_str()); // We used to just m_lexp->unputString(strg.c_str());
// However this can lead to "flex scanner push-back overflow" // However this can lead to "flex scanner push-back overflow"
// so instead we scan from a temporary buffer, then on EOF return. // so instead we scan from a temporary buffer, then on EOF return.
// This is also faster than the old scheme, amazingly. // This is also faster than the old scheme, amazingly.
if (!first) { // Else the initial creation if (1) {
if (m_lexp->m_bufferStack.empty() || m_lexp->m_bufferStack.top()!=m_lexp->currentBuffer()) { if (m_lexp->m_bufferStack.empty() || m_lexp->m_bufferStack.top()!=m_lexp->currentBuffer()) {
fileline()->v3fatalSrc("bufferStack missing current buffer; will return incorrectly"); fileline()->v3fatalSrc("bufferStack missing current buffer; will return incorrectly");
// Hard to debug lost text as won't know till much later // Hard to debug lost text as won't know till much later
} }
} }
m_lexp->scanBytes(strg); m_lexp->scanBytes(strg.c_str(), strg.length());
} }
string V3PreProcImp::trimWhitespace(const string& strg, bool trailing) { string V3PreProcImp::trimWhitespace(const string& strg, bool trailing) {
@ -600,22 +604,14 @@ void V3PreProcImp::openFile(FileLine* fl, V3InFilter* filterp, const string& fil
V3File::addSrcDepend(filename); V3File::addSrcDepend(filename);
string wholefile; // Read a list<string> with the whole file.
StrList wholefile;
bool ok = filterp->readWholefile(filename, wholefile/*ref*/); bool ok = filterp->readWholefile(filename, wholefile/*ref*/);
if (!ok) { if (!ok) {
fileline()->v3error("File not found: "+filename+"\n"); fileline()->v3error("File not found: "+filename+"\n");
return; return;
} }
// Filter all DOS CR's en-mass. This avoids bugs with lexing CRs in the wrong places.
// This will also strip them from strings, but strings aren't supposed to be multi-line without a "\"
string wholefilecr;
size_t wholesize = wholefile.length();
for (size_t i=0; i<wholesize; i++) { // Not a c_str(), as we keep '\0's for now.
if (wholefile[i] != '\r' && wholefile[i] != '\0') wholefilecr += wholefile[i];
}
wholefile.resize(0); // free memory
if (m_lexp) { if (m_lexp) {
// We allow the same include file twice, because occasionally it pops // We allow the same include file twice, because occasionally it pops
// up, with guards preventing a real recursion. // up, with guards preventing a real recursion.
@ -636,7 +632,30 @@ void V3PreProcImp::openFile(FileLine* fl, V3InFilter* filterp, const string& fil
addLineComment(1); // Enter addLineComment(1); // Enter
yy_flex_debug = (debug()>4)?1:0; yy_flex_debug = (debug()>4)?1:0;
unputString(wholefilecr,true);
// Filter all DOS CR's en-mass. This avoids bugs with lexing CRs in the wrong places.
// This will also strip them from strings, but strings aren't supposed to be multi-line without a "\"
for (StrList::iterator it=wholefile.begin(); it!=wholefile.end(); ++it) {
// We don't test for \0 as we allow and strip mid-string '\0's (for now).
// We also edit in place. This is nasty to other users of the string, but
// there aren't any, and it avoids needing 2x the memory on very large files.
const char* sp = it->data();
const char* ep = sp + it->length();
char* cp = (char*) sp;
for (; sp<ep; sp++) {
if (*sp != '\r' && *sp != '\0') {
*cp++ = *sp;
}
}
size_t len = cp - it->data();
// Truncate old string
it->erase(len);
// Push the data to an internal buffer.
m_lexp->scanBytesBack(*it);
// Reclaim memory; the push saved the string contents for us
*it = "";
}
} }
void V3PreProcImp::insertUnreadbackAtBol(const string& text) { void V3PreProcImp::insertUnreadbackAtBol(const string& text) {
@ -771,7 +790,7 @@ int V3PreProcImp::getToken() {
// We're off or processed the comment specially. If there are newlines // We're off or processed the comment specially. If there are newlines
// in it, we also return the newlines as TEXT so that the linenumber // in it, we also return the newlines as TEXT so that the linenumber
// count is maintained for downstream tools // count is maintained for downstream tools
for (int len=0; len<yyourleng(); len++) { if (yyourtext()[len]=='\n') m_lineAdd++; } for (size_t len=0; len<yyourleng(); len++) { if (yyourtext()[len]=='\n') m_lineAdd++; }
goto next_tok; goto next_tok;
} }
if (tok==VP_LINE) { if (tok==VP_LINE) {

View File

@ -29,6 +29,7 @@
#include <string> #include <string>
#include <map> #include <map>
#include <list>
#include <iostream> #include <iostream>
class V3InFilter; class V3InFilter;

View File

@ -144,6 +144,7 @@ crnl [\r]*[\n]
id [a-zA-Z_][a-zA-Z0-9_$]* id [a-zA-Z_][a-zA-Z0-9_$]*
/* escaped identifier */ /* escaped identifier */
escid \\[^ \t\f\r\n]+ escid \\[^ \t\f\r\n]+
word [a-zA-Z0-9_]+
%% %%
@ -832,12 +833,14 @@ escid \\[^ \t\f\r\n]+
<STRING>\" { yy_pop_state(); <STRING>\" { yy_pop_state();
FL; yylval.strp = PARSEP->newString(yytext+1,yyleng-2); FL; yylval.strp = PARSEP->newString(yytext+1,yyleng-2);
return yaSTRING; } return yaSTRING; }
<STRING>{word} { yymore(); }
<STRING>. { yymore(); } <STRING>. { yymore(); }
/************************************************************************/ /************************************************************************/
/* Attributes */ /* Attributes */
<ATTRMODE>{crnl} { yymore(); NEXTLINE(); } <ATTRMODE>{crnl} { yymore(); NEXTLINE(); }
<ATTRMODE>"*)" { yy_pop_state(); } <ATTRMODE>"*)" { yy_pop_state(); }
<ATTRMODE>{word} { yymore(); }
<ATTRMODE>. { yymore(); } <ATTRMODE>. { yymore(); }
<ATTRMODE><<EOF>> { yyerrorf("EOF in (*"); <ATTRMODE><<EOF>> { yyerrorf("EOF in (*");
yyleng = 0; yy_pop_state(); } yyleng = 0; yy_pop_state(); }