diff --git a/Changes b/Changes index 3877b1e8b..4468c2f76 100644 --- a/Changes +++ b/Changes @@ -11,7 +11,7 @@ indicates the contributor was also the author of the fix; Thanks! **** Fix large shift error on large shift constants. [David Welch] -**** Fix $display mangling on GCC 4.7 and speed up, msg927, bug373. [R Diez] +**** Fix $display mangling on GCC 4.7 and speed up, msg927, bug373, bug574. [R Diez] **** Fix array of struct references giving false error, bug566. [Julius Baxter] diff --git a/bin/verilator b/bin/verilator index 6e2dcf474..d5c796e51 100755 --- a/bin/verilator +++ b/bin/verilator @@ -366,8 +366,10 @@ and/or "--x-initial-edge" may be desirable. =item --autoflush After every $display or $fdisplay, flush the output stream. This insures -that messages will appear immediately but may reduce performance. Defaults -off, which will buffer output as provided by the normal C stdio calls. +that messages will appear immediately but may reduce performance; for best +performance call "fflush(stdout)" occasionally in the main C loop. +Defaults off, which will buffer output as provided by the normal C stdio +calls. =item --bbox-sys @@ -1236,8 +1238,10 @@ C++ compiler and size of your CPU's caches. By default, the lib/verilated.mk file has optimization turned off. This is for the benefit of new users, as it improves compile times at the cost of runtimes. To add optimization as the default, set one of three variables, -OPT, OPT_FAST, or OPT_SLOW in lib/verilated.mk. Or, just for one run, pass -them on the command line to make: +OPT, OPT_FAST, or OPT_SLOW lib/verilated.mk. Or, use the -CFLAGS and/or +-LDFLAGS option on the verilator command line to pass the flags directly to +the compiler or linker. Or, just for one run, pass them on the command +line to make: make OPT_FAST="-O2" -f Vour.mk Vour__ALL.a @@ -1248,7 +1252,9 @@ rarely, yet take a long time to compile with optimization on. OPT specifies overall optimization and affects all compiles, including those OPT_FAST and OPT_SLOW affect. For best results, use OPT="-O2", and link with "-static". Nearly the same results can be had with much better -compile times with OPT_FAST="-O1 -fstrict-aliasing". +compile times with OPT_FAST="-O1 -fstrict-aliasing". Higher optimization +such as "-O3" may help, but gcc compile times may be excessive under O3 on +even medium sized designs. Unfortunately, using the optimizer with SystemC files can result in compiles taking several minutes. (The SystemC libraries have many little @@ -1262,6 +1268,11 @@ If you will be running many simulations on a single compile, investigate feedback driven compilation. With GCC, using -fprofile-arcs, then -fbranch-probabilities will yield another 15% or so. +Modern compilers also support link-time optimization (LTO), which can help +especially if you link in DPI code. To enable LTO on GCC, pass "-flto" in +both compilation and link. Note LTO may cause excessive compile times on +large designs. + You may uncover further tuning possibilities by profiling the Verilog code. Use Verilator's --profile-cfuncs, then GCC's -g -pg. You can then run either oprofile or gprof to see where in the C++ code the time is spent. @@ -3645,9 +3656,11 @@ Major concepts by Paul Wasson and Duane Galbi. =head1 SEE ALSO -L, L, L, L +L, L, L, L, -And internals.txt in the distribution. +L which is the source for this document, + +and internals.txt in the distribution. =cut diff --git a/include/verilated.cpp b/include/verilated.cpp index 5e53f1679..cb0c6a00b 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -757,9 +757,10 @@ void VL_FCLOSE_I(IData fdi) { } void VL_SFORMAT_X(int obits, void* destp, const char* formatp, ...) { + VL_STATIC_OR_THREAD string output; // static only for speed + output = ""; va_list ap; va_start(ap,formatp); - string output; _vl_vsformat(output, formatp, ap); va_end(ap); @@ -767,9 +768,10 @@ void VL_SFORMAT_X(int obits, void* destp, const char* formatp, ...) { } string VL_SFORMATF_NX(const char* formatp, ...) { + VL_STATIC_OR_THREAD string output; // static only for speed + output = ""; va_list ap; va_start(ap,formatp); - string output; _vl_vsformat(output, formatp, ap); va_end(ap); @@ -777,9 +779,10 @@ string VL_SFORMATF_NX(const char* formatp, ...) { } void VL_WRITEF(const char* formatp, ...) { + VL_STATIC_OR_THREAD string output; // static only for speed + output = ""; va_list ap; va_start(ap,formatp); - string output; _vl_vsformat(output, formatp, ap); va_end(ap); @@ -788,12 +791,13 @@ void VL_WRITEF(const char* formatp, ...) { } void VL_FWRITEF(IData fpi, const char* formatp, ...) { + VL_STATIC_OR_THREAD string output; // static only for speed + output = ""; FILE* fp = VL_CVT_I_FP(fpi); if (VL_UNLIKELY(!fp)) return; va_list ap; va_start(ap,formatp); - string output; _vl_vsformat(output, formatp, ap); va_end(ap); @@ -956,13 +960,13 @@ IData VL_SYSTEM_IW(int lhswords, WDataInP filenamep) { } IData VL_TESTPLUSARGS_I(const char* formatp) { - string match = VerilatedImp::argPlusMatch(formatp); + const string& match = VerilatedImp::argPlusMatch(formatp); if (match == "") return 0; else return 1; } IData VL_VALUEPLUSARGS_IW(int rbits, const char* prefixp, char fmt, WDataOutP rwp) { - string match = VerilatedImp::argPlusMatch(prefixp); + const string& match = VerilatedImp::argPlusMatch(prefixp); const char* dp = match.c_str() + 1 /*leading + */ + strlen(prefixp); if (match == "") return 0; VL_ZERO_RESET_W(rbits, rwp); @@ -998,7 +1002,7 @@ IData VL_VALUEPLUSARGS_IW(int rbits, const char* prefixp, char fmt, WDataOutP rw } const char* vl_mc_scan_plusargs(const char* prefixp) { - string match = VerilatedImp::argPlusMatch(prefixp); + const string& match = VerilatedImp::argPlusMatch(prefixp); static VL_THREAD char outstr[VL_VALUE_STRING_MAX_WIDTH]; if (match == "") return NULL; strncpy(outstr, match.c_str()+strlen(prefixp)+1, // +1 to skip the "+" diff --git a/include/verilated_vpi.h b/include/verilated_vpi.h index 620d87485..70901c3fa 100644 --- a/include/verilated_vpi.h +++ b/include/verilated_vpi.h @@ -182,7 +182,7 @@ public: vluint32_t entSize() const { return m_entSize; } virtual const char* name() { return m_varp->name(); } virtual const char* fullname() { - static VL_THREAD string out; + VL_STATIC_OR_THREAD string out; out = string(m_scopep->name())+"."+name(); return out.c_str(); } @@ -207,7 +207,7 @@ public: virtual ~VerilatedVpioVarIndex() {} static inline VerilatedVpioVarIndex* castp(vpiHandle h) { return dynamic_cast((VerilatedVpio*)h); } virtual const char* fullname() { - static VL_THREAD string out; + VL_STATIC_OR_THREAD string out; char num[20]; sprintf(num,"%d",m_index); out = string(scopep()->name())+"."+name()+"["+num+"]"; return out.c_str(); diff --git a/include/verilatedos.h b/include/verilatedos.h index 00c140932..08801fdfe 100644 --- a/include/verilatedos.h +++ b/include/verilatedos.h @@ -78,8 +78,12 @@ # else # error "Unsupported compiler for VL_THREADED: No thread-local declarator" # endif +# define VL_STATIC_OR_THREAD ///< Static if unthreaded, as some strings can be faster +// ///< if non-dynamic and can't do "static VL_THREAD string" #else # define VL_THREAD ///< Storage class for thread-local storage +# define VL_STATIC_OR_THREAD static ///< Static if unthreaded, as some strings can be faster +// ///< if non-dynamic and can't do "static VL_THREAD string" #endif #ifdef _MSC_VER