diff --git a/Changes b/Changes index ae6bd77aa..eca429ade 100644 --- a/Changes +++ b/Changes @@ -5,6 +5,8 @@ indicates the contributor was also the author of the fix; Thanks! * Verilator 3.6** +*** Add --output-split-cfuncs for accelerating GCC. [Eugene Weber] + **** Fix $signed mis-extending when input has a WIDTH violation. [Eugene Weber] * Verilator 3.622 10/17/2006 Stable diff --git a/bin/verilator b/bin/verilator index 60a91e7be..83dcea274 100755 --- a/bin/verilator +++ b/bin/verilator @@ -377,9 +377,18 @@ C++ file exceeds the specified number of operations, a new file will be created. In addition, any slow routines will be placed into __Slow files. This accelerates compilation by as optimization can be disabled on the slow routines, and the remaining files can be compiled on parallel machines. +Using --output-split should have only a trivial impact on performance. With GCC 3.3 on a 2GHz Opteron, --output-split 20000 will result in splitting into approximately one-minute-compile chunks. +=item --output-split-cfuncs I + +Enables splitting functions in the output .cpp/.sp files into multiple +functions. When a generated function exceeds the specified number of +operations, a new function will be created. With --output-split, this will +enable GCC to compile faster, at a small loss in performance that increases +with smaller statement values. + =item --pins64 Specifies SystemC outputs of 33-64 bits wide should use uint64_t instead of diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 6e89bd439..cc9aad580 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -348,6 +348,10 @@ void V3Options::parseOptsList(FileLine* fl, int argc, char** argv) { shift; m_outputSplit = atoi(argv[i]); } + else if ( !strcmp (sw, "-output-split-cfuncs") ) { + shift; + m_outputSplitCFuncs = atoi(argv[i]); + } else if ( !strcmp (sw, "-unroll-count") ) { // Undocumented optimization tweak shift; m_unrollCount = atoi(argv[i]); @@ -581,6 +585,7 @@ V3Options::V3Options() { m_inlineMult = 2000; m_outputSplit = 0; + m_outputSplitCFuncs = 0; m_unrollCount = 64; m_unrollStmts = 20; diff --git a/src/V3Options.h b/src/V3Options.h index 29fb5d86d..676e30881 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -70,6 +70,7 @@ class V3Options { int m_inlineMult; // main switch: --inline-mult int m_outputSplit; // main switch: --output-split + int m_outputSplitCFuncs;// main switch: --output-split-cfuncs int m_unrollCount; // main switch: --unroll-count int m_unrollStmts; // main switch: --unroll-stmts @@ -152,6 +153,7 @@ class V3Options { int inlineMult() const { return m_inlineMult; } int outputSplit() const { return m_outputSplit; } + int outputSplitCFuncs() const { return m_outputSplitCFuncs; } int unrollCount() const { return m_unrollCount; } int unrollStmts() const { return m_unrollStmts; } diff --git a/src/V3Order.cpp b/src/V3Order.cpp index bd338bc1c..8c96e6d17 100644 --- a/src/V3Order.cpp +++ b/src/V3Order.cpp @@ -271,6 +271,7 @@ private: vector m_pmlLoopEndps; // processInsLoop: End vertex for each color vector m_pomLoopMoveps;// processMoveLoop: Loops next nodes are under AstCFunc* m_pomNewFuncp; // Current function being created + int m_pomNewStmts; // Statements in function being created V3Graph m_pomGraph; // Graph of logic elements to move V3List m_pomWaiting; // List of nodes needing inputs to become ready protected: @@ -688,6 +689,7 @@ public: m_settleVxp = NULL; m_inputsVxp = NULL; m_loopIdMax = LOOPID_FIRST; + m_pomNewStmts = 0; if (debug()) m_graph.debug(5); // 3 is default if global debug; we want acyc debugging } virtual ~OrderVisitor() { @@ -1374,8 +1376,10 @@ void OrderVisitor::processMoveOne(OrderMoveVertex* vertexp, OrderMoveDomScope* d } else { // Normal logic // Make or borrow a CFunc to contain the new statements - if (v3Global.opt.profileCFuncs()) { - // Put every statement into a unique function to ease profiling + if (v3Global.opt.profileCFuncs() + || (v3Global.opt.outputSplitCFuncs() + && v3Global.opt.outputSplitCFuncs() < m_pomNewStmts)) { + // Put every statement into a unique function to ease profiling or reduce function size m_pomNewFuncp = NULL; } if (!m_pomNewFuncp && domainp != m_deleteDomainp) { @@ -1383,6 +1387,7 @@ void OrderVisitor::processMoveOne(OrderMoveVertex* vertexp, OrderMoveDomScope* d m_pomNewFuncp = new AstCFunc(nodep->fileline(), name, scopep); m_pomNewFuncp->argTypes(EmitCBaseVisitor::symClassVar()); m_pomNewFuncp->symProlog(true); + m_pomNewStmts = 0; if (domainp->hasInitial() || domainp->hasSettle()) m_pomNewFuncp->slow(true); scopep->addActivep(m_pomNewFuncp); // Where will we be adding the call? @@ -1402,6 +1407,11 @@ void OrderVisitor::processMoveOne(OrderMoveVertex* vertexp, OrderMoveDomScope* d pushDeletep(nodep); nodep=NULL; } else { m_pomNewFuncp->addStmtsp(nodep); + if (v3Global.opt.outputSplitCFuncs()) { + // Add in the number of nodes we're adding + EmitCBaseCounterVisitor visitor(nodep); + m_pomNewStmts += visitor.count(); + } } } processMoveDoneOne (vertexp);