Add --output-split-cfuncs for accelerating GCC.

git-svn-id: file://localhost/svn/verilator/trunk/verilator@829 77ca24e4-aefa-0310-84f0-b9a241c72d87
This commit is contained in:
Wilson Snyder 2006-10-26 01:08:44 +00:00
parent 9026118a7c
commit 86c8e7b3e6
5 changed files with 30 additions and 2 deletions

View File

@ -5,6 +5,8 @@ indicates the contributor was also the author of the fix; Thanks!
* Verilator 3.6** * Verilator 3.6**
*** Add --output-split-cfuncs for accelerating GCC. [Eugene Weber]
**** Fix $signed mis-extending when input has a WIDTH violation. [Eugene Weber] **** Fix $signed mis-extending when input has a WIDTH violation. [Eugene Weber]
* Verilator 3.622 10/17/2006 Stable * Verilator 3.622 10/17/2006 Stable

View File

@ -377,9 +377,18 @@ C++ file exceeds the specified number of operations, a new file will be
created. In addition, any slow routines will be placed into __Slow files. created. In addition, any slow routines will be placed into __Slow files.
This accelerates compilation by as optimization can be disabled on the slow This accelerates compilation by as optimization can be disabled on the slow
routines, and the remaining files can be compiled on parallel machines. routines, and the remaining files can be compiled on parallel machines.
Using --output-split should have only a trivial impact on performance.
With GCC 3.3 on a 2GHz Opteron, --output-split 20000 will result in With GCC 3.3 on a 2GHz Opteron, --output-split 20000 will result in
splitting into approximately one-minute-compile chunks. splitting into approximately one-minute-compile chunks.
=item --output-split-cfuncs I<statements>
Enables splitting functions in the output .cpp/.sp files into multiple
functions. When a generated function exceeds the specified number of
operations, a new function will be created. With --output-split, this will
enable GCC to compile faster, at a small loss in performance that increases
with smaller statement values.
=item --pins64 =item --pins64
Specifies SystemC outputs of 33-64 bits wide should use uint64_t instead of Specifies SystemC outputs of 33-64 bits wide should use uint64_t instead of

View File

@ -348,6 +348,10 @@ void V3Options::parseOptsList(FileLine* fl, int argc, char** argv) {
shift; shift;
m_outputSplit = atoi(argv[i]); m_outputSplit = atoi(argv[i]);
} }
else if ( !strcmp (sw, "-output-split-cfuncs") ) {
shift;
m_outputSplitCFuncs = atoi(argv[i]);
}
else if ( !strcmp (sw, "-unroll-count") ) { // Undocumented optimization tweak else if ( !strcmp (sw, "-unroll-count") ) { // Undocumented optimization tweak
shift; shift;
m_unrollCount = atoi(argv[i]); m_unrollCount = atoi(argv[i]);
@ -581,6 +585,7 @@ V3Options::V3Options() {
m_inlineMult = 2000; m_inlineMult = 2000;
m_outputSplit = 0; m_outputSplit = 0;
m_outputSplitCFuncs = 0;
m_unrollCount = 64; m_unrollCount = 64;
m_unrollStmts = 20; m_unrollStmts = 20;

View File

@ -70,6 +70,7 @@ class V3Options {
int m_inlineMult; // main switch: --inline-mult int m_inlineMult; // main switch: --inline-mult
int m_outputSplit; // main switch: --output-split int m_outputSplit; // main switch: --output-split
int m_outputSplitCFuncs;// main switch: --output-split-cfuncs
int m_unrollCount; // main switch: --unroll-count int m_unrollCount; // main switch: --unroll-count
int m_unrollStmts; // main switch: --unroll-stmts int m_unrollStmts; // main switch: --unroll-stmts
@ -152,6 +153,7 @@ class V3Options {
int inlineMult() const { return m_inlineMult; } int inlineMult() const { return m_inlineMult; }
int outputSplit() const { return m_outputSplit; } int outputSplit() const { return m_outputSplit; }
int outputSplitCFuncs() const { return m_outputSplitCFuncs; }
int unrollCount() const { return m_unrollCount; } int unrollCount() const { return m_unrollCount; }
int unrollStmts() const { return m_unrollStmts; } int unrollStmts() const { return m_unrollStmts; }

View File

@ -271,6 +271,7 @@ private:
vector<OrderLoopEndVertex*> m_pmlLoopEndps; // processInsLoop: End vertex for each color vector<OrderLoopEndVertex*> m_pmlLoopEndps; // processInsLoop: End vertex for each color
vector<OrderLoopBeginVertex*> m_pomLoopMoveps;// processMoveLoop: Loops next nodes are under vector<OrderLoopBeginVertex*> m_pomLoopMoveps;// processMoveLoop: Loops next nodes are under
AstCFunc* m_pomNewFuncp; // Current function being created AstCFunc* m_pomNewFuncp; // Current function being created
int m_pomNewStmts; // Statements in function being created
V3Graph m_pomGraph; // Graph of logic elements to move V3Graph m_pomGraph; // Graph of logic elements to move
V3List<OrderMoveVertex*> m_pomWaiting; // List of nodes needing inputs to become ready V3List<OrderMoveVertex*> m_pomWaiting; // List of nodes needing inputs to become ready
protected: protected:
@ -688,6 +689,7 @@ public:
m_settleVxp = NULL; m_settleVxp = NULL;
m_inputsVxp = NULL; m_inputsVxp = NULL;
m_loopIdMax = LOOPID_FIRST; m_loopIdMax = LOOPID_FIRST;
m_pomNewStmts = 0;
if (debug()) m_graph.debug(5); // 3 is default if global debug; we want acyc debugging if (debug()) m_graph.debug(5); // 3 is default if global debug; we want acyc debugging
} }
virtual ~OrderVisitor() { virtual ~OrderVisitor() {
@ -1374,8 +1376,10 @@ void OrderVisitor::processMoveOne(OrderMoveVertex* vertexp, OrderMoveDomScope* d
} }
else { // Normal logic else { // Normal logic
// Make or borrow a CFunc to contain the new statements // Make or borrow a CFunc to contain the new statements
if (v3Global.opt.profileCFuncs()) { if (v3Global.opt.profileCFuncs()
// Put every statement into a unique function to ease profiling || (v3Global.opt.outputSplitCFuncs()
&& v3Global.opt.outputSplitCFuncs() < m_pomNewStmts)) {
// Put every statement into a unique function to ease profiling or reduce function size
m_pomNewFuncp = NULL; m_pomNewFuncp = NULL;
} }
if (!m_pomNewFuncp && domainp != m_deleteDomainp) { if (!m_pomNewFuncp && domainp != m_deleteDomainp) {
@ -1383,6 +1387,7 @@ void OrderVisitor::processMoveOne(OrderMoveVertex* vertexp, OrderMoveDomScope* d
m_pomNewFuncp = new AstCFunc(nodep->fileline(), name, scopep); m_pomNewFuncp = new AstCFunc(nodep->fileline(), name, scopep);
m_pomNewFuncp->argTypes(EmitCBaseVisitor::symClassVar()); m_pomNewFuncp->argTypes(EmitCBaseVisitor::symClassVar());
m_pomNewFuncp->symProlog(true); m_pomNewFuncp->symProlog(true);
m_pomNewStmts = 0;
if (domainp->hasInitial() || domainp->hasSettle()) m_pomNewFuncp->slow(true); if (domainp->hasInitial() || domainp->hasSettle()) m_pomNewFuncp->slow(true);
scopep->addActivep(m_pomNewFuncp); scopep->addActivep(m_pomNewFuncp);
// Where will we be adding the call? // Where will we be adding the call?
@ -1402,6 +1407,11 @@ void OrderVisitor::processMoveOne(OrderMoveVertex* vertexp, OrderMoveDomScope* d
pushDeletep(nodep); nodep=NULL; pushDeletep(nodep); nodep=NULL;
} else { } else {
m_pomNewFuncp->addStmtsp(nodep); m_pomNewFuncp->addStmtsp(nodep);
if (v3Global.opt.outputSplitCFuncs()) {
// Add in the number of nodes we're adding
EmitCBaseCounterVisitor visitor(nodep);
m_pomNewStmts += visitor.count();
}
} }
} }
processMoveDoneOne (vertexp); processMoveDoneOne (vertexp);