Construct AstExecGraph implementation outside of V3EmitC. (#3022)
The goal of this patch is to move functionality related to constructing the thread entry points and then invoking them out of V3EmitC (and into V3Partition). The long term goal being enabling V3EmitC to emit functions partitioned based on header dependencies. V3EmitC having to deal with only AstCFunc instances and no other magic will facilitate this. In this patch: - We construct AstCFuncs for each thread entry point in V3Partition::finalize and move AstMTaskBody nodes under these functions. - Add the invocation of the threads as text statements within the AstExecGraph, so they are still invoked where the exec graph is located. (the entry point functions are still referenced via AstCCall or AstAddOrCFunc, so lazy declarations of referenced functions are created automatically). - Explicitly handle MTask state variables (VlMTaskVertex in verilated_threads.h) within Verilator, so no need to text bash a lot of these any more (some text refs still remain but they are all created next to each other within V3Partition.cpp). The effect of all this on the emitted code should be nothing but some identifier/ordering changes. No functional change intended.
This commit is contained in:
parent
65bfb4e5ff
commit
a8f83d5758
23
src/V3Ast.h
23
src/V3Ast.h
|
|
@ -457,6 +457,7 @@ public:
|
||||||
// Internal types for mid-steps
|
// Internal types for mid-steps
|
||||||
SCOPEPTR,
|
SCOPEPTR,
|
||||||
CHARPTR,
|
CHARPTR,
|
||||||
|
MTASKSTATE,
|
||||||
// Unsigned and two state; fundamental types
|
// Unsigned and two state; fundamental types
|
||||||
UINT32,
|
UINT32,
|
||||||
UINT64,
|
UINT64,
|
||||||
|
|
@ -467,18 +468,19 @@ public:
|
||||||
};
|
};
|
||||||
enum en m_e;
|
enum en m_e;
|
||||||
const char* ascii() const {
|
const char* ascii() const {
|
||||||
static const char* const names[] = {
|
static const char* const names[]
|
||||||
"%E-unk", "bit", "byte", "chandle", "event", "int", "integer",
|
= {"%E-unk", "bit", "byte", "chandle", "event",
|
||||||
"logic", "longint", "real", "shortint", "time", "string", "VerilatedScope*",
|
"int", "integer", "logic", "longint", "real",
|
||||||
"char*", "IData", "QData", "LOGIC_IMPLICIT", " MAX"};
|
"shortint", "time", "string", "VerilatedScope*", "char*",
|
||||||
|
"VlMTaskState", "IData", "QData", "LOGIC_IMPLICIT", " MAX"};
|
||||||
return names[m_e];
|
return names[m_e];
|
||||||
}
|
}
|
||||||
const char* dpiType() const {
|
const char* dpiType() const {
|
||||||
static const char* const names[]
|
static const char* const names[]
|
||||||
= {"%E-unk", "svBit", "char", "void*", "char", "int",
|
= {"%E-unk", "svBit", "char", "void*", "char",
|
||||||
"%E-integer", "svLogic", "long long", "double", "short", "%E-time",
|
"int", "%E-integer", "svLogic", "long long", "double",
|
||||||
"const char*", "dpiScope", "const char*", "IData", "QData", "%E-logic-implicit",
|
"short", "%E-time", "const char*", "dpiScope", "const char*",
|
||||||
" MAX"};
|
"%E-mtaskstate", "IData", "QData", "%E-logic-implct", " MAX"};
|
||||||
return names[m_e];
|
return names[m_e];
|
||||||
}
|
}
|
||||||
static void selfTest() {
|
static void selfTest() {
|
||||||
|
|
@ -511,6 +513,7 @@ public:
|
||||||
case STRING: return 64; // opaque // Just the pointer, for today
|
case STRING: return 64; // opaque // Just the pointer, for today
|
||||||
case SCOPEPTR: return 0; // opaque
|
case SCOPEPTR: return 0; // opaque
|
||||||
case CHARPTR: return 0; // opaque
|
case CHARPTR: return 0; // opaque
|
||||||
|
case MTASKSTATE: return 0; // opaque
|
||||||
case UINT32: return 32;
|
case UINT32: return 32;
|
||||||
case UINT64: return 64;
|
case UINT64: return 64;
|
||||||
default: return 0;
|
default: return 0;
|
||||||
|
|
@ -549,11 +552,13 @@ public:
|
||||||
|| m_e == DOUBLE || m_e == SHORTINT || m_e == UINT32 || m_e == UINT64);
|
|| m_e == DOUBLE || m_e == SHORTINT || m_e == UINT32 || m_e == UINT64);
|
||||||
}
|
}
|
||||||
bool isOpaque() const { // IE not a simple number we can bit optimize
|
bool isOpaque() const { // IE not a simple number we can bit optimize
|
||||||
return (m_e == STRING || m_e == SCOPEPTR || m_e == CHARPTR || m_e == DOUBLE);
|
return (m_e == STRING || m_e == SCOPEPTR || m_e == CHARPTR || m_e == MTASKSTATE
|
||||||
|
|| m_e == DOUBLE);
|
||||||
}
|
}
|
||||||
bool isDouble() const { return m_e == DOUBLE; }
|
bool isDouble() const { return m_e == DOUBLE; }
|
||||||
bool isEventValue() const { return m_e == EVENTVALUE; }
|
bool isEventValue() const { return m_e == EVENTVALUE; }
|
||||||
bool isString() const { return m_e == STRING; }
|
bool isString() const { return m_e == STRING; }
|
||||||
|
bool isMTaskState() const { return m_e == MTASKSTATE; }
|
||||||
};
|
};
|
||||||
inline bool operator==(const AstBasicDTypeKwd& lhs, const AstBasicDTypeKwd& rhs) {
|
inline bool operator==(const AstBasicDTypeKwd& lhs, const AstBasicDTypeKwd& rhs) {
|
||||||
return lhs.m_e == rhs.m_e;
|
return lhs.m_e == rhs.m_e;
|
||||||
|
|
|
||||||
|
|
@ -238,26 +238,11 @@ AstNodeBiop* AstEqWild::newTyped(FileLine* fl, AstNode* lhsp, AstNode* rhsp) {
|
||||||
}
|
}
|
||||||
|
|
||||||
AstExecGraph::AstExecGraph(FileLine* fileline)
|
AstExecGraph::AstExecGraph(FileLine* fileline)
|
||||||
: ASTGEN_SUPER_ExecGraph(fileline) {
|
: ASTGEN_SUPER_ExecGraph(fileline)
|
||||||
m_depGraphp = new V3Graph;
|
, m_depGraphp{new V3Graph} {}
|
||||||
}
|
|
||||||
AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); }
|
AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); }
|
||||||
|
|
||||||
std::vector<const ExecMTask*> AstExecGraph::rootMTasks() {
|
|
||||||
// Build the list of initial mtasks to start
|
|
||||||
std::vector<const ExecMTask*> execMTasks;
|
|
||||||
|
|
||||||
for (const V3GraphVertex* vxp = depGraphp()->verticesBeginp(); vxp;
|
|
||||||
vxp = vxp->verticesNextp()) {
|
|
||||||
const ExecMTask* etp = dynamic_cast<const ExecMTask*>(vxp);
|
|
||||||
if (etp->threadRoot()) execMTasks.push_back(etp);
|
|
||||||
}
|
|
||||||
UASSERT_OBJ(execMTasks.size() <= static_cast<unsigned>(v3Global.opt.threads()), this,
|
|
||||||
"More root mtasks than available threads");
|
|
||||||
|
|
||||||
return execMTasks;
|
|
||||||
}
|
|
||||||
|
|
||||||
AstNode* AstInsideRange::newAndFromInside(AstNode* exprp, AstNode* lhsp, AstNode* rhsp) {
|
AstNode* AstInsideRange::newAndFromInside(AstNode* exprp, AstNode* lhsp, AstNode* rhsp) {
|
||||||
AstNode* ap = new AstGte(fileline(), exprp->cloneTree(true), lhsp);
|
AstNode* ap = new AstGte(fileline(), exprp->cloneTree(true), lhsp);
|
||||||
AstNode* bp = new AstLte(fileline(), exprp->cloneTree(true), rhsp);
|
AstNode* bp = new AstLte(fileline(), exprp->cloneTree(true), rhsp);
|
||||||
|
|
@ -717,10 +702,12 @@ AstNodeDType::CTypeRecursed AstNodeDType::cTypeRecurse(bool compound) const {
|
||||||
info.m_type = "const char*";
|
info.m_type = "const char*";
|
||||||
} else if (bdtypep->keyword() == AstBasicDTypeKwd::SCOPEPTR) {
|
} else if (bdtypep->keyword() == AstBasicDTypeKwd::SCOPEPTR) {
|
||||||
info.m_type = "const VerilatedScope*";
|
info.m_type = "const VerilatedScope*";
|
||||||
} else if (bdtypep->keyword() == AstBasicDTypeKwd::DOUBLE) {
|
} else if (bdtypep->keyword().isDouble()) {
|
||||||
info.m_type = "double";
|
info.m_type = "double";
|
||||||
} else if (bdtypep->keyword() == AstBasicDTypeKwd::STRING) {
|
} else if (bdtypep->keyword().isString()) {
|
||||||
info.m_type = "std::string";
|
info.m_type = "std::string";
|
||||||
|
} else if (bdtypep->keyword().isMTaskState()) {
|
||||||
|
info.m_type = "VlMTaskVertex";
|
||||||
} else if (dtypep->widthMin() <= 8) { // Handle unpacked arrays; not bdtypep->width
|
} else if (dtypep->widthMin() <= 8) { // Handle unpacked arrays; not bdtypep->width
|
||||||
info.m_type = "CData" + bitvec;
|
info.m_type = "CData" + bitvec;
|
||||||
} else if (dtypep->widthMin() <= 16) {
|
} else if (dtypep->widthMin() <= 16) {
|
||||||
|
|
|
||||||
|
|
@ -2070,7 +2070,8 @@ public:
|
||||||
// (Slow) recurse down to find basic data type (Note don't need virtual -
|
// (Slow) recurse down to find basic data type (Note don't need virtual -
|
||||||
// AstVar isn't a NodeDType)
|
// AstVar isn't a NodeDType)
|
||||||
AstBasicDType* basicp() const { return subDTypep()->basicp(); }
|
AstBasicDType* basicp() const { return subDTypep()->basicp(); }
|
||||||
// op3 = Initial value that never changes (static const)
|
// op3 = Initial value that never changes (static const), or constructor argument for
|
||||||
|
// MTASKSTATE variables
|
||||||
AstNode* valuep() const { return op3p(); }
|
AstNode* valuep() const { return op3p(); }
|
||||||
// It's valuep(), not constp(), as may be more complicated than an AstConst
|
// It's valuep(), not constp(), as may be more complicated than an AstConst
|
||||||
void valuep(AstNode* nodep) { setOp3p(nodep); }
|
void valuep(AstNode* nodep) { setOp3p(nodep); }
|
||||||
|
|
@ -9045,7 +9046,8 @@ class AstExecGraph final : public AstNode {
|
||||||
// them without traversing the graph (it's not always needed to
|
// them without traversing the graph (it's not always needed to
|
||||||
// traverse the graph.)
|
// traverse the graph.)
|
||||||
private:
|
private:
|
||||||
V3Graph* m_depGraphp; // contains ExecMTask's
|
V3Graph* const m_depGraphp; // contains ExecMTask's
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit AstExecGraph(FileLine* fl);
|
explicit AstExecGraph(FileLine* fl);
|
||||||
ASTNODE_NODE_FUNCS_NO_DTOR(ExecGraph)
|
ASTNODE_NODE_FUNCS_NO_DTOR(ExecGraph)
|
||||||
|
|
@ -9057,7 +9059,7 @@ public:
|
||||||
const V3Graph* depGraphp() const { return m_depGraphp; }
|
const V3Graph* depGraphp() const { return m_depGraphp; }
|
||||||
V3Graph* mutableDepGraphp() { return m_depGraphp; }
|
V3Graph* mutableDepGraphp() { return m_depGraphp; }
|
||||||
void addMTaskBody(AstMTaskBody* bodyp) { addOp1p(bodyp); }
|
void addMTaskBody(AstMTaskBody* bodyp) { addOp1p(bodyp); }
|
||||||
std::vector<const ExecMTask*> rootMTasks();
|
void addStmtsp(AstNode* stmtp) { addOp2p(stmtp); }
|
||||||
};
|
};
|
||||||
|
|
||||||
class AstSplitPlaceholder final : public AstNode {
|
class AstSplitPlaceholder final : public AstNode {
|
||||||
|
|
|
||||||
226
src/V3EmitC.cpp
226
src/V3EmitC.cpp
|
|
@ -1429,18 +1429,6 @@ class EmitCLazyDecls final : public AstNVisitor {
|
||||||
lazyDeclare(nodep->funcp());
|
lazyDeclare(nodep->funcp());
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void visit(AstExecGraph* nodep) override {
|
|
||||||
if (nodep->user2SetOnce()) return; // Already declared
|
|
||||||
// Build the list of initial mtasks to start
|
|
||||||
for (const ExecMTask* mtp : nodep->rootMTasks()) {
|
|
||||||
m_emitter.puts("void ");
|
|
||||||
m_emitter.puts(EmitCBaseVisitor::topClassName() + "__"
|
|
||||||
+ EmitCBaseVisitor::protect(mtp->cFuncName()));
|
|
||||||
m_emitter.puts("(void* voidSelf, bool even_cycle);\n");
|
|
||||||
m_needsBlankLine = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void visit(AstVarRef* nodep) override {
|
virtual void visit(AstVarRef* nodep) override {
|
||||||
AstVar* const varp = nodep->varp();
|
AstVar* const varp = nodep->varp();
|
||||||
// Only constant pool symbols are lazy declared for now ...
|
// Only constant pool symbols are lazy declared for now ...
|
||||||
|
|
@ -1574,93 +1562,6 @@ class EmitCImp final : EmitCStmts {
|
||||||
return ofp;
|
return ofp;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the number of cross-thread dependencies into mtaskp.
|
|
||||||
// If >0, mtaskp must test whether its prereqs are done before starting,
|
|
||||||
// and may need to block.
|
|
||||||
static uint32_t packedMTaskMayBlock(const ExecMTask* mtaskp) {
|
|
||||||
uint32_t result = 0;
|
|
||||||
for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
|
||||||
const ExecMTask* prevp = dynamic_cast<ExecMTask*>(edgep->fromp());
|
|
||||||
if (prevp->thread() != mtaskp->thread()) ++result;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void emitMTaskBody(AstMTaskBody* nodep) {
|
|
||||||
ExecMTask* curExecMTaskp = nodep->execMTaskp();
|
|
||||||
if (packedMTaskMayBlock(curExecMTaskp)) {
|
|
||||||
puts("vlSelf->__Vm_mt_" + cvtToStr(curExecMTaskp->id())
|
|
||||||
+ ".waitUntilUpstreamDone(even_cycle);\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
string recName;
|
|
||||||
if (v3Global.opt.profThreads()) {
|
|
||||||
recName = "__Vprfthr_" + cvtToStr(curExecMTaskp->id());
|
|
||||||
puts("VlProfileRec* " + recName + " = nullptr;\n");
|
|
||||||
// Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
|
|
||||||
puts("if (VL_UNLIKELY(vlSelf->__Vm_profile_cycle_start)) {\n");
|
|
||||||
puts(recName + " = vlSelf->__Vm_threadPoolp->profileAppend();\n");
|
|
||||||
puts(recName + "->startRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start,");
|
|
||||||
puts(" " + cvtToStr(curExecMTaskp->id()) + ",");
|
|
||||||
puts(" " + cvtToStr(curExecMTaskp->cost()) + ");\n");
|
|
||||||
puts("}\n");
|
|
||||||
}
|
|
||||||
puts("Verilated::mtaskId(" + cvtToStr(curExecMTaskp->id()) + ");\n");
|
|
||||||
|
|
||||||
// The actual body of calls to leaf functions
|
|
||||||
iterateAndNextNull(nodep->stmtsp());
|
|
||||||
|
|
||||||
if (v3Global.opt.profThreads()) {
|
|
||||||
// Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
|
|
||||||
puts("if (VL_UNLIKELY(" + recName + ")) {\n");
|
|
||||||
puts(recName + "->endRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start);\n");
|
|
||||||
puts("}\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush message queue
|
|
||||||
puts("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n");
|
|
||||||
|
|
||||||
// For any downstream mtask that's on another thread, bump its
|
|
||||||
// counter and maybe notify it.
|
|
||||||
for (V3GraphEdge* edgep = curExecMTaskp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
|
||||||
const ExecMTask* nextp = dynamic_cast<ExecMTask*>(edgep->top());
|
|
||||||
if (nextp->thread() != curExecMTaskp->thread()) {
|
|
||||||
puts("vlSelf->__Vm_mt_" + cvtToStr(nextp->id())
|
|
||||||
+ ".signalUpstreamDone(even_cycle);\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run the next mtask inline
|
|
||||||
const ExecMTask* nextp = curExecMTaskp->packNextp();
|
|
||||||
if (nextp) {
|
|
||||||
emitMTaskBody(nextp->bodyp());
|
|
||||||
} else {
|
|
||||||
// Unblock the fake "final" mtask
|
|
||||||
puts("vlSelf->__Vm_mt_final.signalUpstreamDone(even_cycle);\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void visit(AstMTaskBody* nodep) override {
|
|
||||||
VL_RESTORER(m_useSelfForThis);
|
|
||||||
maybeSplit();
|
|
||||||
splitSizeInc(10);
|
|
||||||
|
|
||||||
puts("\n");
|
|
||||||
for (const ExecMTask* mtp = nodep->execMTaskp(); mtp; mtp = mtp->packNextp()) {
|
|
||||||
m_lazyDecls.emit(mtp->bodyp());
|
|
||||||
}
|
|
||||||
puts("void ");
|
|
||||||
puts(topClassName() + "__" + protect(nodep->execMTaskp()->cFuncName()));
|
|
||||||
puts("(void* voidSelf, bool even_cycle) {\n");
|
|
||||||
puts(topClassName() + "* const vlSelf = static_cast<" + topClassName()
|
|
||||||
+ "*>(voidSelf);\n");
|
|
||||||
m_useSelfForThis = true;
|
|
||||||
puts(symClassAssign());
|
|
||||||
emitMTaskBody(nodep);
|
|
||||||
ensureNewLine();
|
|
||||||
puts("}\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
//---------------------------------------
|
//---------------------------------------
|
||||||
// VISITORS
|
// VISITORS
|
||||||
using EmitCStmts::visit; // Suppress hidden overloaded virtual function warning
|
using EmitCStmts::visit; // Suppress hidden overloaded virtual function warning
|
||||||
|
|
@ -1734,6 +1635,12 @@ class EmitCImp final : EmitCStmts {
|
||||||
if (nodep->ifdef() != "") puts("#endif // " + nodep->ifdef() + "\n");
|
if (nodep->ifdef() != "") puts("#endif // " + nodep->ifdef() + "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void visit(AstMTaskBody* nodep) override {
|
||||||
|
VL_RESTORER(m_useSelfForThis);
|
||||||
|
m_useSelfForThis = true;
|
||||||
|
iterateChildrenConst(nodep);
|
||||||
|
}
|
||||||
|
|
||||||
void emitChangeDet() {
|
void emitChangeDet() {
|
||||||
putsDecoration("// Change detection\n");
|
putsDecoration("// Change detection\n");
|
||||||
puts("QData __req = false; // Logically a bool\n"); // But not because it results in
|
puts("QData __req = false; // Logically a bool\n"); // But not because it results in
|
||||||
|
|
@ -1833,34 +1740,8 @@ class EmitCImp final : EmitCStmts {
|
||||||
"ExecGraph should be a singleton!");
|
"ExecGraph should be a singleton!");
|
||||||
// The location of the AstExecGraph within the containing _eval()
|
// The location of the AstExecGraph within the containing _eval()
|
||||||
// function is where we want to invoke the graph and wait for it to
|
// function is where we want to invoke the graph and wait for it to
|
||||||
// complete. Do that now.
|
// complete. Emitting the children does just that.
|
||||||
//
|
iterateChildrenConst(nodep);
|
||||||
// Don't recurse to children -- this isn't the place to emit
|
|
||||||
// function definitions for the nested CFuncs. We'll do that at the
|
|
||||||
// end.
|
|
||||||
puts("vlSelf->__Vm_even_cycle = !vlSelf->__Vm_even_cycle;\n");
|
|
||||||
|
|
||||||
// Build the list of initial mtasks to start
|
|
||||||
std::vector<const ExecMTask*> execMTasks = nodep->rootMTasks();
|
|
||||||
|
|
||||||
if (!execMTasks.empty()) {
|
|
||||||
for (uint32_t i = 0; i < execMTasks.size(); ++i) {
|
|
||||||
const bool runInline = (i == execMTasks.size() - 1);
|
|
||||||
const string protName
|
|
||||||
= topClassName() + "__" + protect(execMTasks[i]->cFuncName());
|
|
||||||
if (runInline) {
|
|
||||||
// The thread calling eval() will run this mtask inline,
|
|
||||||
// along with its packed successors.
|
|
||||||
puts(protName + "(vlSelf, vlSelf->__Vm_even_cycle);\n");
|
|
||||||
puts("Verilated::mtaskId(0);\n");
|
|
||||||
} else {
|
|
||||||
// The other N-1 go to the thread pool.
|
|
||||||
puts("vlSelf->__Vm_threadPoolp->workerp(" + cvtToStr(i) + ")->addTask("
|
|
||||||
+ protName + ", vlSelf, vlSelf->__Vm_even_cycle);\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
puts("vlSelf->__Vm_mt_final.waitUntilUpstreamDone(vlSelf->__Vm_even_cycle);\n");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------
|
//---------------------------------------
|
||||||
|
|
@ -2015,8 +1896,8 @@ class EmitCImp final : EmitCStmts {
|
||||||
void emitSettleLoop(bool initial);
|
void emitSettleLoop(bool initial);
|
||||||
void emitWrapEval();
|
void emitWrapEval();
|
||||||
void emitWrapFast();
|
void emitWrapFast();
|
||||||
void emitMTaskState();
|
void emitThreadingState();
|
||||||
void emitMTaskVertexCtors(bool* firstp);
|
void emitThreadingCtors(bool* firstp);
|
||||||
void emitIntTop(const AstNodeModule* modp);
|
void emitIntTop(const AstNodeModule* modp);
|
||||||
void emitInt(AstNodeModule* modp);
|
void emitInt(AstNodeModule* modp);
|
||||||
void maybeSplit();
|
void maybeSplit();
|
||||||
|
|
@ -2085,6 +1966,7 @@ void EmitCStmts::emitVarDecl(const AstVar* nodep, const string& prefixIfImp) {
|
||||||
if (nodep->isWide()) puts("," + cvtToStr(nodep->widthWords()));
|
if (nodep->isWide()) puts("," + cvtToStr(nodep->widthWords()));
|
||||||
puts(");\n");
|
puts(");\n");
|
||||||
} else {
|
} else {
|
||||||
|
if (basicp && basicp->keyword().isMTaskState()) { m_ctorVarsVec.push_back(nodep); }
|
||||||
// strings and other fundamental c types
|
// strings and other fundamental c types
|
||||||
if (nodep->isFuncLocal() && nodep->isString()) {
|
if (nodep->isFuncLocal() && nodep->isString()) {
|
||||||
const string name = nodep->name();
|
const string name = nodep->name();
|
||||||
|
|
@ -2114,13 +1996,19 @@ void EmitCStmts::emitCtorSep(bool* firstp) {
|
||||||
void EmitCStmts::emitVarCtors(bool* firstp) {
|
void EmitCStmts::emitVarCtors(bool* firstp) {
|
||||||
if (!m_ctorVarsVec.empty()) {
|
if (!m_ctorVarsVec.empty()) {
|
||||||
ofp()->indentInc();
|
ofp()->indentInc();
|
||||||
puts("\n");
|
if (*firstp) puts("\n");
|
||||||
for (const AstVar* varp : m_ctorVarsVec) {
|
for (const AstVar* varp : m_ctorVarsVec) {
|
||||||
bool isArray = !VN_CAST(varp->dtypeSkipRefp(), BasicDType);
|
const AstBasicDType* const dtypep = VN_CAST(varp->dtypeSkipRefp(), BasicDType);
|
||||||
if (isArray) {
|
if (!dtypep) {
|
||||||
puts("// Skipping array: ");
|
puts("// Skipping array: ");
|
||||||
puts(varp->nameProtect());
|
puts(varp->nameProtect());
|
||||||
puts("\n");
|
puts("\n");
|
||||||
|
} else if (dtypep->keyword().isMTaskState()) {
|
||||||
|
emitCtorSep(firstp);
|
||||||
|
puts(varp->nameProtect());
|
||||||
|
puts("(");
|
||||||
|
iterate(varp->valuep());
|
||||||
|
puts(")");
|
||||||
} else {
|
} else {
|
||||||
emitCtorSep(firstp);
|
emitCtorSep(firstp);
|
||||||
puts(varp->nameProtect());
|
puts(varp->nameProtect());
|
||||||
|
|
@ -2555,36 +2443,17 @@ void EmitCImp::emitCoverageDecl(AstNodeModule*) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCImp::emitMTaskVertexCtors(bool* firstp) {
|
void EmitCImp::emitThreadingCtors(bool* firstp) {
|
||||||
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp();
|
ofp()->indentInc();
|
||||||
UASSERT_OBJ(execGraphp, v3Global.rootp(), "Root should have an execGraphp");
|
|
||||||
const V3Graph* depGraphp = execGraphp->depGraphp();
|
|
||||||
|
|
||||||
unsigned finalEdgesInCt = 0;
|
|
||||||
for (const V3GraphVertex* vxp = depGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
|
||||||
const ExecMTask* mtp = dynamic_cast<const ExecMTask*>(vxp);
|
|
||||||
unsigned edgesInCt = packedMTaskMayBlock(mtp);
|
|
||||||
if (packedMTaskMayBlock(mtp) > 0) {
|
|
||||||
emitCtorSep(firstp);
|
|
||||||
puts("__Vm_mt_" + cvtToStr(mtp->id()) + "(" + cvtToStr(edgesInCt) + ")");
|
|
||||||
}
|
|
||||||
// Each mtask with no packed successor will become a dependency
|
|
||||||
// for the final node:
|
|
||||||
if (!mtp->packNextp()) ++finalEdgesInCt;
|
|
||||||
}
|
|
||||||
|
|
||||||
emitCtorSep(firstp);
|
|
||||||
puts("__Vm_mt_final(" + cvtToStr(finalEdgesInCt) + ")");
|
|
||||||
|
|
||||||
// This will flip to 'true' before the start of the 0th cycle.
|
|
||||||
emitCtorSep(firstp);
|
emitCtorSep(firstp);
|
||||||
puts("__Vm_threadPoolp(nullptr)");
|
puts("__Vm_threadPoolp(nullptr)");
|
||||||
|
emitCtorSep(firstp);
|
||||||
|
puts("__Vm_even_cycle(false)");
|
||||||
if (v3Global.opt.profThreads()) {
|
if (v3Global.opt.profThreads()) {
|
||||||
emitCtorSep(firstp);
|
emitCtorSep(firstp);
|
||||||
puts("__Vm_profile_cycle_start(0)");
|
puts("__Vm_profile_cycle_start(0)");
|
||||||
}
|
}
|
||||||
emitCtorSep(firstp);
|
ofp()->indentDec();
|
||||||
puts("__Vm_even_cycle(false)");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCImp::emitCtorImp(AstNodeModule* modp) {
|
void EmitCImp::emitCtorImp(AstNodeModule* modp) {
|
||||||
|
|
@ -2612,9 +2481,10 @@ void EmitCImp::emitCtorImp(AstNodeModule* modp) {
|
||||||
} else {
|
} else {
|
||||||
puts(modName + "::" + modName + "(const char* _vcname__)\n");
|
puts(modName + "::" + modName + "(const char* _vcname__)\n");
|
||||||
puts(" : VerilatedModule(_vcname__)\n");
|
puts(" : VerilatedModule(_vcname__)\n");
|
||||||
|
first = false; // printed the first ':'
|
||||||
}
|
}
|
||||||
emitVarCtors(&first);
|
emitVarCtors(&first);
|
||||||
if (modp->isTop() && v3Global.opt.mtasks()) emitMTaskVertexCtors(&first);
|
if (modp->isTop() && v3Global.opt.mtasks()) emitThreadingCtors(&first);
|
||||||
|
|
||||||
puts(" {\n");
|
puts(" {\n");
|
||||||
emitCellCtors(modp);
|
emitCellCtors(modp);
|
||||||
|
|
@ -2804,9 +2674,11 @@ void EmitCImp::emitSavableImp(AstNodeModule* modp) {
|
||||||
puts("; ++" + ivar + ") {\n");
|
puts("; ++" + ivar + ") {\n");
|
||||||
elementp = arrayp->subDTypep()->skipRefp();
|
elementp = arrayp->subDTypep()->skipRefp();
|
||||||
}
|
}
|
||||||
|
const AstBasicDType* const basicp = elementp->basicp();
|
||||||
|
// Do not save MTask state, only matters within an evaluation
|
||||||
|
if (basicp && basicp->keyword().isMTaskState()) continue;
|
||||||
// Want to detect types that are represented as arrays
|
// Want to detect types that are represented as arrays
|
||||||
// (i.e. packed types of more than 64 bits).
|
// (i.e. packed types of more than 64 bits).
|
||||||
AstBasicDType* basicp = elementp->basicp();
|
|
||||||
if (elementp->isWide()
|
if (elementp->isWide()
|
||||||
&& !(basicp && basicp->keyword() == AstBasicDTypeKwd::STRING)) {
|
&& !(basicp && basicp->keyword() == AstBasicDTypeKwd::STRING)) {
|
||||||
int vecnum = vects++;
|
int vecnum = vects++;
|
||||||
|
|
@ -3249,26 +3121,13 @@ void EmitCStmts::emitSortedVarList(const VarVec& anons, const VarVec& nonanons,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCImp::emitMTaskState() {
|
void EmitCImp::emitThreadingState() {
|
||||||
ofp()->putsPrivate(false); // Accessed from loose function
|
ofp()->putsPrivate(false); // Accessed from loose function
|
||||||
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp();
|
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp();
|
||||||
UASSERT_OBJ(execGraphp, v3Global.rootp(), "Root should have an execGraphp");
|
UASSERT_OBJ(execGraphp, v3Global.rootp(), "Root should have an execGraphp");
|
||||||
|
|
||||||
const V3Graph* depGraphp = execGraphp->depGraphp();
|
|
||||||
for (const V3GraphVertex* vxp = depGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
|
||||||
const ExecMTask* mtp = dynamic_cast<const ExecMTask*>(vxp);
|
|
||||||
if (packedMTaskMayBlock(mtp) > 0) {
|
|
||||||
puts("VlMTaskVertex __Vm_mt_" + cvtToStr(mtp->id()) + ";\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// This fake mtask depends on all the real ones. We use it to block
|
|
||||||
// eval() until all mtasks are done.
|
|
||||||
//
|
|
||||||
// In the future we might allow _eval() to return before the graph is
|
|
||||||
// fully done executing, for "half wave" scheduling. For now we wait
|
|
||||||
// for all mtasks though.
|
|
||||||
puts("VlMTaskVertex __Vm_mt_final;\n");
|
|
||||||
puts("VlThreadPool* __Vm_threadPoolp;\n");
|
puts("VlThreadPool* __Vm_threadPoolp;\n");
|
||||||
|
puts("bool __Vm_even_cycle;\n");
|
||||||
|
|
||||||
if (v3Global.opt.profThreads()) {
|
if (v3Global.opt.profThreads()) {
|
||||||
// rdtsc() at current cycle start
|
// rdtsc() at current cycle start
|
||||||
|
|
@ -3278,8 +3137,6 @@ void EmitCImp::emitMTaskState() {
|
||||||
// Track our position in the cache warmup and actual profile window
|
// Track our position in the cache warmup and actual profile window
|
||||||
puts("vluint32_t __Vm_profile_window_ct;\n");
|
puts("vluint32_t __Vm_profile_window_ct;\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
puts("bool __Vm_even_cycle;\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitCImp::emitIntTop(const AstNodeModule* modp) {
|
void EmitCImp::emitIntTop(const AstNodeModule* modp) {
|
||||||
|
|
@ -3380,7 +3237,7 @@ void EmitCImp::emitInt(AstNodeModule* modp) {
|
||||||
if (v3Global.opt.inhibitSim()) {
|
if (v3Global.opt.inhibitSim()) {
|
||||||
puts("bool __Vm_inhibitSim; ///< Set true to disable evaluation of module\n");
|
puts("bool __Vm_inhibitSim; ///< Set true to disable evaluation of module\n");
|
||||||
}
|
}
|
||||||
if (v3Global.opt.mtasks()) emitMTaskState();
|
if (v3Global.opt.mtasks()) emitThreadingState();
|
||||||
}
|
}
|
||||||
emitCoverageDecl(modp); // may flip public/private
|
emitCoverageDecl(modp); // may flip public/private
|
||||||
|
|
||||||
|
|
@ -3634,23 +3491,6 @@ void EmitCImp::mainImp(AstNodeModule* modp, bool slow) {
|
||||||
m_modp = modp;
|
m_modp = modp;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_fast && modp->isTop() && v3Global.opt.mtasks()) {
|
|
||||||
// Make a final pass and emit function definitions for the mtasks
|
|
||||||
// in the ExecGraph
|
|
||||||
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp();
|
|
||||||
const V3Graph* depGraphp = execGraphp->depGraphp();
|
|
||||||
for (const V3GraphVertex* vxp = depGraphp->verticesBeginp(); vxp;
|
|
||||||
vxp = vxp->verticesNextp()) {
|
|
||||||
const ExecMTask* mtaskp = dynamic_cast<const ExecMTask*>(vxp);
|
|
||||||
if (mtaskp->threadRoot()) {
|
|
||||||
// Only define one function for all the mtasks packed on
|
|
||||||
// a given thread. We'll name this function after the
|
|
||||||
// root mtask though it contains multiple mtasks' worth
|
|
||||||
// of logic.
|
|
||||||
iterate(mtaskp->bodyp());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
VL_DO_CLEAR(delete m_ofp, m_ofp = nullptr);
|
VL_DO_CLEAR(delete m_ofp, m_ofp = nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,10 @@ public:
|
||||||
return VIdProtect::protectWordsIf(name, doIt);
|
return VIdProtect::protectWordsIf(name, doIt);
|
||||||
}
|
}
|
||||||
static string ifNoProtect(const string& in) { return v3Global.opt.protectIds() ? "" : in; }
|
static string ifNoProtect(const string& in) { return v3Global.opt.protectIds() ? "" : in; }
|
||||||
|
static string voidSelfAssign() {
|
||||||
|
return topClassName() + "* const __restrict vlSelf VL_ATTR_UNUSED = static_cast<"
|
||||||
|
+ topClassName() + "*>(voidSelf);\n";
|
||||||
|
}
|
||||||
static string symClassName() { return v3Global.opt.prefix() + "_" + protect("_Syms"); }
|
static string symClassName() { return v3Global.opt.prefix() + "_" + protect("_Syms"); }
|
||||||
static string symClassVar() { return symClassName() + "* __restrict vlSymsp"; }
|
static string symClassVar() { return symClassName() + "* __restrict vlSymsp"; }
|
||||||
static string symClassAssign() {
|
static string symClassAssign() {
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@
|
||||||
#include "config_build.h"
|
#include "config_build.h"
|
||||||
#include "verilatedos.h"
|
#include "verilatedos.h"
|
||||||
|
|
||||||
|
#include "V3EmitCBase.h"
|
||||||
#include "V3Os.h"
|
#include "V3Os.h"
|
||||||
#include "V3File.h"
|
#include "V3File.h"
|
||||||
#include "V3GraphAlg.h"
|
#include "V3GraphAlg.h"
|
||||||
|
|
@ -1997,6 +1998,48 @@ private:
|
||||||
VL_DEBUG_FUNC;
|
VL_DEBUG_FUNC;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//######################################################################
|
||||||
|
// ThreadSchedule
|
||||||
|
|
||||||
|
class PartPackMTasks;
|
||||||
|
|
||||||
|
// The thread schedule, containing all information needed later. Note that this is a simple
|
||||||
|
// aggregate data type and the only way to get hold of an instance of it is via
|
||||||
|
// PartPackMTasks::pack, which is moved from there and is const, which means we can only acquire a
|
||||||
|
// const reference to is so no further modifications are allowed, so all members are public
|
||||||
|
// (attributes).
|
||||||
|
class ThreadSchedule final {
|
||||||
|
public:
|
||||||
|
// Allocation of sequence of MTasks to threads. Can be considered a map from thread ID to
|
||||||
|
// the sequence of MTasks to be executed by that thread.
|
||||||
|
std::vector<std::vector<const ExecMTask*>> threads;
|
||||||
|
|
||||||
|
// Map from MTask to ID of thread it is assigned to.
|
||||||
|
std::unordered_map<const ExecMTask*, uint32_t> threadId;
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class PartPackMTasks;
|
||||||
|
|
||||||
|
explicit ThreadSchedule(uint32_t nThreads)
|
||||||
|
: threads{nThreads} {}
|
||||||
|
VL_UNCOPYABLE(ThreadSchedule); // But movable
|
||||||
|
ThreadSchedule(ThreadSchedule&&) = default;
|
||||||
|
ThreadSchedule& operator=(ThreadSchedule&&) = default;
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Returns the number of cross-thread dependencies of the given MTask. If > 0, the MTask must
|
||||||
|
// test whether its dependencies are ready before starting, and therefore may need to block.
|
||||||
|
uint32_t crossThreadDependencies(const ExecMTask* mtaskp) const {
|
||||||
|
const uint32_t thisThreadId = threadId.at(mtaskp);
|
||||||
|
uint32_t result = 0;
|
||||||
|
for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
||||||
|
const ExecMTask* const prevp = dynamic_cast<ExecMTask*>(edgep->fromp());
|
||||||
|
if (threadId.at(prevp) != thisThreadId) ++result;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
//######################################################################
|
//######################################################################
|
||||||
// PartPackMTasks
|
// PartPackMTasks
|
||||||
|
|
||||||
|
|
@ -2016,49 +2059,45 @@ private:
|
||||||
// thread A checks the end time of an mtask running on thread B. This extra
|
// thread A checks the end time of an mtask running on thread B. This extra
|
||||||
// "padding" avoids tight "layovers" at cross-thread dependencies.
|
// "padding" avoids tight "layovers" at cross-thread dependencies.
|
||||||
class PartPackMTasks final {
|
class PartPackMTasks final {
|
||||||
private:
|
// CONSTANTS
|
||||||
|
static constexpr uint32_t UNASSIGNED = 0xffffffff;
|
||||||
|
|
||||||
// TYPES
|
// TYPES
|
||||||
struct MTaskState {
|
struct MTaskState {
|
||||||
uint32_t completionTime; // Estimated time this mtask will complete
|
uint32_t completionTime = 0; // Estimated time this mtask will complete
|
||||||
|
uint32_t threadId = UNASSIGNED; // Thread id this MTask is assigned to
|
||||||
|
const ExecMTask* nextp = nullptr; // Next MTask on same thread after this
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MTaskCmp {
|
struct MTaskCmp {
|
||||||
bool operator()(const ExecMTask* ap, ExecMTask* bp) const { return ap->id() < bp->id(); }
|
bool operator()(const ExecMTask* ap, const ExecMTask* bp) const {
|
||||||
|
return ap->id() < bp->id();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// MEMBERS
|
// MEMBERS
|
||||||
V3Graph* m_mtasksp; // Mtask graph
|
const uint32_t m_nThreads; // Number of threads
|
||||||
uint32_t m_nThreads; // Number of threads
|
const uint32_t m_sandbagNumerator; // Numerator padding for est runtime
|
||||||
uint32_t m_sandbagNumerator; // Numerator padding for est runtime
|
const uint32_t m_sandbagDenom; // Denominator padding for est runtime
|
||||||
uint32_t m_sandbagDenom; // Denomerator padding for est runtime
|
|
||||||
|
|
||||||
using MTaskStateMap = std::unordered_map<const ExecMTask*, MTaskState>;
|
std::unordered_map<const ExecMTask*, MTaskState> m_mtaskState; // State for each mtask.
|
||||||
MTaskStateMap m_mtaskState; // State for each mtask.
|
|
||||||
|
|
||||||
MTaskCmp m_mtaskCmp; // Comparison functor
|
|
||||||
using ReadyMTasks = std::set<ExecMTask*, MTaskCmp&>;
|
|
||||||
ReadyMTasks m_ready; // MTasks ready to be assigned next; all their
|
|
||||||
// // dependencies are already assigned.
|
|
||||||
|
|
||||||
std::vector<ExecMTask*> m_prevMTask; // Previous mtask scheduled to each thread.
|
|
||||||
std::vector<uint32_t> m_busyUntil; // Time each thread is occupied until
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// CONSTRUCTORS
|
// CONSTRUCTORS
|
||||||
explicit PartPackMTasks(V3Graph* mtasksp, uint32_t nThreads = v3Global.opt.threads(),
|
explicit PartPackMTasks(uint32_t nThreads = v3Global.opt.threads(),
|
||||||
unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100)
|
unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100)
|
||||||
: m_mtasksp{mtasksp}
|
: m_nThreads{nThreads}
|
||||||
, m_nThreads{nThreads}
|
|
||||||
, m_sandbagNumerator{sandbagNumerator}
|
, m_sandbagNumerator{sandbagNumerator}
|
||||||
, m_sandbagDenom{sandbagDenom}
|
, m_sandbagDenom{sandbagDenom} {}
|
||||||
, m_ready{m_mtaskCmp} {}
|
|
||||||
~PartPackMTasks() = default;
|
~PartPackMTasks() = default;
|
||||||
|
|
||||||
|
private:
|
||||||
// METHODS
|
// METHODS
|
||||||
uint32_t completionTime(const ExecMTask* mtaskp, uint32_t thread) {
|
uint32_t completionTime(const ExecMTask* mtaskp, uint32_t threadId) {
|
||||||
const MTaskState& state = m_mtaskState[mtaskp];
|
const MTaskState& state = m_mtaskState[mtaskp];
|
||||||
UASSERT(mtaskp->thread() != 0xffffffff, "Mtask should have assigned thread");
|
UASSERT(state.threadId != UNASSIGNED, "Mtask should have assigned thread");
|
||||||
if (thread == mtaskp->thread()) {
|
if (threadId == state.threadId) {
|
||||||
// No overhead on native thread
|
// No overhead on same thread
|
||||||
return state.completionTime;
|
return state.completionTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2068,157 +2107,162 @@ public:
|
||||||
= state.completionTime + (m_sandbagNumerator * mtaskp->cost()) / m_sandbagDenom;
|
= state.completionTime + (m_sandbagNumerator * mtaskp->cost()) / m_sandbagDenom;
|
||||||
|
|
||||||
// If task B is packed after task A on thread 0, don't let thread 1
|
// If task B is packed after task A on thread 0, don't let thread 1
|
||||||
// think that A finishes later than thread 0 thinks that B
|
// think that A finishes earlier than thread 0 thinks that B
|
||||||
// finishes, otherwise we get priority inversions and fail the self
|
// finishes, otherwise we get priority inversions and fail the self
|
||||||
// test.
|
// test.
|
||||||
if (mtaskp->packNextp()) {
|
if (state.nextp) {
|
||||||
uint32_t successorEndTime = completionTime(mtaskp->packNextp(), mtaskp->thread());
|
const uint32_t successorEndTime = completionTime(state.nextp, state.threadId);
|
||||||
if ((sandbaggedEndTime >= successorEndTime) && (successorEndTime > 1)) {
|
if ((sandbaggedEndTime >= successorEndTime) && (successorEndTime > 1)) {
|
||||||
sandbaggedEndTime = successorEndTime - 1;
|
sandbaggedEndTime = successorEndTime - 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
UINFO(6, "Sandbagged end time for " << mtaskp->name() << " on th " << thread << " = "
|
UINFO(6, "Sandbagged end time for " << mtaskp->name() << " on th " << threadId << " = "
|
||||||
<< sandbaggedEndTime << endl);
|
<< sandbaggedEndTime << endl);
|
||||||
return sandbaggedEndTime;
|
return sandbaggedEndTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setCompletionTime(ExecMTask* mtaskp, uint32_t time) {
|
bool isReady(const ExecMTask* mtaskp) {
|
||||||
MTaskState& state = m_mtaskState[mtaskp];
|
for (V3GraphEdge* edgeInp = mtaskp->inBeginp(); edgeInp; edgeInp = edgeInp->inNextp()) {
|
||||||
state.completionTime = time;
|
const ExecMTask* const prevp = dynamic_cast<ExecMTask*>(edgeInp->fromp());
|
||||||
|
if (m_mtaskState[prevp].threadId == UNASSIGNED) {
|
||||||
|
// This predecessor is not assigned yet
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void go() {
|
public:
|
||||||
|
// Pack an MTasks from given graph into m_nThreads threads, return the schedule.
|
||||||
|
const ThreadSchedule pack(const V3Graph& mtaskGraph) {
|
||||||
|
// The result
|
||||||
|
ThreadSchedule schedule(m_nThreads);
|
||||||
|
|
||||||
|
// Time each thread is occupied until
|
||||||
|
std::vector<uint32_t> busyUntil(m_nThreads, 0);
|
||||||
|
|
||||||
|
// MTasks ready to be assigned next. All their dependencies are already assigned.
|
||||||
|
std::set<const ExecMTask*, MTaskCmp> readyMTasks;
|
||||||
|
|
||||||
// Build initial ready list
|
// Build initial ready list
|
||||||
for (V3GraphVertex* vxp = m_mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
for (V3GraphVertex* vxp = mtaskGraph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
|
||||||
ExecMTask* mtaskp = dynamic_cast<ExecMTask*>(vxp);
|
const ExecMTask* const mtaskp = dynamic_cast<ExecMTask*>(vxp);
|
||||||
if (vxp->inEmpty()) m_ready.insert(mtaskp);
|
if (isReady(mtaskp)) readyMTasks.insert(mtaskp);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_prevMTask.clear();
|
// Clear algorithm state
|
||||||
m_prevMTask.resize(m_nThreads);
|
m_mtaskState.clear();
|
||||||
m_busyUntil.clear();
|
|
||||||
m_busyUntil.resize(m_nThreads);
|
|
||||||
|
|
||||||
while (!m_ready.empty()) {
|
while (!readyMTasks.empty()) {
|
||||||
// For each task in the ready set, compute when it might start
|
// For each task in the ready set, compute when it might start
|
||||||
// on each thread (in that thread's local time frame.)
|
// on each thread (in that thread's local time frame.)
|
||||||
uint32_t bestTime = 0xffffffff;
|
uint32_t bestTime = 0xffffffff;
|
||||||
uint32_t bestTh = 0;
|
uint32_t bestThreadId = 0;
|
||||||
ExecMTask* bestMtaskp = nullptr;
|
const ExecMTask* bestMtaskp = nullptr; // Todo: const ExecMTask*
|
||||||
for (uint32_t th = 0; th < m_nThreads; ++th) {
|
for (uint32_t threadId = 0; threadId < m_nThreads; ++threadId) {
|
||||||
for (ReadyMTasks::iterator taskIt = m_ready.begin(); taskIt != m_ready.end();
|
for (const ExecMTask* const mtaskp : readyMTasks) {
|
||||||
++taskIt) {
|
uint32_t timeBegin = busyUntil[threadId];
|
||||||
uint32_t timeBegin = m_busyUntil[th];
|
|
||||||
if (timeBegin > bestTime) {
|
if (timeBegin > bestTime) {
|
||||||
UINFO(6, "th " << th << " busy until " << timeBegin
|
UINFO(6, "th " << threadId << " busy until " << timeBegin
|
||||||
<< ", later than bestTime " << bestTime
|
<< ", later than bestTime " << bestTime
|
||||||
<< ", skipping thread.\n");
|
<< ", skipping thread.\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ExecMTask* taskp = *taskIt;
|
for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep;
|
||||||
for (V3GraphEdge* edgep = taskp->inBeginp(); edgep; edgep = edgep->inNextp()) {
|
edgep = edgep->inNextp()) {
|
||||||
ExecMTask* priorp = dynamic_cast<ExecMTask*>(edgep->fromp());
|
const ExecMTask* const priorp = dynamic_cast<ExecMTask*>(edgep->fromp());
|
||||||
uint32_t priorEndTime = completionTime(priorp, th);
|
const uint32_t priorEndTime = completionTime(priorp, threadId);
|
||||||
if (priorEndTime > timeBegin) timeBegin = priorEndTime;
|
if (priorEndTime > timeBegin) timeBegin = priorEndTime;
|
||||||
}
|
}
|
||||||
UINFO(6, "Task " << taskp->name() << " start at " << timeBegin << " on thread "
|
UINFO(6, "Task " << mtaskp->name() << " start at " << timeBegin
|
||||||
<< th << endl);
|
<< " on thread " << threadId << endl);
|
||||||
if ((timeBegin < bestTime)
|
if ((timeBegin < bestTime)
|
||||||
|| ((timeBegin == bestTime)
|
|| ((timeBegin == bestTime)
|
||||||
&& bestMtaskp // Redundant, but appeases static analysis tools
|
&& bestMtaskp // Redundant, but appeases static analysis tools
|
||||||
&& (taskp->priority() > bestMtaskp->priority()))) {
|
&& (mtaskp->priority() > bestMtaskp->priority()))) {
|
||||||
bestTime = timeBegin;
|
bestTime = timeBegin;
|
||||||
bestTh = th;
|
bestThreadId = threadId;
|
||||||
bestMtaskp = taskp;
|
bestMtaskp = mtaskp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!bestMtaskp) v3fatalSrc("Should have found some task");
|
UASSERT(bestMtaskp, "Should have found some task");
|
||||||
UINFO(6, "Will schedule " << bestMtaskp->name() << " onto thread " << bestTh << endl);
|
UINFO(6, "Will schedule " << bestMtaskp->name() << " onto thread " << bestThreadId
|
||||||
uint32_t bestEndTime = bestTime + bestMtaskp->cost();
|
<< endl);
|
||||||
setCompletionTime(bestMtaskp, bestEndTime);
|
|
||||||
|
// Reference to thread in schedule we are assigning this MTask to.
|
||||||
|
std::vector<const ExecMTask*>& bestThread = schedule.threads[bestThreadId];
|
||||||
|
|
||||||
|
// Update algorithm state
|
||||||
|
const uint32_t bestEndTime = bestTime + bestMtaskp->cost();
|
||||||
|
m_mtaskState[bestMtaskp].completionTime = bestEndTime;
|
||||||
|
m_mtaskState[bestMtaskp].threadId = bestThreadId;
|
||||||
|
if (!bestThread.empty()) { m_mtaskState[bestThread.back()].nextp = bestMtaskp; }
|
||||||
|
busyUntil[bestThreadId] = bestEndTime;
|
||||||
|
|
||||||
|
// Add the MTask to the schedule
|
||||||
|
bestThread.push_back(bestMtaskp);
|
||||||
|
schedule.threadId[bestMtaskp] = bestThreadId;
|
||||||
|
|
||||||
// Update the ready list
|
// Update the ready list
|
||||||
size_t erased = m_ready.erase(bestMtaskp);
|
const size_t erased = readyMTasks.erase(bestMtaskp);
|
||||||
UASSERT_OBJ(erased > 0, bestMtaskp, "Should have erased something?");
|
UASSERT_OBJ(erased > 0, bestMtaskp, "Should have erased something?");
|
||||||
for (V3GraphEdge* edgeOutp = bestMtaskp->outBeginp(); edgeOutp;
|
for (V3GraphEdge* edgeOutp = bestMtaskp->outBeginp(); edgeOutp;
|
||||||
edgeOutp = edgeOutp->outNextp()) {
|
edgeOutp = edgeOutp->outNextp()) {
|
||||||
ExecMTask* nextp = dynamic_cast<ExecMTask*>(edgeOutp->top());
|
const ExecMTask* const nextp = dynamic_cast<ExecMTask*>(edgeOutp->top());
|
||||||
|
// Dependent MTask should not yet be assigned to a thread
|
||||||
UASSERT(nextp->thread() == 0xffffffff,
|
UASSERT(m_mtaskState[nextp].threadId == UNASSIGNED,
|
||||||
"Tasks after one being assigned should not be assigned yet");
|
"Tasks after one being assigned should not be assigned yet");
|
||||||
// They also should not be ready yet, since they only now
|
// Dependent MTask should not be ready yet, since dependency is just being assigned
|
||||||
// may become ready
|
UASSERT_OBJ(readyMTasks.find(nextp) == readyMTasks.end(), nextp,
|
||||||
UASSERT_OBJ(m_ready.find(nextp) == m_ready.end(), nextp,
|
|
||||||
"Tasks after one being assigned should not be ready");
|
"Tasks after one being assigned should not be ready");
|
||||||
bool isReady = true;
|
if (isReady(nextp)) {
|
||||||
for (V3GraphEdge* edgeInp = nextp->inBeginp(); edgeInp;
|
readyMTasks.insert(nextp);
|
||||||
edgeInp = edgeInp->inNextp()) {
|
|
||||||
ExecMTask* priorp = dynamic_cast<ExecMTask*>(edgeInp->fromp());
|
|
||||||
if (priorp == bestMtaskp) continue;
|
|
||||||
if (priorp->thread() == 0xffffffff) {
|
|
||||||
// This prior is not assigned yet
|
|
||||||
isReady = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (isReady) {
|
|
||||||
m_ready.insert(nextp);
|
|
||||||
UINFO(6, "Inserted " << nextp->name() << " into ready\n");
|
UINFO(6, "Inserted " << nextp->name() << " into ready\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the ExecMTask itself
|
|
||||||
if (m_prevMTask[bestTh]) {
|
|
||||||
m_prevMTask[bestTh]->packNextp(bestMtaskp);
|
|
||||||
UINFO(6, "Packing " << bestMtaskp->name() << " after "
|
|
||||||
<< m_prevMTask[bestTh]->name() << endl);
|
|
||||||
} else {
|
|
||||||
UINFO(6, "Marking " << bestMtaskp->name() << " as thread root\n");
|
|
||||||
bestMtaskp->threadRoot(true);
|
|
||||||
}
|
|
||||||
bestMtaskp->thread(bestTh);
|
|
||||||
|
|
||||||
// Update the thread state
|
|
||||||
m_prevMTask[bestTh] = bestMtaskp;
|
|
||||||
m_busyUntil[bestTh] = bestEndTime;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return schedule;
|
||||||
}
|
}
|
||||||
|
|
||||||
// SELF TEST
|
// SELF TEST
|
||||||
static void selfTest() {
|
static void selfTest() {
|
||||||
V3Graph graph;
|
V3Graph graph;
|
||||||
ExecMTask* t0 = new ExecMTask(&graph, nullptr, 0);
|
ExecMTask* const t0 = new ExecMTask(&graph, nullptr, 0);
|
||||||
t0->cost(1000);
|
t0->cost(1000);
|
||||||
t0->priority(1100);
|
t0->priority(1100);
|
||||||
ExecMTask* t1 = new ExecMTask(&graph, nullptr, 1);
|
ExecMTask* const t1 = new ExecMTask(&graph, nullptr, 1);
|
||||||
t1->cost(100);
|
t1->cost(100);
|
||||||
t1->priority(100);
|
t1->priority(100);
|
||||||
ExecMTask* t2 = new ExecMTask(&graph, nullptr, 2);
|
ExecMTask* const t2 = new ExecMTask(&graph, nullptr, 2);
|
||||||
t2->cost(100);
|
t2->cost(100);
|
||||||
t2->priority(100);
|
t2->priority(100);
|
||||||
|
|
||||||
new V3GraphEdge(&graph, t0, t1, 1);
|
new V3GraphEdge(&graph, t0, t1, 1);
|
||||||
new V3GraphEdge(&graph, t0, t2, 1);
|
new V3GraphEdge(&graph, t0, t2, 1);
|
||||||
|
|
||||||
PartPackMTasks packer(&graph,
|
PartPackMTasks packer(2, // Threads
|
||||||
2, // Threads
|
|
||||||
3, // Sandbag numerator
|
3, // Sandbag numerator
|
||||||
10); // Sandbag denom
|
10); // Sandbag denom
|
||||||
packer.go();
|
const ThreadSchedule& schedule = packer.pack(graph);
|
||||||
|
|
||||||
UASSERT_SELFTEST(bool, t0->threadRoot(), true);
|
UASSERT_SELFTEST(size_t, schedule.threads.size(), 2);
|
||||||
UASSERT_SELFTEST(uint32_t, t0->thread(), 0);
|
|
||||||
UASSERT_SELFTEST(const void*, t0->packNextp(), t1);
|
|
||||||
|
|
||||||
UASSERT_SELFTEST(uint32_t, t1->thread(), 0);
|
UASSERT_SELFTEST(size_t, schedule.threads[0].size(), 2);
|
||||||
UASSERT_SELFTEST(bool, t1->threadRoot(), false);
|
UASSERT_SELFTEST(size_t, schedule.threads[1].size(), 1);
|
||||||
UASSERT_SELFTEST(const void*, t1->packNextp(), nullptr);
|
|
||||||
|
|
||||||
UASSERT_SELFTEST(uint32_t, t2->thread(), 1);
|
UASSERT_SELFTEST(const ExecMTask*, schedule.threads[0][0], t0);
|
||||||
UASSERT_SELFTEST(bool, t2->threadRoot(), true);
|
UASSERT_SELFTEST(const ExecMTask*, schedule.threads[0][1], t1);
|
||||||
UASSERT_SELFTEST(const void*, t2->packNextp(), nullptr);
|
UASSERT_SELFTEST(const ExecMTask*, schedule.threads[1][0], t2);
|
||||||
|
|
||||||
|
UASSERT_SELFTEST(size_t, schedule.threadId.size(), 3);
|
||||||
|
|
||||||
|
UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t0), 0);
|
||||||
|
UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t1), 0);
|
||||||
|
UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t2), 1);
|
||||||
|
|
||||||
// On its native thread, we see the actual end time for t0:
|
// On its native thread, we see the actual end time for t0:
|
||||||
UASSERT_SELFTEST(uint32_t, packer.completionTime(t0, 0), 1000);
|
UASSERT_SELFTEST(uint32_t, packer.completionTime(t0, 0), 1000);
|
||||||
|
|
@ -2478,7 +2522,7 @@ void V3Partition::go(V3Graph* mtasksp) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void V3Partition::finalizeCosts(V3Graph* execMTaskGraphp) {
|
static void finalizeCosts(V3Graph* execMTaskGraphp) {
|
||||||
GraphStreamUnordered ser(execMTaskGraphp, GraphWay::REVERSE);
|
GraphStreamUnordered ser(execMTaskGraphp, GraphWay::REVERSE);
|
||||||
|
|
||||||
while (const V3GraphVertex* vxp = ser.nextp()) {
|
while (const V3GraphVertex* vxp = ser.nextp()) {
|
||||||
|
|
@ -2544,9 +2588,170 @@ void V3Partition::finalizeCosts(V3Graph* execMTaskGraphp) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId,
|
||||||
|
AstCFunc* funcp, const ExecMTask* mtaskp) {
|
||||||
|
AstNodeModule* const modp = v3Global.rootp()->topModulep();
|
||||||
|
FileLine* const fl = modp->fileline();
|
||||||
|
|
||||||
|
// Helper function to make the code a bit more legible
|
||||||
|
const auto addStrStmt = [=](const string& stmt) -> void { //
|
||||||
|
funcp->addStmtsp(new AstCStmt(fl, stmt));
|
||||||
|
};
|
||||||
|
|
||||||
|
if (const uint32_t nDependencies = schedule.crossThreadDependencies(mtaskp)) {
|
||||||
|
// This mtask has dependencies executed on another thread, so it may block. Create the task
|
||||||
|
// state variable and wait to be notified.
|
||||||
|
const string name = "__Vm_mtaskstate_" + cvtToStr(mtaskp->id());
|
||||||
|
AstBasicDType* const mtaskStateDtypep
|
||||||
|
= v3Global.rootp()->typeTablep()->findBasicDType(fl, AstBasicDTypeKwd::MTASKSTATE);
|
||||||
|
AstVar* const varp = new AstVar(fl, AstVarType::MODULETEMP, name, mtaskStateDtypep);
|
||||||
|
varp->valuep(new AstConst(fl, nDependencies));
|
||||||
|
varp->protect(false); // Do not protect as we still have references in AstText
|
||||||
|
modp->addStmtp(varp);
|
||||||
|
// For now, reference is still via text bashing
|
||||||
|
addStrStmt("vlSelf->" + name + +".waitUntilUpstreamDone(even_cycle);\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
string recName;
|
||||||
|
if (v3Global.opt.profThreads()) {
|
||||||
|
recName = "__Vprfthr_" + cvtToStr(mtaskp->id());
|
||||||
|
addStrStmt("VlProfileRec* " + recName + " = nullptr;\n");
|
||||||
|
// Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
|
||||||
|
addStrStmt("if (VL_UNLIKELY(vlSelf->__Vm_profile_cycle_start)) {\n" + //
|
||||||
|
recName + " = vlSelf->__Vm_threadPoolp->profileAppend();\n" + //
|
||||||
|
recName + "->startRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start," + //
|
||||||
|
" " + cvtToStr(mtaskp->id()) + "," + //
|
||||||
|
" " + cvtToStr(mtaskp->cost()) + ");\n" + //
|
||||||
|
"}\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
addStrStmt("Verilated::mtaskId(" + cvtToStr(mtaskp->id()) + ");\n");
|
||||||
|
|
||||||
|
// Move the the actual body of calls to leaf functions into this function
|
||||||
|
funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack());
|
||||||
|
|
||||||
|
if (v3Global.opt.profThreads()) {
|
||||||
|
// Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
|
||||||
|
addStrStmt("if (VL_UNLIKELY(" + recName + ")) {\n" + //
|
||||||
|
recName + "->endRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start);\n"
|
||||||
|
+ "}\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush message queue
|
||||||
|
addStrStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n");
|
||||||
|
|
||||||
|
// For any dependent mtask that's on another thread, signal one dependency completion.
|
||||||
|
for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) {
|
||||||
|
const ExecMTask* const nextp = dynamic_cast<ExecMTask*>(edgep->top());
|
||||||
|
if (schedule.threadId.at(nextp) != threadId) {
|
||||||
|
addStrStmt("vlSelf->__Vm_mtaskstate_" + cvtToStr(nextp->id())
|
||||||
|
+ ".signalUpstreamDone(even_cycle);\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule& schedule) {
|
||||||
|
AstNodeModule* const modp = v3Global.rootp()->topModulep();
|
||||||
|
FileLine* const fl = modp->fileline();
|
||||||
|
|
||||||
|
std::vector<AstCFunc*> funcps;
|
||||||
|
|
||||||
|
// For each thread, create a function representing its entry point
|
||||||
|
for (const std::vector<const ExecMTask*>& thread : schedule.threads) {
|
||||||
|
if (thread.empty()) continue;
|
||||||
|
const uint32_t threadId = schedule.threadId.at(thread.front());
|
||||||
|
string name = "__Vthread_";
|
||||||
|
name += cvtToStr(threadId);
|
||||||
|
AstCFunc* const funcp = new AstCFunc(fl, name, nullptr, "void");
|
||||||
|
modp->addStmtp(funcp);
|
||||||
|
funcps.push_back(funcp);
|
||||||
|
funcp->isStatic(true); // Uses void self pointer, so static and hand rolled
|
||||||
|
funcp->isLoose(true);
|
||||||
|
funcp->entryPoint(true);
|
||||||
|
funcp->argTypes("void* voidSelf, bool even_cycle");
|
||||||
|
|
||||||
|
// Setup vlSelf an vlSyms
|
||||||
|
funcp->addStmtsp(new AstCStmt(fl, EmitCBaseVisitor::voidSelfAssign()));
|
||||||
|
funcp->addStmtsp(new AstCStmt(fl, EmitCBaseVisitor::symClassAssign()));
|
||||||
|
|
||||||
|
// Invoke each mtask scheduled to this thread from the thread function
|
||||||
|
for (const ExecMTask* const mtaskp : thread) {
|
||||||
|
addMTaskToFunction(schedule, threadId, funcp, mtaskp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unblock the fake "final" mtask when this thread is finished
|
||||||
|
funcp->addStmtsp(
|
||||||
|
new AstCStmt(fl, "vlSelf->__Vm_mtaskstate_final.signalUpstreamDone(even_cycle);\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the fake "final" mtask state variable
|
||||||
|
AstBasicDType* const mtaskStateDtypep
|
||||||
|
= v3Global.rootp()->typeTablep()->findBasicDType(fl, AstBasicDTypeKwd::MTASKSTATE);
|
||||||
|
AstVar* const varp
|
||||||
|
= new AstVar(fl, AstVarType::MODULETEMP, "__Vm_mtaskstate_final", mtaskStateDtypep);
|
||||||
|
varp->valuep(new AstConst(fl, funcps.size()));
|
||||||
|
varp->protect(false); // Do not protect as we still have references in AstText
|
||||||
|
modp->addStmtp(varp);
|
||||||
|
|
||||||
|
return funcps;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void addThreadStartToExecGraph(AstExecGraph* const execGraphp,
|
||||||
|
const std::vector<AstCFunc*>& funcps) {
|
||||||
|
// FileLine used for constructing nodes below
|
||||||
|
FileLine* const fl = v3Global.rootp()->fileline();
|
||||||
|
|
||||||
|
// Add thread function invocations to execGraph
|
||||||
|
const auto addStrStmt = [=](const string& stmt) -> void { //
|
||||||
|
execGraphp->addStmtsp(new AstCStmt(fl, stmt));
|
||||||
|
};
|
||||||
|
const auto addTextStmt = [=](const string& text) -> void {
|
||||||
|
execGraphp->addStmtsp(new AstText(fl, text, /* tracking: */ true));
|
||||||
|
};
|
||||||
|
|
||||||
|
addStrStmt("vlSelf->__Vm_even_cycle = !vlSelf->__Vm_even_cycle;\n");
|
||||||
|
|
||||||
|
const uint32_t last = funcps.size() - 1;
|
||||||
|
for (uint32_t i = 0; i <= last; ++i) {
|
||||||
|
AstCFunc* const funcp = funcps.at(i);
|
||||||
|
if (i != last) {
|
||||||
|
// The first N-1 will run on the thread pool.
|
||||||
|
addTextStmt("vlSelf->__Vm_threadPoolp->workerp(" + cvtToStr(i) + ")->addTask(");
|
||||||
|
execGraphp->addStmtsp(new AstAddrOfCFunc(fl, funcp));
|
||||||
|
addTextStmt(", vlSelf, vlSelf->__Vm_even_cycle);\n");
|
||||||
|
} else {
|
||||||
|
// The last will run on the main thread.
|
||||||
|
AstCCall* const callp = new AstCCall(fl, funcp);
|
||||||
|
callp->argTypes("vlSelf, vlSelf->__Vm_even_cycle");
|
||||||
|
execGraphp->addStmtsp(callp);
|
||||||
|
addStrStmt("Verilated::mtaskId(0);\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
addStrStmt("vlSelf->__Vm_mtaskstate_final.waitUntilUpstreamDone(vlSelf->__Vm_even_cycle);\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void implementExecGraph(AstExecGraph* const execGraphp) {
|
||||||
|
// Nothing to be done if there are no MTasks in the graph at all.
|
||||||
|
if (execGraphp->depGraphp()->empty()) return;
|
||||||
|
|
||||||
|
// Schedule the mtasks: statically associate each mtask with a thread,
|
||||||
|
// and determine the order in which each thread will runs its mtasks.
|
||||||
|
const ThreadSchedule& schedule = PartPackMTasks().pack(*execGraphp->mutableDepGraphp());
|
||||||
|
|
||||||
|
// Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the
|
||||||
|
// AstExecGrap into the AstCFunc created
|
||||||
|
const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule);
|
||||||
|
UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?");
|
||||||
|
|
||||||
|
// Start the thread functions at the point this AstExecGraph is located in the tree.
|
||||||
|
addThreadStartToExecGraph(execGraphp, funcps);
|
||||||
|
}
|
||||||
|
|
||||||
void V3Partition::finalize() {
|
void V3Partition::finalize() {
|
||||||
// Called by Verilator top stage
|
// Called by Verilator top stage
|
||||||
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp();
|
AstExecGraph* const execGraphp = v3Global.rootp()->execGraphp();
|
||||||
UASSERT(execGraphp, "Couldn't find AstExecGraph singleton.");
|
UASSERT(execGraphp, "Couldn't find AstExecGraph singleton.");
|
||||||
|
|
||||||
// Back in V3Order, we partitioned mtasks using provisional cost
|
// Back in V3Order, we partitioned mtasks using provisional cost
|
||||||
|
|
@ -2556,9 +2761,8 @@ void V3Partition::finalize() {
|
||||||
// ExecMTask.
|
// ExecMTask.
|
||||||
finalizeCosts(execGraphp->mutableDepGraphp());
|
finalizeCosts(execGraphp->mutableDepGraphp());
|
||||||
|
|
||||||
// "Pack" the mtasks: statically associate each mtask with a thread,
|
// Replace the graph body with it's multi-threaded implementation.
|
||||||
// and determine the order in which each thread will runs its mtasks.
|
implementExecGraph(execGraphp);
|
||||||
PartPackMTasks(execGraphp->mutableDepGraphp()).go();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void V3Partition::selfTest() {
|
void V3Partition::selfTest() {
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,6 @@ public:
|
||||||
static void finalize();
|
static void finalize();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static void finalizeCosts(V3Graph* execMTaskGraphp);
|
|
||||||
static void setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp);
|
static void setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp);
|
||||||
|
|
||||||
VL_DEBUG_FUNC; // Declare debug()
|
VL_DEBUG_FUNC; // Declare debug()
|
||||||
|
|
|
||||||
|
|
@ -54,17 +54,13 @@ public:
|
||||||
|
|
||||||
class ExecMTask final : public AbstractMTask {
|
class ExecMTask final : public AbstractMTask {
|
||||||
private:
|
private:
|
||||||
AstMTaskBody* m_bodyp; // Task body
|
AstMTaskBody* const m_bodyp; // Task body
|
||||||
uint32_t m_id; // Unique id of this mtask.
|
const uint32_t m_id; // Unique id of this mtask.
|
||||||
uint32_t m_priority = 0; // Predicted critical path from the start of
|
uint32_t m_priority = 0; // Predicted critical path from the start of
|
||||||
// this mtask to the ends of the graph that are reachable from this
|
// this mtask to the ends of the graph that are reachable from this
|
||||||
// mtask. In abstract time units.
|
// mtask. In abstract time units.
|
||||||
uint32_t m_cost = 0; // Predicted runtime of this mtask, in the same
|
uint32_t m_cost = 0; // Predicted runtime of this mtask, in the same
|
||||||
// abstract time units as priority().
|
// abstract time units as priority().
|
||||||
uint32_t m_thread = 0xffffffff; // Thread for static (pack_mtasks) scheduling,
|
|
||||||
// or 0xffffffff if not yet assigned.
|
|
||||||
const ExecMTask* m_packNextp = nullptr; // Next for static (pack_mtasks) scheduling
|
|
||||||
bool m_threadRoot = false; // Is root thread
|
|
||||||
VL_UNCOPYABLE(ExecMTask);
|
VL_UNCOPYABLE(ExecMTask);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
@ -78,12 +74,6 @@ public:
|
||||||
void priority(uint32_t pri) { m_priority = pri; }
|
void priority(uint32_t pri) { m_priority = pri; }
|
||||||
virtual uint32_t cost() const override { return m_cost; }
|
virtual uint32_t cost() const override { return m_cost; }
|
||||||
void cost(uint32_t cost) { m_cost = cost; }
|
void cost(uint32_t cost) { m_cost = cost; }
|
||||||
void thread(uint32_t thread) { m_thread = thread; }
|
|
||||||
uint32_t thread() const { return m_thread; }
|
|
||||||
void packNextp(const ExecMTask* nextp) { m_packNextp = nextp; }
|
|
||||||
const ExecMTask* packNextp() const { return m_packNextp; }
|
|
||||||
bool threadRoot() const { return m_threadRoot; }
|
|
||||||
void threadRoot(bool threadRoot) { m_threadRoot = threadRoot; }
|
|
||||||
string cFuncName() const {
|
string cFuncName() const {
|
||||||
// If this MTask maps to a C function, this should be the name
|
// If this MTask maps to a C function, this should be the name
|
||||||
return string("__Vmtask") + "__" + cvtToStr(m_id);
|
return string("__Vmtask") + "__" + cvtToStr(m_id);
|
||||||
|
|
@ -92,9 +82,6 @@ public:
|
||||||
void dump(std::ostream& str) const {
|
void dump(std::ostream& str) const {
|
||||||
str << name() << "." << cvtToHex(this);
|
str << name() << "." << cvtToHex(this);
|
||||||
if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
|
if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
|
||||||
if (thread() != 0xffffffff) str << " th=" << thread();
|
|
||||||
if (threadRoot()) str << " [ROOT]";
|
|
||||||
if (packNextp()) str << " nx=" << packNextp()->name();
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) {
|
inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) {
|
||||||
|
|
|
||||||
|
|
@ -490,6 +490,14 @@ static void process() {
|
||||||
V3CCtors::cctorsAll();
|
V3CCtors::cctorsAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!v3Global.opt.xmlOnly() && v3Global.opt.mtasks()) {
|
||||||
|
// Finalize our MTask cost estimates and pack the mtasks into
|
||||||
|
// threads. Must happen pre-EmitC which relies on the packing
|
||||||
|
// order. Must happen post-V3LifePost which changes the relative
|
||||||
|
// costs of mtasks.
|
||||||
|
V3Partition::finalize();
|
||||||
|
}
|
||||||
|
|
||||||
// Output the text
|
// Output the text
|
||||||
if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && !v3Global.opt.dpiHdrOnly()) {
|
if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && !v3Global.opt.dpiHdrOnly()) {
|
||||||
// Create AstCUse to determine what class forward declarations/#includes needed in C
|
// Create AstCUse to determine what class forward declarations/#includes needed in C
|
||||||
|
|
@ -504,13 +512,6 @@ static void process() {
|
||||||
} else if (v3Global.opt.dpiHdrOnly()) {
|
} else if (v3Global.opt.dpiHdrOnly()) {
|
||||||
V3EmitC::emitcSyms(true);
|
V3EmitC::emitcSyms(true);
|
||||||
}
|
}
|
||||||
if (!v3Global.opt.xmlOnly() && v3Global.opt.mtasks()) {
|
|
||||||
// Finalize our MTask cost estimates and pack the mtasks into
|
|
||||||
// threads. Must happen pre-EmitC which relies on the packing
|
|
||||||
// order. Must happen post-V3LifePost which changes the relative
|
|
||||||
// costs of mtasks.
|
|
||||||
V3Partition::finalize();
|
|
||||||
}
|
|
||||||
if (!v3Global.opt.xmlOnly()
|
if (!v3Global.opt.xmlOnly()
|
||||||
&& !v3Global.opt.dpiHdrOnly()) { // Unfortunately we have some lint checks in emitc.
|
&& !v3Global.opt.dpiHdrOnly()) { // Unfortunately we have some lint checks in emitc.
|
||||||
V3EmitC::emitc();
|
V3EmitC::emitc();
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ if ($Self->{vlt_all}) {
|
||||||
$text =~ s/vlSelf->.* = VL_RAND_RESET.*;//g;
|
$text =~ s/vlSelf->.* = VL_RAND_RESET.*;//g;
|
||||||
$text =~ s/vlSelf->__Vm_even_cycle//g;
|
$text =~ s/vlSelf->__Vm_even_cycle//g;
|
||||||
$text =~ s/vlSelf->__Vm_even_cycle//g;
|
$text =~ s/vlSelf->__Vm_even_cycle//g;
|
||||||
$text =~ s/vlSelf->__Vm_mt_(final|\d+)//g;
|
$text =~ s/vlSelf->__Vm_mtaskstate_(final|\d+)//g;
|
||||||
$text =~ s/vlSelf->__Vm_threadPoolp//g;
|
$text =~ s/vlSelf->__Vm_threadPoolp//g;
|
||||||
if ($text =~ m/this->/ || $text =~ m/vlSelf->/) {
|
if ($text =~ m/this->/ || $text =~ m/vlSelf->/) {
|
||||||
error("$file has unexpected this-> refs when --norelative-cfuncs");
|
error("$file has unexpected this-> refs when --norelative-cfuncs");
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,9 @@ compile(
|
||||||
);
|
);
|
||||||
|
|
||||||
if ($Self->{vlt_all}) {
|
if ($Self->{vlt_all}) {
|
||||||
file_grep("$out_filename", qr/\<var fl="e70" loc=".*?" name="u.u0.u0.z0" dtype_id="3" vartype="logic" origName="z0" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i);
|
file_grep("$out_filename", qr/\<var fl="e70" loc=".*?" name="u.u0.u0.z0" dtype_id="\d+" vartype="logic" origName="z0" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i);
|
||||||
file_grep("$out_filename", qr/\<var fl="e85" loc=".*?" name="u.u0.u0.u0.u0.z1" dtype_id="3" vartype="logic" origName="z1" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i);
|
file_grep("$out_filename", qr/\<var fl="e85" loc=".*?" name="u.u0.u0.u0.u0.z1" dtype_id="\d+" vartype="logic" origName="z1" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i);
|
||||||
file_grep("$out_filename", qr/\<var fl="e83" loc=".*?" name="u.u0.u1.u0.u0.z" dtype_id="3" vartype="logic" origName="z" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i);
|
file_grep("$out_filename", qr/\<var fl="e83" loc=".*?" name="u.u0.u1.u0.u0.z" dtype_id="\d+" vartype="logic" origName="z" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i);
|
||||||
}
|
}
|
||||||
|
|
||||||
execute(
|
execute(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue