Construct AstExecGraph implementation outside of V3EmitC. (#3022)

The goal of this patch is to move functionality related to constructing
the thread entry points and then invoking them out of V3EmitC (and into
V3Partition). The long term goal being enabling V3EmitC to emit
functions partitioned based on header dependencies. V3EmitC having to
deal with only AstCFunc instances and no other magic will facilitate
this.

In this patch:
- We construct AstCFuncs for each thread entry point in
V3Partition::finalize and move AstMTaskBody nodes under these functions.
- Add the invocation of the threads as text statements within the
AstExecGraph, so they are still invoked where the exec graph is located.
(the entry point functions are still referenced via AstCCall or
AstAddOrCFunc, so lazy declarations of referenced functions are created
automatically).
- Explicitly handle MTask state variables (VlMTaskVertex in
verilated_threads.h) within Verilator, so no need to text bash a lot of
these any more (some text refs still remain but they are all created
next to each other within V3Partition.cpp).

The effect of all this on the emitted code should be nothing but some
identifier/ordering changes. No functional change intended.
This commit is contained in:
Geza Lore 2021-06-16 12:18:56 +01:00 committed by GitHub
parent 65bfb4e5ff
commit a8f83d5758
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 400 additions and 371 deletions

View File

@ -457,6 +457,7 @@ public:
// Internal types for mid-steps // Internal types for mid-steps
SCOPEPTR, SCOPEPTR,
CHARPTR, CHARPTR,
MTASKSTATE,
// Unsigned and two state; fundamental types // Unsigned and two state; fundamental types
UINT32, UINT32,
UINT64, UINT64,
@ -467,18 +468,19 @@ public:
}; };
enum en m_e; enum en m_e;
const char* ascii() const { const char* ascii() const {
static const char* const names[] = { static const char* const names[]
"%E-unk", "bit", "byte", "chandle", "event", "int", "integer", = {"%E-unk", "bit", "byte", "chandle", "event",
"logic", "longint", "real", "shortint", "time", "string", "VerilatedScope*", "int", "integer", "logic", "longint", "real",
"char*", "IData", "QData", "LOGIC_IMPLICIT", " MAX"}; "shortint", "time", "string", "VerilatedScope*", "char*",
"VlMTaskState", "IData", "QData", "LOGIC_IMPLICIT", " MAX"};
return names[m_e]; return names[m_e];
} }
const char* dpiType() const { const char* dpiType() const {
static const char* const names[] static const char* const names[]
= {"%E-unk", "svBit", "char", "void*", "char", "int", = {"%E-unk", "svBit", "char", "void*", "char",
"%E-integer", "svLogic", "long long", "double", "short", "%E-time", "int", "%E-integer", "svLogic", "long long", "double",
"const char*", "dpiScope", "const char*", "IData", "QData", "%E-logic-implicit", "short", "%E-time", "const char*", "dpiScope", "const char*",
" MAX"}; "%E-mtaskstate", "IData", "QData", "%E-logic-implct", " MAX"};
return names[m_e]; return names[m_e];
} }
static void selfTest() { static void selfTest() {
@ -511,6 +513,7 @@ public:
case STRING: return 64; // opaque // Just the pointer, for today case STRING: return 64; // opaque // Just the pointer, for today
case SCOPEPTR: return 0; // opaque case SCOPEPTR: return 0; // opaque
case CHARPTR: return 0; // opaque case CHARPTR: return 0; // opaque
case MTASKSTATE: return 0; // opaque
case UINT32: return 32; case UINT32: return 32;
case UINT64: return 64; case UINT64: return 64;
default: return 0; default: return 0;
@ -549,11 +552,13 @@ public:
|| m_e == DOUBLE || m_e == SHORTINT || m_e == UINT32 || m_e == UINT64); || m_e == DOUBLE || m_e == SHORTINT || m_e == UINT32 || m_e == UINT64);
} }
bool isOpaque() const { // IE not a simple number we can bit optimize bool isOpaque() const { // IE not a simple number we can bit optimize
return (m_e == STRING || m_e == SCOPEPTR || m_e == CHARPTR || m_e == DOUBLE); return (m_e == STRING || m_e == SCOPEPTR || m_e == CHARPTR || m_e == MTASKSTATE
|| m_e == DOUBLE);
} }
bool isDouble() const { return m_e == DOUBLE; } bool isDouble() const { return m_e == DOUBLE; }
bool isEventValue() const { return m_e == EVENTVALUE; } bool isEventValue() const { return m_e == EVENTVALUE; }
bool isString() const { return m_e == STRING; } bool isString() const { return m_e == STRING; }
bool isMTaskState() const { return m_e == MTASKSTATE; }
}; };
inline bool operator==(const AstBasicDTypeKwd& lhs, const AstBasicDTypeKwd& rhs) { inline bool operator==(const AstBasicDTypeKwd& lhs, const AstBasicDTypeKwd& rhs) {
return lhs.m_e == rhs.m_e; return lhs.m_e == rhs.m_e;

View File

@ -238,26 +238,11 @@ AstNodeBiop* AstEqWild::newTyped(FileLine* fl, AstNode* lhsp, AstNode* rhsp) {
} }
AstExecGraph::AstExecGraph(FileLine* fileline) AstExecGraph::AstExecGraph(FileLine* fileline)
: ASTGEN_SUPER_ExecGraph(fileline) { : ASTGEN_SUPER_ExecGraph(fileline)
m_depGraphp = new V3Graph; , m_depGraphp{new V3Graph} {}
}
AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); } AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); }
std::vector<const ExecMTask*> AstExecGraph::rootMTasks() {
// Build the list of initial mtasks to start
std::vector<const ExecMTask*> execMTasks;
for (const V3GraphVertex* vxp = depGraphp()->verticesBeginp(); vxp;
vxp = vxp->verticesNextp()) {
const ExecMTask* etp = dynamic_cast<const ExecMTask*>(vxp);
if (etp->threadRoot()) execMTasks.push_back(etp);
}
UASSERT_OBJ(execMTasks.size() <= static_cast<unsigned>(v3Global.opt.threads()), this,
"More root mtasks than available threads");
return execMTasks;
}
AstNode* AstInsideRange::newAndFromInside(AstNode* exprp, AstNode* lhsp, AstNode* rhsp) { AstNode* AstInsideRange::newAndFromInside(AstNode* exprp, AstNode* lhsp, AstNode* rhsp) {
AstNode* ap = new AstGte(fileline(), exprp->cloneTree(true), lhsp); AstNode* ap = new AstGte(fileline(), exprp->cloneTree(true), lhsp);
AstNode* bp = new AstLte(fileline(), exprp->cloneTree(true), rhsp); AstNode* bp = new AstLte(fileline(), exprp->cloneTree(true), rhsp);
@ -717,10 +702,12 @@ AstNodeDType::CTypeRecursed AstNodeDType::cTypeRecurse(bool compound) const {
info.m_type = "const char*"; info.m_type = "const char*";
} else if (bdtypep->keyword() == AstBasicDTypeKwd::SCOPEPTR) { } else if (bdtypep->keyword() == AstBasicDTypeKwd::SCOPEPTR) {
info.m_type = "const VerilatedScope*"; info.m_type = "const VerilatedScope*";
} else if (bdtypep->keyword() == AstBasicDTypeKwd::DOUBLE) { } else if (bdtypep->keyword().isDouble()) {
info.m_type = "double"; info.m_type = "double";
} else if (bdtypep->keyword() == AstBasicDTypeKwd::STRING) { } else if (bdtypep->keyword().isString()) {
info.m_type = "std::string"; info.m_type = "std::string";
} else if (bdtypep->keyword().isMTaskState()) {
info.m_type = "VlMTaskVertex";
} else if (dtypep->widthMin() <= 8) { // Handle unpacked arrays; not bdtypep->width } else if (dtypep->widthMin() <= 8) { // Handle unpacked arrays; not bdtypep->width
info.m_type = "CData" + bitvec; info.m_type = "CData" + bitvec;
} else if (dtypep->widthMin() <= 16) { } else if (dtypep->widthMin() <= 16) {

View File

@ -2070,7 +2070,8 @@ public:
// (Slow) recurse down to find basic data type (Note don't need virtual - // (Slow) recurse down to find basic data type (Note don't need virtual -
// AstVar isn't a NodeDType) // AstVar isn't a NodeDType)
AstBasicDType* basicp() const { return subDTypep()->basicp(); } AstBasicDType* basicp() const { return subDTypep()->basicp(); }
// op3 = Initial value that never changes (static const) // op3 = Initial value that never changes (static const), or constructor argument for
// MTASKSTATE variables
AstNode* valuep() const { return op3p(); } AstNode* valuep() const { return op3p(); }
// It's valuep(), not constp(), as may be more complicated than an AstConst // It's valuep(), not constp(), as may be more complicated than an AstConst
void valuep(AstNode* nodep) { setOp3p(nodep); } void valuep(AstNode* nodep) { setOp3p(nodep); }
@ -9045,7 +9046,8 @@ class AstExecGraph final : public AstNode {
// them without traversing the graph (it's not always needed to // them without traversing the graph (it's not always needed to
// traverse the graph.) // traverse the graph.)
private: private:
V3Graph* m_depGraphp; // contains ExecMTask's V3Graph* const m_depGraphp; // contains ExecMTask's
public: public:
explicit AstExecGraph(FileLine* fl); explicit AstExecGraph(FileLine* fl);
ASTNODE_NODE_FUNCS_NO_DTOR(ExecGraph) ASTNODE_NODE_FUNCS_NO_DTOR(ExecGraph)
@ -9057,7 +9059,7 @@ public:
const V3Graph* depGraphp() const { return m_depGraphp; } const V3Graph* depGraphp() const { return m_depGraphp; }
V3Graph* mutableDepGraphp() { return m_depGraphp; } V3Graph* mutableDepGraphp() { return m_depGraphp; }
void addMTaskBody(AstMTaskBody* bodyp) { addOp1p(bodyp); } void addMTaskBody(AstMTaskBody* bodyp) { addOp1p(bodyp); }
std::vector<const ExecMTask*> rootMTasks(); void addStmtsp(AstNode* stmtp) { addOp2p(stmtp); }
}; };
class AstSplitPlaceholder final : public AstNode { class AstSplitPlaceholder final : public AstNode {

View File

@ -1429,18 +1429,6 @@ class EmitCLazyDecls final : public AstNVisitor {
lazyDeclare(nodep->funcp()); lazyDeclare(nodep->funcp());
} }
virtual void visit(AstExecGraph* nodep) override {
if (nodep->user2SetOnce()) return; // Already declared
// Build the list of initial mtasks to start
for (const ExecMTask* mtp : nodep->rootMTasks()) {
m_emitter.puts("void ");
m_emitter.puts(EmitCBaseVisitor::topClassName() + "__"
+ EmitCBaseVisitor::protect(mtp->cFuncName()));
m_emitter.puts("(void* voidSelf, bool even_cycle);\n");
m_needsBlankLine = true;
}
}
virtual void visit(AstVarRef* nodep) override { virtual void visit(AstVarRef* nodep) override {
AstVar* const varp = nodep->varp(); AstVar* const varp = nodep->varp();
// Only constant pool symbols are lazy declared for now ... // Only constant pool symbols are lazy declared for now ...
@ -1574,93 +1562,6 @@ class EmitCImp final : EmitCStmts {
return ofp; return ofp;
} }
// Returns the number of cross-thread dependencies into mtaskp.
// If >0, mtaskp must test whether its prereqs are done before starting,
// and may need to block.
static uint32_t packedMTaskMayBlock(const ExecMTask* mtaskp) {
uint32_t result = 0;
for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; edgep = edgep->inNextp()) {
const ExecMTask* prevp = dynamic_cast<ExecMTask*>(edgep->fromp());
if (prevp->thread() != mtaskp->thread()) ++result;
}
return result;
}
void emitMTaskBody(AstMTaskBody* nodep) {
ExecMTask* curExecMTaskp = nodep->execMTaskp();
if (packedMTaskMayBlock(curExecMTaskp)) {
puts("vlSelf->__Vm_mt_" + cvtToStr(curExecMTaskp->id())
+ ".waitUntilUpstreamDone(even_cycle);\n");
}
string recName;
if (v3Global.opt.profThreads()) {
recName = "__Vprfthr_" + cvtToStr(curExecMTaskp->id());
puts("VlProfileRec* " + recName + " = nullptr;\n");
// Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
puts("if (VL_UNLIKELY(vlSelf->__Vm_profile_cycle_start)) {\n");
puts(recName + " = vlSelf->__Vm_threadPoolp->profileAppend();\n");
puts(recName + "->startRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start,");
puts(" " + cvtToStr(curExecMTaskp->id()) + ",");
puts(" " + cvtToStr(curExecMTaskp->cost()) + ");\n");
puts("}\n");
}
puts("Verilated::mtaskId(" + cvtToStr(curExecMTaskp->id()) + ");\n");
// The actual body of calls to leaf functions
iterateAndNextNull(nodep->stmtsp());
if (v3Global.opt.profThreads()) {
// Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
puts("if (VL_UNLIKELY(" + recName + ")) {\n");
puts(recName + "->endRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start);\n");
puts("}\n");
}
// Flush message queue
puts("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n");
// For any downstream mtask that's on another thread, bump its
// counter and maybe notify it.
for (V3GraphEdge* edgep = curExecMTaskp->outBeginp(); edgep; edgep = edgep->outNextp()) {
const ExecMTask* nextp = dynamic_cast<ExecMTask*>(edgep->top());
if (nextp->thread() != curExecMTaskp->thread()) {
puts("vlSelf->__Vm_mt_" + cvtToStr(nextp->id())
+ ".signalUpstreamDone(even_cycle);\n");
}
}
// Run the next mtask inline
const ExecMTask* nextp = curExecMTaskp->packNextp();
if (nextp) {
emitMTaskBody(nextp->bodyp());
} else {
// Unblock the fake "final" mtask
puts("vlSelf->__Vm_mt_final.signalUpstreamDone(even_cycle);\n");
}
}
virtual void visit(AstMTaskBody* nodep) override {
VL_RESTORER(m_useSelfForThis);
maybeSplit();
splitSizeInc(10);
puts("\n");
for (const ExecMTask* mtp = nodep->execMTaskp(); mtp; mtp = mtp->packNextp()) {
m_lazyDecls.emit(mtp->bodyp());
}
puts("void ");
puts(topClassName() + "__" + protect(nodep->execMTaskp()->cFuncName()));
puts("(void* voidSelf, bool even_cycle) {\n");
puts(topClassName() + "* const vlSelf = static_cast<" + topClassName()
+ "*>(voidSelf);\n");
m_useSelfForThis = true;
puts(symClassAssign());
emitMTaskBody(nodep);
ensureNewLine();
puts("}\n");
}
//--------------------------------------- //---------------------------------------
// VISITORS // VISITORS
using EmitCStmts::visit; // Suppress hidden overloaded virtual function warning using EmitCStmts::visit; // Suppress hidden overloaded virtual function warning
@ -1734,6 +1635,12 @@ class EmitCImp final : EmitCStmts {
if (nodep->ifdef() != "") puts("#endif // " + nodep->ifdef() + "\n"); if (nodep->ifdef() != "") puts("#endif // " + nodep->ifdef() + "\n");
} }
virtual void visit(AstMTaskBody* nodep) override {
VL_RESTORER(m_useSelfForThis);
m_useSelfForThis = true;
iterateChildrenConst(nodep);
}
void emitChangeDet() { void emitChangeDet() {
putsDecoration("// Change detection\n"); putsDecoration("// Change detection\n");
puts("QData __req = false; // Logically a bool\n"); // But not because it results in puts("QData __req = false; // Logically a bool\n"); // But not because it results in
@ -1833,34 +1740,8 @@ class EmitCImp final : EmitCStmts {
"ExecGraph should be a singleton!"); "ExecGraph should be a singleton!");
// The location of the AstExecGraph within the containing _eval() // The location of the AstExecGraph within the containing _eval()
// function is where we want to invoke the graph and wait for it to // function is where we want to invoke the graph and wait for it to
// complete. Do that now. // complete. Emitting the children does just that.
// iterateChildrenConst(nodep);
// Don't recurse to children -- this isn't the place to emit
// function definitions for the nested CFuncs. We'll do that at the
// end.
puts("vlSelf->__Vm_even_cycle = !vlSelf->__Vm_even_cycle;\n");
// Build the list of initial mtasks to start
std::vector<const ExecMTask*> execMTasks = nodep->rootMTasks();
if (!execMTasks.empty()) {
for (uint32_t i = 0; i < execMTasks.size(); ++i) {
const bool runInline = (i == execMTasks.size() - 1);
const string protName
= topClassName() + "__" + protect(execMTasks[i]->cFuncName());
if (runInline) {
// The thread calling eval() will run this mtask inline,
// along with its packed successors.
puts(protName + "(vlSelf, vlSelf->__Vm_even_cycle);\n");
puts("Verilated::mtaskId(0);\n");
} else {
// The other N-1 go to the thread pool.
puts("vlSelf->__Vm_threadPoolp->workerp(" + cvtToStr(i) + ")->addTask("
+ protName + ", vlSelf, vlSelf->__Vm_even_cycle);\n");
}
}
puts("vlSelf->__Vm_mt_final.waitUntilUpstreamDone(vlSelf->__Vm_even_cycle);\n");
}
} }
//--------------------------------------- //---------------------------------------
@ -2015,8 +1896,8 @@ class EmitCImp final : EmitCStmts {
void emitSettleLoop(bool initial); void emitSettleLoop(bool initial);
void emitWrapEval(); void emitWrapEval();
void emitWrapFast(); void emitWrapFast();
void emitMTaskState(); void emitThreadingState();
void emitMTaskVertexCtors(bool* firstp); void emitThreadingCtors(bool* firstp);
void emitIntTop(const AstNodeModule* modp); void emitIntTop(const AstNodeModule* modp);
void emitInt(AstNodeModule* modp); void emitInt(AstNodeModule* modp);
void maybeSplit(); void maybeSplit();
@ -2085,6 +1966,7 @@ void EmitCStmts::emitVarDecl(const AstVar* nodep, const string& prefixIfImp) {
if (nodep->isWide()) puts("," + cvtToStr(nodep->widthWords())); if (nodep->isWide()) puts("," + cvtToStr(nodep->widthWords()));
puts(");\n"); puts(");\n");
} else { } else {
if (basicp && basicp->keyword().isMTaskState()) { m_ctorVarsVec.push_back(nodep); }
// strings and other fundamental c types // strings and other fundamental c types
if (nodep->isFuncLocal() && nodep->isString()) { if (nodep->isFuncLocal() && nodep->isString()) {
const string name = nodep->name(); const string name = nodep->name();
@ -2114,13 +1996,19 @@ void EmitCStmts::emitCtorSep(bool* firstp) {
void EmitCStmts::emitVarCtors(bool* firstp) { void EmitCStmts::emitVarCtors(bool* firstp) {
if (!m_ctorVarsVec.empty()) { if (!m_ctorVarsVec.empty()) {
ofp()->indentInc(); ofp()->indentInc();
puts("\n"); if (*firstp) puts("\n");
for (const AstVar* varp : m_ctorVarsVec) { for (const AstVar* varp : m_ctorVarsVec) {
bool isArray = !VN_CAST(varp->dtypeSkipRefp(), BasicDType); const AstBasicDType* const dtypep = VN_CAST(varp->dtypeSkipRefp(), BasicDType);
if (isArray) { if (!dtypep) {
puts("// Skipping array: "); puts("// Skipping array: ");
puts(varp->nameProtect()); puts(varp->nameProtect());
puts("\n"); puts("\n");
} else if (dtypep->keyword().isMTaskState()) {
emitCtorSep(firstp);
puts(varp->nameProtect());
puts("(");
iterate(varp->valuep());
puts(")");
} else { } else {
emitCtorSep(firstp); emitCtorSep(firstp);
puts(varp->nameProtect()); puts(varp->nameProtect());
@ -2555,36 +2443,17 @@ void EmitCImp::emitCoverageDecl(AstNodeModule*) {
} }
} }
void EmitCImp::emitMTaskVertexCtors(bool* firstp) { void EmitCImp::emitThreadingCtors(bool* firstp) {
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp(); ofp()->indentInc();
UASSERT_OBJ(execGraphp, v3Global.rootp(), "Root should have an execGraphp");
const V3Graph* depGraphp = execGraphp->depGraphp();
unsigned finalEdgesInCt = 0;
for (const V3GraphVertex* vxp = depGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
const ExecMTask* mtp = dynamic_cast<const ExecMTask*>(vxp);
unsigned edgesInCt = packedMTaskMayBlock(mtp);
if (packedMTaskMayBlock(mtp) > 0) {
emitCtorSep(firstp);
puts("__Vm_mt_" + cvtToStr(mtp->id()) + "(" + cvtToStr(edgesInCt) + ")");
}
// Each mtask with no packed successor will become a dependency
// for the final node:
if (!mtp->packNextp()) ++finalEdgesInCt;
}
emitCtorSep(firstp);
puts("__Vm_mt_final(" + cvtToStr(finalEdgesInCt) + ")");
// This will flip to 'true' before the start of the 0th cycle.
emitCtorSep(firstp); emitCtorSep(firstp);
puts("__Vm_threadPoolp(nullptr)"); puts("__Vm_threadPoolp(nullptr)");
emitCtorSep(firstp);
puts("__Vm_even_cycle(false)");
if (v3Global.opt.profThreads()) { if (v3Global.opt.profThreads()) {
emitCtorSep(firstp); emitCtorSep(firstp);
puts("__Vm_profile_cycle_start(0)"); puts("__Vm_profile_cycle_start(0)");
} }
emitCtorSep(firstp); ofp()->indentDec();
puts("__Vm_even_cycle(false)");
} }
void EmitCImp::emitCtorImp(AstNodeModule* modp) { void EmitCImp::emitCtorImp(AstNodeModule* modp) {
@ -2612,9 +2481,10 @@ void EmitCImp::emitCtorImp(AstNodeModule* modp) {
} else { } else {
puts(modName + "::" + modName + "(const char* _vcname__)\n"); puts(modName + "::" + modName + "(const char* _vcname__)\n");
puts(" : VerilatedModule(_vcname__)\n"); puts(" : VerilatedModule(_vcname__)\n");
first = false; // printed the first ':'
} }
emitVarCtors(&first); emitVarCtors(&first);
if (modp->isTop() && v3Global.opt.mtasks()) emitMTaskVertexCtors(&first); if (modp->isTop() && v3Global.opt.mtasks()) emitThreadingCtors(&first);
puts(" {\n"); puts(" {\n");
emitCellCtors(modp); emitCellCtors(modp);
@ -2804,9 +2674,11 @@ void EmitCImp::emitSavableImp(AstNodeModule* modp) {
puts("; ++" + ivar + ") {\n"); puts("; ++" + ivar + ") {\n");
elementp = arrayp->subDTypep()->skipRefp(); elementp = arrayp->subDTypep()->skipRefp();
} }
const AstBasicDType* const basicp = elementp->basicp();
// Do not save MTask state, only matters within an evaluation
if (basicp && basicp->keyword().isMTaskState()) continue;
// Want to detect types that are represented as arrays // Want to detect types that are represented as arrays
// (i.e. packed types of more than 64 bits). // (i.e. packed types of more than 64 bits).
AstBasicDType* basicp = elementp->basicp();
if (elementp->isWide() if (elementp->isWide()
&& !(basicp && basicp->keyword() == AstBasicDTypeKwd::STRING)) { && !(basicp && basicp->keyword() == AstBasicDTypeKwd::STRING)) {
int vecnum = vects++; int vecnum = vects++;
@ -3249,26 +3121,13 @@ void EmitCStmts::emitSortedVarList(const VarVec& anons, const VarVec& nonanons,
} }
} }
void EmitCImp::emitMTaskState() { void EmitCImp::emitThreadingState() {
ofp()->putsPrivate(false); // Accessed from loose function ofp()->putsPrivate(false); // Accessed from loose function
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp(); AstExecGraph* execGraphp = v3Global.rootp()->execGraphp();
UASSERT_OBJ(execGraphp, v3Global.rootp(), "Root should have an execGraphp"); UASSERT_OBJ(execGraphp, v3Global.rootp(), "Root should have an execGraphp");
const V3Graph* depGraphp = execGraphp->depGraphp();
for (const V3GraphVertex* vxp = depGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
const ExecMTask* mtp = dynamic_cast<const ExecMTask*>(vxp);
if (packedMTaskMayBlock(mtp) > 0) {
puts("VlMTaskVertex __Vm_mt_" + cvtToStr(mtp->id()) + ";\n");
}
}
// This fake mtask depends on all the real ones. We use it to block
// eval() until all mtasks are done.
//
// In the future we might allow _eval() to return before the graph is
// fully done executing, for "half wave" scheduling. For now we wait
// for all mtasks though.
puts("VlMTaskVertex __Vm_mt_final;\n");
puts("VlThreadPool* __Vm_threadPoolp;\n"); puts("VlThreadPool* __Vm_threadPoolp;\n");
puts("bool __Vm_even_cycle;\n");
if (v3Global.opt.profThreads()) { if (v3Global.opt.profThreads()) {
// rdtsc() at current cycle start // rdtsc() at current cycle start
@ -3278,8 +3137,6 @@ void EmitCImp::emitMTaskState() {
// Track our position in the cache warmup and actual profile window // Track our position in the cache warmup and actual profile window
puts("vluint32_t __Vm_profile_window_ct;\n"); puts("vluint32_t __Vm_profile_window_ct;\n");
} }
puts("bool __Vm_even_cycle;\n");
} }
void EmitCImp::emitIntTop(const AstNodeModule* modp) { void EmitCImp::emitIntTop(const AstNodeModule* modp) {
@ -3380,7 +3237,7 @@ void EmitCImp::emitInt(AstNodeModule* modp) {
if (v3Global.opt.inhibitSim()) { if (v3Global.opt.inhibitSim()) {
puts("bool __Vm_inhibitSim; ///< Set true to disable evaluation of module\n"); puts("bool __Vm_inhibitSim; ///< Set true to disable evaluation of module\n");
} }
if (v3Global.opt.mtasks()) emitMTaskState(); if (v3Global.opt.mtasks()) emitThreadingState();
} }
emitCoverageDecl(modp); // may flip public/private emitCoverageDecl(modp); // may flip public/private
@ -3634,23 +3491,6 @@ void EmitCImp::mainImp(AstNodeModule* modp, bool slow) {
m_modp = modp; m_modp = modp;
} }
if (m_fast && modp->isTop() && v3Global.opt.mtasks()) {
// Make a final pass and emit function definitions for the mtasks
// in the ExecGraph
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp();
const V3Graph* depGraphp = execGraphp->depGraphp();
for (const V3GraphVertex* vxp = depGraphp->verticesBeginp(); vxp;
vxp = vxp->verticesNextp()) {
const ExecMTask* mtaskp = dynamic_cast<const ExecMTask*>(vxp);
if (mtaskp->threadRoot()) {
// Only define one function for all the mtasks packed on
// a given thread. We'll name this function after the
// root mtask though it contains multiple mtasks' worth
// of logic.
iterate(mtaskp->bodyp());
}
}
}
VL_DO_CLEAR(delete m_ofp, m_ofp = nullptr); VL_DO_CLEAR(delete m_ofp, m_ofp = nullptr);
} }

View File

@ -53,6 +53,10 @@ public:
return VIdProtect::protectWordsIf(name, doIt); return VIdProtect::protectWordsIf(name, doIt);
} }
static string ifNoProtect(const string& in) { return v3Global.opt.protectIds() ? "" : in; } static string ifNoProtect(const string& in) { return v3Global.opt.protectIds() ? "" : in; }
static string voidSelfAssign() {
return topClassName() + "* const __restrict vlSelf VL_ATTR_UNUSED = static_cast<"
+ topClassName() + "*>(voidSelf);\n";
}
static string symClassName() { return v3Global.opt.prefix() + "_" + protect("_Syms"); } static string symClassName() { return v3Global.opt.prefix() + "_" + protect("_Syms"); }
static string symClassVar() { return symClassName() + "* __restrict vlSymsp"; } static string symClassVar() { return symClassName() + "* __restrict vlSymsp"; }
static string symClassAssign() { static string symClassAssign() {

View File

@ -17,6 +17,7 @@
#include "config_build.h" #include "config_build.h"
#include "verilatedos.h" #include "verilatedos.h"
#include "V3EmitCBase.h"
#include "V3Os.h" #include "V3Os.h"
#include "V3File.h" #include "V3File.h"
#include "V3GraphAlg.h" #include "V3GraphAlg.h"
@ -1997,6 +1998,48 @@ private:
VL_DEBUG_FUNC; VL_DEBUG_FUNC;
}; };
//######################################################################
// ThreadSchedule
class PartPackMTasks;
// The thread schedule, containing all information needed later. Note that this is a simple
// aggregate data type and the only way to get hold of an instance of it is via
// PartPackMTasks::pack, which is moved from there and is const, which means we can only acquire a
// const reference to is so no further modifications are allowed, so all members are public
// (attributes).
class ThreadSchedule final {
public:
// Allocation of sequence of MTasks to threads. Can be considered a map from thread ID to
// the sequence of MTasks to be executed by that thread.
std::vector<std::vector<const ExecMTask*>> threads;
// Map from MTask to ID of thread it is assigned to.
std::unordered_map<const ExecMTask*, uint32_t> threadId;
private:
friend class PartPackMTasks;
explicit ThreadSchedule(uint32_t nThreads)
: threads{nThreads} {}
VL_UNCOPYABLE(ThreadSchedule); // But movable
ThreadSchedule(ThreadSchedule&&) = default;
ThreadSchedule& operator=(ThreadSchedule&&) = default;
public:
// Returns the number of cross-thread dependencies of the given MTask. If > 0, the MTask must
// test whether its dependencies are ready before starting, and therefore may need to block.
uint32_t crossThreadDependencies(const ExecMTask* mtaskp) const {
const uint32_t thisThreadId = threadId.at(mtaskp);
uint32_t result = 0;
for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; edgep = edgep->inNextp()) {
const ExecMTask* const prevp = dynamic_cast<ExecMTask*>(edgep->fromp());
if (threadId.at(prevp) != thisThreadId) ++result;
}
return result;
}
};
//###################################################################### //######################################################################
// PartPackMTasks // PartPackMTasks
@ -2016,49 +2059,45 @@ private:
// thread A checks the end time of an mtask running on thread B. This extra // thread A checks the end time of an mtask running on thread B. This extra
// "padding" avoids tight "layovers" at cross-thread dependencies. // "padding" avoids tight "layovers" at cross-thread dependencies.
class PartPackMTasks final { class PartPackMTasks final {
private: // CONSTANTS
static constexpr uint32_t UNASSIGNED = 0xffffffff;
// TYPES // TYPES
struct MTaskState { struct MTaskState {
uint32_t completionTime; // Estimated time this mtask will complete uint32_t completionTime = 0; // Estimated time this mtask will complete
uint32_t threadId = UNASSIGNED; // Thread id this MTask is assigned to
const ExecMTask* nextp = nullptr; // Next MTask on same thread after this
}; };
struct MTaskCmp { struct MTaskCmp {
bool operator()(const ExecMTask* ap, ExecMTask* bp) const { return ap->id() < bp->id(); } bool operator()(const ExecMTask* ap, const ExecMTask* bp) const {
return ap->id() < bp->id();
}
}; };
// MEMBERS // MEMBERS
V3Graph* m_mtasksp; // Mtask graph const uint32_t m_nThreads; // Number of threads
uint32_t m_nThreads; // Number of threads const uint32_t m_sandbagNumerator; // Numerator padding for est runtime
uint32_t m_sandbagNumerator; // Numerator padding for est runtime const uint32_t m_sandbagDenom; // Denominator padding for est runtime
uint32_t m_sandbagDenom; // Denomerator padding for est runtime
using MTaskStateMap = std::unordered_map<const ExecMTask*, MTaskState>; std::unordered_map<const ExecMTask*, MTaskState> m_mtaskState; // State for each mtask.
MTaskStateMap m_mtaskState; // State for each mtask.
MTaskCmp m_mtaskCmp; // Comparison functor
using ReadyMTasks = std::set<ExecMTask*, MTaskCmp&>;
ReadyMTasks m_ready; // MTasks ready to be assigned next; all their
// // dependencies are already assigned.
std::vector<ExecMTask*> m_prevMTask; // Previous mtask scheduled to each thread.
std::vector<uint32_t> m_busyUntil; // Time each thread is occupied until
public: public:
// CONSTRUCTORS // CONSTRUCTORS
explicit PartPackMTasks(V3Graph* mtasksp, uint32_t nThreads = v3Global.opt.threads(), explicit PartPackMTasks(uint32_t nThreads = v3Global.opt.threads(),
unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100) unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100)
: m_mtasksp{mtasksp} : m_nThreads{nThreads}
, m_nThreads{nThreads}
, m_sandbagNumerator{sandbagNumerator} , m_sandbagNumerator{sandbagNumerator}
, m_sandbagDenom{sandbagDenom} , m_sandbagDenom{sandbagDenom} {}
, m_ready{m_mtaskCmp} {}
~PartPackMTasks() = default; ~PartPackMTasks() = default;
private:
// METHODS // METHODS
uint32_t completionTime(const ExecMTask* mtaskp, uint32_t thread) { uint32_t completionTime(const ExecMTask* mtaskp, uint32_t threadId) {
const MTaskState& state = m_mtaskState[mtaskp]; const MTaskState& state = m_mtaskState[mtaskp];
UASSERT(mtaskp->thread() != 0xffffffff, "Mtask should have assigned thread"); UASSERT(state.threadId != UNASSIGNED, "Mtask should have assigned thread");
if (thread == mtaskp->thread()) { if (threadId == state.threadId) {
// No overhead on native thread // No overhead on same thread
return state.completionTime; return state.completionTime;
} }
@ -2068,157 +2107,162 @@ public:
= state.completionTime + (m_sandbagNumerator * mtaskp->cost()) / m_sandbagDenom; = state.completionTime + (m_sandbagNumerator * mtaskp->cost()) / m_sandbagDenom;
// If task B is packed after task A on thread 0, don't let thread 1 // If task B is packed after task A on thread 0, don't let thread 1
// think that A finishes later than thread 0 thinks that B // think that A finishes earlier than thread 0 thinks that B
// finishes, otherwise we get priority inversions and fail the self // finishes, otherwise we get priority inversions and fail the self
// test. // test.
if (mtaskp->packNextp()) { if (state.nextp) {
uint32_t successorEndTime = completionTime(mtaskp->packNextp(), mtaskp->thread()); const uint32_t successorEndTime = completionTime(state.nextp, state.threadId);
if ((sandbaggedEndTime >= successorEndTime) && (successorEndTime > 1)) { if ((sandbaggedEndTime >= successorEndTime) && (successorEndTime > 1)) {
sandbaggedEndTime = successorEndTime - 1; sandbaggedEndTime = successorEndTime - 1;
} }
} }
UINFO(6, "Sandbagged end time for " << mtaskp->name() << " on th " << thread << " = " UINFO(6, "Sandbagged end time for " << mtaskp->name() << " on th " << threadId << " = "
<< sandbaggedEndTime << endl); << sandbaggedEndTime << endl);
return sandbaggedEndTime; return sandbaggedEndTime;
} }
void setCompletionTime(ExecMTask* mtaskp, uint32_t time) { bool isReady(const ExecMTask* mtaskp) {
MTaskState& state = m_mtaskState[mtaskp]; for (V3GraphEdge* edgeInp = mtaskp->inBeginp(); edgeInp; edgeInp = edgeInp->inNextp()) {
state.completionTime = time; const ExecMTask* const prevp = dynamic_cast<ExecMTask*>(edgeInp->fromp());
if (m_mtaskState[prevp].threadId == UNASSIGNED) {
// This predecessor is not assigned yet
return false;
}
}
return true;
} }
void go() { public:
// Pack an MTasks from given graph into m_nThreads threads, return the schedule.
const ThreadSchedule pack(const V3Graph& mtaskGraph) {
// The result
ThreadSchedule schedule(m_nThreads);
// Time each thread is occupied until
std::vector<uint32_t> busyUntil(m_nThreads, 0);
// MTasks ready to be assigned next. All their dependencies are already assigned.
std::set<const ExecMTask*, MTaskCmp> readyMTasks;
// Build initial ready list // Build initial ready list
for (V3GraphVertex* vxp = m_mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { for (V3GraphVertex* vxp = mtaskGraph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) {
ExecMTask* mtaskp = dynamic_cast<ExecMTask*>(vxp); const ExecMTask* const mtaskp = dynamic_cast<ExecMTask*>(vxp);
if (vxp->inEmpty()) m_ready.insert(mtaskp); if (isReady(mtaskp)) readyMTasks.insert(mtaskp);
} }
m_prevMTask.clear(); // Clear algorithm state
m_prevMTask.resize(m_nThreads); m_mtaskState.clear();
m_busyUntil.clear();
m_busyUntil.resize(m_nThreads);
while (!m_ready.empty()) { while (!readyMTasks.empty()) {
// For each task in the ready set, compute when it might start // For each task in the ready set, compute when it might start
// on each thread (in that thread's local time frame.) // on each thread (in that thread's local time frame.)
uint32_t bestTime = 0xffffffff; uint32_t bestTime = 0xffffffff;
uint32_t bestTh = 0; uint32_t bestThreadId = 0;
ExecMTask* bestMtaskp = nullptr; const ExecMTask* bestMtaskp = nullptr; // Todo: const ExecMTask*
for (uint32_t th = 0; th < m_nThreads; ++th) { for (uint32_t threadId = 0; threadId < m_nThreads; ++threadId) {
for (ReadyMTasks::iterator taskIt = m_ready.begin(); taskIt != m_ready.end(); for (const ExecMTask* const mtaskp : readyMTasks) {
++taskIt) { uint32_t timeBegin = busyUntil[threadId];
uint32_t timeBegin = m_busyUntil[th];
if (timeBegin > bestTime) { if (timeBegin > bestTime) {
UINFO(6, "th " << th << " busy until " << timeBegin UINFO(6, "th " << threadId << " busy until " << timeBegin
<< ", later than bestTime " << bestTime << ", later than bestTime " << bestTime
<< ", skipping thread.\n"); << ", skipping thread.\n");
break; break;
} }
ExecMTask* taskp = *taskIt; for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep;
for (V3GraphEdge* edgep = taskp->inBeginp(); edgep; edgep = edgep->inNextp()) { edgep = edgep->inNextp()) {
ExecMTask* priorp = dynamic_cast<ExecMTask*>(edgep->fromp()); const ExecMTask* const priorp = dynamic_cast<ExecMTask*>(edgep->fromp());
uint32_t priorEndTime = completionTime(priorp, th); const uint32_t priorEndTime = completionTime(priorp, threadId);
if (priorEndTime > timeBegin) timeBegin = priorEndTime; if (priorEndTime > timeBegin) timeBegin = priorEndTime;
} }
UINFO(6, "Task " << taskp->name() << " start at " << timeBegin << " on thread " UINFO(6, "Task " << mtaskp->name() << " start at " << timeBegin
<< th << endl); << " on thread " << threadId << endl);
if ((timeBegin < bestTime) if ((timeBegin < bestTime)
|| ((timeBegin == bestTime) || ((timeBegin == bestTime)
&& bestMtaskp // Redundant, but appeases static analysis tools && bestMtaskp // Redundant, but appeases static analysis tools
&& (taskp->priority() > bestMtaskp->priority()))) { && (mtaskp->priority() > bestMtaskp->priority()))) {
bestTime = timeBegin; bestTime = timeBegin;
bestTh = th; bestThreadId = threadId;
bestMtaskp = taskp; bestMtaskp = mtaskp;
} }
} }
} }
if (!bestMtaskp) v3fatalSrc("Should have found some task"); UASSERT(bestMtaskp, "Should have found some task");
UINFO(6, "Will schedule " << bestMtaskp->name() << " onto thread " << bestTh << endl); UINFO(6, "Will schedule " << bestMtaskp->name() << " onto thread " << bestThreadId
uint32_t bestEndTime = bestTime + bestMtaskp->cost(); << endl);
setCompletionTime(bestMtaskp, bestEndTime);
// Reference to thread in schedule we are assigning this MTask to.
std::vector<const ExecMTask*>& bestThread = schedule.threads[bestThreadId];
// Update algorithm state
const uint32_t bestEndTime = bestTime + bestMtaskp->cost();
m_mtaskState[bestMtaskp].completionTime = bestEndTime;
m_mtaskState[bestMtaskp].threadId = bestThreadId;
if (!bestThread.empty()) { m_mtaskState[bestThread.back()].nextp = bestMtaskp; }
busyUntil[bestThreadId] = bestEndTime;
// Add the MTask to the schedule
bestThread.push_back(bestMtaskp);
schedule.threadId[bestMtaskp] = bestThreadId;
// Update the ready list // Update the ready list
size_t erased = m_ready.erase(bestMtaskp); const size_t erased = readyMTasks.erase(bestMtaskp);
UASSERT_OBJ(erased > 0, bestMtaskp, "Should have erased something?"); UASSERT_OBJ(erased > 0, bestMtaskp, "Should have erased something?");
for (V3GraphEdge* edgeOutp = bestMtaskp->outBeginp(); edgeOutp; for (V3GraphEdge* edgeOutp = bestMtaskp->outBeginp(); edgeOutp;
edgeOutp = edgeOutp->outNextp()) { edgeOutp = edgeOutp->outNextp()) {
ExecMTask* nextp = dynamic_cast<ExecMTask*>(edgeOutp->top()); const ExecMTask* const nextp = dynamic_cast<ExecMTask*>(edgeOutp->top());
// Dependent MTask should not yet be assigned to a thread
UASSERT(nextp->thread() == 0xffffffff, UASSERT(m_mtaskState[nextp].threadId == UNASSIGNED,
"Tasks after one being assigned should not be assigned yet"); "Tasks after one being assigned should not be assigned yet");
// They also should not be ready yet, since they only now // Dependent MTask should not be ready yet, since dependency is just being assigned
// may become ready UASSERT_OBJ(readyMTasks.find(nextp) == readyMTasks.end(), nextp,
UASSERT_OBJ(m_ready.find(nextp) == m_ready.end(), nextp,
"Tasks after one being assigned should not be ready"); "Tasks after one being assigned should not be ready");
bool isReady = true; if (isReady(nextp)) {
for (V3GraphEdge* edgeInp = nextp->inBeginp(); edgeInp; readyMTasks.insert(nextp);
edgeInp = edgeInp->inNextp()) {
ExecMTask* priorp = dynamic_cast<ExecMTask*>(edgeInp->fromp());
if (priorp == bestMtaskp) continue;
if (priorp->thread() == 0xffffffff) {
// This prior is not assigned yet
isReady = false;
}
}
if (isReady) {
m_ready.insert(nextp);
UINFO(6, "Inserted " << nextp->name() << " into ready\n"); UINFO(6, "Inserted " << nextp->name() << " into ready\n");
} }
} }
// Update the ExecMTask itself
if (m_prevMTask[bestTh]) {
m_prevMTask[bestTh]->packNextp(bestMtaskp);
UINFO(6, "Packing " << bestMtaskp->name() << " after "
<< m_prevMTask[bestTh]->name() << endl);
} else {
UINFO(6, "Marking " << bestMtaskp->name() << " as thread root\n");
bestMtaskp->threadRoot(true);
} }
bestMtaskp->thread(bestTh);
// Update the thread state return schedule;
m_prevMTask[bestTh] = bestMtaskp;
m_busyUntil[bestTh] = bestEndTime;
}
} }
// SELF TEST // SELF TEST
static void selfTest() { static void selfTest() {
V3Graph graph; V3Graph graph;
ExecMTask* t0 = new ExecMTask(&graph, nullptr, 0); ExecMTask* const t0 = new ExecMTask(&graph, nullptr, 0);
t0->cost(1000); t0->cost(1000);
t0->priority(1100); t0->priority(1100);
ExecMTask* t1 = new ExecMTask(&graph, nullptr, 1); ExecMTask* const t1 = new ExecMTask(&graph, nullptr, 1);
t1->cost(100); t1->cost(100);
t1->priority(100); t1->priority(100);
ExecMTask* t2 = new ExecMTask(&graph, nullptr, 2); ExecMTask* const t2 = new ExecMTask(&graph, nullptr, 2);
t2->cost(100); t2->cost(100);
t2->priority(100); t2->priority(100);
new V3GraphEdge(&graph, t0, t1, 1); new V3GraphEdge(&graph, t0, t1, 1);
new V3GraphEdge(&graph, t0, t2, 1); new V3GraphEdge(&graph, t0, t2, 1);
PartPackMTasks packer(&graph, PartPackMTasks packer(2, // Threads
2, // Threads
3, // Sandbag numerator 3, // Sandbag numerator
10); // Sandbag denom 10); // Sandbag denom
packer.go(); const ThreadSchedule& schedule = packer.pack(graph);
UASSERT_SELFTEST(bool, t0->threadRoot(), true); UASSERT_SELFTEST(size_t, schedule.threads.size(), 2);
UASSERT_SELFTEST(uint32_t, t0->thread(), 0);
UASSERT_SELFTEST(const void*, t0->packNextp(), t1);
UASSERT_SELFTEST(uint32_t, t1->thread(), 0); UASSERT_SELFTEST(size_t, schedule.threads[0].size(), 2);
UASSERT_SELFTEST(bool, t1->threadRoot(), false); UASSERT_SELFTEST(size_t, schedule.threads[1].size(), 1);
UASSERT_SELFTEST(const void*, t1->packNextp(), nullptr);
UASSERT_SELFTEST(uint32_t, t2->thread(), 1); UASSERT_SELFTEST(const ExecMTask*, schedule.threads[0][0], t0);
UASSERT_SELFTEST(bool, t2->threadRoot(), true); UASSERT_SELFTEST(const ExecMTask*, schedule.threads[0][1], t1);
UASSERT_SELFTEST(const void*, t2->packNextp(), nullptr); UASSERT_SELFTEST(const ExecMTask*, schedule.threads[1][0], t2);
UASSERT_SELFTEST(size_t, schedule.threadId.size(), 3);
UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t0), 0);
UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t1), 0);
UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t2), 1);
// On its native thread, we see the actual end time for t0: // On its native thread, we see the actual end time for t0:
UASSERT_SELFTEST(uint32_t, packer.completionTime(t0, 0), 1000); UASSERT_SELFTEST(uint32_t, packer.completionTime(t0, 0), 1000);
@ -2478,7 +2522,7 @@ void V3Partition::go(V3Graph* mtasksp) {
} }
} }
void V3Partition::finalizeCosts(V3Graph* execMTaskGraphp) { static void finalizeCosts(V3Graph* execMTaskGraphp) {
GraphStreamUnordered ser(execMTaskGraphp, GraphWay::REVERSE); GraphStreamUnordered ser(execMTaskGraphp, GraphWay::REVERSE);
while (const V3GraphVertex* vxp = ser.nextp()) { while (const V3GraphVertex* vxp = ser.nextp()) {
@ -2544,9 +2588,170 @@ void V3Partition::finalizeCosts(V3Graph* execMTaskGraphp) {
} }
} }
static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId,
AstCFunc* funcp, const ExecMTask* mtaskp) {
AstNodeModule* const modp = v3Global.rootp()->topModulep();
FileLine* const fl = modp->fileline();
// Helper function to make the code a bit more legible
const auto addStrStmt = [=](const string& stmt) -> void { //
funcp->addStmtsp(new AstCStmt(fl, stmt));
};
if (const uint32_t nDependencies = schedule.crossThreadDependencies(mtaskp)) {
// This mtask has dependencies executed on another thread, so it may block. Create the task
// state variable and wait to be notified.
const string name = "__Vm_mtaskstate_" + cvtToStr(mtaskp->id());
AstBasicDType* const mtaskStateDtypep
= v3Global.rootp()->typeTablep()->findBasicDType(fl, AstBasicDTypeKwd::MTASKSTATE);
AstVar* const varp = new AstVar(fl, AstVarType::MODULETEMP, name, mtaskStateDtypep);
varp->valuep(new AstConst(fl, nDependencies));
varp->protect(false); // Do not protect as we still have references in AstText
modp->addStmtp(varp);
// For now, reference is still via text bashing
addStrStmt("vlSelf->" + name + +".waitUntilUpstreamDone(even_cycle);\n");
}
string recName;
if (v3Global.opt.profThreads()) {
recName = "__Vprfthr_" + cvtToStr(mtaskp->id());
addStrStmt("VlProfileRec* " + recName + " = nullptr;\n");
// Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
addStrStmt("if (VL_UNLIKELY(vlSelf->__Vm_profile_cycle_start)) {\n" + //
recName + " = vlSelf->__Vm_threadPoolp->profileAppend();\n" + //
recName + "->startRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start," + //
" " + cvtToStr(mtaskp->id()) + "," + //
" " + cvtToStr(mtaskp->cost()) + ");\n" + //
"}\n");
}
//
addStrStmt("Verilated::mtaskId(" + cvtToStr(mtaskp->id()) + ");\n");
// Move the the actual body of calls to leaf functions into this function
funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack());
if (v3Global.opt.profThreads()) {
// Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling
addStrStmt("if (VL_UNLIKELY(" + recName + ")) {\n" + //
recName + "->endRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start);\n"
+ "}\n");
}
// Flush message queue
addStrStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n");
// For any dependent mtask that's on another thread, signal one dependency completion.
for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) {
const ExecMTask* const nextp = dynamic_cast<ExecMTask*>(edgep->top());
if (schedule.threadId.at(nextp) != threadId) {
addStrStmt("vlSelf->__Vm_mtaskstate_" + cvtToStr(nextp->id())
+ ".signalUpstreamDone(even_cycle);\n");
}
}
}
static const std::vector<AstCFunc*> createThreadFunctions(const ThreadSchedule& schedule) {
AstNodeModule* const modp = v3Global.rootp()->topModulep();
FileLine* const fl = modp->fileline();
std::vector<AstCFunc*> funcps;
// For each thread, create a function representing its entry point
for (const std::vector<const ExecMTask*>& thread : schedule.threads) {
if (thread.empty()) continue;
const uint32_t threadId = schedule.threadId.at(thread.front());
string name = "__Vthread_";
name += cvtToStr(threadId);
AstCFunc* const funcp = new AstCFunc(fl, name, nullptr, "void");
modp->addStmtp(funcp);
funcps.push_back(funcp);
funcp->isStatic(true); // Uses void self pointer, so static and hand rolled
funcp->isLoose(true);
funcp->entryPoint(true);
funcp->argTypes("void* voidSelf, bool even_cycle");
// Setup vlSelf an vlSyms
funcp->addStmtsp(new AstCStmt(fl, EmitCBaseVisitor::voidSelfAssign()));
funcp->addStmtsp(new AstCStmt(fl, EmitCBaseVisitor::symClassAssign()));
// Invoke each mtask scheduled to this thread from the thread function
for (const ExecMTask* const mtaskp : thread) {
addMTaskToFunction(schedule, threadId, funcp, mtaskp);
}
// Unblock the fake "final" mtask when this thread is finished
funcp->addStmtsp(
new AstCStmt(fl, "vlSelf->__Vm_mtaskstate_final.signalUpstreamDone(even_cycle);\n"));
}
// Create the fake "final" mtask state variable
AstBasicDType* const mtaskStateDtypep
= v3Global.rootp()->typeTablep()->findBasicDType(fl, AstBasicDTypeKwd::MTASKSTATE);
AstVar* const varp
= new AstVar(fl, AstVarType::MODULETEMP, "__Vm_mtaskstate_final", mtaskStateDtypep);
varp->valuep(new AstConst(fl, funcps.size()));
varp->protect(false); // Do not protect as we still have references in AstText
modp->addStmtp(varp);
return funcps;
}
static void addThreadStartToExecGraph(AstExecGraph* const execGraphp,
const std::vector<AstCFunc*>& funcps) {
// FileLine used for constructing nodes below
FileLine* const fl = v3Global.rootp()->fileline();
// Add thread function invocations to execGraph
const auto addStrStmt = [=](const string& stmt) -> void { //
execGraphp->addStmtsp(new AstCStmt(fl, stmt));
};
const auto addTextStmt = [=](const string& text) -> void {
execGraphp->addStmtsp(new AstText(fl, text, /* tracking: */ true));
};
addStrStmt("vlSelf->__Vm_even_cycle = !vlSelf->__Vm_even_cycle;\n");
const uint32_t last = funcps.size() - 1;
for (uint32_t i = 0; i <= last; ++i) {
AstCFunc* const funcp = funcps.at(i);
if (i != last) {
// The first N-1 will run on the thread pool.
addTextStmt("vlSelf->__Vm_threadPoolp->workerp(" + cvtToStr(i) + ")->addTask(");
execGraphp->addStmtsp(new AstAddrOfCFunc(fl, funcp));
addTextStmt(", vlSelf, vlSelf->__Vm_even_cycle);\n");
} else {
// The last will run on the main thread.
AstCCall* const callp = new AstCCall(fl, funcp);
callp->argTypes("vlSelf, vlSelf->__Vm_even_cycle");
execGraphp->addStmtsp(callp);
addStrStmt("Verilated::mtaskId(0);\n");
}
}
addStrStmt("vlSelf->__Vm_mtaskstate_final.waitUntilUpstreamDone(vlSelf->__Vm_even_cycle);\n");
}
static void implementExecGraph(AstExecGraph* const execGraphp) {
// Nothing to be done if there are no MTasks in the graph at all.
if (execGraphp->depGraphp()->empty()) return;
// Schedule the mtasks: statically associate each mtask with a thread,
// and determine the order in which each thread will runs its mtasks.
const ThreadSchedule& schedule = PartPackMTasks().pack(*execGraphp->mutableDepGraphp());
// Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the
// AstExecGrap into the AstCFunc created
const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule);
UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?");
// Start the thread functions at the point this AstExecGraph is located in the tree.
addThreadStartToExecGraph(execGraphp, funcps);
}
void V3Partition::finalize() { void V3Partition::finalize() {
// Called by Verilator top stage // Called by Verilator top stage
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp(); AstExecGraph* const execGraphp = v3Global.rootp()->execGraphp();
UASSERT(execGraphp, "Couldn't find AstExecGraph singleton."); UASSERT(execGraphp, "Couldn't find AstExecGraph singleton.");
// Back in V3Order, we partitioned mtasks using provisional cost // Back in V3Order, we partitioned mtasks using provisional cost
@ -2556,9 +2761,8 @@ void V3Partition::finalize() {
// ExecMTask. // ExecMTask.
finalizeCosts(execGraphp->mutableDepGraphp()); finalizeCosts(execGraphp->mutableDepGraphp());
// "Pack" the mtasks: statically associate each mtask with a thread, // Replace the graph body with it's multi-threaded implementation.
// and determine the order in which each thread will runs its mtasks. implementExecGraph(execGraphp);
PartPackMTasks(execGraphp->mutableDepGraphp()).go();
} }
void V3Partition::selfTest() { void V3Partition::selfTest() {

View File

@ -63,7 +63,6 @@ public:
static void finalize(); static void finalize();
private: private:
static void finalizeCosts(V3Graph* execMTaskGraphp);
static void setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp); static void setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp);
VL_DEBUG_FUNC; // Declare debug() VL_DEBUG_FUNC; // Declare debug()

View File

@ -54,17 +54,13 @@ public:
class ExecMTask final : public AbstractMTask { class ExecMTask final : public AbstractMTask {
private: private:
AstMTaskBody* m_bodyp; // Task body AstMTaskBody* const m_bodyp; // Task body
uint32_t m_id; // Unique id of this mtask. const uint32_t m_id; // Unique id of this mtask.
uint32_t m_priority = 0; // Predicted critical path from the start of uint32_t m_priority = 0; // Predicted critical path from the start of
// this mtask to the ends of the graph that are reachable from this // this mtask to the ends of the graph that are reachable from this
// mtask. In abstract time units. // mtask. In abstract time units.
uint32_t m_cost = 0; // Predicted runtime of this mtask, in the same uint32_t m_cost = 0; // Predicted runtime of this mtask, in the same
// abstract time units as priority(). // abstract time units as priority().
uint32_t m_thread = 0xffffffff; // Thread for static (pack_mtasks) scheduling,
// or 0xffffffff if not yet assigned.
const ExecMTask* m_packNextp = nullptr; // Next for static (pack_mtasks) scheduling
bool m_threadRoot = false; // Is root thread
VL_UNCOPYABLE(ExecMTask); VL_UNCOPYABLE(ExecMTask);
public: public:
@ -78,12 +74,6 @@ public:
void priority(uint32_t pri) { m_priority = pri; } void priority(uint32_t pri) { m_priority = pri; }
virtual uint32_t cost() const override { return m_cost; } virtual uint32_t cost() const override { return m_cost; }
void cost(uint32_t cost) { m_cost = cost; } void cost(uint32_t cost) { m_cost = cost; }
void thread(uint32_t thread) { m_thread = thread; }
uint32_t thread() const { return m_thread; }
void packNextp(const ExecMTask* nextp) { m_packNextp = nextp; }
const ExecMTask* packNextp() const { return m_packNextp; }
bool threadRoot() const { return m_threadRoot; }
void threadRoot(bool threadRoot) { m_threadRoot = threadRoot; }
string cFuncName() const { string cFuncName() const {
// If this MTask maps to a C function, this should be the name // If this MTask maps to a C function, this should be the name
return string("__Vmtask") + "__" + cvtToStr(m_id); return string("__Vmtask") + "__" + cvtToStr(m_id);
@ -92,9 +82,6 @@ public:
void dump(std::ostream& str) const { void dump(std::ostream& str) const {
str << name() << "." << cvtToHex(this); str << name() << "." << cvtToHex(this);
if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]"; if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
if (thread() != 0xffffffff) str << " th=" << thread();
if (threadRoot()) str << " [ROOT]";
if (packNextp()) str << " nx=" << packNextp()->name();
} }
}; };
inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) { inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) {

View File

@ -490,6 +490,14 @@ static void process() {
V3CCtors::cctorsAll(); V3CCtors::cctorsAll();
} }
if (!v3Global.opt.xmlOnly() && v3Global.opt.mtasks()) {
// Finalize our MTask cost estimates and pack the mtasks into
// threads. Must happen pre-EmitC which relies on the packing
// order. Must happen post-V3LifePost which changes the relative
// costs of mtasks.
V3Partition::finalize();
}
// Output the text // Output the text
if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && !v3Global.opt.dpiHdrOnly()) { if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && !v3Global.opt.dpiHdrOnly()) {
// Create AstCUse to determine what class forward declarations/#includes needed in C // Create AstCUse to determine what class forward declarations/#includes needed in C
@ -504,13 +512,6 @@ static void process() {
} else if (v3Global.opt.dpiHdrOnly()) { } else if (v3Global.opt.dpiHdrOnly()) {
V3EmitC::emitcSyms(true); V3EmitC::emitcSyms(true);
} }
if (!v3Global.opt.xmlOnly() && v3Global.opt.mtasks()) {
// Finalize our MTask cost estimates and pack the mtasks into
// threads. Must happen pre-EmitC which relies on the packing
// order. Must happen post-V3LifePost which changes the relative
// costs of mtasks.
V3Partition::finalize();
}
if (!v3Global.opt.xmlOnly() if (!v3Global.opt.xmlOnly()
&& !v3Global.opt.dpiHdrOnly()) { // Unfortunately we have some lint checks in emitc. && !v3Global.opt.dpiHdrOnly()) { // Unfortunately we have some lint checks in emitc.
V3EmitC::emitc(); V3EmitC::emitc();

View File

@ -33,7 +33,7 @@ if ($Self->{vlt_all}) {
$text =~ s/vlSelf->.* = VL_RAND_RESET.*;//g; $text =~ s/vlSelf->.* = VL_RAND_RESET.*;//g;
$text =~ s/vlSelf->__Vm_even_cycle//g; $text =~ s/vlSelf->__Vm_even_cycle//g;
$text =~ s/vlSelf->__Vm_even_cycle//g; $text =~ s/vlSelf->__Vm_even_cycle//g;
$text =~ s/vlSelf->__Vm_mt_(final|\d+)//g; $text =~ s/vlSelf->__Vm_mtaskstate_(final|\d+)//g;
$text =~ s/vlSelf->__Vm_threadPoolp//g; $text =~ s/vlSelf->__Vm_threadPoolp//g;
if ($text =~ m/this->/ || $text =~ m/vlSelf->/) { if ($text =~ m/this->/ || $text =~ m/vlSelf->/) {
error("$file has unexpected this-> refs when --norelative-cfuncs"); error("$file has unexpected this-> refs when --norelative-cfuncs");

View File

@ -19,9 +19,9 @@ compile(
); );
if ($Self->{vlt_all}) { if ($Self->{vlt_all}) {
file_grep("$out_filename", qr/\<var fl="e70" loc=".*?" name="u.u0.u0.z0" dtype_id="3" vartype="logic" origName="z0" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i); file_grep("$out_filename", qr/\<var fl="e70" loc=".*?" name="u.u0.u0.z0" dtype_id="\d+" vartype="logic" origName="z0" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i);
file_grep("$out_filename", qr/\<var fl="e85" loc=".*?" name="u.u0.u0.u0.u0.z1" dtype_id="3" vartype="logic" origName="z1" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i); file_grep("$out_filename", qr/\<var fl="e85" loc=".*?" name="u.u0.u0.u0.u0.z1" dtype_id="\d+" vartype="logic" origName="z1" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i);
file_grep("$out_filename", qr/\<var fl="e83" loc=".*?" name="u.u0.u1.u0.u0.z" dtype_id="3" vartype="logic" origName="z" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i); file_grep("$out_filename", qr/\<var fl="e83" loc=".*?" name="u.u0.u1.u0.u0.z" dtype_id="\d+" vartype="logic" origName="z" public="true" public_flat_rd="true" public_flat_rw="true"\/\>/i);
} }
execute( execute(