diff --git a/src/V3Ast.h b/src/V3Ast.h index a8628dee1..c72e62b3a 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -457,6 +457,7 @@ public: // Internal types for mid-steps SCOPEPTR, CHARPTR, + MTASKSTATE, // Unsigned and two state; fundamental types UINT32, UINT64, @@ -467,18 +468,19 @@ public: }; enum en m_e; const char* ascii() const { - static const char* const names[] = { - "%E-unk", "bit", "byte", "chandle", "event", "int", "integer", - "logic", "longint", "real", "shortint", "time", "string", "VerilatedScope*", - "char*", "IData", "QData", "LOGIC_IMPLICIT", " MAX"}; + static const char* const names[] + = {"%E-unk", "bit", "byte", "chandle", "event", + "int", "integer", "logic", "longint", "real", + "shortint", "time", "string", "VerilatedScope*", "char*", + "VlMTaskState", "IData", "QData", "LOGIC_IMPLICIT", " MAX"}; return names[m_e]; } const char* dpiType() const { static const char* const names[] - = {"%E-unk", "svBit", "char", "void*", "char", "int", - "%E-integer", "svLogic", "long long", "double", "short", "%E-time", - "const char*", "dpiScope", "const char*", "IData", "QData", "%E-logic-implicit", - " MAX"}; + = {"%E-unk", "svBit", "char", "void*", "char", + "int", "%E-integer", "svLogic", "long long", "double", + "short", "%E-time", "const char*", "dpiScope", "const char*", + "%E-mtaskstate", "IData", "QData", "%E-logic-implct", " MAX"}; return names[m_e]; } static void selfTest() { @@ -511,6 +513,7 @@ public: case STRING: return 64; // opaque // Just the pointer, for today case SCOPEPTR: return 0; // opaque case CHARPTR: return 0; // opaque + case MTASKSTATE: return 0; // opaque case UINT32: return 32; case UINT64: return 64; default: return 0; @@ -549,11 +552,13 @@ public: || m_e == DOUBLE || m_e == SHORTINT || m_e == UINT32 || m_e == UINT64); } bool isOpaque() const { // IE not a simple number we can bit optimize - return (m_e == STRING || m_e == SCOPEPTR || m_e == CHARPTR || m_e == DOUBLE); + return (m_e == STRING || m_e == SCOPEPTR || m_e == CHARPTR || m_e == MTASKSTATE + || m_e == DOUBLE); } bool isDouble() const { return m_e == DOUBLE; } bool isEventValue() const { return m_e == EVENTVALUE; } bool isString() const { return m_e == STRING; } + bool isMTaskState() const { return m_e == MTASKSTATE; } }; inline bool operator==(const AstBasicDTypeKwd& lhs, const AstBasicDTypeKwd& rhs) { return lhs.m_e == rhs.m_e; diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 4160df3e1..399ceb3b9 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -238,26 +238,11 @@ AstNodeBiop* AstEqWild::newTyped(FileLine* fl, AstNode* lhsp, AstNode* rhsp) { } AstExecGraph::AstExecGraph(FileLine* fileline) - : ASTGEN_SUPER_ExecGraph(fileline) { - m_depGraphp = new V3Graph; -} + : ASTGEN_SUPER_ExecGraph(fileline) + , m_depGraphp{new V3Graph} {} + AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); } -std::vector AstExecGraph::rootMTasks() { - // Build the list of initial mtasks to start - std::vector execMTasks; - - for (const V3GraphVertex* vxp = depGraphp()->verticesBeginp(); vxp; - vxp = vxp->verticesNextp()) { - const ExecMTask* etp = dynamic_cast(vxp); - if (etp->threadRoot()) execMTasks.push_back(etp); - } - UASSERT_OBJ(execMTasks.size() <= static_cast(v3Global.opt.threads()), this, - "More root mtasks than available threads"); - - return execMTasks; -} - AstNode* AstInsideRange::newAndFromInside(AstNode* exprp, AstNode* lhsp, AstNode* rhsp) { AstNode* ap = new AstGte(fileline(), exprp->cloneTree(true), lhsp); AstNode* bp = new AstLte(fileline(), exprp->cloneTree(true), rhsp); @@ -717,10 +702,12 @@ AstNodeDType::CTypeRecursed AstNodeDType::cTypeRecurse(bool compound) const { info.m_type = "const char*"; } else if (bdtypep->keyword() == AstBasicDTypeKwd::SCOPEPTR) { info.m_type = "const VerilatedScope*"; - } else if (bdtypep->keyword() == AstBasicDTypeKwd::DOUBLE) { + } else if (bdtypep->keyword().isDouble()) { info.m_type = "double"; - } else if (bdtypep->keyword() == AstBasicDTypeKwd::STRING) { + } else if (bdtypep->keyword().isString()) { info.m_type = "std::string"; + } else if (bdtypep->keyword().isMTaskState()) { + info.m_type = "VlMTaskVertex"; } else if (dtypep->widthMin() <= 8) { // Handle unpacked arrays; not bdtypep->width info.m_type = "CData" + bitvec; } else if (dtypep->widthMin() <= 16) { diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h index 6416bb965..7f583f929 100644 --- a/src/V3AstNodes.h +++ b/src/V3AstNodes.h @@ -2070,7 +2070,8 @@ public: // (Slow) recurse down to find basic data type (Note don't need virtual - // AstVar isn't a NodeDType) AstBasicDType* basicp() const { return subDTypep()->basicp(); } - // op3 = Initial value that never changes (static const) + // op3 = Initial value that never changes (static const), or constructor argument for + // MTASKSTATE variables AstNode* valuep() const { return op3p(); } // It's valuep(), not constp(), as may be more complicated than an AstConst void valuep(AstNode* nodep) { setOp3p(nodep); } @@ -9045,7 +9046,8 @@ class AstExecGraph final : public AstNode { // them without traversing the graph (it's not always needed to // traverse the graph.) private: - V3Graph* m_depGraphp; // contains ExecMTask's + V3Graph* const m_depGraphp; // contains ExecMTask's + public: explicit AstExecGraph(FileLine* fl); ASTNODE_NODE_FUNCS_NO_DTOR(ExecGraph) @@ -9057,7 +9059,7 @@ public: const V3Graph* depGraphp() const { return m_depGraphp; } V3Graph* mutableDepGraphp() { return m_depGraphp; } void addMTaskBody(AstMTaskBody* bodyp) { addOp1p(bodyp); } - std::vector rootMTasks(); + void addStmtsp(AstNode* stmtp) { addOp2p(stmtp); } }; class AstSplitPlaceholder final : public AstNode { diff --git a/src/V3EmitC.cpp b/src/V3EmitC.cpp index 128ff512b..290608961 100644 --- a/src/V3EmitC.cpp +++ b/src/V3EmitC.cpp @@ -1429,18 +1429,6 @@ class EmitCLazyDecls final : public AstNVisitor { lazyDeclare(nodep->funcp()); } - virtual void visit(AstExecGraph* nodep) override { - if (nodep->user2SetOnce()) return; // Already declared - // Build the list of initial mtasks to start - for (const ExecMTask* mtp : nodep->rootMTasks()) { - m_emitter.puts("void "); - m_emitter.puts(EmitCBaseVisitor::topClassName() + "__" - + EmitCBaseVisitor::protect(mtp->cFuncName())); - m_emitter.puts("(void* voidSelf, bool even_cycle);\n"); - m_needsBlankLine = true; - } - } - virtual void visit(AstVarRef* nodep) override { AstVar* const varp = nodep->varp(); // Only constant pool symbols are lazy declared for now ... @@ -1574,93 +1562,6 @@ class EmitCImp final : EmitCStmts { return ofp; } - // Returns the number of cross-thread dependencies into mtaskp. - // If >0, mtaskp must test whether its prereqs are done before starting, - // and may need to block. - static uint32_t packedMTaskMayBlock(const ExecMTask* mtaskp) { - uint32_t result = 0; - for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; edgep = edgep->inNextp()) { - const ExecMTask* prevp = dynamic_cast(edgep->fromp()); - if (prevp->thread() != mtaskp->thread()) ++result; - } - return result; - } - - void emitMTaskBody(AstMTaskBody* nodep) { - ExecMTask* curExecMTaskp = nodep->execMTaskp(); - if (packedMTaskMayBlock(curExecMTaskp)) { - puts("vlSelf->__Vm_mt_" + cvtToStr(curExecMTaskp->id()) - + ".waitUntilUpstreamDone(even_cycle);\n"); - } - - string recName; - if (v3Global.opt.profThreads()) { - recName = "__Vprfthr_" + cvtToStr(curExecMTaskp->id()); - puts("VlProfileRec* " + recName + " = nullptr;\n"); - // Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling - puts("if (VL_UNLIKELY(vlSelf->__Vm_profile_cycle_start)) {\n"); - puts(recName + " = vlSelf->__Vm_threadPoolp->profileAppend();\n"); - puts(recName + "->startRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start,"); - puts(" " + cvtToStr(curExecMTaskp->id()) + ","); - puts(" " + cvtToStr(curExecMTaskp->cost()) + ");\n"); - puts("}\n"); - } - puts("Verilated::mtaskId(" + cvtToStr(curExecMTaskp->id()) + ");\n"); - - // The actual body of calls to leaf functions - iterateAndNextNull(nodep->stmtsp()); - - if (v3Global.opt.profThreads()) { - // Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling - puts("if (VL_UNLIKELY(" + recName + ")) {\n"); - puts(recName + "->endRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start);\n"); - puts("}\n"); - } - - // Flush message queue - puts("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n"); - - // For any downstream mtask that's on another thread, bump its - // counter and maybe notify it. - for (V3GraphEdge* edgep = curExecMTaskp->outBeginp(); edgep; edgep = edgep->outNextp()) { - const ExecMTask* nextp = dynamic_cast(edgep->top()); - if (nextp->thread() != curExecMTaskp->thread()) { - puts("vlSelf->__Vm_mt_" + cvtToStr(nextp->id()) - + ".signalUpstreamDone(even_cycle);\n"); - } - } - - // Run the next mtask inline - const ExecMTask* nextp = curExecMTaskp->packNextp(); - if (nextp) { - emitMTaskBody(nextp->bodyp()); - } else { - // Unblock the fake "final" mtask - puts("vlSelf->__Vm_mt_final.signalUpstreamDone(even_cycle);\n"); - } - } - - virtual void visit(AstMTaskBody* nodep) override { - VL_RESTORER(m_useSelfForThis); - maybeSplit(); - splitSizeInc(10); - - puts("\n"); - for (const ExecMTask* mtp = nodep->execMTaskp(); mtp; mtp = mtp->packNextp()) { - m_lazyDecls.emit(mtp->bodyp()); - } - puts("void "); - puts(topClassName() + "__" + protect(nodep->execMTaskp()->cFuncName())); - puts("(void* voidSelf, bool even_cycle) {\n"); - puts(topClassName() + "* const vlSelf = static_cast<" + topClassName() - + "*>(voidSelf);\n"); - m_useSelfForThis = true; - puts(symClassAssign()); - emitMTaskBody(nodep); - ensureNewLine(); - puts("}\n"); - } - //--------------------------------------- // VISITORS using EmitCStmts::visit; // Suppress hidden overloaded virtual function warning @@ -1734,6 +1635,12 @@ class EmitCImp final : EmitCStmts { if (nodep->ifdef() != "") puts("#endif // " + nodep->ifdef() + "\n"); } + virtual void visit(AstMTaskBody* nodep) override { + VL_RESTORER(m_useSelfForThis); + m_useSelfForThis = true; + iterateChildrenConst(nodep); + } + void emitChangeDet() { putsDecoration("// Change detection\n"); puts("QData __req = false; // Logically a bool\n"); // But not because it results in @@ -1833,34 +1740,8 @@ class EmitCImp final : EmitCStmts { "ExecGraph should be a singleton!"); // The location of the AstExecGraph within the containing _eval() // function is where we want to invoke the graph and wait for it to - // complete. Do that now. - // - // Don't recurse to children -- this isn't the place to emit - // function definitions for the nested CFuncs. We'll do that at the - // end. - puts("vlSelf->__Vm_even_cycle = !vlSelf->__Vm_even_cycle;\n"); - - // Build the list of initial mtasks to start - std::vector execMTasks = nodep->rootMTasks(); - - if (!execMTasks.empty()) { - for (uint32_t i = 0; i < execMTasks.size(); ++i) { - const bool runInline = (i == execMTasks.size() - 1); - const string protName - = topClassName() + "__" + protect(execMTasks[i]->cFuncName()); - if (runInline) { - // The thread calling eval() will run this mtask inline, - // along with its packed successors. - puts(protName + "(vlSelf, vlSelf->__Vm_even_cycle);\n"); - puts("Verilated::mtaskId(0);\n"); - } else { - // The other N-1 go to the thread pool. - puts("vlSelf->__Vm_threadPoolp->workerp(" + cvtToStr(i) + ")->addTask(" - + protName + ", vlSelf, vlSelf->__Vm_even_cycle);\n"); - } - } - puts("vlSelf->__Vm_mt_final.waitUntilUpstreamDone(vlSelf->__Vm_even_cycle);\n"); - } + // complete. Emitting the children does just that. + iterateChildrenConst(nodep); } //--------------------------------------- @@ -2015,8 +1896,8 @@ class EmitCImp final : EmitCStmts { void emitSettleLoop(bool initial); void emitWrapEval(); void emitWrapFast(); - void emitMTaskState(); - void emitMTaskVertexCtors(bool* firstp); + void emitThreadingState(); + void emitThreadingCtors(bool* firstp); void emitIntTop(const AstNodeModule* modp); void emitInt(AstNodeModule* modp); void maybeSplit(); @@ -2085,6 +1966,7 @@ void EmitCStmts::emitVarDecl(const AstVar* nodep, const string& prefixIfImp) { if (nodep->isWide()) puts("," + cvtToStr(nodep->widthWords())); puts(");\n"); } else { + if (basicp && basicp->keyword().isMTaskState()) { m_ctorVarsVec.push_back(nodep); } // strings and other fundamental c types if (nodep->isFuncLocal() && nodep->isString()) { const string name = nodep->name(); @@ -2114,13 +1996,19 @@ void EmitCStmts::emitCtorSep(bool* firstp) { void EmitCStmts::emitVarCtors(bool* firstp) { if (!m_ctorVarsVec.empty()) { ofp()->indentInc(); - puts("\n"); + if (*firstp) puts("\n"); for (const AstVar* varp : m_ctorVarsVec) { - bool isArray = !VN_CAST(varp->dtypeSkipRefp(), BasicDType); - if (isArray) { + const AstBasicDType* const dtypep = VN_CAST(varp->dtypeSkipRefp(), BasicDType); + if (!dtypep) { puts("// Skipping array: "); puts(varp->nameProtect()); puts("\n"); + } else if (dtypep->keyword().isMTaskState()) { + emitCtorSep(firstp); + puts(varp->nameProtect()); + puts("("); + iterate(varp->valuep()); + puts(")"); } else { emitCtorSep(firstp); puts(varp->nameProtect()); @@ -2555,36 +2443,17 @@ void EmitCImp::emitCoverageDecl(AstNodeModule*) { } } -void EmitCImp::emitMTaskVertexCtors(bool* firstp) { - AstExecGraph* execGraphp = v3Global.rootp()->execGraphp(); - UASSERT_OBJ(execGraphp, v3Global.rootp(), "Root should have an execGraphp"); - const V3Graph* depGraphp = execGraphp->depGraphp(); - - unsigned finalEdgesInCt = 0; - for (const V3GraphVertex* vxp = depGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - const ExecMTask* mtp = dynamic_cast(vxp); - unsigned edgesInCt = packedMTaskMayBlock(mtp); - if (packedMTaskMayBlock(mtp) > 0) { - emitCtorSep(firstp); - puts("__Vm_mt_" + cvtToStr(mtp->id()) + "(" + cvtToStr(edgesInCt) + ")"); - } - // Each mtask with no packed successor will become a dependency - // for the final node: - if (!mtp->packNextp()) ++finalEdgesInCt; - } - - emitCtorSep(firstp); - puts("__Vm_mt_final(" + cvtToStr(finalEdgesInCt) + ")"); - - // This will flip to 'true' before the start of the 0th cycle. +void EmitCImp::emitThreadingCtors(bool* firstp) { + ofp()->indentInc(); emitCtorSep(firstp); puts("__Vm_threadPoolp(nullptr)"); + emitCtorSep(firstp); + puts("__Vm_even_cycle(false)"); if (v3Global.opt.profThreads()) { emitCtorSep(firstp); puts("__Vm_profile_cycle_start(0)"); } - emitCtorSep(firstp); - puts("__Vm_even_cycle(false)"); + ofp()->indentDec(); } void EmitCImp::emitCtorImp(AstNodeModule* modp) { @@ -2612,9 +2481,10 @@ void EmitCImp::emitCtorImp(AstNodeModule* modp) { } else { puts(modName + "::" + modName + "(const char* _vcname__)\n"); puts(" : VerilatedModule(_vcname__)\n"); + first = false; // printed the first ':' } emitVarCtors(&first); - if (modp->isTop() && v3Global.opt.mtasks()) emitMTaskVertexCtors(&first); + if (modp->isTop() && v3Global.opt.mtasks()) emitThreadingCtors(&first); puts(" {\n"); emitCellCtors(modp); @@ -2804,9 +2674,11 @@ void EmitCImp::emitSavableImp(AstNodeModule* modp) { puts("; ++" + ivar + ") {\n"); elementp = arrayp->subDTypep()->skipRefp(); } + const AstBasicDType* const basicp = elementp->basicp(); + // Do not save MTask state, only matters within an evaluation + if (basicp && basicp->keyword().isMTaskState()) continue; // Want to detect types that are represented as arrays // (i.e. packed types of more than 64 bits). - AstBasicDType* basicp = elementp->basicp(); if (elementp->isWide() && !(basicp && basicp->keyword() == AstBasicDTypeKwd::STRING)) { int vecnum = vects++; @@ -3249,26 +3121,13 @@ void EmitCStmts::emitSortedVarList(const VarVec& anons, const VarVec& nonanons, } } -void EmitCImp::emitMTaskState() { +void EmitCImp::emitThreadingState() { ofp()->putsPrivate(false); // Accessed from loose function AstExecGraph* execGraphp = v3Global.rootp()->execGraphp(); UASSERT_OBJ(execGraphp, v3Global.rootp(), "Root should have an execGraphp"); - const V3Graph* depGraphp = execGraphp->depGraphp(); - for (const V3GraphVertex* vxp = depGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - const ExecMTask* mtp = dynamic_cast(vxp); - if (packedMTaskMayBlock(mtp) > 0) { - puts("VlMTaskVertex __Vm_mt_" + cvtToStr(mtp->id()) + ";\n"); - } - } - // This fake mtask depends on all the real ones. We use it to block - // eval() until all mtasks are done. - // - // In the future we might allow _eval() to return before the graph is - // fully done executing, for "half wave" scheduling. For now we wait - // for all mtasks though. - puts("VlMTaskVertex __Vm_mt_final;\n"); puts("VlThreadPool* __Vm_threadPoolp;\n"); + puts("bool __Vm_even_cycle;\n"); if (v3Global.opt.profThreads()) { // rdtsc() at current cycle start @@ -3278,8 +3137,6 @@ void EmitCImp::emitMTaskState() { // Track our position in the cache warmup and actual profile window puts("vluint32_t __Vm_profile_window_ct;\n"); } - - puts("bool __Vm_even_cycle;\n"); } void EmitCImp::emitIntTop(const AstNodeModule* modp) { @@ -3380,7 +3237,7 @@ void EmitCImp::emitInt(AstNodeModule* modp) { if (v3Global.opt.inhibitSim()) { puts("bool __Vm_inhibitSim; ///< Set true to disable evaluation of module\n"); } - if (v3Global.opt.mtasks()) emitMTaskState(); + if (v3Global.opt.mtasks()) emitThreadingState(); } emitCoverageDecl(modp); // may flip public/private @@ -3634,23 +3491,6 @@ void EmitCImp::mainImp(AstNodeModule* modp, bool slow) { m_modp = modp; } - if (m_fast && modp->isTop() && v3Global.opt.mtasks()) { - // Make a final pass and emit function definitions for the mtasks - // in the ExecGraph - AstExecGraph* execGraphp = v3Global.rootp()->execGraphp(); - const V3Graph* depGraphp = execGraphp->depGraphp(); - for (const V3GraphVertex* vxp = depGraphp->verticesBeginp(); vxp; - vxp = vxp->verticesNextp()) { - const ExecMTask* mtaskp = dynamic_cast(vxp); - if (mtaskp->threadRoot()) { - // Only define one function for all the mtasks packed on - // a given thread. We'll name this function after the - // root mtask though it contains multiple mtasks' worth - // of logic. - iterate(mtaskp->bodyp()); - } - } - } VL_DO_CLEAR(delete m_ofp, m_ofp = nullptr); } diff --git a/src/V3EmitCBase.h b/src/V3EmitCBase.h index b125e180c..005956543 100644 --- a/src/V3EmitCBase.h +++ b/src/V3EmitCBase.h @@ -53,6 +53,10 @@ public: return VIdProtect::protectWordsIf(name, doIt); } static string ifNoProtect(const string& in) { return v3Global.opt.protectIds() ? "" : in; } + static string voidSelfAssign() { + return topClassName() + "* const __restrict vlSelf VL_ATTR_UNUSED = static_cast<" + + topClassName() + "*>(voidSelf);\n"; + } static string symClassName() { return v3Global.opt.prefix() + "_" + protect("_Syms"); } static string symClassVar() { return symClassName() + "* __restrict vlSymsp"; } static string symClassAssign() { diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 3d443f4ca..d00870844 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -17,6 +17,7 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3EmitCBase.h" #include "V3Os.h" #include "V3File.h" #include "V3GraphAlg.h" @@ -1997,6 +1998,48 @@ private: VL_DEBUG_FUNC; }; +//###################################################################### +// ThreadSchedule + +class PartPackMTasks; + +// The thread schedule, containing all information needed later. Note that this is a simple +// aggregate data type and the only way to get hold of an instance of it is via +// PartPackMTasks::pack, which is moved from there and is const, which means we can only acquire a +// const reference to is so no further modifications are allowed, so all members are public +// (attributes). +class ThreadSchedule final { +public: + // Allocation of sequence of MTasks to threads. Can be considered a map from thread ID to + // the sequence of MTasks to be executed by that thread. + std::vector> threads; + + // Map from MTask to ID of thread it is assigned to. + std::unordered_map threadId; + +private: + friend class PartPackMTasks; + + explicit ThreadSchedule(uint32_t nThreads) + : threads{nThreads} {} + VL_UNCOPYABLE(ThreadSchedule); // But movable + ThreadSchedule(ThreadSchedule&&) = default; + ThreadSchedule& operator=(ThreadSchedule&&) = default; + +public: + // Returns the number of cross-thread dependencies of the given MTask. If > 0, the MTask must + // test whether its dependencies are ready before starting, and therefore may need to block. + uint32_t crossThreadDependencies(const ExecMTask* mtaskp) const { + const uint32_t thisThreadId = threadId.at(mtaskp); + uint32_t result = 0; + for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; edgep = edgep->inNextp()) { + const ExecMTask* const prevp = dynamic_cast(edgep->fromp()); + if (threadId.at(prevp) != thisThreadId) ++result; + } + return result; + } +}; + //###################################################################### // PartPackMTasks @@ -2016,49 +2059,45 @@ private: // thread A checks the end time of an mtask running on thread B. This extra // "padding" avoids tight "layovers" at cross-thread dependencies. class PartPackMTasks final { -private: + // CONSTANTS + static constexpr uint32_t UNASSIGNED = 0xffffffff; + // TYPES struct MTaskState { - uint32_t completionTime; // Estimated time this mtask will complete + uint32_t completionTime = 0; // Estimated time this mtask will complete + uint32_t threadId = UNASSIGNED; // Thread id this MTask is assigned to + const ExecMTask* nextp = nullptr; // Next MTask on same thread after this }; + struct MTaskCmp { - bool operator()(const ExecMTask* ap, ExecMTask* bp) const { return ap->id() < bp->id(); } + bool operator()(const ExecMTask* ap, const ExecMTask* bp) const { + return ap->id() < bp->id(); + } }; // MEMBERS - V3Graph* m_mtasksp; // Mtask graph - uint32_t m_nThreads; // Number of threads - uint32_t m_sandbagNumerator; // Numerator padding for est runtime - uint32_t m_sandbagDenom; // Denomerator padding for est runtime + const uint32_t m_nThreads; // Number of threads + const uint32_t m_sandbagNumerator; // Numerator padding for est runtime + const uint32_t m_sandbagDenom; // Denominator padding for est runtime - using MTaskStateMap = std::unordered_map; - MTaskStateMap m_mtaskState; // State for each mtask. - - MTaskCmp m_mtaskCmp; // Comparison functor - using ReadyMTasks = std::set; - ReadyMTasks m_ready; // MTasks ready to be assigned next; all their - // // dependencies are already assigned. - - std::vector m_prevMTask; // Previous mtask scheduled to each thread. - std::vector m_busyUntil; // Time each thread is occupied until + std::unordered_map m_mtaskState; // State for each mtask. public: // CONSTRUCTORS - explicit PartPackMTasks(V3Graph* mtasksp, uint32_t nThreads = v3Global.opt.threads(), + explicit PartPackMTasks(uint32_t nThreads = v3Global.opt.threads(), unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100) - : m_mtasksp{mtasksp} - , m_nThreads{nThreads} + : m_nThreads{nThreads} , m_sandbagNumerator{sandbagNumerator} - , m_sandbagDenom{sandbagDenom} - , m_ready{m_mtaskCmp} {} + , m_sandbagDenom{sandbagDenom} {} ~PartPackMTasks() = default; +private: // METHODS - uint32_t completionTime(const ExecMTask* mtaskp, uint32_t thread) { + uint32_t completionTime(const ExecMTask* mtaskp, uint32_t threadId) { const MTaskState& state = m_mtaskState[mtaskp]; - UASSERT(mtaskp->thread() != 0xffffffff, "Mtask should have assigned thread"); - if (thread == mtaskp->thread()) { - // No overhead on native thread + UASSERT(state.threadId != UNASSIGNED, "Mtask should have assigned thread"); + if (threadId == state.threadId) { + // No overhead on same thread return state.completionTime; } @@ -2068,157 +2107,162 @@ public: = state.completionTime + (m_sandbagNumerator * mtaskp->cost()) / m_sandbagDenom; // If task B is packed after task A on thread 0, don't let thread 1 - // think that A finishes later than thread 0 thinks that B + // think that A finishes earlier than thread 0 thinks that B // finishes, otherwise we get priority inversions and fail the self // test. - if (mtaskp->packNextp()) { - uint32_t successorEndTime = completionTime(mtaskp->packNextp(), mtaskp->thread()); + if (state.nextp) { + const uint32_t successorEndTime = completionTime(state.nextp, state.threadId); if ((sandbaggedEndTime >= successorEndTime) && (successorEndTime > 1)) { sandbaggedEndTime = successorEndTime - 1; } } - UINFO(6, "Sandbagged end time for " << mtaskp->name() << " on th " << thread << " = " + UINFO(6, "Sandbagged end time for " << mtaskp->name() << " on th " << threadId << " = " << sandbaggedEndTime << endl); return sandbaggedEndTime; } - void setCompletionTime(ExecMTask* mtaskp, uint32_t time) { - MTaskState& state = m_mtaskState[mtaskp]; - state.completionTime = time; + bool isReady(const ExecMTask* mtaskp) { + for (V3GraphEdge* edgeInp = mtaskp->inBeginp(); edgeInp; edgeInp = edgeInp->inNextp()) { + const ExecMTask* const prevp = dynamic_cast(edgeInp->fromp()); + if (m_mtaskState[prevp].threadId == UNASSIGNED) { + // This predecessor is not assigned yet + return false; + } + } + return true; } - void go() { +public: + // Pack an MTasks from given graph into m_nThreads threads, return the schedule. + const ThreadSchedule pack(const V3Graph& mtaskGraph) { + // The result + ThreadSchedule schedule(m_nThreads); + + // Time each thread is occupied until + std::vector busyUntil(m_nThreads, 0); + + // MTasks ready to be assigned next. All their dependencies are already assigned. + std::set readyMTasks; + // Build initial ready list - for (V3GraphVertex* vxp = m_mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - ExecMTask* mtaskp = dynamic_cast(vxp); - if (vxp->inEmpty()) m_ready.insert(mtaskp); + for (V3GraphVertex* vxp = mtaskGraph.verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { + const ExecMTask* const mtaskp = dynamic_cast(vxp); + if (isReady(mtaskp)) readyMTasks.insert(mtaskp); } - m_prevMTask.clear(); - m_prevMTask.resize(m_nThreads); - m_busyUntil.clear(); - m_busyUntil.resize(m_nThreads); + // Clear algorithm state + m_mtaskState.clear(); - while (!m_ready.empty()) { + while (!readyMTasks.empty()) { // For each task in the ready set, compute when it might start // on each thread (in that thread's local time frame.) uint32_t bestTime = 0xffffffff; - uint32_t bestTh = 0; - ExecMTask* bestMtaskp = nullptr; - for (uint32_t th = 0; th < m_nThreads; ++th) { - for (ReadyMTasks::iterator taskIt = m_ready.begin(); taskIt != m_ready.end(); - ++taskIt) { - uint32_t timeBegin = m_busyUntil[th]; + uint32_t bestThreadId = 0; + const ExecMTask* bestMtaskp = nullptr; // Todo: const ExecMTask* + for (uint32_t threadId = 0; threadId < m_nThreads; ++threadId) { + for (const ExecMTask* const mtaskp : readyMTasks) { + uint32_t timeBegin = busyUntil[threadId]; if (timeBegin > bestTime) { - UINFO(6, "th " << th << " busy until " << timeBegin + UINFO(6, "th " << threadId << " busy until " << timeBegin << ", later than bestTime " << bestTime << ", skipping thread.\n"); break; } - ExecMTask* taskp = *taskIt; - for (V3GraphEdge* edgep = taskp->inBeginp(); edgep; edgep = edgep->inNextp()) { - ExecMTask* priorp = dynamic_cast(edgep->fromp()); - uint32_t priorEndTime = completionTime(priorp, th); + for (V3GraphEdge* edgep = mtaskp->inBeginp(); edgep; + edgep = edgep->inNextp()) { + const ExecMTask* const priorp = dynamic_cast(edgep->fromp()); + const uint32_t priorEndTime = completionTime(priorp, threadId); if (priorEndTime > timeBegin) timeBegin = priorEndTime; } - UINFO(6, "Task " << taskp->name() << " start at " << timeBegin << " on thread " - << th << endl); + UINFO(6, "Task " << mtaskp->name() << " start at " << timeBegin + << " on thread " << threadId << endl); if ((timeBegin < bestTime) || ((timeBegin == bestTime) && bestMtaskp // Redundant, but appeases static analysis tools - && (taskp->priority() > bestMtaskp->priority()))) { + && (mtaskp->priority() > bestMtaskp->priority()))) { bestTime = timeBegin; - bestTh = th; - bestMtaskp = taskp; + bestThreadId = threadId; + bestMtaskp = mtaskp; } } } - if (!bestMtaskp) v3fatalSrc("Should have found some task"); - UINFO(6, "Will schedule " << bestMtaskp->name() << " onto thread " << bestTh << endl); - uint32_t bestEndTime = bestTime + bestMtaskp->cost(); - setCompletionTime(bestMtaskp, bestEndTime); + UASSERT(bestMtaskp, "Should have found some task"); + UINFO(6, "Will schedule " << bestMtaskp->name() << " onto thread " << bestThreadId + << endl); + + // Reference to thread in schedule we are assigning this MTask to. + std::vector& bestThread = schedule.threads[bestThreadId]; + + // Update algorithm state + const uint32_t bestEndTime = bestTime + bestMtaskp->cost(); + m_mtaskState[bestMtaskp].completionTime = bestEndTime; + m_mtaskState[bestMtaskp].threadId = bestThreadId; + if (!bestThread.empty()) { m_mtaskState[bestThread.back()].nextp = bestMtaskp; } + busyUntil[bestThreadId] = bestEndTime; + + // Add the MTask to the schedule + bestThread.push_back(bestMtaskp); + schedule.threadId[bestMtaskp] = bestThreadId; // Update the ready list - size_t erased = m_ready.erase(bestMtaskp); + const size_t erased = readyMTasks.erase(bestMtaskp); UASSERT_OBJ(erased > 0, bestMtaskp, "Should have erased something?"); for (V3GraphEdge* edgeOutp = bestMtaskp->outBeginp(); edgeOutp; edgeOutp = edgeOutp->outNextp()) { - ExecMTask* nextp = dynamic_cast(edgeOutp->top()); - - UASSERT(nextp->thread() == 0xffffffff, + const ExecMTask* const nextp = dynamic_cast(edgeOutp->top()); + // Dependent MTask should not yet be assigned to a thread + UASSERT(m_mtaskState[nextp].threadId == UNASSIGNED, "Tasks after one being assigned should not be assigned yet"); - // They also should not be ready yet, since they only now - // may become ready - UASSERT_OBJ(m_ready.find(nextp) == m_ready.end(), nextp, + // Dependent MTask should not be ready yet, since dependency is just being assigned + UASSERT_OBJ(readyMTasks.find(nextp) == readyMTasks.end(), nextp, "Tasks after one being assigned should not be ready"); - bool isReady = true; - for (V3GraphEdge* edgeInp = nextp->inBeginp(); edgeInp; - edgeInp = edgeInp->inNextp()) { - ExecMTask* priorp = dynamic_cast(edgeInp->fromp()); - if (priorp == bestMtaskp) continue; - if (priorp->thread() == 0xffffffff) { - // This prior is not assigned yet - isReady = false; - } - } - if (isReady) { - m_ready.insert(nextp); + if (isReady(nextp)) { + readyMTasks.insert(nextp); UINFO(6, "Inserted " << nextp->name() << " into ready\n"); } } - - // Update the ExecMTask itself - if (m_prevMTask[bestTh]) { - m_prevMTask[bestTh]->packNextp(bestMtaskp); - UINFO(6, "Packing " << bestMtaskp->name() << " after " - << m_prevMTask[bestTh]->name() << endl); - } else { - UINFO(6, "Marking " << bestMtaskp->name() << " as thread root\n"); - bestMtaskp->threadRoot(true); - } - bestMtaskp->thread(bestTh); - - // Update the thread state - m_prevMTask[bestTh] = bestMtaskp; - m_busyUntil[bestTh] = bestEndTime; } + + return schedule; } // SELF TEST static void selfTest() { V3Graph graph; - ExecMTask* t0 = new ExecMTask(&graph, nullptr, 0); + ExecMTask* const t0 = new ExecMTask(&graph, nullptr, 0); t0->cost(1000); t0->priority(1100); - ExecMTask* t1 = new ExecMTask(&graph, nullptr, 1); + ExecMTask* const t1 = new ExecMTask(&graph, nullptr, 1); t1->cost(100); t1->priority(100); - ExecMTask* t2 = new ExecMTask(&graph, nullptr, 2); + ExecMTask* const t2 = new ExecMTask(&graph, nullptr, 2); t2->cost(100); t2->priority(100); new V3GraphEdge(&graph, t0, t1, 1); new V3GraphEdge(&graph, t0, t2, 1); - PartPackMTasks packer(&graph, - 2, // Threads + PartPackMTasks packer(2, // Threads 3, // Sandbag numerator 10); // Sandbag denom - packer.go(); + const ThreadSchedule& schedule = packer.pack(graph); - UASSERT_SELFTEST(bool, t0->threadRoot(), true); - UASSERT_SELFTEST(uint32_t, t0->thread(), 0); - UASSERT_SELFTEST(const void*, t0->packNextp(), t1); + UASSERT_SELFTEST(size_t, schedule.threads.size(), 2); - UASSERT_SELFTEST(uint32_t, t1->thread(), 0); - UASSERT_SELFTEST(bool, t1->threadRoot(), false); - UASSERT_SELFTEST(const void*, t1->packNextp(), nullptr); + UASSERT_SELFTEST(size_t, schedule.threads[0].size(), 2); + UASSERT_SELFTEST(size_t, schedule.threads[1].size(), 1); - UASSERT_SELFTEST(uint32_t, t2->thread(), 1); - UASSERT_SELFTEST(bool, t2->threadRoot(), true); - UASSERT_SELFTEST(const void*, t2->packNextp(), nullptr); + UASSERT_SELFTEST(const ExecMTask*, schedule.threads[0][0], t0); + UASSERT_SELFTEST(const ExecMTask*, schedule.threads[0][1], t1); + UASSERT_SELFTEST(const ExecMTask*, schedule.threads[1][0], t2); + + UASSERT_SELFTEST(size_t, schedule.threadId.size(), 3); + + UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t0), 0); + UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t1), 0); + UASSERT_SELFTEST(uint32_t, schedule.threadId.at(t2), 1); // On its native thread, we see the actual end time for t0: UASSERT_SELFTEST(uint32_t, packer.completionTime(t0, 0), 1000); @@ -2478,7 +2522,7 @@ void V3Partition::go(V3Graph* mtasksp) { } } -void V3Partition::finalizeCosts(V3Graph* execMTaskGraphp) { +static void finalizeCosts(V3Graph* execMTaskGraphp) { GraphStreamUnordered ser(execMTaskGraphp, GraphWay::REVERSE); while (const V3GraphVertex* vxp = ser.nextp()) { @@ -2544,9 +2588,170 @@ void V3Partition::finalizeCosts(V3Graph* execMTaskGraphp) { } } +static void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, + AstCFunc* funcp, const ExecMTask* mtaskp) { + AstNodeModule* const modp = v3Global.rootp()->topModulep(); + FileLine* const fl = modp->fileline(); + + // Helper function to make the code a bit more legible + const auto addStrStmt = [=](const string& stmt) -> void { // + funcp->addStmtsp(new AstCStmt(fl, stmt)); + }; + + if (const uint32_t nDependencies = schedule.crossThreadDependencies(mtaskp)) { + // This mtask has dependencies executed on another thread, so it may block. Create the task + // state variable and wait to be notified. + const string name = "__Vm_mtaskstate_" + cvtToStr(mtaskp->id()); + AstBasicDType* const mtaskStateDtypep + = v3Global.rootp()->typeTablep()->findBasicDType(fl, AstBasicDTypeKwd::MTASKSTATE); + AstVar* const varp = new AstVar(fl, AstVarType::MODULETEMP, name, mtaskStateDtypep); + varp->valuep(new AstConst(fl, nDependencies)); + varp->protect(false); // Do not protect as we still have references in AstText + modp->addStmtp(varp); + // For now, reference is still via text bashing + addStrStmt("vlSelf->" + name + +".waitUntilUpstreamDone(even_cycle);\n"); + } + + string recName; + if (v3Global.opt.profThreads()) { + recName = "__Vprfthr_" + cvtToStr(mtaskp->id()); + addStrStmt("VlProfileRec* " + recName + " = nullptr;\n"); + // Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling + addStrStmt("if (VL_UNLIKELY(vlSelf->__Vm_profile_cycle_start)) {\n" + // + recName + " = vlSelf->__Vm_threadPoolp->profileAppend();\n" + // + recName + "->startRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start," + // + " " + cvtToStr(mtaskp->id()) + "," + // + " " + cvtToStr(mtaskp->cost()) + ");\n" + // + "}\n"); + } + + // + addStrStmt("Verilated::mtaskId(" + cvtToStr(mtaskp->id()) + ");\n"); + + // Move the the actual body of calls to leaf functions into this function + funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack()); + + if (v3Global.opt.profThreads()) { + // Leave this if() here, as don't want to call VL_RDTSC_Q unless profiling + addStrStmt("if (VL_UNLIKELY(" + recName + ")) {\n" + // + recName + "->endRecord(VL_RDTSC_Q() - vlSelf->__Vm_profile_cycle_start);\n" + + "}\n"); + } + + // Flush message queue + addStrStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n"); + + // For any dependent mtask that's on another thread, signal one dependency completion. + for (V3GraphEdge* edgep = mtaskp->outBeginp(); edgep; edgep = edgep->outNextp()) { + const ExecMTask* const nextp = dynamic_cast(edgep->top()); + if (schedule.threadId.at(nextp) != threadId) { + addStrStmt("vlSelf->__Vm_mtaskstate_" + cvtToStr(nextp->id()) + + ".signalUpstreamDone(even_cycle);\n"); + } + } +} + +static const std::vector createThreadFunctions(const ThreadSchedule& schedule) { + AstNodeModule* const modp = v3Global.rootp()->topModulep(); + FileLine* const fl = modp->fileline(); + + std::vector funcps; + + // For each thread, create a function representing its entry point + for (const std::vector& thread : schedule.threads) { + if (thread.empty()) continue; + const uint32_t threadId = schedule.threadId.at(thread.front()); + string name = "__Vthread_"; + name += cvtToStr(threadId); + AstCFunc* const funcp = new AstCFunc(fl, name, nullptr, "void"); + modp->addStmtp(funcp); + funcps.push_back(funcp); + funcp->isStatic(true); // Uses void self pointer, so static and hand rolled + funcp->isLoose(true); + funcp->entryPoint(true); + funcp->argTypes("void* voidSelf, bool even_cycle"); + + // Setup vlSelf an vlSyms + funcp->addStmtsp(new AstCStmt(fl, EmitCBaseVisitor::voidSelfAssign())); + funcp->addStmtsp(new AstCStmt(fl, EmitCBaseVisitor::symClassAssign())); + + // Invoke each mtask scheduled to this thread from the thread function + for (const ExecMTask* const mtaskp : thread) { + addMTaskToFunction(schedule, threadId, funcp, mtaskp); + } + + // Unblock the fake "final" mtask when this thread is finished + funcp->addStmtsp( + new AstCStmt(fl, "vlSelf->__Vm_mtaskstate_final.signalUpstreamDone(even_cycle);\n")); + } + + // Create the fake "final" mtask state variable + AstBasicDType* const mtaskStateDtypep + = v3Global.rootp()->typeTablep()->findBasicDType(fl, AstBasicDTypeKwd::MTASKSTATE); + AstVar* const varp + = new AstVar(fl, AstVarType::MODULETEMP, "__Vm_mtaskstate_final", mtaskStateDtypep); + varp->valuep(new AstConst(fl, funcps.size())); + varp->protect(false); // Do not protect as we still have references in AstText + modp->addStmtp(varp); + + return funcps; +} + +static void addThreadStartToExecGraph(AstExecGraph* const execGraphp, + const std::vector& funcps) { + // FileLine used for constructing nodes below + FileLine* const fl = v3Global.rootp()->fileline(); + + // Add thread function invocations to execGraph + const auto addStrStmt = [=](const string& stmt) -> void { // + execGraphp->addStmtsp(new AstCStmt(fl, stmt)); + }; + const auto addTextStmt = [=](const string& text) -> void { + execGraphp->addStmtsp(new AstText(fl, text, /* tracking: */ true)); + }; + + addStrStmt("vlSelf->__Vm_even_cycle = !vlSelf->__Vm_even_cycle;\n"); + + const uint32_t last = funcps.size() - 1; + for (uint32_t i = 0; i <= last; ++i) { + AstCFunc* const funcp = funcps.at(i); + if (i != last) { + // The first N-1 will run on the thread pool. + addTextStmt("vlSelf->__Vm_threadPoolp->workerp(" + cvtToStr(i) + ")->addTask("); + execGraphp->addStmtsp(new AstAddrOfCFunc(fl, funcp)); + addTextStmt(", vlSelf, vlSelf->__Vm_even_cycle);\n"); + } else { + // The last will run on the main thread. + AstCCall* const callp = new AstCCall(fl, funcp); + callp->argTypes("vlSelf, vlSelf->__Vm_even_cycle"); + execGraphp->addStmtsp(callp); + addStrStmt("Verilated::mtaskId(0);\n"); + } + } + + addStrStmt("vlSelf->__Vm_mtaskstate_final.waitUntilUpstreamDone(vlSelf->__Vm_even_cycle);\n"); +} + +static void implementExecGraph(AstExecGraph* const execGraphp) { + // Nothing to be done if there are no MTasks in the graph at all. + if (execGraphp->depGraphp()->empty()) return; + + // Schedule the mtasks: statically associate each mtask with a thread, + // and determine the order in which each thread will runs its mtasks. + const ThreadSchedule& schedule = PartPackMTasks().pack(*execGraphp->mutableDepGraphp()); + + // Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the + // AstExecGrap into the AstCFunc created + const std::vector& funcps = createThreadFunctions(schedule); + UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?"); + + // Start the thread functions at the point this AstExecGraph is located in the tree. + addThreadStartToExecGraph(execGraphp, funcps); +} + void V3Partition::finalize() { // Called by Verilator top stage - AstExecGraph* execGraphp = v3Global.rootp()->execGraphp(); + AstExecGraph* const execGraphp = v3Global.rootp()->execGraphp(); UASSERT(execGraphp, "Couldn't find AstExecGraph singleton."); // Back in V3Order, we partitioned mtasks using provisional cost @@ -2556,9 +2761,8 @@ void V3Partition::finalize() { // ExecMTask. finalizeCosts(execGraphp->mutableDepGraphp()); - // "Pack" the mtasks: statically associate each mtask with a thread, - // and determine the order in which each thread will runs its mtasks. - PartPackMTasks(execGraphp->mutableDepGraphp()).go(); + // Replace the graph body with it's multi-threaded implementation. + implementExecGraph(execGraphp); } void V3Partition::selfTest() { diff --git a/src/V3Partition.h b/src/V3Partition.h index 12f58f830..0c9ca80d2 100644 --- a/src/V3Partition.h +++ b/src/V3Partition.h @@ -63,7 +63,6 @@ public: static void finalize(); private: - static void finalizeCosts(V3Graph* execMTaskGraphp); static void setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp); VL_DEBUG_FUNC; // Declare debug() diff --git a/src/V3PartitionGraph.h b/src/V3PartitionGraph.h index 8bc4baf3f..24e081d90 100644 --- a/src/V3PartitionGraph.h +++ b/src/V3PartitionGraph.h @@ -54,17 +54,13 @@ public: class ExecMTask final : public AbstractMTask { private: - AstMTaskBody* m_bodyp; // Task body - uint32_t m_id; // Unique id of this mtask. + AstMTaskBody* const m_bodyp; // Task body + const uint32_t m_id; // Unique id of this mtask. uint32_t m_priority = 0; // Predicted critical path from the start of - // this mtask to the ends of the graph that are reachable from this - // mtask. In abstract time units. + // this mtask to the ends of the graph that are reachable from this + // mtask. In abstract time units. uint32_t m_cost = 0; // Predicted runtime of this mtask, in the same - // abstract time units as priority(). - uint32_t m_thread = 0xffffffff; // Thread for static (pack_mtasks) scheduling, - // or 0xffffffff if not yet assigned. - const ExecMTask* m_packNextp = nullptr; // Next for static (pack_mtasks) scheduling - bool m_threadRoot = false; // Is root thread + // abstract time units as priority(). VL_UNCOPYABLE(ExecMTask); public: @@ -78,12 +74,6 @@ public: void priority(uint32_t pri) { m_priority = pri; } virtual uint32_t cost() const override { return m_cost; } void cost(uint32_t cost) { m_cost = cost; } - void thread(uint32_t thread) { m_thread = thread; } - uint32_t thread() const { return m_thread; } - void packNextp(const ExecMTask* nextp) { m_packNextp = nextp; } - const ExecMTask* packNextp() const { return m_packNextp; } - bool threadRoot() const { return m_threadRoot; } - void threadRoot(bool threadRoot) { m_threadRoot = threadRoot; } string cFuncName() const { // If this MTask maps to a C function, this should be the name return string("__Vmtask") + "__" + cvtToStr(m_id); @@ -92,9 +82,6 @@ public: void dump(std::ostream& str) const { str << name() << "." << cvtToHex(this); if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]"; - if (thread() != 0xffffffff) str << " th=" << thread(); - if (threadRoot()) str << " [ROOT]"; - if (packNextp()) str << " nx=" << packNextp()->name(); } }; inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) { diff --git a/src/Verilator.cpp b/src/Verilator.cpp index 041a2705c..a73fe58bf 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -490,6 +490,14 @@ static void process() { V3CCtors::cctorsAll(); } + if (!v3Global.opt.xmlOnly() && v3Global.opt.mtasks()) { + // Finalize our MTask cost estimates and pack the mtasks into + // threads. Must happen pre-EmitC which relies on the packing + // order. Must happen post-V3LifePost which changes the relative + // costs of mtasks. + V3Partition::finalize(); + } + // Output the text if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && !v3Global.opt.dpiHdrOnly()) { // Create AstCUse to determine what class forward declarations/#includes needed in C @@ -504,13 +512,6 @@ static void process() { } else if (v3Global.opt.dpiHdrOnly()) { V3EmitC::emitcSyms(true); } - if (!v3Global.opt.xmlOnly() && v3Global.opt.mtasks()) { - // Finalize our MTask cost estimates and pack the mtasks into - // threads. Must happen pre-EmitC which relies on the packing - // order. Must happen post-V3LifePost which changes the relative - // costs of mtasks. - V3Partition::finalize(); - } if (!v3Global.opt.xmlOnly() && !v3Global.opt.dpiHdrOnly()) { // Unfortunately we have some lint checks in emitc. V3EmitC::emitc(); diff --git a/test_regress/t/t_inst_tree_inl0_pub1_norelcfuncs.pl b/test_regress/t/t_inst_tree_inl0_pub1_norelcfuncs.pl index 27c1c82b3..3e2ae31fc 100755 --- a/test_regress/t/t_inst_tree_inl0_pub1_norelcfuncs.pl +++ b/test_regress/t/t_inst_tree_inl0_pub1_norelcfuncs.pl @@ -33,7 +33,7 @@ if ($Self->{vlt_all}) { $text =~ s/vlSelf->.* = VL_RAND_RESET.*;//g; $text =~ s/vlSelf->__Vm_even_cycle//g; $text =~ s/vlSelf->__Vm_even_cycle//g; - $text =~ s/vlSelf->__Vm_mt_(final|\d+)//g; + $text =~ s/vlSelf->__Vm_mtaskstate_(final|\d+)//g; $text =~ s/vlSelf->__Vm_threadPoolp//g; if ($text =~ m/this->/ || $text =~ m/vlSelf->/) { error("$file has unexpected this-> refs when --norelative-cfuncs"); diff --git a/test_regress/t/t_inst_tree_inl1_pub1.pl b/test_regress/t/t_inst_tree_inl1_pub1.pl index 624135b06..ed87d615c 100755 --- a/test_regress/t/t_inst_tree_inl1_pub1.pl +++ b/test_regress/t/t_inst_tree_inl1_pub1.pl @@ -19,9 +19,9 @@ compile( ); if ($Self->{vlt_all}) { - file_grep("$out_filename", qr/\/i); - file_grep("$out_filename", qr/\/i); - file_grep("$out_filename", qr/\/i); + file_grep("$out_filename", qr/\/i); + file_grep("$out_filename", qr/\/i); + file_grep("$out_filename", qr/\/i); } execute(