diff --git a/include/verilated_profiler.h b/include/verilated_profiler.h index 2cabb39ff..1d8a2c5d4 100644 --- a/include/verilated_profiler.h +++ b/include/verilated_profiler.h @@ -120,7 +120,7 @@ public: m_type = Type::SECTION_PUSH; } void sectionPop() { m_type = Type::SECTION_POP; } - void mtaskBegin(uint32_t id, uint32_t predictStart, const char* hierBlock = "") { + void mtaskBegin(uint32_t id, uint32_t predictStart, const char* hierBlock) { m_payload.mtaskBegin.m_id = id; m_payload.mtaskBegin.m_predictStart = predictStart; m_payload.mtaskBegin.m_cpu = VlOs::getcpu(); diff --git a/src/V3AstNodeOther.h b/src/V3AstNodeOther.h index 0ed7cf774..be568f2d6 100644 --- a/src/V3AstNodeOther.h +++ b/src/V3AstNodeOther.h @@ -1100,32 +1100,6 @@ public: string name() const override VL_MT_STABLE { return m_name; } ASTGEN_MEMBERS_AstIntfRef; }; -class AstMTaskBody final : public AstNode { - // Hold statements for each MTask - // @astgen op1 := stmtsp : List[AstNode] - ExecMTask* m_execMTaskp = nullptr; - -public: - explicit AstMTaskBody(FileLine* fl) - : ASTGEN_SUPER_MTaskBody(fl) {} - ASTGEN_MEMBERS_AstMTaskBody; - void cloneRelink() override { UASSERT(!clonep(), "Not cloneable"); } - const char* broken() const override { - BROKEN_RTN(!m_execMTaskp); - return nullptr; - } - void addStmtsFirstp(AstNode* nodep) { - if (stmtsp()) { - stmtsp()->addHereThisAsNext(nodep); - } else { - addStmtsp(nodep); - } - } - ExecMTask* execMTaskp() const { return m_execMTaskp; } - void execMTaskp(ExecMTask* execMTaskp) { m_execMTaskp = execMTaskp; } - void dump(std::ostream& str = std::cout) const override; - void dumpJson(std::ostream& str = std::cout) const override; -}; class AstModport final : public AstNode { // A modport in an interface // @astgen op1 := varsp : List[AstNode] diff --git a/src/V3AstNodeStmt.h b/src/V3AstNodeStmt.h index 0825e6b27..1369bdefa 100644 --- a/src/V3AstNodeStmt.h +++ b/src/V3AstNodeStmt.h @@ -601,20 +601,21 @@ public: int instrCount() const override { return 0; } }; class AstExecGraph final : public AstNodeStmt { - // For parallel execution, this node contains a dependency graph. Each - // vertex in the graph is an ExecMTask, which contains a body for the - // mtask (an AstMTaskBody), which contains sequentially executed statements. - // - // The AstMTaskBody nodes are also children of this node, so we can visit - // them without traversing the graph. + // For parallel execution, this node contains a dependency graph. Each + // vertex in the graph is an ExecMTask, which includes a function that + // holds the sequential body of the mtask. // // The location where AstExecGraph appears as a procedural statement is // where the parallel graph will be executed. Execution proceeds after // the AstExecGraph when all threads have joined. // - // @astgen op1 := mTaskBodiesp : List[AstMTaskBody] - // In later phases, the statements that start the parallel execution - // @astgen op2 := stmtsp : List[AstNode] + // For code analysis purposes after scheduling, we keep a call to each + // MTask function as children of the AstExecGraph in 'stmtsp'. These + // are in a topological order so they represent a valid sequential + // execution of the graph. In `V3ExecGraph::implement`, we replace these + // statements with statements that dispatch to the thread pool for + // parallel execution. + // @astgen op1 := stmtsp : List[AstNode] V3Graph* const m_depGraphp; // contains ExecMTask vertices const string m_name; // Name of this AstExecGraph (for uniqueness at code generation) @@ -623,10 +624,7 @@ public: ~AstExecGraph() override; ASTGEN_MEMBERS_AstExecGraph; void cloneRelink() override { V3ERROR_NA; } // Not cloneable - const char* broken() const override { - BROKEN_RTN(!m_depGraphp); - return nullptr; - } + const char* broken() const override; string name() const override VL_MT_STABLE { return m_name; } V3Graph* depGraphp() { return m_depGraphp; } const V3Graph* depGraphp() const { return m_depGraphp; } diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index c264c7ec9..778784ba0 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -340,6 +340,16 @@ AstExecGraph::AstExecGraph(FileLine* fileline, const string& name) VL_MT_DISABLE AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); } +const char* AstExecGraph::broken() const { + BROKEN_RTN(!m_depGraphp); + for (const V3GraphVertex& vtx : m_depGraphp->vertices()) { + const ExecMTask* const mtaskp = vtx.as(); + AstCFunc* const funcp = mtaskp->funcp(); + BROKEN_RTN(!funcp || !funcp->brokeExists()); + } + return nullptr; +} + AstNodeExpr* AstInsideRange::newAndFromInside(AstNodeExpr* exprp, AstNodeExpr* lhsp, AstNodeExpr* rhsp) { AstNodeExpr* const ap = new AstGte{fileline(), exprp, lhsp}; @@ -2538,17 +2548,6 @@ void AstSystemCSection::dumpJson(std::ostream& str) const { dumpJsonStr(str, "sectionType", sectionType().ascii()); dumpJsonGen(str); } -void AstMTaskBody::dump(std::ostream& str) const { - this->AstNode::dump(str); - str << " "; - m_execMTaskp->dump(str); -} -void AstMTaskBody::dumpJson(std::ostream& str) const { - str << ',' << '"' << "execMTask" << '"' << ':' << '"'; - m_execMTaskp->dump(str); // TODO: Consider dumping it as json object - str << '"'; - dumpJsonGen(str); -} void AstTypeTable::dump(std::ostream& str) const { this->AstNode::dump(str); for (int i = 0; i < static_cast(VBasicDTypeKwd::_ENUM_MAX); ++i) { diff --git a/src/V3Depth.cpp b/src/V3Depth.cpp index f1728812f..4c9b0082d 100644 --- a/src/V3Depth.cpp +++ b/src/V3Depth.cpp @@ -38,7 +38,6 @@ class DepthVisitor final : public VNVisitor { // STATE - for current visit position (use VL_RESTORER) AstCFunc* m_cfuncp = nullptr; // Current block - AstMTaskBody* m_mtaskbodyp = nullptr; // Current mtaskbody AstNode* m_stmtp = nullptr; // Current statement int m_depth = 0; // How deep in an expression int m_maxdepth = 0; // Maximum depth in an expression @@ -53,8 +52,6 @@ class DepthVisitor final : public VNVisitor { m_tempNames.get(nodep), nodep->dtypep()}; if (m_cfuncp) { m_cfuncp->addVarsp(varp); - } else if (m_mtaskbodyp) { - m_mtaskbodyp->addStmtsFirstp(varp); } else { nodep->v3fatalSrc("Deep expression not under a function"); } @@ -70,28 +67,14 @@ class DepthVisitor final : public VNVisitor { // VISITORS void visit(AstCFunc* nodep) override { VL_RESTORER(m_cfuncp); - VL_RESTORER(m_mtaskbodyp); VL_RESTORER(m_depth); VL_RESTORER(m_maxdepth); m_cfuncp = nodep; - m_mtaskbodyp = nullptr; m_depth = 0; m_maxdepth = 0; m_tempNames.reset(); iterateChildren(nodep); } - void visit(AstMTaskBody* nodep) override { - VL_RESTORER(m_cfuncp); - VL_RESTORER(m_mtaskbodyp); - VL_RESTORER(m_depth); - VL_RESTORER(m_maxdepth); - m_cfuncp = nullptr; - m_mtaskbodyp = nodep; - m_depth = 0; - m_maxdepth = 0; - // We don't reset the names, as must share across tasks - iterateChildren(nodep); - } void visitStmt(AstNodeStmt* nodep) { VL_RESTORER(m_stmtp); VL_RESTORER(m_depth); diff --git a/src/V3EmitCFunc.h b/src/V3EmitCFunc.h index b04556c38..f9763cfd2 100644 --- a/src/V3EmitCFunc.h +++ b/src/V3EmitCFunc.h @@ -1623,11 +1623,6 @@ public: } // - void visit(AstMTaskBody* nodep) override { - VL_RESTORER(m_useSelfForThis); - m_useSelfForThis = true; - iterateChildrenConst(nodep); - } void visit(AstConsAssoc* nodep) override { putnbs(nodep, nodep->dtypep()->cType("", false, false)); puts("()"); @@ -1723,7 +1718,6 @@ public: void visit(AstExecGraph* nodep) override { // The location of the AstExecGraph within the containing AstCFunc is where we want to // invoke the graph and wait for it to complete. Emitting the children does just that. - UASSERT_OBJ(!nodep->mTaskBodiesp(), nodep, "These should have been lowered"); iterateChildrenConst(nodep); } diff --git a/src/V3ExecGraph.cpp b/src/V3ExecGraph.cpp index ca20bcd81..28f5b86db 100644 --- a/src/V3ExecGraph.cpp +++ b/src/V3ExecGraph.cpp @@ -33,16 +33,24 @@ VL_DEFINE_DEBUG_FUNCTIONS; -ExecMTask::ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED // - : V3GraphVertex{graphp}, - m_bodyp{bodyp}, - m_id{s_nextId++}, - m_hashName{V3Hasher::uncachedHash(bodyp).toString()} { - UASSERT_OBJ(bodyp->stmtsp(), bodyp, "AstMTaskBody should already be populated for hashing"); - UASSERT_OBJ(!bodyp->execMTaskp(), bodyp, "AstMTaskBody already linked to an ExecMTask"); - bodyp->execMTaskp(this); +AstCFunc* ExecMTask::createCFunc(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp, + uint32_t id) { + const std::string name = execGraphp->name() + "_mtask" + std::to_string(id); + AstCFunc* const funcp = new AstCFunc{execGraphp->fileline(), name, scopep}; + funcp->isLoose(true); + funcp->dontCombine(true); + funcp->addStmtsp(stmtsp); + if (scopep) scopep->addBlocksp(funcp); + return funcp; } +ExecMTask::ExecMTask(AstExecGraph* execGraphp, AstScope* scopep, + AstNodeStmt* stmtsp) VL_MT_DISABLED // + : V3GraphVertex{execGraphp->depGraphp()}, + m_id{s_nextId++}, + m_funcp{createCFunc(execGraphp, scopep, stmtsp, m_id)}, + m_hashName{V3Hasher::uncachedHash(m_funcp).toString()} {} + void ExecMTask::dump(std::ostream& str) const { str << name() << "." << cvtToHex(this); if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]"; @@ -538,37 +546,32 @@ public: selfTestNormalFirst(); } static void selfTestNormalFirst() { - V3Graph graph; FileLine* const flp = v3Global.rootp()->fileline(); - std::vector mTaskBodyps; - const auto makeBody = [&]() { - AstMTaskBody* const bodyp = new AstMTaskBody{flp}; - mTaskBodyps.push_back(bodyp); - bodyp->addStmtsp(new AstComment{flp, ""}); - return bodyp; - }; - ExecMTask* const t0 = new ExecMTask{&graph, makeBody()}; + AstExecGraph* const execGraphp = new AstExecGraph{flp, "test"}; + V3Graph& graph = *execGraphp->depGraphp(); + const auto makeBody = [&]() -> AstNodeStmt* { return new AstComment{flp, ""}; }; + ExecMTask* const t0 = new ExecMTask{execGraphp, nullptr, makeBody()}; t0->cost(1000); t0->priority(1100); - ExecMTask* const t1 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t1 = new ExecMTask{execGraphp, nullptr, makeBody()}; t1->cost(100); t1->priority(100); - ExecMTask* const t2 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t2 = new ExecMTask{execGraphp, nullptr, makeBody()}; t2->cost(100); t2->priority(100); t2->threads(2); - ExecMTask* const t3 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t3 = new ExecMTask{execGraphp, nullptr, makeBody()}; t3->cost(100); t3->priority(100); t3->threads(3); - ExecMTask* const t4 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t4 = new ExecMTask{execGraphp, nullptr, makeBody()}; t4->cost(100); t4->priority(100); t4->threads(3); - ExecMTask* const t5 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t5 = new ExecMTask{execGraphp, nullptr, makeBody()}; t5->cost(100); t5->priority(100); - ExecMTask* const t6 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t6 = new ExecMTask{execGraphp, nullptr, makeBody()}; t6->cost(100); t6->priority(100); @@ -666,24 +669,20 @@ public: UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 4), 1360); UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 5), 1360); - for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree(); + for (V3GraphVertex& vtx : graph.vertices()) vtx.as()->funcp()->deleteTree(); + VL_DO_DANGLING(execGraphp->deleteTree(), execGraphp); ThreadSchedule::s_mtaskState.clear(); } static void selfTestHierFirst() { - V3Graph graph; FileLine* const flp = v3Global.rootp()->fileline(); - std::vector mTaskBodyps; - const auto makeBody = [&]() { - AstMTaskBody* const bodyp = new AstMTaskBody{flp}; - mTaskBodyps.push_back(bodyp); - bodyp->addStmtsp(new AstComment{flp, ""}); - return bodyp; - }; - ExecMTask* const t0 = new ExecMTask{&graph, makeBody()}; + AstExecGraph* const execGraphp = new AstExecGraph{flp, "test"}; + V3Graph& graph = *execGraphp->depGraphp(); + const auto makeBody = [&]() -> AstNodeStmt* { return new AstComment{flp, ""}; }; + ExecMTask* const t0 = new ExecMTask{execGraphp, nullptr, makeBody()}; t0->cost(1000); t0->priority(1100); t0->threads(2); - ExecMTask* const t1 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t1 = new ExecMTask{execGraphp, nullptr, makeBody()}; t1->cost(100); t1->priority(100); @@ -725,7 +724,8 @@ public: UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 0), 1100); UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 1), 1130); - for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree(); + for (V3GraphVertex& vtx : graph.vertices()) vtx.as()->funcp()->deleteTree(); + VL_DO_DANGLING(execGraphp->deleteTree(), execGraphp); ThreadSchedule::s_mtaskState.clear(); } @@ -790,6 +790,24 @@ void normalizeCosts(Costs& costs) { } } +void removeEmptyMTasks(V3Graph* execMTaskGraphp) { + for (V3GraphVertex* const vtxp : execMTaskGraphp->vertices().unlinkable()) { + ExecMTask* const mtaskp = vtxp->as(); + AstCFunc* const funcp = mtaskp->funcp(); + if (funcp->stmtsp()) continue; + + UINFO(6, "Removing empty MTask " << mtaskp->name()); + // Redirect edges + mtaskp->rerouteEdges(execMTaskGraphp); + // Delete the MTask function + VL_DO_DANGLING(funcp->unlinkFrBack()->deleteTree(), funcp); + // Delete the MTask vertex + VL_DO_DANGLING(mtaskp->unlinkDelete(execMTaskGraphp), mtaskp); + } + // Remove redundant dependencies + execMTaskGraphp->removeRedundantEdgesMax(&V3GraphEdge::followAlwaysTrue); +} + void fillinCosts(V3Graph* execMTaskGraphp) { // Pass 1: See what profiling data applies Costs costs; // For each mtask, costs @@ -797,7 +815,7 @@ void fillinCosts(V3Graph* execMTaskGraphp) { for (V3GraphVertex& vtx : execMTaskGraphp->vertices()) { ExecMTask* const mtp = vtx.as(); // This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits - const uint64_t costEstimate = V3InstrCount::count(mtp->bodyp(), false); + const uint64_t costEstimate = V3InstrCount::count(mtp->funcp(), false); const uint64_t costProfiled = V3Control::getProfileData(v3Global.opt.prefix(), mtp->hashName()); if (costProfiled) { @@ -857,30 +875,6 @@ void finalizeCosts(V3Graph* execMTaskGraphp) { } } - // Some MTasks may now have zero cost, eliminate those. - // (It's common for tasks to shrink to nothing when V3LifePost - // removes dly assignments.) - for (V3GraphVertex* const vtxp : execMTaskGraphp->vertices().unlinkable()) { - ExecMTask* const mtp = vtxp->as(); - - // Don't rely on checking mtp->cost() == 0 to detect an empty task. - // Our cost-estimating logic is just an estimate. Instead, check - // the MTaskBody to see if it's empty. That's the source of truth. - AstMTaskBody* const bodyp = mtp->bodyp(); - if (!bodyp->stmtsp()) { // Kill this empty mtask - UINFO(6, "Removing zero-cost " << mtp->name()); - for (V3GraphEdge& in : mtp->inEdges()) { - for (V3GraphEdge& out : mtp->outEdges()) { - new V3GraphEdge{execMTaskGraphp, in.fromp(), out.top(), 1}; - } - } - VL_DO_DANGLING(mtp->unlinkDelete(execMTaskGraphp), mtp); - // Also remove and delete the AstMTaskBody, otherwise it would - // keep a dangling pointer to the ExecMTask. - VL_DO_DANGLING(bodyp->unlinkFrBack()->deleteTree(), bodyp); - } - } - // Removing tasks may cause edges that were formerly non-transitive to // become transitive. Also we just created new edges around the removed // tasks, which could be transitive. Prune out all transitive edges. @@ -907,6 +901,7 @@ void finalizeCosts(V3Graph* execMTaskGraphp) { void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, AstCFunc* funcp, const ExecMTask* mtaskp) { + AstScope* const scopep = v3Global.rootp()->topScopep()->scopep(); AstNodeModule* const modp = v3Global.rootp()->topModulep(); FileLine* const fl = modp->fileline(); @@ -940,8 +935,11 @@ void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, addCStmt("vlSymsp->_vm_pgoProfiler.startCounter(" + std::to_string(mtaskp->id()) + ");"); } - // Move the actual body into this function - funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack()); + // Call the MTask function + AstCCall* const callp = new AstCCall{fl, mtaskp->funcp()}; + callp->selfPointer(VSelfPointerText{VSelfPointerText::VlSyms{}, scopep->nameDotless()}); + callp->dtypeSetVoid(); + funcp->addStmtsp(callp->makeStmt()); if (v3Global.opt.profPgo()) { // No lock around stopCounter, as counter numbers are unique per thread @@ -1093,56 +1091,38 @@ void addThreadStartToExecGraph(AstExecGraph* const execGraphp, } } -void wrapMTaskBodies(AstExecGraph* const execGraphp) { - FileLine* const flp = execGraphp->fileline(); - const string& tag = execGraphp->name(); - AstNodeModule* const modp = v3Global.rootp()->topModulep(); - - for (AstMTaskBody* mtaskBodyp = execGraphp->mTaskBodiesp(); mtaskBodyp; - mtaskBodyp = VN_AS(mtaskBodyp->nextp(), MTaskBody)) { - ExecMTask* const mtaskp = mtaskBodyp->execMTaskp(); - const std::string name = tag + "_mtask" + std::to_string(mtaskp->id()); - AstCFunc* const funcp = new AstCFunc{flp, name, nullptr}; - funcp->isLoose(true); - modp->addStmtsp(funcp); +void processMTaskBodies(AstExecGraph* const execGraphp) { + for (V3GraphVertex* const vtxp : execGraphp->depGraphp()->vertices().unlinkable()) { + ExecMTask* const mtaskp = vtxp->as(); + AstCFunc* const funcp = mtaskp->funcp(); + // Temporarily unlink function body so we can add more statemetns + AstNode* stmtsp = funcp->stmtsp()->unlinkFrBackWithNext(); // Helper function to make the code a bit more legible const auto addCStmt = [=](const string& stmt) -> void { // - funcp->addStmtsp(new AstCStmt{flp, stmt}); + funcp->addStmtsp(new AstCStmt{execGraphp->fileline(), stmt}); }; - addCStmt("static constexpr unsigned taskId = " + cvtToStr(mtaskp->id()) + ";"); - + // Profiling mtaskStart if (v3Global.opt.profExec()) { - const string& predictStart = std::to_string(mtaskp->predictStart()); - if (v3Global.opt.hierChild()) { - addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(taskId, " + predictStart - + ", \"" + v3Global.opt.topModule() + "\");"); - } else { - addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(taskId, " + predictStart - + ");"); - } + std::string args = std::to_string(mtaskp->id()); + args += ", " + std::to_string(mtaskp->predictStart()); + args += ", \""; + if (v3Global.opt.hierChild()) args += v3Global.opt.topModule(); + args += "\""; + addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(" + args + ");"); } - // Set mtask ID in the run-time system - addCStmt("Verilated::mtaskId(taskId);"); - - // Run body - funcp->addStmtsp(mtaskBodyp->stmtsp()->unlinkFrBackWithNext()); - + addCStmt("Verilated::mtaskId(" + std::to_string(mtaskp->id()) + ");"); + // Add back the body + funcp->addStmtsp(stmtsp); // Flush message queue addCStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);"); - + // Profiling mtaskEnd if (v3Global.opt.profExec()) { - const string& predictCost = std::to_string(mtaskp->cost()); - addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(" + predictCost + ");"); + const std::string& args = std::to_string(mtaskp->cost()); + addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(" + args + ");"); } - - // AstMTask will simply contain a call - AstCCall* const callp = new AstCCall{flp, funcp}; - callp->selfPointer(VSelfPointerText{VSelfPointerText::This{}}); - callp->dtypeSetVoid(); - mtaskBodyp->addStmtsp(callp->makeStmt()); } } @@ -1150,8 +1130,7 @@ void implementExecGraph(AstExecGraph* const execGraphp, const ThreadSchedule& sc // Nothing to be done if there are no MTasks in the graph at all. if (execGraphp->depGraphp()->empty()) return; - // Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the - // AstExecGraph into the AstCFunc created + // Create a function to be run by each thread. const std::vector& funcps = createThreadFunctions(schedule, execGraphp->name()); UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?"); @@ -1159,9 +1138,30 @@ void implementExecGraph(AstExecGraph* const execGraphp, const ThreadSchedule& sc addThreadStartToExecGraph(execGraphp, funcps, schedule.id()); } +// Called by Verilator top stage void implement(AstNetlist* netlistp) { - // Called by Verilator top stage - netlistp->topModulep()->foreach([&](AstExecGraph* execGraphp) { + // Gather all ExecGraphs + std::vector execGraphps; + netlistp->topModulep()->foreach([&](AstExecGraph* egp) { execGraphps.emplace_back(egp); }); + + // Process each + for (AstExecGraph* const execGraphp : execGraphps) { + // We can delete the placeholder calls to the MTask functions that + // were used for code analysis until now. We will replace them with + // statements that dispatch execution to the thread pool. + if (execGraphp->stmtsp()) execGraphp->stmtsp()->unlinkFrBackWithNext()->deleteTree(); + + // Some MTasks may have become empty after scheduling due to + // optimizations after scheduling. Remove those. + removeEmptyMTasks(execGraphp->depGraphp()); + + // In some very small test cases, we might end up with a completely + // empty ExecGraph, if so just delete it. + if (execGraphp->depGraphp()->empty()) { + VL_DO_DANGLING(execGraphp->unlinkFrBack()->deleteTree(), execGraphp); + return; + } + // Back in V3Order, we partitioned mtasks using provisional cost // estimates. However, V3Order precedes some optimizations (notably // V3LifePost) that can change the cost of logic within each mtask. @@ -1180,8 +1180,8 @@ void implement(AstNetlist* netlistp) { V3Stats::addStatSum("Optimizations, Thread schedule count", static_cast(packed.size())); - // Wrap each MTask body into a CFunc for better profiling/debugging - wrapMTaskBodies(execGraphp); + // Process MTask function bodies to add additional code + processMTaskBodies(execGraphp); for (const ThreadSchedule& schedule : packed) { // Replace the graph body with its multi-threaded implementation. @@ -1189,7 +1189,7 @@ void implement(AstNetlist* netlistp) { } addThreadEndWrapper(execGraphp); - }); + } } void selfTest() { diff --git a/src/V3ExecGraph.h b/src/V3ExecGraph.h index d16941b5c..2c13b92f9 100644 --- a/src/V3ExecGraph.h +++ b/src/V3ExecGraph.h @@ -25,7 +25,10 @@ #include class AstNetlist; -class AstMTaskBody; +class AstCFunc; +class AstExecGraph; +class AstNodeStmt; +class AstScope; //************************************************************************* // MTasks and graph structures @@ -33,9 +36,9 @@ class AstMTaskBody; class ExecMTask final : public V3GraphVertex { VL_RTTI_IMPL(ExecMTask, V3GraphVertex) private: - AstMTaskBody* const m_bodyp; // Task body const uint32_t m_id; // Unique ID of this ExecMTask. static std::atomic s_nextId; // Next ID to use + AstCFunc* const m_funcp; // The function that contains the task body const std::string m_hashName; // Hashed name based on body for profile-driven optimization // Predicted critical path from the start of this mtask to the ends of the graph that are // reachable from this mtask. In abstract time units. @@ -46,9 +49,12 @@ private: int m_threads = 1; // Threads used by this mtask VL_UNCOPYABLE(ExecMTask); + static AstCFunc* createCFunc(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp, + uint32_t id); + public: - ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED; - AstMTaskBody* bodyp() const { return m_bodyp; } + ExecMTask(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp) VL_MT_DISABLED; + AstCFunc* funcp() const { return m_funcp; } uint32_t id() const VL_MT_SAFE { return m_id; } uint32_t priority() const { return m_priority; } void priority(uint32_t pri) { m_priority = pri; } diff --git a/src/V3Hasher.cpp b/src/V3Hasher.cpp index 14438c196..03e6261ca 100644 --- a/src/V3Hasher.cpp +++ b/src/V3Hasher.cpp @@ -513,9 +513,6 @@ class HasherVisitor final : public VNVisitorConst { iterateConstNull(nodep->ftaskp()); }); } - void visit(AstMTaskBody* nodep) override { - m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, []() {}); - } void visit(AstNodeProcedure* nodep) override { m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, []() {}); } diff --git a/src/V3LifePost.cpp b/src/V3LifePost.cpp index 4703ea0f9..c39463967 100644 --- a/src/V3LifePost.cpp +++ b/src/V3LifePost.cpp @@ -290,7 +290,7 @@ class LifePostDlyVisitor final : public VNVisitorConst { const ExecMTask* const mtaskp = mtaskVtx.as(); VL_RESTORER(m_execMTaskp); m_execMTaskp = mtaskp; - iterateConst(mtaskp->bodyp()); + trace(mtaskp->funcp()); } } void visit(AstCFunc* nodep) override { diff --git a/src/V3OrderParallel.cpp b/src/V3OrderParallel.cpp index b84d8a3cd..d3b4b3904 100644 --- a/src/V3OrderParallel.cpp +++ b/src/V3OrderParallel.cpp @@ -1763,7 +1763,7 @@ class DpiThreadsVisitor final : public VNVisitorConst { public: // CONSTRUCTORS - explicit DpiThreadsVisitor(AstMTaskBody* nodep) { iterateConst(nodep); } + explicit DpiThreadsVisitor(AstCFunc* nodep) { iterateConst(nodep); } int threads() const { return m_threads; } ~DpiThreadsVisitor() override = default; @@ -2431,8 +2431,9 @@ AstNodeStmt* V3Order::createParallel(OrderGraph& orderGraph, OrderMoveGraph& mov if (dumpGraphLevel() >= 9) moveGraph.dumpDotFilePrefixed(tag + "_ordermv_pruned"); // Create the AstExecGraph node which represents the execution of the MTask graph. - FileLine* const rootFlp = v3Global.rootp()->fileline(); - AstExecGraph* const execGraphp = new AstExecGraph{rootFlp, tag}; + FileLine* const flp = v3Global.rootp()->fileline(); + AstScope* const scopep = v3Global.rootp()->topScopep()->scopep(); + AstExecGraph* const execGraphp = new AstExecGraph{flp, tag}; V3Graph* const depGraphp = execGraphp->depGraphp(); // Translate the LogicMTask graph into the corresponding ExecMTask graph, @@ -2468,24 +2469,23 @@ AstNodeStmt* V3Order::createParallel(OrderGraph& orderGraph, OrderMoveGraph& mov VL_DO_DANGLING(mVtxp->unlinkDelete(&moveGraph), mVtxp); } - // We have 2 objects, because AstMTaskBody is an AstNode, and ExecMTask is a GraphVertex. - // To combine them would involve multiple inheritance. - - // Construct the actual MTaskBody - AstMTaskBody* const bodyp = new AstMTaskBody{rootFlp}; - execGraphp->addMTaskBodiesp(bodyp); - bodyp->addStmtsp(emitter.getStmts()); - UASSERT_OBJ(bodyp->stmtsp(), bodyp, "Should not try to create empty MTask"); - // Create the ExecMTask - ExecMTask* const execMTaskp = new ExecMTask{depGraphp, bodyp}; - if (!v3Global.opt.hierBlocks().empty()) - execMTaskp->threads(DpiThreadsVisitor{bodyp}.threads()); + ExecMTask* const execMTaskp = new ExecMTask{execGraphp, scopep, emitter.getStmts()}; + if (!v3Global.opt.hierBlocks().empty()) { + execMTaskp->threads(DpiThreadsVisitor{execMTaskp->funcp()}.threads()); + } const bool newEntry = logicMTaskToExecMTask.emplace(mTaskp, execMTaskp).second; UASSERT_OBJ(newEntry, mTaskp, "LogicMTasks should be processed in dependencyorder"); UINFO(3, "Final '" << tag << "' LogicMTask " << mTaskp->id() << " maps to ExecMTask" << execMTaskp->id()); + // For code analysis purposes, we can pretend the AstExecGraph runs the + // MTasks sequentially, in some topological order that respects edges. + // The order they are created here happens to be just such an order. + AstCCall* const callp = new AstCCall{flp, execMTaskp->funcp()}; + callp->dtypeSetVoid(); + execGraphp->addStmtsp(callp->makeStmt()); + // Add the dependency edges between ExecMTasks for (const V3GraphEdge& edge : mTaskp->inEdges()) { const V3GraphVertex* fromVxp = edge.fromp(); diff --git a/src/V3VariableOrder.cpp b/src/V3VariableOrder.cpp index e241ae495..7560fc01d 100644 --- a/src/V3VariableOrder.cpp +++ b/src/V3VariableOrder.cpp @@ -53,7 +53,7 @@ class GatherMTaskAffinity final : VNVisitorConst { GatherMTaskAffinity(const ExecMTask* mTaskp, MTaskAffinityMap& results) : m_results{results} , m_id{mTaskp->id()} { - iterateChildrenConst(mTaskp->bodyp()); + iterateConst(mTaskp->funcp()); } ~GatherMTaskAffinity() = default; VL_UNMOVABLE(GatherMTaskAffinity);