From d3ca79368c7e55a9f7af771878e1df0a1a7f9fb4 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 3 Nov 2025 07:32:03 +0100 Subject: [PATCH] Internals: Replace AstMTaskBody with AstCFunc(#6280) (#6628) AstMTaskBody is somewhat redundant and is problematic for #6280. We used to wrap all MTasks in a CFunc before emit anyway. Now we create that CFunc when we create the ExecMTask in V3OrderParallel, and subsequently use the CFunc to represent the contents of the MTask. Final output and optimizations are the same, but internals are simplified to move towards #6280. No functional change. --- include/verilated_profiler.h | 2 +- src/V3AstNodeOther.h | 26 ----- src/V3AstNodeStmt.h | 24 ++-- src/V3AstNodes.cpp | 21 ++-- src/V3Depth.cpp | 17 --- src/V3EmitCFunc.h | 6 - src/V3ExecGraph.cpp | 214 +++++++++++++++++------------------ src/V3ExecGraph.h | 14 ++- src/V3Hasher.cpp | 3 - src/V3LifePost.cpp | 2 +- src/V3OrderParallel.cpp | 30 ++--- src/V3VariableOrder.cpp | 2 +- 12 files changed, 156 insertions(+), 205 deletions(-) diff --git a/include/verilated_profiler.h b/include/verilated_profiler.h index 2cabb39ff..1d8a2c5d4 100644 --- a/include/verilated_profiler.h +++ b/include/verilated_profiler.h @@ -120,7 +120,7 @@ public: m_type = Type::SECTION_PUSH; } void sectionPop() { m_type = Type::SECTION_POP; } - void mtaskBegin(uint32_t id, uint32_t predictStart, const char* hierBlock = "") { + void mtaskBegin(uint32_t id, uint32_t predictStart, const char* hierBlock) { m_payload.mtaskBegin.m_id = id; m_payload.mtaskBegin.m_predictStart = predictStart; m_payload.mtaskBegin.m_cpu = VlOs::getcpu(); diff --git a/src/V3AstNodeOther.h b/src/V3AstNodeOther.h index 0ed7cf774..be568f2d6 100644 --- a/src/V3AstNodeOther.h +++ b/src/V3AstNodeOther.h @@ -1100,32 +1100,6 @@ public: string name() const override VL_MT_STABLE { return m_name; } ASTGEN_MEMBERS_AstIntfRef; }; -class AstMTaskBody final : public AstNode { - // Hold statements for each MTask - // @astgen op1 := stmtsp : List[AstNode] - ExecMTask* m_execMTaskp = nullptr; - -public: - explicit AstMTaskBody(FileLine* fl) - : ASTGEN_SUPER_MTaskBody(fl) {} - ASTGEN_MEMBERS_AstMTaskBody; - void cloneRelink() override { UASSERT(!clonep(), "Not cloneable"); } - const char* broken() const override { - BROKEN_RTN(!m_execMTaskp); - return nullptr; - } - void addStmtsFirstp(AstNode* nodep) { - if (stmtsp()) { - stmtsp()->addHereThisAsNext(nodep); - } else { - addStmtsp(nodep); - } - } - ExecMTask* execMTaskp() const { return m_execMTaskp; } - void execMTaskp(ExecMTask* execMTaskp) { m_execMTaskp = execMTaskp; } - void dump(std::ostream& str = std::cout) const override; - void dumpJson(std::ostream& str = std::cout) const override; -}; class AstModport final : public AstNode { // A modport in an interface // @astgen op1 := varsp : List[AstNode] diff --git a/src/V3AstNodeStmt.h b/src/V3AstNodeStmt.h index 0825e6b27..1369bdefa 100644 --- a/src/V3AstNodeStmt.h +++ b/src/V3AstNodeStmt.h @@ -601,20 +601,21 @@ public: int instrCount() const override { return 0; } }; class AstExecGraph final : public AstNodeStmt { - // For parallel execution, this node contains a dependency graph. Each - // vertex in the graph is an ExecMTask, which contains a body for the - // mtask (an AstMTaskBody), which contains sequentially executed statements. - // - // The AstMTaskBody nodes are also children of this node, so we can visit - // them without traversing the graph. + // For parallel execution, this node contains a dependency graph. Each + // vertex in the graph is an ExecMTask, which includes a function that + // holds the sequential body of the mtask. // // The location where AstExecGraph appears as a procedural statement is // where the parallel graph will be executed. Execution proceeds after // the AstExecGraph when all threads have joined. // - // @astgen op1 := mTaskBodiesp : List[AstMTaskBody] - // In later phases, the statements that start the parallel execution - // @astgen op2 := stmtsp : List[AstNode] + // For code analysis purposes after scheduling, we keep a call to each + // MTask function as children of the AstExecGraph in 'stmtsp'. These + // are in a topological order so they represent a valid sequential + // execution of the graph. In `V3ExecGraph::implement`, we replace these + // statements with statements that dispatch to the thread pool for + // parallel execution. + // @astgen op1 := stmtsp : List[AstNode] V3Graph* const m_depGraphp; // contains ExecMTask vertices const string m_name; // Name of this AstExecGraph (for uniqueness at code generation) @@ -623,10 +624,7 @@ public: ~AstExecGraph() override; ASTGEN_MEMBERS_AstExecGraph; void cloneRelink() override { V3ERROR_NA; } // Not cloneable - const char* broken() const override { - BROKEN_RTN(!m_depGraphp); - return nullptr; - } + const char* broken() const override; string name() const override VL_MT_STABLE { return m_name; } V3Graph* depGraphp() { return m_depGraphp; } const V3Graph* depGraphp() const { return m_depGraphp; } diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index c264c7ec9..778784ba0 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -340,6 +340,16 @@ AstExecGraph::AstExecGraph(FileLine* fileline, const string& name) VL_MT_DISABLE AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); } +const char* AstExecGraph::broken() const { + BROKEN_RTN(!m_depGraphp); + for (const V3GraphVertex& vtx : m_depGraphp->vertices()) { + const ExecMTask* const mtaskp = vtx.as(); + AstCFunc* const funcp = mtaskp->funcp(); + BROKEN_RTN(!funcp || !funcp->brokeExists()); + } + return nullptr; +} + AstNodeExpr* AstInsideRange::newAndFromInside(AstNodeExpr* exprp, AstNodeExpr* lhsp, AstNodeExpr* rhsp) { AstNodeExpr* const ap = new AstGte{fileline(), exprp, lhsp}; @@ -2538,17 +2548,6 @@ void AstSystemCSection::dumpJson(std::ostream& str) const { dumpJsonStr(str, "sectionType", sectionType().ascii()); dumpJsonGen(str); } -void AstMTaskBody::dump(std::ostream& str) const { - this->AstNode::dump(str); - str << " "; - m_execMTaskp->dump(str); -} -void AstMTaskBody::dumpJson(std::ostream& str) const { - str << ',' << '"' << "execMTask" << '"' << ':' << '"'; - m_execMTaskp->dump(str); // TODO: Consider dumping it as json object - str << '"'; - dumpJsonGen(str); -} void AstTypeTable::dump(std::ostream& str) const { this->AstNode::dump(str); for (int i = 0; i < static_cast(VBasicDTypeKwd::_ENUM_MAX); ++i) { diff --git a/src/V3Depth.cpp b/src/V3Depth.cpp index f1728812f..4c9b0082d 100644 --- a/src/V3Depth.cpp +++ b/src/V3Depth.cpp @@ -38,7 +38,6 @@ class DepthVisitor final : public VNVisitor { // STATE - for current visit position (use VL_RESTORER) AstCFunc* m_cfuncp = nullptr; // Current block - AstMTaskBody* m_mtaskbodyp = nullptr; // Current mtaskbody AstNode* m_stmtp = nullptr; // Current statement int m_depth = 0; // How deep in an expression int m_maxdepth = 0; // Maximum depth in an expression @@ -53,8 +52,6 @@ class DepthVisitor final : public VNVisitor { m_tempNames.get(nodep), nodep->dtypep()}; if (m_cfuncp) { m_cfuncp->addVarsp(varp); - } else if (m_mtaskbodyp) { - m_mtaskbodyp->addStmtsFirstp(varp); } else { nodep->v3fatalSrc("Deep expression not under a function"); } @@ -70,28 +67,14 @@ class DepthVisitor final : public VNVisitor { // VISITORS void visit(AstCFunc* nodep) override { VL_RESTORER(m_cfuncp); - VL_RESTORER(m_mtaskbodyp); VL_RESTORER(m_depth); VL_RESTORER(m_maxdepth); m_cfuncp = nodep; - m_mtaskbodyp = nullptr; m_depth = 0; m_maxdepth = 0; m_tempNames.reset(); iterateChildren(nodep); } - void visit(AstMTaskBody* nodep) override { - VL_RESTORER(m_cfuncp); - VL_RESTORER(m_mtaskbodyp); - VL_RESTORER(m_depth); - VL_RESTORER(m_maxdepth); - m_cfuncp = nullptr; - m_mtaskbodyp = nodep; - m_depth = 0; - m_maxdepth = 0; - // We don't reset the names, as must share across tasks - iterateChildren(nodep); - } void visitStmt(AstNodeStmt* nodep) { VL_RESTORER(m_stmtp); VL_RESTORER(m_depth); diff --git a/src/V3EmitCFunc.h b/src/V3EmitCFunc.h index b04556c38..f9763cfd2 100644 --- a/src/V3EmitCFunc.h +++ b/src/V3EmitCFunc.h @@ -1623,11 +1623,6 @@ public: } // - void visit(AstMTaskBody* nodep) override { - VL_RESTORER(m_useSelfForThis); - m_useSelfForThis = true; - iterateChildrenConst(nodep); - } void visit(AstConsAssoc* nodep) override { putnbs(nodep, nodep->dtypep()->cType("", false, false)); puts("()"); @@ -1723,7 +1718,6 @@ public: void visit(AstExecGraph* nodep) override { // The location of the AstExecGraph within the containing AstCFunc is where we want to // invoke the graph and wait for it to complete. Emitting the children does just that. - UASSERT_OBJ(!nodep->mTaskBodiesp(), nodep, "These should have been lowered"); iterateChildrenConst(nodep); } diff --git a/src/V3ExecGraph.cpp b/src/V3ExecGraph.cpp index ca20bcd81..28f5b86db 100644 --- a/src/V3ExecGraph.cpp +++ b/src/V3ExecGraph.cpp @@ -33,16 +33,24 @@ VL_DEFINE_DEBUG_FUNCTIONS; -ExecMTask::ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED // - : V3GraphVertex{graphp}, - m_bodyp{bodyp}, - m_id{s_nextId++}, - m_hashName{V3Hasher::uncachedHash(bodyp).toString()} { - UASSERT_OBJ(bodyp->stmtsp(), bodyp, "AstMTaskBody should already be populated for hashing"); - UASSERT_OBJ(!bodyp->execMTaskp(), bodyp, "AstMTaskBody already linked to an ExecMTask"); - bodyp->execMTaskp(this); +AstCFunc* ExecMTask::createCFunc(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp, + uint32_t id) { + const std::string name = execGraphp->name() + "_mtask" + std::to_string(id); + AstCFunc* const funcp = new AstCFunc{execGraphp->fileline(), name, scopep}; + funcp->isLoose(true); + funcp->dontCombine(true); + funcp->addStmtsp(stmtsp); + if (scopep) scopep->addBlocksp(funcp); + return funcp; } +ExecMTask::ExecMTask(AstExecGraph* execGraphp, AstScope* scopep, + AstNodeStmt* stmtsp) VL_MT_DISABLED // + : V3GraphVertex{execGraphp->depGraphp()}, + m_id{s_nextId++}, + m_funcp{createCFunc(execGraphp, scopep, stmtsp, m_id)}, + m_hashName{V3Hasher::uncachedHash(m_funcp).toString()} {} + void ExecMTask::dump(std::ostream& str) const { str << name() << "." << cvtToHex(this); if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]"; @@ -538,37 +546,32 @@ public: selfTestNormalFirst(); } static void selfTestNormalFirst() { - V3Graph graph; FileLine* const flp = v3Global.rootp()->fileline(); - std::vector mTaskBodyps; - const auto makeBody = [&]() { - AstMTaskBody* const bodyp = new AstMTaskBody{flp}; - mTaskBodyps.push_back(bodyp); - bodyp->addStmtsp(new AstComment{flp, ""}); - return bodyp; - }; - ExecMTask* const t0 = new ExecMTask{&graph, makeBody()}; + AstExecGraph* const execGraphp = new AstExecGraph{flp, "test"}; + V3Graph& graph = *execGraphp->depGraphp(); + const auto makeBody = [&]() -> AstNodeStmt* { return new AstComment{flp, ""}; }; + ExecMTask* const t0 = new ExecMTask{execGraphp, nullptr, makeBody()}; t0->cost(1000); t0->priority(1100); - ExecMTask* const t1 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t1 = new ExecMTask{execGraphp, nullptr, makeBody()}; t1->cost(100); t1->priority(100); - ExecMTask* const t2 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t2 = new ExecMTask{execGraphp, nullptr, makeBody()}; t2->cost(100); t2->priority(100); t2->threads(2); - ExecMTask* const t3 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t3 = new ExecMTask{execGraphp, nullptr, makeBody()}; t3->cost(100); t3->priority(100); t3->threads(3); - ExecMTask* const t4 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t4 = new ExecMTask{execGraphp, nullptr, makeBody()}; t4->cost(100); t4->priority(100); t4->threads(3); - ExecMTask* const t5 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t5 = new ExecMTask{execGraphp, nullptr, makeBody()}; t5->cost(100); t5->priority(100); - ExecMTask* const t6 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t6 = new ExecMTask{execGraphp, nullptr, makeBody()}; t6->cost(100); t6->priority(100); @@ -666,24 +669,20 @@ public: UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 4), 1360); UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 5), 1360); - for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree(); + for (V3GraphVertex& vtx : graph.vertices()) vtx.as()->funcp()->deleteTree(); + VL_DO_DANGLING(execGraphp->deleteTree(), execGraphp); ThreadSchedule::s_mtaskState.clear(); } static void selfTestHierFirst() { - V3Graph graph; FileLine* const flp = v3Global.rootp()->fileline(); - std::vector mTaskBodyps; - const auto makeBody = [&]() { - AstMTaskBody* const bodyp = new AstMTaskBody{flp}; - mTaskBodyps.push_back(bodyp); - bodyp->addStmtsp(new AstComment{flp, ""}); - return bodyp; - }; - ExecMTask* const t0 = new ExecMTask{&graph, makeBody()}; + AstExecGraph* const execGraphp = new AstExecGraph{flp, "test"}; + V3Graph& graph = *execGraphp->depGraphp(); + const auto makeBody = [&]() -> AstNodeStmt* { return new AstComment{flp, ""}; }; + ExecMTask* const t0 = new ExecMTask{execGraphp, nullptr, makeBody()}; t0->cost(1000); t0->priority(1100); t0->threads(2); - ExecMTask* const t1 = new ExecMTask{&graph, makeBody()}; + ExecMTask* const t1 = new ExecMTask{execGraphp, nullptr, makeBody()}; t1->cost(100); t1->priority(100); @@ -725,7 +724,8 @@ public: UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 0), 1100); UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 1), 1130); - for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree(); + for (V3GraphVertex& vtx : graph.vertices()) vtx.as()->funcp()->deleteTree(); + VL_DO_DANGLING(execGraphp->deleteTree(), execGraphp); ThreadSchedule::s_mtaskState.clear(); } @@ -790,6 +790,24 @@ void normalizeCosts(Costs& costs) { } } +void removeEmptyMTasks(V3Graph* execMTaskGraphp) { + for (V3GraphVertex* const vtxp : execMTaskGraphp->vertices().unlinkable()) { + ExecMTask* const mtaskp = vtxp->as(); + AstCFunc* const funcp = mtaskp->funcp(); + if (funcp->stmtsp()) continue; + + UINFO(6, "Removing empty MTask " << mtaskp->name()); + // Redirect edges + mtaskp->rerouteEdges(execMTaskGraphp); + // Delete the MTask function + VL_DO_DANGLING(funcp->unlinkFrBack()->deleteTree(), funcp); + // Delete the MTask vertex + VL_DO_DANGLING(mtaskp->unlinkDelete(execMTaskGraphp), mtaskp); + } + // Remove redundant dependencies + execMTaskGraphp->removeRedundantEdgesMax(&V3GraphEdge::followAlwaysTrue); +} + void fillinCosts(V3Graph* execMTaskGraphp) { // Pass 1: See what profiling data applies Costs costs; // For each mtask, costs @@ -797,7 +815,7 @@ void fillinCosts(V3Graph* execMTaskGraphp) { for (V3GraphVertex& vtx : execMTaskGraphp->vertices()) { ExecMTask* const mtp = vtx.as(); // This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits - const uint64_t costEstimate = V3InstrCount::count(mtp->bodyp(), false); + const uint64_t costEstimate = V3InstrCount::count(mtp->funcp(), false); const uint64_t costProfiled = V3Control::getProfileData(v3Global.opt.prefix(), mtp->hashName()); if (costProfiled) { @@ -857,30 +875,6 @@ void finalizeCosts(V3Graph* execMTaskGraphp) { } } - // Some MTasks may now have zero cost, eliminate those. - // (It's common for tasks to shrink to nothing when V3LifePost - // removes dly assignments.) - for (V3GraphVertex* const vtxp : execMTaskGraphp->vertices().unlinkable()) { - ExecMTask* const mtp = vtxp->as(); - - // Don't rely on checking mtp->cost() == 0 to detect an empty task. - // Our cost-estimating logic is just an estimate. Instead, check - // the MTaskBody to see if it's empty. That's the source of truth. - AstMTaskBody* const bodyp = mtp->bodyp(); - if (!bodyp->stmtsp()) { // Kill this empty mtask - UINFO(6, "Removing zero-cost " << mtp->name()); - for (V3GraphEdge& in : mtp->inEdges()) { - for (V3GraphEdge& out : mtp->outEdges()) { - new V3GraphEdge{execMTaskGraphp, in.fromp(), out.top(), 1}; - } - } - VL_DO_DANGLING(mtp->unlinkDelete(execMTaskGraphp), mtp); - // Also remove and delete the AstMTaskBody, otherwise it would - // keep a dangling pointer to the ExecMTask. - VL_DO_DANGLING(bodyp->unlinkFrBack()->deleteTree(), bodyp); - } - } - // Removing tasks may cause edges that were formerly non-transitive to // become transitive. Also we just created new edges around the removed // tasks, which could be transitive. Prune out all transitive edges. @@ -907,6 +901,7 @@ void finalizeCosts(V3Graph* execMTaskGraphp) { void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, AstCFunc* funcp, const ExecMTask* mtaskp) { + AstScope* const scopep = v3Global.rootp()->topScopep()->scopep(); AstNodeModule* const modp = v3Global.rootp()->topModulep(); FileLine* const fl = modp->fileline(); @@ -940,8 +935,11 @@ void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, addCStmt("vlSymsp->_vm_pgoProfiler.startCounter(" + std::to_string(mtaskp->id()) + ");"); } - // Move the actual body into this function - funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack()); + // Call the MTask function + AstCCall* const callp = new AstCCall{fl, mtaskp->funcp()}; + callp->selfPointer(VSelfPointerText{VSelfPointerText::VlSyms{}, scopep->nameDotless()}); + callp->dtypeSetVoid(); + funcp->addStmtsp(callp->makeStmt()); if (v3Global.opt.profPgo()) { // No lock around stopCounter, as counter numbers are unique per thread @@ -1093,56 +1091,38 @@ void addThreadStartToExecGraph(AstExecGraph* const execGraphp, } } -void wrapMTaskBodies(AstExecGraph* const execGraphp) { - FileLine* const flp = execGraphp->fileline(); - const string& tag = execGraphp->name(); - AstNodeModule* const modp = v3Global.rootp()->topModulep(); - - for (AstMTaskBody* mtaskBodyp = execGraphp->mTaskBodiesp(); mtaskBodyp; - mtaskBodyp = VN_AS(mtaskBodyp->nextp(), MTaskBody)) { - ExecMTask* const mtaskp = mtaskBodyp->execMTaskp(); - const std::string name = tag + "_mtask" + std::to_string(mtaskp->id()); - AstCFunc* const funcp = new AstCFunc{flp, name, nullptr}; - funcp->isLoose(true); - modp->addStmtsp(funcp); +void processMTaskBodies(AstExecGraph* const execGraphp) { + for (V3GraphVertex* const vtxp : execGraphp->depGraphp()->vertices().unlinkable()) { + ExecMTask* const mtaskp = vtxp->as(); + AstCFunc* const funcp = mtaskp->funcp(); + // Temporarily unlink function body so we can add more statemetns + AstNode* stmtsp = funcp->stmtsp()->unlinkFrBackWithNext(); // Helper function to make the code a bit more legible const auto addCStmt = [=](const string& stmt) -> void { // - funcp->addStmtsp(new AstCStmt{flp, stmt}); + funcp->addStmtsp(new AstCStmt{execGraphp->fileline(), stmt}); }; - addCStmt("static constexpr unsigned taskId = " + cvtToStr(mtaskp->id()) + ";"); - + // Profiling mtaskStart if (v3Global.opt.profExec()) { - const string& predictStart = std::to_string(mtaskp->predictStart()); - if (v3Global.opt.hierChild()) { - addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(taskId, " + predictStart - + ", \"" + v3Global.opt.topModule() + "\");"); - } else { - addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(taskId, " + predictStart - + ");"); - } + std::string args = std::to_string(mtaskp->id()); + args += ", " + std::to_string(mtaskp->predictStart()); + args += ", \""; + if (v3Global.opt.hierChild()) args += v3Global.opt.topModule(); + args += "\""; + addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(" + args + ");"); } - // Set mtask ID in the run-time system - addCStmt("Verilated::mtaskId(taskId);"); - - // Run body - funcp->addStmtsp(mtaskBodyp->stmtsp()->unlinkFrBackWithNext()); - + addCStmt("Verilated::mtaskId(" + std::to_string(mtaskp->id()) + ");"); + // Add back the body + funcp->addStmtsp(stmtsp); // Flush message queue addCStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);"); - + // Profiling mtaskEnd if (v3Global.opt.profExec()) { - const string& predictCost = std::to_string(mtaskp->cost()); - addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(" + predictCost + ");"); + const std::string& args = std::to_string(mtaskp->cost()); + addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(" + args + ");"); } - - // AstMTask will simply contain a call - AstCCall* const callp = new AstCCall{flp, funcp}; - callp->selfPointer(VSelfPointerText{VSelfPointerText::This{}}); - callp->dtypeSetVoid(); - mtaskBodyp->addStmtsp(callp->makeStmt()); } } @@ -1150,8 +1130,7 @@ void implementExecGraph(AstExecGraph* const execGraphp, const ThreadSchedule& sc // Nothing to be done if there are no MTasks in the graph at all. if (execGraphp->depGraphp()->empty()) return; - // Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the - // AstExecGraph into the AstCFunc created + // Create a function to be run by each thread. const std::vector& funcps = createThreadFunctions(schedule, execGraphp->name()); UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?"); @@ -1159,9 +1138,30 @@ void implementExecGraph(AstExecGraph* const execGraphp, const ThreadSchedule& sc addThreadStartToExecGraph(execGraphp, funcps, schedule.id()); } +// Called by Verilator top stage void implement(AstNetlist* netlistp) { - // Called by Verilator top stage - netlistp->topModulep()->foreach([&](AstExecGraph* execGraphp) { + // Gather all ExecGraphs + std::vector execGraphps; + netlistp->topModulep()->foreach([&](AstExecGraph* egp) { execGraphps.emplace_back(egp); }); + + // Process each + for (AstExecGraph* const execGraphp : execGraphps) { + // We can delete the placeholder calls to the MTask functions that + // were used for code analysis until now. We will replace them with + // statements that dispatch execution to the thread pool. + if (execGraphp->stmtsp()) execGraphp->stmtsp()->unlinkFrBackWithNext()->deleteTree(); + + // Some MTasks may have become empty after scheduling due to + // optimizations after scheduling. Remove those. + removeEmptyMTasks(execGraphp->depGraphp()); + + // In some very small test cases, we might end up with a completely + // empty ExecGraph, if so just delete it. + if (execGraphp->depGraphp()->empty()) { + VL_DO_DANGLING(execGraphp->unlinkFrBack()->deleteTree(), execGraphp); + return; + } + // Back in V3Order, we partitioned mtasks using provisional cost // estimates. However, V3Order precedes some optimizations (notably // V3LifePost) that can change the cost of logic within each mtask. @@ -1180,8 +1180,8 @@ void implement(AstNetlist* netlistp) { V3Stats::addStatSum("Optimizations, Thread schedule count", static_cast(packed.size())); - // Wrap each MTask body into a CFunc for better profiling/debugging - wrapMTaskBodies(execGraphp); + // Process MTask function bodies to add additional code + processMTaskBodies(execGraphp); for (const ThreadSchedule& schedule : packed) { // Replace the graph body with its multi-threaded implementation. @@ -1189,7 +1189,7 @@ void implement(AstNetlist* netlistp) { } addThreadEndWrapper(execGraphp); - }); + } } void selfTest() { diff --git a/src/V3ExecGraph.h b/src/V3ExecGraph.h index d16941b5c..2c13b92f9 100644 --- a/src/V3ExecGraph.h +++ b/src/V3ExecGraph.h @@ -25,7 +25,10 @@ #include class AstNetlist; -class AstMTaskBody; +class AstCFunc; +class AstExecGraph; +class AstNodeStmt; +class AstScope; //************************************************************************* // MTasks and graph structures @@ -33,9 +36,9 @@ class AstMTaskBody; class ExecMTask final : public V3GraphVertex { VL_RTTI_IMPL(ExecMTask, V3GraphVertex) private: - AstMTaskBody* const m_bodyp; // Task body const uint32_t m_id; // Unique ID of this ExecMTask. static std::atomic s_nextId; // Next ID to use + AstCFunc* const m_funcp; // The function that contains the task body const std::string m_hashName; // Hashed name based on body for profile-driven optimization // Predicted critical path from the start of this mtask to the ends of the graph that are // reachable from this mtask. In abstract time units. @@ -46,9 +49,12 @@ private: int m_threads = 1; // Threads used by this mtask VL_UNCOPYABLE(ExecMTask); + static AstCFunc* createCFunc(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp, + uint32_t id); + public: - ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED; - AstMTaskBody* bodyp() const { return m_bodyp; } + ExecMTask(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp) VL_MT_DISABLED; + AstCFunc* funcp() const { return m_funcp; } uint32_t id() const VL_MT_SAFE { return m_id; } uint32_t priority() const { return m_priority; } void priority(uint32_t pri) { m_priority = pri; } diff --git a/src/V3Hasher.cpp b/src/V3Hasher.cpp index 14438c196..03e6261ca 100644 --- a/src/V3Hasher.cpp +++ b/src/V3Hasher.cpp @@ -513,9 +513,6 @@ class HasherVisitor final : public VNVisitorConst { iterateConstNull(nodep->ftaskp()); }); } - void visit(AstMTaskBody* nodep) override { - m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, []() {}); - } void visit(AstNodeProcedure* nodep) override { m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, []() {}); } diff --git a/src/V3LifePost.cpp b/src/V3LifePost.cpp index 4703ea0f9..c39463967 100644 --- a/src/V3LifePost.cpp +++ b/src/V3LifePost.cpp @@ -290,7 +290,7 @@ class LifePostDlyVisitor final : public VNVisitorConst { const ExecMTask* const mtaskp = mtaskVtx.as(); VL_RESTORER(m_execMTaskp); m_execMTaskp = mtaskp; - iterateConst(mtaskp->bodyp()); + trace(mtaskp->funcp()); } } void visit(AstCFunc* nodep) override { diff --git a/src/V3OrderParallel.cpp b/src/V3OrderParallel.cpp index b84d8a3cd..d3b4b3904 100644 --- a/src/V3OrderParallel.cpp +++ b/src/V3OrderParallel.cpp @@ -1763,7 +1763,7 @@ class DpiThreadsVisitor final : public VNVisitorConst { public: // CONSTRUCTORS - explicit DpiThreadsVisitor(AstMTaskBody* nodep) { iterateConst(nodep); } + explicit DpiThreadsVisitor(AstCFunc* nodep) { iterateConst(nodep); } int threads() const { return m_threads; } ~DpiThreadsVisitor() override = default; @@ -2431,8 +2431,9 @@ AstNodeStmt* V3Order::createParallel(OrderGraph& orderGraph, OrderMoveGraph& mov if (dumpGraphLevel() >= 9) moveGraph.dumpDotFilePrefixed(tag + "_ordermv_pruned"); // Create the AstExecGraph node which represents the execution of the MTask graph. - FileLine* const rootFlp = v3Global.rootp()->fileline(); - AstExecGraph* const execGraphp = new AstExecGraph{rootFlp, tag}; + FileLine* const flp = v3Global.rootp()->fileline(); + AstScope* const scopep = v3Global.rootp()->topScopep()->scopep(); + AstExecGraph* const execGraphp = new AstExecGraph{flp, tag}; V3Graph* const depGraphp = execGraphp->depGraphp(); // Translate the LogicMTask graph into the corresponding ExecMTask graph, @@ -2468,24 +2469,23 @@ AstNodeStmt* V3Order::createParallel(OrderGraph& orderGraph, OrderMoveGraph& mov VL_DO_DANGLING(mVtxp->unlinkDelete(&moveGraph), mVtxp); } - // We have 2 objects, because AstMTaskBody is an AstNode, and ExecMTask is a GraphVertex. - // To combine them would involve multiple inheritance. - - // Construct the actual MTaskBody - AstMTaskBody* const bodyp = new AstMTaskBody{rootFlp}; - execGraphp->addMTaskBodiesp(bodyp); - bodyp->addStmtsp(emitter.getStmts()); - UASSERT_OBJ(bodyp->stmtsp(), bodyp, "Should not try to create empty MTask"); - // Create the ExecMTask - ExecMTask* const execMTaskp = new ExecMTask{depGraphp, bodyp}; - if (!v3Global.opt.hierBlocks().empty()) - execMTaskp->threads(DpiThreadsVisitor{bodyp}.threads()); + ExecMTask* const execMTaskp = new ExecMTask{execGraphp, scopep, emitter.getStmts()}; + if (!v3Global.opt.hierBlocks().empty()) { + execMTaskp->threads(DpiThreadsVisitor{execMTaskp->funcp()}.threads()); + } const bool newEntry = logicMTaskToExecMTask.emplace(mTaskp, execMTaskp).second; UASSERT_OBJ(newEntry, mTaskp, "LogicMTasks should be processed in dependencyorder"); UINFO(3, "Final '" << tag << "' LogicMTask " << mTaskp->id() << " maps to ExecMTask" << execMTaskp->id()); + // For code analysis purposes, we can pretend the AstExecGraph runs the + // MTasks sequentially, in some topological order that respects edges. + // The order they are created here happens to be just such an order. + AstCCall* const callp = new AstCCall{flp, execMTaskp->funcp()}; + callp->dtypeSetVoid(); + execGraphp->addStmtsp(callp->makeStmt()); + // Add the dependency edges between ExecMTasks for (const V3GraphEdge& edge : mTaskp->inEdges()) { const V3GraphVertex* fromVxp = edge.fromp(); diff --git a/src/V3VariableOrder.cpp b/src/V3VariableOrder.cpp index e241ae495..7560fc01d 100644 --- a/src/V3VariableOrder.cpp +++ b/src/V3VariableOrder.cpp @@ -53,7 +53,7 @@ class GatherMTaskAffinity final : VNVisitorConst { GatherMTaskAffinity(const ExecMTask* mTaskp, MTaskAffinityMap& results) : m_results{results} , m_id{mTaskp->id()} { - iterateChildrenConst(mTaskp->bodyp()); + iterateConst(mTaskp->funcp()); } ~GatherMTaskAffinity() = default; VL_UNMOVABLE(GatherMTaskAffinity);