Internals: Replace AstMTaskBody with AstCFunc(#6280) (#6628)

AstMTaskBody is somewhat redundant and is problematic for #6280. We used
to wrap all MTasks in a CFunc before emit anyway. Now we create that
CFunc when we create the ExecMTask in V3OrderParallel, and subsequently
use the CFunc to represent the contents of the MTask. Final output and
optimizations are the same, but internals are simplified to move
towards #6280.

No functional change.
This commit is contained in:
Geza Lore 2025-11-03 07:32:03 +01:00 committed by GitHub
parent d066504bb9
commit d3ca79368c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 156 additions and 205 deletions

View File

@ -120,7 +120,7 @@ public:
m_type = Type::SECTION_PUSH; m_type = Type::SECTION_PUSH;
} }
void sectionPop() { m_type = Type::SECTION_POP; } void sectionPop() { m_type = Type::SECTION_POP; }
void mtaskBegin(uint32_t id, uint32_t predictStart, const char* hierBlock = "") { void mtaskBegin(uint32_t id, uint32_t predictStart, const char* hierBlock) {
m_payload.mtaskBegin.m_id = id; m_payload.mtaskBegin.m_id = id;
m_payload.mtaskBegin.m_predictStart = predictStart; m_payload.mtaskBegin.m_predictStart = predictStart;
m_payload.mtaskBegin.m_cpu = VlOs::getcpu(); m_payload.mtaskBegin.m_cpu = VlOs::getcpu();

View File

@ -1100,32 +1100,6 @@ public:
string name() const override VL_MT_STABLE { return m_name; } string name() const override VL_MT_STABLE { return m_name; }
ASTGEN_MEMBERS_AstIntfRef; ASTGEN_MEMBERS_AstIntfRef;
}; };
class AstMTaskBody final : public AstNode {
// Hold statements for each MTask
// @astgen op1 := stmtsp : List[AstNode]
ExecMTask* m_execMTaskp = nullptr;
public:
explicit AstMTaskBody(FileLine* fl)
: ASTGEN_SUPER_MTaskBody(fl) {}
ASTGEN_MEMBERS_AstMTaskBody;
void cloneRelink() override { UASSERT(!clonep(), "Not cloneable"); }
const char* broken() const override {
BROKEN_RTN(!m_execMTaskp);
return nullptr;
}
void addStmtsFirstp(AstNode* nodep) {
if (stmtsp()) {
stmtsp()->addHereThisAsNext(nodep);
} else {
addStmtsp(nodep);
}
}
ExecMTask* execMTaskp() const { return m_execMTaskp; }
void execMTaskp(ExecMTask* execMTaskp) { m_execMTaskp = execMTaskp; }
void dump(std::ostream& str = std::cout) const override;
void dumpJson(std::ostream& str = std::cout) const override;
};
class AstModport final : public AstNode { class AstModport final : public AstNode {
// A modport in an interface // A modport in an interface
// @astgen op1 := varsp : List[AstNode] // @astgen op1 := varsp : List[AstNode]

View File

@ -601,20 +601,21 @@ public:
int instrCount() const override { return 0; } int instrCount() const override { return 0; }
}; };
class AstExecGraph final : public AstNodeStmt { class AstExecGraph final : public AstNodeStmt {
// For parallel execution, this node contains a dependency graph. Each // For parallel execution, this node contains a dependency graph. Each
// vertex in the graph is an ExecMTask, which contains a body for the // vertex in the graph is an ExecMTask, which includes a function that
// mtask (an AstMTaskBody), which contains sequentially executed statements. // holds the sequential body of the mtask.
//
// The AstMTaskBody nodes are also children of this node, so we can visit
// them without traversing the graph.
// //
// The location where AstExecGraph appears as a procedural statement is // The location where AstExecGraph appears as a procedural statement is
// where the parallel graph will be executed. Execution proceeds after // where the parallel graph will be executed. Execution proceeds after
// the AstExecGraph when all threads have joined. // the AstExecGraph when all threads have joined.
// //
// @astgen op1 := mTaskBodiesp : List[AstMTaskBody] // For code analysis purposes after scheduling, we keep a call to each
// In later phases, the statements that start the parallel execution // MTask function as children of the AstExecGraph in 'stmtsp'. These
// @astgen op2 := stmtsp : List[AstNode] // are in a topological order so they represent a valid sequential
// execution of the graph. In `V3ExecGraph::implement`, we replace these
// statements with statements that dispatch to the thread pool for
// parallel execution.
// @astgen op1 := stmtsp : List[AstNode]
V3Graph* const m_depGraphp; // contains ExecMTask vertices V3Graph* const m_depGraphp; // contains ExecMTask vertices
const string m_name; // Name of this AstExecGraph (for uniqueness at code generation) const string m_name; // Name of this AstExecGraph (for uniqueness at code generation)
@ -623,10 +624,7 @@ public:
~AstExecGraph() override; ~AstExecGraph() override;
ASTGEN_MEMBERS_AstExecGraph; ASTGEN_MEMBERS_AstExecGraph;
void cloneRelink() override { V3ERROR_NA; } // Not cloneable void cloneRelink() override { V3ERROR_NA; } // Not cloneable
const char* broken() const override { const char* broken() const override;
BROKEN_RTN(!m_depGraphp);
return nullptr;
}
string name() const override VL_MT_STABLE { return m_name; } string name() const override VL_MT_STABLE { return m_name; }
V3Graph* depGraphp() { return m_depGraphp; } V3Graph* depGraphp() { return m_depGraphp; }
const V3Graph* depGraphp() const { return m_depGraphp; } const V3Graph* depGraphp() const { return m_depGraphp; }

View File

@ -340,6 +340,16 @@ AstExecGraph::AstExecGraph(FileLine* fileline, const string& name) VL_MT_DISABLE
AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); } AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); }
const char* AstExecGraph::broken() const {
BROKEN_RTN(!m_depGraphp);
for (const V3GraphVertex& vtx : m_depGraphp->vertices()) {
const ExecMTask* const mtaskp = vtx.as<ExecMTask>();
AstCFunc* const funcp = mtaskp->funcp();
BROKEN_RTN(!funcp || !funcp->brokeExists());
}
return nullptr;
}
AstNodeExpr* AstInsideRange::newAndFromInside(AstNodeExpr* exprp, AstNodeExpr* lhsp, AstNodeExpr* AstInsideRange::newAndFromInside(AstNodeExpr* exprp, AstNodeExpr* lhsp,
AstNodeExpr* rhsp) { AstNodeExpr* rhsp) {
AstNodeExpr* const ap = new AstGte{fileline(), exprp, lhsp}; AstNodeExpr* const ap = new AstGte{fileline(), exprp, lhsp};
@ -2538,17 +2548,6 @@ void AstSystemCSection::dumpJson(std::ostream& str) const {
dumpJsonStr(str, "sectionType", sectionType().ascii()); dumpJsonStr(str, "sectionType", sectionType().ascii());
dumpJsonGen(str); dumpJsonGen(str);
} }
void AstMTaskBody::dump(std::ostream& str) const {
this->AstNode::dump(str);
str << " ";
m_execMTaskp->dump(str);
}
void AstMTaskBody::dumpJson(std::ostream& str) const {
str << ',' << '"' << "execMTask" << '"' << ':' << '"';
m_execMTaskp->dump(str); // TODO: Consider dumping it as json object
str << '"';
dumpJsonGen(str);
}
void AstTypeTable::dump(std::ostream& str) const { void AstTypeTable::dump(std::ostream& str) const {
this->AstNode::dump(str); this->AstNode::dump(str);
for (int i = 0; i < static_cast<int>(VBasicDTypeKwd::_ENUM_MAX); ++i) { for (int i = 0; i < static_cast<int>(VBasicDTypeKwd::_ENUM_MAX); ++i) {

View File

@ -38,7 +38,6 @@ class DepthVisitor final : public VNVisitor {
// STATE - for current visit position (use VL_RESTORER) // STATE - for current visit position (use VL_RESTORER)
AstCFunc* m_cfuncp = nullptr; // Current block AstCFunc* m_cfuncp = nullptr; // Current block
AstMTaskBody* m_mtaskbodyp = nullptr; // Current mtaskbody
AstNode* m_stmtp = nullptr; // Current statement AstNode* m_stmtp = nullptr; // Current statement
int m_depth = 0; // How deep in an expression int m_depth = 0; // How deep in an expression
int m_maxdepth = 0; // Maximum depth in an expression int m_maxdepth = 0; // Maximum depth in an expression
@ -53,8 +52,6 @@ class DepthVisitor final : public VNVisitor {
m_tempNames.get(nodep), nodep->dtypep()}; m_tempNames.get(nodep), nodep->dtypep()};
if (m_cfuncp) { if (m_cfuncp) {
m_cfuncp->addVarsp(varp); m_cfuncp->addVarsp(varp);
} else if (m_mtaskbodyp) {
m_mtaskbodyp->addStmtsFirstp(varp);
} else { } else {
nodep->v3fatalSrc("Deep expression not under a function"); nodep->v3fatalSrc("Deep expression not under a function");
} }
@ -70,28 +67,14 @@ class DepthVisitor final : public VNVisitor {
// VISITORS // VISITORS
void visit(AstCFunc* nodep) override { void visit(AstCFunc* nodep) override {
VL_RESTORER(m_cfuncp); VL_RESTORER(m_cfuncp);
VL_RESTORER(m_mtaskbodyp);
VL_RESTORER(m_depth); VL_RESTORER(m_depth);
VL_RESTORER(m_maxdepth); VL_RESTORER(m_maxdepth);
m_cfuncp = nodep; m_cfuncp = nodep;
m_mtaskbodyp = nullptr;
m_depth = 0; m_depth = 0;
m_maxdepth = 0; m_maxdepth = 0;
m_tempNames.reset(); m_tempNames.reset();
iterateChildren(nodep); iterateChildren(nodep);
} }
void visit(AstMTaskBody* nodep) override {
VL_RESTORER(m_cfuncp);
VL_RESTORER(m_mtaskbodyp);
VL_RESTORER(m_depth);
VL_RESTORER(m_maxdepth);
m_cfuncp = nullptr;
m_mtaskbodyp = nodep;
m_depth = 0;
m_maxdepth = 0;
// We don't reset the names, as must share across tasks
iterateChildren(nodep);
}
void visitStmt(AstNodeStmt* nodep) { void visitStmt(AstNodeStmt* nodep) {
VL_RESTORER(m_stmtp); VL_RESTORER(m_stmtp);
VL_RESTORER(m_depth); VL_RESTORER(m_depth);

View File

@ -1623,11 +1623,6 @@ public:
} }
// //
void visit(AstMTaskBody* nodep) override {
VL_RESTORER(m_useSelfForThis);
m_useSelfForThis = true;
iterateChildrenConst(nodep);
}
void visit(AstConsAssoc* nodep) override { void visit(AstConsAssoc* nodep) override {
putnbs(nodep, nodep->dtypep()->cType("", false, false)); putnbs(nodep, nodep->dtypep()->cType("", false, false));
puts("()"); puts("()");
@ -1723,7 +1718,6 @@ public:
void visit(AstExecGraph* nodep) override { void visit(AstExecGraph* nodep) override {
// The location of the AstExecGraph within the containing AstCFunc is where we want to // The location of the AstExecGraph within the containing AstCFunc is where we want to
// invoke the graph and wait for it to complete. Emitting the children does just that. // invoke the graph and wait for it to complete. Emitting the children does just that.
UASSERT_OBJ(!nodep->mTaskBodiesp(), nodep, "These should have been lowered");
iterateChildrenConst(nodep); iterateChildrenConst(nodep);
} }

View File

@ -33,16 +33,24 @@
VL_DEFINE_DEBUG_FUNCTIONS; VL_DEFINE_DEBUG_FUNCTIONS;
ExecMTask::ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED // AstCFunc* ExecMTask::createCFunc(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp,
: V3GraphVertex{graphp}, uint32_t id) {
m_bodyp{bodyp}, const std::string name = execGraphp->name() + "_mtask" + std::to_string(id);
m_id{s_nextId++}, AstCFunc* const funcp = new AstCFunc{execGraphp->fileline(), name, scopep};
m_hashName{V3Hasher::uncachedHash(bodyp).toString()} { funcp->isLoose(true);
UASSERT_OBJ(bodyp->stmtsp(), bodyp, "AstMTaskBody should already be populated for hashing"); funcp->dontCombine(true);
UASSERT_OBJ(!bodyp->execMTaskp(), bodyp, "AstMTaskBody already linked to an ExecMTask"); funcp->addStmtsp(stmtsp);
bodyp->execMTaskp(this); if (scopep) scopep->addBlocksp(funcp);
return funcp;
} }
ExecMTask::ExecMTask(AstExecGraph* execGraphp, AstScope* scopep,
AstNodeStmt* stmtsp) VL_MT_DISABLED //
: V3GraphVertex{execGraphp->depGraphp()},
m_id{s_nextId++},
m_funcp{createCFunc(execGraphp, scopep, stmtsp, m_id)},
m_hashName{V3Hasher::uncachedHash(m_funcp).toString()} {}
void ExecMTask::dump(std::ostream& str) const { void ExecMTask::dump(std::ostream& str) const {
str << name() << "." << cvtToHex(this); str << name() << "." << cvtToHex(this);
if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]"; if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
@ -538,37 +546,32 @@ public:
selfTestNormalFirst(); selfTestNormalFirst();
} }
static void selfTestNormalFirst() { static void selfTestNormalFirst() {
V3Graph graph;
FileLine* const flp = v3Global.rootp()->fileline(); FileLine* const flp = v3Global.rootp()->fileline();
std::vector<AstMTaskBody*> mTaskBodyps; AstExecGraph* const execGraphp = new AstExecGraph{flp, "test"};
const auto makeBody = [&]() { V3Graph& graph = *execGraphp->depGraphp();
AstMTaskBody* const bodyp = new AstMTaskBody{flp}; const auto makeBody = [&]() -> AstNodeStmt* { return new AstComment{flp, ""}; };
mTaskBodyps.push_back(bodyp); ExecMTask* const t0 = new ExecMTask{execGraphp, nullptr, makeBody()};
bodyp->addStmtsp(new AstComment{flp, ""});
return bodyp;
};
ExecMTask* const t0 = new ExecMTask{&graph, makeBody()};
t0->cost(1000); t0->cost(1000);
t0->priority(1100); t0->priority(1100);
ExecMTask* const t1 = new ExecMTask{&graph, makeBody()}; ExecMTask* const t1 = new ExecMTask{execGraphp, nullptr, makeBody()};
t1->cost(100); t1->cost(100);
t1->priority(100); t1->priority(100);
ExecMTask* const t2 = new ExecMTask{&graph, makeBody()}; ExecMTask* const t2 = new ExecMTask{execGraphp, nullptr, makeBody()};
t2->cost(100); t2->cost(100);
t2->priority(100); t2->priority(100);
t2->threads(2); t2->threads(2);
ExecMTask* const t3 = new ExecMTask{&graph, makeBody()}; ExecMTask* const t3 = new ExecMTask{execGraphp, nullptr, makeBody()};
t3->cost(100); t3->cost(100);
t3->priority(100); t3->priority(100);
t3->threads(3); t3->threads(3);
ExecMTask* const t4 = new ExecMTask{&graph, makeBody()}; ExecMTask* const t4 = new ExecMTask{execGraphp, nullptr, makeBody()};
t4->cost(100); t4->cost(100);
t4->priority(100); t4->priority(100);
t4->threads(3); t4->threads(3);
ExecMTask* const t5 = new ExecMTask{&graph, makeBody()}; ExecMTask* const t5 = new ExecMTask{execGraphp, nullptr, makeBody()};
t5->cost(100); t5->cost(100);
t5->priority(100); t5->priority(100);
ExecMTask* const t6 = new ExecMTask{&graph, makeBody()}; ExecMTask* const t6 = new ExecMTask{execGraphp, nullptr, makeBody()};
t6->cost(100); t6->cost(100);
t6->priority(100); t6->priority(100);
@ -666,24 +669,20 @@ public:
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 4), 1360); UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 4), 1360);
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 5), 1360); UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 5), 1360);
for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree(); for (V3GraphVertex& vtx : graph.vertices()) vtx.as<ExecMTask>()->funcp()->deleteTree();
VL_DO_DANGLING(execGraphp->deleteTree(), execGraphp);
ThreadSchedule::s_mtaskState.clear(); ThreadSchedule::s_mtaskState.clear();
} }
static void selfTestHierFirst() { static void selfTestHierFirst() {
V3Graph graph;
FileLine* const flp = v3Global.rootp()->fileline(); FileLine* const flp = v3Global.rootp()->fileline();
std::vector<AstMTaskBody*> mTaskBodyps; AstExecGraph* const execGraphp = new AstExecGraph{flp, "test"};
const auto makeBody = [&]() { V3Graph& graph = *execGraphp->depGraphp();
AstMTaskBody* const bodyp = new AstMTaskBody{flp}; const auto makeBody = [&]() -> AstNodeStmt* { return new AstComment{flp, ""}; };
mTaskBodyps.push_back(bodyp); ExecMTask* const t0 = new ExecMTask{execGraphp, nullptr, makeBody()};
bodyp->addStmtsp(new AstComment{flp, ""});
return bodyp;
};
ExecMTask* const t0 = new ExecMTask{&graph, makeBody()};
t0->cost(1000); t0->cost(1000);
t0->priority(1100); t0->priority(1100);
t0->threads(2); t0->threads(2);
ExecMTask* const t1 = new ExecMTask{&graph, makeBody()}; ExecMTask* const t1 = new ExecMTask{execGraphp, nullptr, makeBody()};
t1->cost(100); t1->cost(100);
t1->priority(100); t1->priority(100);
@ -725,7 +724,8 @@ public:
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 0), 1100); UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 0), 1100);
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 1), 1130); UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 1), 1130);
for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree(); for (V3GraphVertex& vtx : graph.vertices()) vtx.as<ExecMTask>()->funcp()->deleteTree();
VL_DO_DANGLING(execGraphp->deleteTree(), execGraphp);
ThreadSchedule::s_mtaskState.clear(); ThreadSchedule::s_mtaskState.clear();
} }
@ -790,6 +790,24 @@ void normalizeCosts(Costs& costs) {
} }
} }
void removeEmptyMTasks(V3Graph* execMTaskGraphp) {
for (V3GraphVertex* const vtxp : execMTaskGraphp->vertices().unlinkable()) {
ExecMTask* const mtaskp = vtxp->as<ExecMTask>();
AstCFunc* const funcp = mtaskp->funcp();
if (funcp->stmtsp()) continue;
UINFO(6, "Removing empty MTask " << mtaskp->name());
// Redirect edges
mtaskp->rerouteEdges(execMTaskGraphp);
// Delete the MTask function
VL_DO_DANGLING(funcp->unlinkFrBack()->deleteTree(), funcp);
// Delete the MTask vertex
VL_DO_DANGLING(mtaskp->unlinkDelete(execMTaskGraphp), mtaskp);
}
// Remove redundant dependencies
execMTaskGraphp->removeRedundantEdgesMax(&V3GraphEdge::followAlwaysTrue);
}
void fillinCosts(V3Graph* execMTaskGraphp) { void fillinCosts(V3Graph* execMTaskGraphp) {
// Pass 1: See what profiling data applies // Pass 1: See what profiling data applies
Costs costs; // For each mtask, costs Costs costs; // For each mtask, costs
@ -797,7 +815,7 @@ void fillinCosts(V3Graph* execMTaskGraphp) {
for (V3GraphVertex& vtx : execMTaskGraphp->vertices()) { for (V3GraphVertex& vtx : execMTaskGraphp->vertices()) {
ExecMTask* const mtp = vtx.as<ExecMTask>(); ExecMTask* const mtp = vtx.as<ExecMTask>();
// This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits // This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits
const uint64_t costEstimate = V3InstrCount::count(mtp->bodyp(), false); const uint64_t costEstimate = V3InstrCount::count(mtp->funcp(), false);
const uint64_t costProfiled const uint64_t costProfiled
= V3Control::getProfileData(v3Global.opt.prefix(), mtp->hashName()); = V3Control::getProfileData(v3Global.opt.prefix(), mtp->hashName());
if (costProfiled) { if (costProfiled) {
@ -857,30 +875,6 @@ void finalizeCosts(V3Graph* execMTaskGraphp) {
} }
} }
// Some MTasks may now have zero cost, eliminate those.
// (It's common for tasks to shrink to nothing when V3LifePost
// removes dly assignments.)
for (V3GraphVertex* const vtxp : execMTaskGraphp->vertices().unlinkable()) {
ExecMTask* const mtp = vtxp->as<ExecMTask>();
// Don't rely on checking mtp->cost() == 0 to detect an empty task.
// Our cost-estimating logic is just an estimate. Instead, check
// the MTaskBody to see if it's empty. That's the source of truth.
AstMTaskBody* const bodyp = mtp->bodyp();
if (!bodyp->stmtsp()) { // Kill this empty mtask
UINFO(6, "Removing zero-cost " << mtp->name());
for (V3GraphEdge& in : mtp->inEdges()) {
for (V3GraphEdge& out : mtp->outEdges()) {
new V3GraphEdge{execMTaskGraphp, in.fromp(), out.top(), 1};
}
}
VL_DO_DANGLING(mtp->unlinkDelete(execMTaskGraphp), mtp);
// Also remove and delete the AstMTaskBody, otherwise it would
// keep a dangling pointer to the ExecMTask.
VL_DO_DANGLING(bodyp->unlinkFrBack()->deleteTree(), bodyp);
}
}
// Removing tasks may cause edges that were formerly non-transitive to // Removing tasks may cause edges that were formerly non-transitive to
// become transitive. Also we just created new edges around the removed // become transitive. Also we just created new edges around the removed
// tasks, which could be transitive. Prune out all transitive edges. // tasks, which could be transitive. Prune out all transitive edges.
@ -907,6 +901,7 @@ void finalizeCosts(V3Graph* execMTaskGraphp) {
void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, AstCFunc* funcp, void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, AstCFunc* funcp,
const ExecMTask* mtaskp) { const ExecMTask* mtaskp) {
AstScope* const scopep = v3Global.rootp()->topScopep()->scopep();
AstNodeModule* const modp = v3Global.rootp()->topModulep(); AstNodeModule* const modp = v3Global.rootp()->topModulep();
FileLine* const fl = modp->fileline(); FileLine* const fl = modp->fileline();
@ -940,8 +935,11 @@ void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId,
addCStmt("vlSymsp->_vm_pgoProfiler.startCounter(" + std::to_string(mtaskp->id()) + ");"); addCStmt("vlSymsp->_vm_pgoProfiler.startCounter(" + std::to_string(mtaskp->id()) + ");");
} }
// Move the actual body into this function // Call the MTask function
funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack()); AstCCall* const callp = new AstCCall{fl, mtaskp->funcp()};
callp->selfPointer(VSelfPointerText{VSelfPointerText::VlSyms{}, scopep->nameDotless()});
callp->dtypeSetVoid();
funcp->addStmtsp(callp->makeStmt());
if (v3Global.opt.profPgo()) { if (v3Global.opt.profPgo()) {
// No lock around stopCounter, as counter numbers are unique per thread // No lock around stopCounter, as counter numbers are unique per thread
@ -1093,56 +1091,38 @@ void addThreadStartToExecGraph(AstExecGraph* const execGraphp,
} }
} }
void wrapMTaskBodies(AstExecGraph* const execGraphp) { void processMTaskBodies(AstExecGraph* const execGraphp) {
FileLine* const flp = execGraphp->fileline(); for (V3GraphVertex* const vtxp : execGraphp->depGraphp()->vertices().unlinkable()) {
const string& tag = execGraphp->name(); ExecMTask* const mtaskp = vtxp->as<ExecMTask>();
AstNodeModule* const modp = v3Global.rootp()->topModulep(); AstCFunc* const funcp = mtaskp->funcp();
// Temporarily unlink function body so we can add more statemetns
for (AstMTaskBody* mtaskBodyp = execGraphp->mTaskBodiesp(); mtaskBodyp; AstNode* stmtsp = funcp->stmtsp()->unlinkFrBackWithNext();
mtaskBodyp = VN_AS(mtaskBodyp->nextp(), MTaskBody)) {
ExecMTask* const mtaskp = mtaskBodyp->execMTaskp();
const std::string name = tag + "_mtask" + std::to_string(mtaskp->id());
AstCFunc* const funcp = new AstCFunc{flp, name, nullptr};
funcp->isLoose(true);
modp->addStmtsp(funcp);
// Helper function to make the code a bit more legible // Helper function to make the code a bit more legible
const auto addCStmt = [=](const string& stmt) -> void { // const auto addCStmt = [=](const string& stmt) -> void { //
funcp->addStmtsp(new AstCStmt{flp, stmt}); funcp->addStmtsp(new AstCStmt{execGraphp->fileline(), stmt});
}; };
addCStmt("static constexpr unsigned taskId = " + cvtToStr(mtaskp->id()) + ";"); // Profiling mtaskStart
if (v3Global.opt.profExec()) { if (v3Global.opt.profExec()) {
const string& predictStart = std::to_string(mtaskp->predictStart()); std::string args = std::to_string(mtaskp->id());
if (v3Global.opt.hierChild()) { args += ", " + std::to_string(mtaskp->predictStart());
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(taskId, " + predictStart args += ", \"";
+ ", \"" + v3Global.opt.topModule() + "\");"); if (v3Global.opt.hierChild()) args += v3Global.opt.topModule();
} else { args += "\"";
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(taskId, " + predictStart addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(" + args + ");");
+ ");");
}
} }
// Set mtask ID in the run-time system // Set mtask ID in the run-time system
addCStmt("Verilated::mtaskId(taskId);"); addCStmt("Verilated::mtaskId(" + std::to_string(mtaskp->id()) + ");");
// Add back the body
// Run body funcp->addStmtsp(stmtsp);
funcp->addStmtsp(mtaskBodyp->stmtsp()->unlinkFrBackWithNext());
// Flush message queue // Flush message queue
addCStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);"); addCStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);");
// Profiling mtaskEnd
if (v3Global.opt.profExec()) { if (v3Global.opt.profExec()) {
const string& predictCost = std::to_string(mtaskp->cost()); const std::string& args = std::to_string(mtaskp->cost());
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(" + predictCost + ");"); addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(" + args + ");");
} }
// AstMTask will simply contain a call
AstCCall* const callp = new AstCCall{flp, funcp};
callp->selfPointer(VSelfPointerText{VSelfPointerText::This{}});
callp->dtypeSetVoid();
mtaskBodyp->addStmtsp(callp->makeStmt());
} }
} }
@ -1150,8 +1130,7 @@ void implementExecGraph(AstExecGraph* const execGraphp, const ThreadSchedule& sc
// Nothing to be done if there are no MTasks in the graph at all. // Nothing to be done if there are no MTasks in the graph at all.
if (execGraphp->depGraphp()->empty()) return; if (execGraphp->depGraphp()->empty()) return;
// Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the // Create a function to be run by each thread.
// AstExecGraph into the AstCFunc created
const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule, execGraphp->name()); const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule, execGraphp->name());
UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?"); UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?");
@ -1159,9 +1138,30 @@ void implementExecGraph(AstExecGraph* const execGraphp, const ThreadSchedule& sc
addThreadStartToExecGraph(execGraphp, funcps, schedule.id()); addThreadStartToExecGraph(execGraphp, funcps, schedule.id());
} }
// Called by Verilator top stage
void implement(AstNetlist* netlistp) { void implement(AstNetlist* netlistp) {
// Called by Verilator top stage // Gather all ExecGraphs
netlistp->topModulep()->foreach([&](AstExecGraph* execGraphp) { std::vector<AstExecGraph*> execGraphps;
netlistp->topModulep()->foreach([&](AstExecGraph* egp) { execGraphps.emplace_back(egp); });
// Process each
for (AstExecGraph* const execGraphp : execGraphps) {
// We can delete the placeholder calls to the MTask functions that
// were used for code analysis until now. We will replace them with
// statements that dispatch execution to the thread pool.
if (execGraphp->stmtsp()) execGraphp->stmtsp()->unlinkFrBackWithNext()->deleteTree();
// Some MTasks may have become empty after scheduling due to
// optimizations after scheduling. Remove those.
removeEmptyMTasks(execGraphp->depGraphp());
// In some very small test cases, we might end up with a completely
// empty ExecGraph, if so just delete it.
if (execGraphp->depGraphp()->empty()) {
VL_DO_DANGLING(execGraphp->unlinkFrBack()->deleteTree(), execGraphp);
return;
}
// Back in V3Order, we partitioned mtasks using provisional cost // Back in V3Order, we partitioned mtasks using provisional cost
// estimates. However, V3Order precedes some optimizations (notably // estimates. However, V3Order precedes some optimizations (notably
// V3LifePost) that can change the cost of logic within each mtask. // V3LifePost) that can change the cost of logic within each mtask.
@ -1180,8 +1180,8 @@ void implement(AstNetlist* netlistp) {
V3Stats::addStatSum("Optimizations, Thread schedule count", V3Stats::addStatSum("Optimizations, Thread schedule count",
static_cast<double>(packed.size())); static_cast<double>(packed.size()));
// Wrap each MTask body into a CFunc for better profiling/debugging // Process MTask function bodies to add additional code
wrapMTaskBodies(execGraphp); processMTaskBodies(execGraphp);
for (const ThreadSchedule& schedule : packed) { for (const ThreadSchedule& schedule : packed) {
// Replace the graph body with its multi-threaded implementation. // Replace the graph body with its multi-threaded implementation.
@ -1189,7 +1189,7 @@ void implement(AstNetlist* netlistp) {
} }
addThreadEndWrapper(execGraphp); addThreadEndWrapper(execGraphp);
}); }
} }
void selfTest() { void selfTest() {

View File

@ -25,7 +25,10 @@
#include <atomic> #include <atomic>
class AstNetlist; class AstNetlist;
class AstMTaskBody; class AstCFunc;
class AstExecGraph;
class AstNodeStmt;
class AstScope;
//************************************************************************* //*************************************************************************
// MTasks and graph structures // MTasks and graph structures
@ -33,9 +36,9 @@ class AstMTaskBody;
class ExecMTask final : public V3GraphVertex { class ExecMTask final : public V3GraphVertex {
VL_RTTI_IMPL(ExecMTask, V3GraphVertex) VL_RTTI_IMPL(ExecMTask, V3GraphVertex)
private: private:
AstMTaskBody* const m_bodyp; // Task body
const uint32_t m_id; // Unique ID of this ExecMTask. const uint32_t m_id; // Unique ID of this ExecMTask.
static std::atomic<uint32_t> s_nextId; // Next ID to use static std::atomic<uint32_t> s_nextId; // Next ID to use
AstCFunc* const m_funcp; // The function that contains the task body
const std::string m_hashName; // Hashed name based on body for profile-driven optimization const std::string m_hashName; // Hashed name based on body for profile-driven optimization
// Predicted critical path from the start of this mtask to the ends of the graph that are // Predicted critical path from the start of this mtask to the ends of the graph that are
// reachable from this mtask. In abstract time units. // reachable from this mtask. In abstract time units.
@ -46,9 +49,12 @@ private:
int m_threads = 1; // Threads used by this mtask int m_threads = 1; // Threads used by this mtask
VL_UNCOPYABLE(ExecMTask); VL_UNCOPYABLE(ExecMTask);
static AstCFunc* createCFunc(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp,
uint32_t id);
public: public:
ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED; ExecMTask(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp) VL_MT_DISABLED;
AstMTaskBody* bodyp() const { return m_bodyp; } AstCFunc* funcp() const { return m_funcp; }
uint32_t id() const VL_MT_SAFE { return m_id; } uint32_t id() const VL_MT_SAFE { return m_id; }
uint32_t priority() const { return m_priority; } uint32_t priority() const { return m_priority; }
void priority(uint32_t pri) { m_priority = pri; } void priority(uint32_t pri) { m_priority = pri; }

View File

@ -513,9 +513,6 @@ class HasherVisitor final : public VNVisitorConst {
iterateConstNull(nodep->ftaskp()); iterateConstNull(nodep->ftaskp());
}); });
} }
void visit(AstMTaskBody* nodep) override {
m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, []() {});
}
void visit(AstNodeProcedure* nodep) override { void visit(AstNodeProcedure* nodep) override {
m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, []() {}); m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, []() {});
} }

View File

@ -290,7 +290,7 @@ class LifePostDlyVisitor final : public VNVisitorConst {
const ExecMTask* const mtaskp = mtaskVtx.as<ExecMTask>(); const ExecMTask* const mtaskp = mtaskVtx.as<ExecMTask>();
VL_RESTORER(m_execMTaskp); VL_RESTORER(m_execMTaskp);
m_execMTaskp = mtaskp; m_execMTaskp = mtaskp;
iterateConst(mtaskp->bodyp()); trace(mtaskp->funcp());
} }
} }
void visit(AstCFunc* nodep) override { void visit(AstCFunc* nodep) override {

View File

@ -1763,7 +1763,7 @@ class DpiThreadsVisitor final : public VNVisitorConst {
public: public:
// CONSTRUCTORS // CONSTRUCTORS
explicit DpiThreadsVisitor(AstMTaskBody* nodep) { iterateConst(nodep); } explicit DpiThreadsVisitor(AstCFunc* nodep) { iterateConst(nodep); }
int threads() const { return m_threads; } int threads() const { return m_threads; }
~DpiThreadsVisitor() override = default; ~DpiThreadsVisitor() override = default;
@ -2431,8 +2431,9 @@ AstNodeStmt* V3Order::createParallel(OrderGraph& orderGraph, OrderMoveGraph& mov
if (dumpGraphLevel() >= 9) moveGraph.dumpDotFilePrefixed(tag + "_ordermv_pruned"); if (dumpGraphLevel() >= 9) moveGraph.dumpDotFilePrefixed(tag + "_ordermv_pruned");
// Create the AstExecGraph node which represents the execution of the MTask graph. // Create the AstExecGraph node which represents the execution of the MTask graph.
FileLine* const rootFlp = v3Global.rootp()->fileline(); FileLine* const flp = v3Global.rootp()->fileline();
AstExecGraph* const execGraphp = new AstExecGraph{rootFlp, tag}; AstScope* const scopep = v3Global.rootp()->topScopep()->scopep();
AstExecGraph* const execGraphp = new AstExecGraph{flp, tag};
V3Graph* const depGraphp = execGraphp->depGraphp(); V3Graph* const depGraphp = execGraphp->depGraphp();
// Translate the LogicMTask graph into the corresponding ExecMTask graph, // Translate the LogicMTask graph into the corresponding ExecMTask graph,
@ -2468,24 +2469,23 @@ AstNodeStmt* V3Order::createParallel(OrderGraph& orderGraph, OrderMoveGraph& mov
VL_DO_DANGLING(mVtxp->unlinkDelete(&moveGraph), mVtxp); VL_DO_DANGLING(mVtxp->unlinkDelete(&moveGraph), mVtxp);
} }
// We have 2 objects, because AstMTaskBody is an AstNode, and ExecMTask is a GraphVertex.
// To combine them would involve multiple inheritance.
// Construct the actual MTaskBody
AstMTaskBody* const bodyp = new AstMTaskBody{rootFlp};
execGraphp->addMTaskBodiesp(bodyp);
bodyp->addStmtsp(emitter.getStmts());
UASSERT_OBJ(bodyp->stmtsp(), bodyp, "Should not try to create empty MTask");
// Create the ExecMTask // Create the ExecMTask
ExecMTask* const execMTaskp = new ExecMTask{depGraphp, bodyp}; ExecMTask* const execMTaskp = new ExecMTask{execGraphp, scopep, emitter.getStmts()};
if (!v3Global.opt.hierBlocks().empty()) if (!v3Global.opt.hierBlocks().empty()) {
execMTaskp->threads(DpiThreadsVisitor{bodyp}.threads()); execMTaskp->threads(DpiThreadsVisitor{execMTaskp->funcp()}.threads());
}
const bool newEntry = logicMTaskToExecMTask.emplace(mTaskp, execMTaskp).second; const bool newEntry = logicMTaskToExecMTask.emplace(mTaskp, execMTaskp).second;
UASSERT_OBJ(newEntry, mTaskp, "LogicMTasks should be processed in dependencyorder"); UASSERT_OBJ(newEntry, mTaskp, "LogicMTasks should be processed in dependencyorder");
UINFO(3, "Final '" << tag << "' LogicMTask " << mTaskp->id() << " maps to ExecMTask" UINFO(3, "Final '" << tag << "' LogicMTask " << mTaskp->id() << " maps to ExecMTask"
<< execMTaskp->id()); << execMTaskp->id());
// For code analysis purposes, we can pretend the AstExecGraph runs the
// MTasks sequentially, in some topological order that respects edges.
// The order they are created here happens to be just such an order.
AstCCall* const callp = new AstCCall{flp, execMTaskp->funcp()};
callp->dtypeSetVoid();
execGraphp->addStmtsp(callp->makeStmt());
// Add the dependency edges between ExecMTasks // Add the dependency edges between ExecMTasks
for (const V3GraphEdge& edge : mTaskp->inEdges()) { for (const V3GraphEdge& edge : mTaskp->inEdges()) {
const V3GraphVertex* fromVxp = edge.fromp(); const V3GraphVertex* fromVxp = edge.fromp();

View File

@ -53,7 +53,7 @@ class GatherMTaskAffinity final : VNVisitorConst {
GatherMTaskAffinity(const ExecMTask* mTaskp, MTaskAffinityMap& results) GatherMTaskAffinity(const ExecMTask* mTaskp, MTaskAffinityMap& results)
: m_results{results} : m_results{results}
, m_id{mTaskp->id()} { , m_id{mTaskp->id()} {
iterateChildrenConst(mTaskp->bodyp()); iterateConst(mTaskp->funcp());
} }
~GatherMTaskAffinity() = default; ~GatherMTaskAffinity() = default;
VL_UNMOVABLE(GatherMTaskAffinity); VL_UNMOVABLE(GatherMTaskAffinity);