Internals: Replace AstMTaskBody with AstCFunc(#6280) (#6628)

AstMTaskBody is somewhat redundant and is problematic for #6280. We used
to wrap all MTasks in a CFunc before emit anyway. Now we create that
CFunc when we create the ExecMTask in V3OrderParallel, and subsequently
use the CFunc to represent the contents of the MTask. Final output and
optimizations are the same, but internals are simplified to move
towards #6280.

No functional change.
This commit is contained in:
Geza Lore 2025-11-03 07:32:03 +01:00 committed by GitHub
parent d066504bb9
commit d3ca79368c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 156 additions and 205 deletions

View File

@ -120,7 +120,7 @@ public:
m_type = Type::SECTION_PUSH;
}
void sectionPop() { m_type = Type::SECTION_POP; }
void mtaskBegin(uint32_t id, uint32_t predictStart, const char* hierBlock = "") {
void mtaskBegin(uint32_t id, uint32_t predictStart, const char* hierBlock) {
m_payload.mtaskBegin.m_id = id;
m_payload.mtaskBegin.m_predictStart = predictStart;
m_payload.mtaskBegin.m_cpu = VlOs::getcpu();

View File

@ -1100,32 +1100,6 @@ public:
string name() const override VL_MT_STABLE { return m_name; }
ASTGEN_MEMBERS_AstIntfRef;
};
class AstMTaskBody final : public AstNode {
// Hold statements for each MTask
// @astgen op1 := stmtsp : List[AstNode]
ExecMTask* m_execMTaskp = nullptr;
public:
explicit AstMTaskBody(FileLine* fl)
: ASTGEN_SUPER_MTaskBody(fl) {}
ASTGEN_MEMBERS_AstMTaskBody;
void cloneRelink() override { UASSERT(!clonep(), "Not cloneable"); }
const char* broken() const override {
BROKEN_RTN(!m_execMTaskp);
return nullptr;
}
void addStmtsFirstp(AstNode* nodep) {
if (stmtsp()) {
stmtsp()->addHereThisAsNext(nodep);
} else {
addStmtsp(nodep);
}
}
ExecMTask* execMTaskp() const { return m_execMTaskp; }
void execMTaskp(ExecMTask* execMTaskp) { m_execMTaskp = execMTaskp; }
void dump(std::ostream& str = std::cout) const override;
void dumpJson(std::ostream& str = std::cout) const override;
};
class AstModport final : public AstNode {
// A modport in an interface
// @astgen op1 := varsp : List[AstNode]

View File

@ -601,20 +601,21 @@ public:
int instrCount() const override { return 0; }
};
class AstExecGraph final : public AstNodeStmt {
// For parallel execution, this node contains a dependency graph. Each
// vertex in the graph is an ExecMTask, which contains a body for the
// mtask (an AstMTaskBody), which contains sequentially executed statements.
//
// The AstMTaskBody nodes are also children of this node, so we can visit
// them without traversing the graph.
// For parallel execution, this node contains a dependency graph. Each
// vertex in the graph is an ExecMTask, which includes a function that
// holds the sequential body of the mtask.
//
// The location where AstExecGraph appears as a procedural statement is
// where the parallel graph will be executed. Execution proceeds after
// the AstExecGraph when all threads have joined.
//
// @astgen op1 := mTaskBodiesp : List[AstMTaskBody]
// In later phases, the statements that start the parallel execution
// @astgen op2 := stmtsp : List[AstNode]
// For code analysis purposes after scheduling, we keep a call to each
// MTask function as children of the AstExecGraph in 'stmtsp'. These
// are in a topological order so they represent a valid sequential
// execution of the graph. In `V3ExecGraph::implement`, we replace these
// statements with statements that dispatch to the thread pool for
// parallel execution.
// @astgen op1 := stmtsp : List[AstNode]
V3Graph* const m_depGraphp; // contains ExecMTask vertices
const string m_name; // Name of this AstExecGraph (for uniqueness at code generation)
@ -623,10 +624,7 @@ public:
~AstExecGraph() override;
ASTGEN_MEMBERS_AstExecGraph;
void cloneRelink() override { V3ERROR_NA; } // Not cloneable
const char* broken() const override {
BROKEN_RTN(!m_depGraphp);
return nullptr;
}
const char* broken() const override;
string name() const override VL_MT_STABLE { return m_name; }
V3Graph* depGraphp() { return m_depGraphp; }
const V3Graph* depGraphp() const { return m_depGraphp; }

View File

@ -340,6 +340,16 @@ AstExecGraph::AstExecGraph(FileLine* fileline, const string& name) VL_MT_DISABLE
AstExecGraph::~AstExecGraph() { VL_DO_DANGLING(delete m_depGraphp, m_depGraphp); }
const char* AstExecGraph::broken() const {
BROKEN_RTN(!m_depGraphp);
for (const V3GraphVertex& vtx : m_depGraphp->vertices()) {
const ExecMTask* const mtaskp = vtx.as<ExecMTask>();
AstCFunc* const funcp = mtaskp->funcp();
BROKEN_RTN(!funcp || !funcp->brokeExists());
}
return nullptr;
}
AstNodeExpr* AstInsideRange::newAndFromInside(AstNodeExpr* exprp, AstNodeExpr* lhsp,
AstNodeExpr* rhsp) {
AstNodeExpr* const ap = new AstGte{fileline(), exprp, lhsp};
@ -2538,17 +2548,6 @@ void AstSystemCSection::dumpJson(std::ostream& str) const {
dumpJsonStr(str, "sectionType", sectionType().ascii());
dumpJsonGen(str);
}
void AstMTaskBody::dump(std::ostream& str) const {
this->AstNode::dump(str);
str << " ";
m_execMTaskp->dump(str);
}
void AstMTaskBody::dumpJson(std::ostream& str) const {
str << ',' << '"' << "execMTask" << '"' << ':' << '"';
m_execMTaskp->dump(str); // TODO: Consider dumping it as json object
str << '"';
dumpJsonGen(str);
}
void AstTypeTable::dump(std::ostream& str) const {
this->AstNode::dump(str);
for (int i = 0; i < static_cast<int>(VBasicDTypeKwd::_ENUM_MAX); ++i) {

View File

@ -38,7 +38,6 @@ class DepthVisitor final : public VNVisitor {
// STATE - for current visit position (use VL_RESTORER)
AstCFunc* m_cfuncp = nullptr; // Current block
AstMTaskBody* m_mtaskbodyp = nullptr; // Current mtaskbody
AstNode* m_stmtp = nullptr; // Current statement
int m_depth = 0; // How deep in an expression
int m_maxdepth = 0; // Maximum depth in an expression
@ -53,8 +52,6 @@ class DepthVisitor final : public VNVisitor {
m_tempNames.get(nodep), nodep->dtypep()};
if (m_cfuncp) {
m_cfuncp->addVarsp(varp);
} else if (m_mtaskbodyp) {
m_mtaskbodyp->addStmtsFirstp(varp);
} else {
nodep->v3fatalSrc("Deep expression not under a function");
}
@ -70,28 +67,14 @@ class DepthVisitor final : public VNVisitor {
// VISITORS
void visit(AstCFunc* nodep) override {
VL_RESTORER(m_cfuncp);
VL_RESTORER(m_mtaskbodyp);
VL_RESTORER(m_depth);
VL_RESTORER(m_maxdepth);
m_cfuncp = nodep;
m_mtaskbodyp = nullptr;
m_depth = 0;
m_maxdepth = 0;
m_tempNames.reset();
iterateChildren(nodep);
}
void visit(AstMTaskBody* nodep) override {
VL_RESTORER(m_cfuncp);
VL_RESTORER(m_mtaskbodyp);
VL_RESTORER(m_depth);
VL_RESTORER(m_maxdepth);
m_cfuncp = nullptr;
m_mtaskbodyp = nodep;
m_depth = 0;
m_maxdepth = 0;
// We don't reset the names, as must share across tasks
iterateChildren(nodep);
}
void visitStmt(AstNodeStmt* nodep) {
VL_RESTORER(m_stmtp);
VL_RESTORER(m_depth);

View File

@ -1623,11 +1623,6 @@ public:
}
//
void visit(AstMTaskBody* nodep) override {
VL_RESTORER(m_useSelfForThis);
m_useSelfForThis = true;
iterateChildrenConst(nodep);
}
void visit(AstConsAssoc* nodep) override {
putnbs(nodep, nodep->dtypep()->cType("", false, false));
puts("()");
@ -1723,7 +1718,6 @@ public:
void visit(AstExecGraph* nodep) override {
// The location of the AstExecGraph within the containing AstCFunc is where we want to
// invoke the graph and wait for it to complete. Emitting the children does just that.
UASSERT_OBJ(!nodep->mTaskBodiesp(), nodep, "These should have been lowered");
iterateChildrenConst(nodep);
}

View File

@ -33,16 +33,24 @@
VL_DEFINE_DEBUG_FUNCTIONS;
ExecMTask::ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED //
: V3GraphVertex{graphp},
m_bodyp{bodyp},
m_id{s_nextId++},
m_hashName{V3Hasher::uncachedHash(bodyp).toString()} {
UASSERT_OBJ(bodyp->stmtsp(), bodyp, "AstMTaskBody should already be populated for hashing");
UASSERT_OBJ(!bodyp->execMTaskp(), bodyp, "AstMTaskBody already linked to an ExecMTask");
bodyp->execMTaskp(this);
AstCFunc* ExecMTask::createCFunc(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp,
uint32_t id) {
const std::string name = execGraphp->name() + "_mtask" + std::to_string(id);
AstCFunc* const funcp = new AstCFunc{execGraphp->fileline(), name, scopep};
funcp->isLoose(true);
funcp->dontCombine(true);
funcp->addStmtsp(stmtsp);
if (scopep) scopep->addBlocksp(funcp);
return funcp;
}
ExecMTask::ExecMTask(AstExecGraph* execGraphp, AstScope* scopep,
AstNodeStmt* stmtsp) VL_MT_DISABLED //
: V3GraphVertex{execGraphp->depGraphp()},
m_id{s_nextId++},
m_funcp{createCFunc(execGraphp, scopep, stmtsp, m_id)},
m_hashName{V3Hasher::uncachedHash(m_funcp).toString()} {}
void ExecMTask::dump(std::ostream& str) const {
str << name() << "." << cvtToHex(this);
if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]";
@ -538,37 +546,32 @@ public:
selfTestNormalFirst();
}
static void selfTestNormalFirst() {
V3Graph graph;
FileLine* const flp = v3Global.rootp()->fileline();
std::vector<AstMTaskBody*> mTaskBodyps;
const auto makeBody = [&]() {
AstMTaskBody* const bodyp = new AstMTaskBody{flp};
mTaskBodyps.push_back(bodyp);
bodyp->addStmtsp(new AstComment{flp, ""});
return bodyp;
};
ExecMTask* const t0 = new ExecMTask{&graph, makeBody()};
AstExecGraph* const execGraphp = new AstExecGraph{flp, "test"};
V3Graph& graph = *execGraphp->depGraphp();
const auto makeBody = [&]() -> AstNodeStmt* { return new AstComment{flp, ""}; };
ExecMTask* const t0 = new ExecMTask{execGraphp, nullptr, makeBody()};
t0->cost(1000);
t0->priority(1100);
ExecMTask* const t1 = new ExecMTask{&graph, makeBody()};
ExecMTask* const t1 = new ExecMTask{execGraphp, nullptr, makeBody()};
t1->cost(100);
t1->priority(100);
ExecMTask* const t2 = new ExecMTask{&graph, makeBody()};
ExecMTask* const t2 = new ExecMTask{execGraphp, nullptr, makeBody()};
t2->cost(100);
t2->priority(100);
t2->threads(2);
ExecMTask* const t3 = new ExecMTask{&graph, makeBody()};
ExecMTask* const t3 = new ExecMTask{execGraphp, nullptr, makeBody()};
t3->cost(100);
t3->priority(100);
t3->threads(3);
ExecMTask* const t4 = new ExecMTask{&graph, makeBody()};
ExecMTask* const t4 = new ExecMTask{execGraphp, nullptr, makeBody()};
t4->cost(100);
t4->priority(100);
t4->threads(3);
ExecMTask* const t5 = new ExecMTask{&graph, makeBody()};
ExecMTask* const t5 = new ExecMTask{execGraphp, nullptr, makeBody()};
t5->cost(100);
t5->priority(100);
ExecMTask* const t6 = new ExecMTask{&graph, makeBody()};
ExecMTask* const t6 = new ExecMTask{execGraphp, nullptr, makeBody()};
t6->cost(100);
t6->priority(100);
@ -666,24 +669,20 @@ public:
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 4), 1360);
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 5), 1360);
for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree();
for (V3GraphVertex& vtx : graph.vertices()) vtx.as<ExecMTask>()->funcp()->deleteTree();
VL_DO_DANGLING(execGraphp->deleteTree(), execGraphp);
ThreadSchedule::s_mtaskState.clear();
}
static void selfTestHierFirst() {
V3Graph graph;
FileLine* const flp = v3Global.rootp()->fileline();
std::vector<AstMTaskBody*> mTaskBodyps;
const auto makeBody = [&]() {
AstMTaskBody* const bodyp = new AstMTaskBody{flp};
mTaskBodyps.push_back(bodyp);
bodyp->addStmtsp(new AstComment{flp, ""});
return bodyp;
};
ExecMTask* const t0 = new ExecMTask{&graph, makeBody()};
AstExecGraph* const execGraphp = new AstExecGraph{flp, "test"};
V3Graph& graph = *execGraphp->depGraphp();
const auto makeBody = [&]() -> AstNodeStmt* { return new AstComment{flp, ""}; };
ExecMTask* const t0 = new ExecMTask{execGraphp, nullptr, makeBody()};
t0->cost(1000);
t0->priority(1100);
t0->threads(2);
ExecMTask* const t1 = new ExecMTask{&graph, makeBody()};
ExecMTask* const t1 = new ExecMTask{execGraphp, nullptr, makeBody()};
t1->cost(100);
t1->priority(100);
@ -725,7 +724,8 @@ public:
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 0), 1100);
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t1, 1), 1130);
for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree();
for (V3GraphVertex& vtx : graph.vertices()) vtx.as<ExecMTask>()->funcp()->deleteTree();
VL_DO_DANGLING(execGraphp->deleteTree(), execGraphp);
ThreadSchedule::s_mtaskState.clear();
}
@ -790,6 +790,24 @@ void normalizeCosts(Costs& costs) {
}
}
void removeEmptyMTasks(V3Graph* execMTaskGraphp) {
for (V3GraphVertex* const vtxp : execMTaskGraphp->vertices().unlinkable()) {
ExecMTask* const mtaskp = vtxp->as<ExecMTask>();
AstCFunc* const funcp = mtaskp->funcp();
if (funcp->stmtsp()) continue;
UINFO(6, "Removing empty MTask " << mtaskp->name());
// Redirect edges
mtaskp->rerouteEdges(execMTaskGraphp);
// Delete the MTask function
VL_DO_DANGLING(funcp->unlinkFrBack()->deleteTree(), funcp);
// Delete the MTask vertex
VL_DO_DANGLING(mtaskp->unlinkDelete(execMTaskGraphp), mtaskp);
}
// Remove redundant dependencies
execMTaskGraphp->removeRedundantEdgesMax(&V3GraphEdge::followAlwaysTrue);
}
void fillinCosts(V3Graph* execMTaskGraphp) {
// Pass 1: See what profiling data applies
Costs costs; // For each mtask, costs
@ -797,7 +815,7 @@ void fillinCosts(V3Graph* execMTaskGraphp) {
for (V3GraphVertex& vtx : execMTaskGraphp->vertices()) {
ExecMTask* const mtp = vtx.as<ExecMTask>();
// This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits
const uint64_t costEstimate = V3InstrCount::count(mtp->bodyp(), false);
const uint64_t costEstimate = V3InstrCount::count(mtp->funcp(), false);
const uint64_t costProfiled
= V3Control::getProfileData(v3Global.opt.prefix(), mtp->hashName());
if (costProfiled) {
@ -857,30 +875,6 @@ void finalizeCosts(V3Graph* execMTaskGraphp) {
}
}
// Some MTasks may now have zero cost, eliminate those.
// (It's common for tasks to shrink to nothing when V3LifePost
// removes dly assignments.)
for (V3GraphVertex* const vtxp : execMTaskGraphp->vertices().unlinkable()) {
ExecMTask* const mtp = vtxp->as<ExecMTask>();
// Don't rely on checking mtp->cost() == 0 to detect an empty task.
// Our cost-estimating logic is just an estimate. Instead, check
// the MTaskBody to see if it's empty. That's the source of truth.
AstMTaskBody* const bodyp = mtp->bodyp();
if (!bodyp->stmtsp()) { // Kill this empty mtask
UINFO(6, "Removing zero-cost " << mtp->name());
for (V3GraphEdge& in : mtp->inEdges()) {
for (V3GraphEdge& out : mtp->outEdges()) {
new V3GraphEdge{execMTaskGraphp, in.fromp(), out.top(), 1};
}
}
VL_DO_DANGLING(mtp->unlinkDelete(execMTaskGraphp), mtp);
// Also remove and delete the AstMTaskBody, otherwise it would
// keep a dangling pointer to the ExecMTask.
VL_DO_DANGLING(bodyp->unlinkFrBack()->deleteTree(), bodyp);
}
}
// Removing tasks may cause edges that were formerly non-transitive to
// become transitive. Also we just created new edges around the removed
// tasks, which could be transitive. Prune out all transitive edges.
@ -907,6 +901,7 @@ void finalizeCosts(V3Graph* execMTaskGraphp) {
void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, AstCFunc* funcp,
const ExecMTask* mtaskp) {
AstScope* const scopep = v3Global.rootp()->topScopep()->scopep();
AstNodeModule* const modp = v3Global.rootp()->topModulep();
FileLine* const fl = modp->fileline();
@ -940,8 +935,11 @@ void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId,
addCStmt("vlSymsp->_vm_pgoProfiler.startCounter(" + std::to_string(mtaskp->id()) + ");");
}
// Move the actual body into this function
funcp->addStmtsp(mtaskp->bodyp()->unlinkFrBack());
// Call the MTask function
AstCCall* const callp = new AstCCall{fl, mtaskp->funcp()};
callp->selfPointer(VSelfPointerText{VSelfPointerText::VlSyms{}, scopep->nameDotless()});
callp->dtypeSetVoid();
funcp->addStmtsp(callp->makeStmt());
if (v3Global.opt.profPgo()) {
// No lock around stopCounter, as counter numbers are unique per thread
@ -1093,56 +1091,38 @@ void addThreadStartToExecGraph(AstExecGraph* const execGraphp,
}
}
void wrapMTaskBodies(AstExecGraph* const execGraphp) {
FileLine* const flp = execGraphp->fileline();
const string& tag = execGraphp->name();
AstNodeModule* const modp = v3Global.rootp()->topModulep();
for (AstMTaskBody* mtaskBodyp = execGraphp->mTaskBodiesp(); mtaskBodyp;
mtaskBodyp = VN_AS(mtaskBodyp->nextp(), MTaskBody)) {
ExecMTask* const mtaskp = mtaskBodyp->execMTaskp();
const std::string name = tag + "_mtask" + std::to_string(mtaskp->id());
AstCFunc* const funcp = new AstCFunc{flp, name, nullptr};
funcp->isLoose(true);
modp->addStmtsp(funcp);
void processMTaskBodies(AstExecGraph* const execGraphp) {
for (V3GraphVertex* const vtxp : execGraphp->depGraphp()->vertices().unlinkable()) {
ExecMTask* const mtaskp = vtxp->as<ExecMTask>();
AstCFunc* const funcp = mtaskp->funcp();
// Temporarily unlink function body so we can add more statemetns
AstNode* stmtsp = funcp->stmtsp()->unlinkFrBackWithNext();
// Helper function to make the code a bit more legible
const auto addCStmt = [=](const string& stmt) -> void { //
funcp->addStmtsp(new AstCStmt{flp, stmt});
funcp->addStmtsp(new AstCStmt{execGraphp->fileline(), stmt});
};
addCStmt("static constexpr unsigned taskId = " + cvtToStr(mtaskp->id()) + ";");
// Profiling mtaskStart
if (v3Global.opt.profExec()) {
const string& predictStart = std::to_string(mtaskp->predictStart());
if (v3Global.opt.hierChild()) {
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(taskId, " + predictStart
+ ", \"" + v3Global.opt.topModule() + "\");");
} else {
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(taskId, " + predictStart
+ ");");
}
std::string args = std::to_string(mtaskp->id());
args += ", " + std::to_string(mtaskp->predictStart());
args += ", \"";
if (v3Global.opt.hierChild()) args += v3Global.opt.topModule();
args += "\"";
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(" + args + ");");
}
// Set mtask ID in the run-time system
addCStmt("Verilated::mtaskId(taskId);");
// Run body
funcp->addStmtsp(mtaskBodyp->stmtsp()->unlinkFrBackWithNext());
addCStmt("Verilated::mtaskId(" + std::to_string(mtaskp->id()) + ");");
// Add back the body
funcp->addStmtsp(stmtsp);
// Flush message queue
addCStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);");
// Profiling mtaskEnd
if (v3Global.opt.profExec()) {
const string& predictCost = std::to_string(mtaskp->cost());
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(" + predictCost + ");");
const std::string& args = std::to_string(mtaskp->cost());
addCStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(" + args + ");");
}
// AstMTask will simply contain a call
AstCCall* const callp = new AstCCall{flp, funcp};
callp->selfPointer(VSelfPointerText{VSelfPointerText::This{}});
callp->dtypeSetVoid();
mtaskBodyp->addStmtsp(callp->makeStmt());
}
}
@ -1150,8 +1130,7 @@ void implementExecGraph(AstExecGraph* const execGraphp, const ThreadSchedule& sc
// Nothing to be done if there are no MTasks in the graph at all.
if (execGraphp->depGraphp()->empty()) return;
// Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the
// AstExecGraph into the AstCFunc created
// Create a function to be run by each thread.
const std::vector<AstCFunc*>& funcps = createThreadFunctions(schedule, execGraphp->name());
UASSERT(!funcps.empty(), "Non-empty ExecGraph yields no threads?");
@ -1159,9 +1138,30 @@ void implementExecGraph(AstExecGraph* const execGraphp, const ThreadSchedule& sc
addThreadStartToExecGraph(execGraphp, funcps, schedule.id());
}
// Called by Verilator top stage
void implement(AstNetlist* netlistp) {
// Called by Verilator top stage
netlistp->topModulep()->foreach([&](AstExecGraph* execGraphp) {
// Gather all ExecGraphs
std::vector<AstExecGraph*> execGraphps;
netlistp->topModulep()->foreach([&](AstExecGraph* egp) { execGraphps.emplace_back(egp); });
// Process each
for (AstExecGraph* const execGraphp : execGraphps) {
// We can delete the placeholder calls to the MTask functions that
// were used for code analysis until now. We will replace them with
// statements that dispatch execution to the thread pool.
if (execGraphp->stmtsp()) execGraphp->stmtsp()->unlinkFrBackWithNext()->deleteTree();
// Some MTasks may have become empty after scheduling due to
// optimizations after scheduling. Remove those.
removeEmptyMTasks(execGraphp->depGraphp());
// In some very small test cases, we might end up with a completely
// empty ExecGraph, if so just delete it.
if (execGraphp->depGraphp()->empty()) {
VL_DO_DANGLING(execGraphp->unlinkFrBack()->deleteTree(), execGraphp);
return;
}
// Back in V3Order, we partitioned mtasks using provisional cost
// estimates. However, V3Order precedes some optimizations (notably
// V3LifePost) that can change the cost of logic within each mtask.
@ -1180,8 +1180,8 @@ void implement(AstNetlist* netlistp) {
V3Stats::addStatSum("Optimizations, Thread schedule count",
static_cast<double>(packed.size()));
// Wrap each MTask body into a CFunc for better profiling/debugging
wrapMTaskBodies(execGraphp);
// Process MTask function bodies to add additional code
processMTaskBodies(execGraphp);
for (const ThreadSchedule& schedule : packed) {
// Replace the graph body with its multi-threaded implementation.
@ -1189,7 +1189,7 @@ void implement(AstNetlist* netlistp) {
}
addThreadEndWrapper(execGraphp);
});
}
}
void selfTest() {

View File

@ -25,7 +25,10 @@
#include <atomic>
class AstNetlist;
class AstMTaskBody;
class AstCFunc;
class AstExecGraph;
class AstNodeStmt;
class AstScope;
//*************************************************************************
// MTasks and graph structures
@ -33,9 +36,9 @@ class AstMTaskBody;
class ExecMTask final : public V3GraphVertex {
VL_RTTI_IMPL(ExecMTask, V3GraphVertex)
private:
AstMTaskBody* const m_bodyp; // Task body
const uint32_t m_id; // Unique ID of this ExecMTask.
static std::atomic<uint32_t> s_nextId; // Next ID to use
AstCFunc* const m_funcp; // The function that contains the task body
const std::string m_hashName; // Hashed name based on body for profile-driven optimization
// Predicted critical path from the start of this mtask to the ends of the graph that are
// reachable from this mtask. In abstract time units.
@ -46,9 +49,12 @@ private:
int m_threads = 1; // Threads used by this mtask
VL_UNCOPYABLE(ExecMTask);
static AstCFunc* createCFunc(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp,
uint32_t id);
public:
ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED;
AstMTaskBody* bodyp() const { return m_bodyp; }
ExecMTask(AstExecGraph* execGraphp, AstScope* scopep, AstNodeStmt* stmtsp) VL_MT_DISABLED;
AstCFunc* funcp() const { return m_funcp; }
uint32_t id() const VL_MT_SAFE { return m_id; }
uint32_t priority() const { return m_priority; }
void priority(uint32_t pri) { m_priority = pri; }

View File

@ -513,9 +513,6 @@ class HasherVisitor final : public VNVisitorConst {
iterateConstNull(nodep->ftaskp());
});
}
void visit(AstMTaskBody* nodep) override {
m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, []() {});
}
void visit(AstNodeProcedure* nodep) override {
m_hash += hashNodeAndIterate(nodep, HASH_DTYPE, HASH_CHILDREN, []() {});
}

View File

@ -290,7 +290,7 @@ class LifePostDlyVisitor final : public VNVisitorConst {
const ExecMTask* const mtaskp = mtaskVtx.as<ExecMTask>();
VL_RESTORER(m_execMTaskp);
m_execMTaskp = mtaskp;
iterateConst(mtaskp->bodyp());
trace(mtaskp->funcp());
}
}
void visit(AstCFunc* nodep) override {

View File

@ -1763,7 +1763,7 @@ class DpiThreadsVisitor final : public VNVisitorConst {
public:
// CONSTRUCTORS
explicit DpiThreadsVisitor(AstMTaskBody* nodep) { iterateConst(nodep); }
explicit DpiThreadsVisitor(AstCFunc* nodep) { iterateConst(nodep); }
int threads() const { return m_threads; }
~DpiThreadsVisitor() override = default;
@ -2431,8 +2431,9 @@ AstNodeStmt* V3Order::createParallel(OrderGraph& orderGraph, OrderMoveGraph& mov
if (dumpGraphLevel() >= 9) moveGraph.dumpDotFilePrefixed(tag + "_ordermv_pruned");
// Create the AstExecGraph node which represents the execution of the MTask graph.
FileLine* const rootFlp = v3Global.rootp()->fileline();
AstExecGraph* const execGraphp = new AstExecGraph{rootFlp, tag};
FileLine* const flp = v3Global.rootp()->fileline();
AstScope* const scopep = v3Global.rootp()->topScopep()->scopep();
AstExecGraph* const execGraphp = new AstExecGraph{flp, tag};
V3Graph* const depGraphp = execGraphp->depGraphp();
// Translate the LogicMTask graph into the corresponding ExecMTask graph,
@ -2468,24 +2469,23 @@ AstNodeStmt* V3Order::createParallel(OrderGraph& orderGraph, OrderMoveGraph& mov
VL_DO_DANGLING(mVtxp->unlinkDelete(&moveGraph), mVtxp);
}
// We have 2 objects, because AstMTaskBody is an AstNode, and ExecMTask is a GraphVertex.
// To combine them would involve multiple inheritance.
// Construct the actual MTaskBody
AstMTaskBody* const bodyp = new AstMTaskBody{rootFlp};
execGraphp->addMTaskBodiesp(bodyp);
bodyp->addStmtsp(emitter.getStmts());
UASSERT_OBJ(bodyp->stmtsp(), bodyp, "Should not try to create empty MTask");
// Create the ExecMTask
ExecMTask* const execMTaskp = new ExecMTask{depGraphp, bodyp};
if (!v3Global.opt.hierBlocks().empty())
execMTaskp->threads(DpiThreadsVisitor{bodyp}.threads());
ExecMTask* const execMTaskp = new ExecMTask{execGraphp, scopep, emitter.getStmts()};
if (!v3Global.opt.hierBlocks().empty()) {
execMTaskp->threads(DpiThreadsVisitor{execMTaskp->funcp()}.threads());
}
const bool newEntry = logicMTaskToExecMTask.emplace(mTaskp, execMTaskp).second;
UASSERT_OBJ(newEntry, mTaskp, "LogicMTasks should be processed in dependencyorder");
UINFO(3, "Final '" << tag << "' LogicMTask " << mTaskp->id() << " maps to ExecMTask"
<< execMTaskp->id());
// For code analysis purposes, we can pretend the AstExecGraph runs the
// MTasks sequentially, in some topological order that respects edges.
// The order they are created here happens to be just such an order.
AstCCall* const callp = new AstCCall{flp, execMTaskp->funcp()};
callp->dtypeSetVoid();
execGraphp->addStmtsp(callp->makeStmt());
// Add the dependency edges between ExecMTasks
for (const V3GraphEdge& edge : mTaskp->inEdges()) {
const V3GraphVertex* fromVxp = edge.fromp();

View File

@ -53,7 +53,7 @@ class GatherMTaskAffinity final : VNVisitorConst {
GatherMTaskAffinity(const ExecMTask* mTaskp, MTaskAffinityMap& results)
: m_results{results}
, m_id{mTaskp->id()} {
iterateChildrenConst(mTaskp->bodyp());
iterateConst(mTaskp->funcp());
}
~GatherMTaskAffinity() = default;
VL_UNMOVABLE(GatherMTaskAffinity);