Improve hierarchical scheduling visualization in V3ExecGraph (#6009)

This commit is contained in:
Bartłomiej Chmiel 2025-05-20 18:15:09 +02:00 committed by GitHub
parent 25cb31c38b
commit 6d257ef52c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 141 additions and 46 deletions

View File

@ -96,8 +96,18 @@ public:
private: private:
VL_UNCOPYABLE(ThreadSchedule); VL_UNCOPYABLE(ThreadSchedule);
static constexpr double s_threadBoxWidth = 2.5;
static constexpr double s_threadBoxHeight = 1.5;
static constexpr double s_horizontalGap = s_threadBoxWidth / 2;
// Debugging // Debugging
void dumpDotFile(const V3Graph& graph, const string& filename) const { // Variant of dumpDotFilePrefixed without --dump option check
static void dumpDotFilePrefixedAlways(const std::vector<ThreadSchedule>& schedules,
const string& nameComment, uint32_t nThreads) {
dumpDotFile(schedules, v3Global.debugFilename(nameComment) + ".dot", nThreads);
}
static void dumpDotFile(const std::vector<ThreadSchedule>& schedules, const string& filename,
uint32_t nThreads) {
// This generates a file used by graphviz, https://www.graphviz.org // This generates a file used by graphviz, https://www.graphviz.org
const std::unique_ptr<std::ofstream> logp{V3File::new_ofstream(filename)}; const std::unique_ptr<std::ofstream> logp{V3File::new_ofstream(filename)};
if (logp->fail()) v3fatal("Can't write file: " << filename); if (logp->fail()) v3fatal("Can't write file: " << filename);
@ -109,52 +119,102 @@ private:
// Thread labels // Thread labels
*logp << "\n // Threads\n"; *logp << "\n // Threads\n";
const int threadBoxWidth = 2;
for (int i = 0; i < v3Global.opt.threads(); i++) { for (uint32_t i = 0; i < nThreads; ++i) {
*logp << " t" << i << " [label=\"Thread " << i << "\" width=" << threadBoxWidth const string name = "t" + std::to_string(i);
<< " pos=\"" << (-threadBoxWidth / 2) << "," << -i const string label = "Thread " + std::to_string(i);
<< "!\" style=\"filled\" fillcolor=\"grey\"] \n"; constexpr double posX = -s_horizontalGap;
const double posY = -static_cast<double>(i) * s_threadBoxHeight;
dumpDotFileEmitBlock(logp, name, label, s_threadBoxWidth, s_threadBoxHeight, posX,
posY, "grey");
} }
// MTask nodes // MTask nodes
*logp << "\n // MTasks\n"; *logp << "\n // MTasks\n";
// Find minimum cost MTask for scaling MTask node widths uint32_t maxCost = 0;
uint32_t minCost = UINT32_MAX; for (const auto& state : ThreadSchedule::mtaskState) {
for (const V3GraphVertex& vtx : graph.vertices()) { const ExecMTask* const mtaskp = state.first;
if (const ExecMTask* const mtaskp = vtx.cast<const ExecMTask>()) { maxCost = std::max(maxCost, mtaskp->cost());
minCost = minCost > mtaskp->cost() ? mtaskp->cost() : minCost; }
// To avoid segments getting too large, limit maximal mtask length.
// Based on the mtask cost, normalize it using segment cost
constexpr uint32_t segmentsPerLongestMtask = 4;
const uint32_t segmentCost = maxCost / segmentsPerLongestMtask;
// Create columns of tasks whose execution intervals overlaps.
// Keep offset for each column for correctly aligned tasks.
std::vector<double> offsets(nThreads, 0.0);
for (const ThreadSchedule& schedule : schedules) {
if (schedule.mtasks.empty()) continue;
using Column = std::vector<const ExecMTask*>;
std::vector<Column> columns = {{}};
// Order tasks based on their start time
struct Cmp final {
bool operator()(const ExecMTask* const a, const ExecMTask* const b) const {
if (startTime(a) == startTime(b)) return threadId(a) < threadId(b);
return startTime(a) < startTime(b);
}
};
const std::multiset<const ExecMTask*, Cmp> tasks(schedule.mtasks.begin(),
schedule.mtasks.end());
for (const ExecMTask* const mtaskp : tasks) {
Column& column = columns.back();
UASSERT(column.size() <= nThreads, "Invalid partitioning");
bool intersects = true;
for (const ExecMTask* const earlierMtask : column) {
if (endTime(mtaskp) <= startTime(earlierMtask)
|| startTime(mtaskp) >= endTime(earlierMtask)) {
intersects = false;
break;
}
}
if (intersects) {
column.emplace_back(mtaskp);
} else {
columns.emplace_back(Column{mtaskp});
}
} }
}
const double minWidth = 2.0;
const auto mtaskXPos = [&](const ExecMTask* mtaskp, const double nodeWidth) {
const double startPosX = (minWidth * startTime(mtaskp)) / minCost;
return nodeWidth / minWidth + startPosX;
};
const auto emitMTask = [&](const ExecMTask* mtaskp) { UASSERT(!columns.front().empty(), "Should be populated by mtasks");
const int thread = threadId(mtaskp);
const double nodeWidth = minWidth * (static_cast<double>(mtaskp->cost()) / minCost);
const double x = mtaskXPos(mtaskp, nodeWidth);
const int y = -thread;
const string label = "label=\"" + mtaskp->name() + " (" + cvtToStr(startTime(mtaskp))
+ ":" + std::to_string(endTime(mtaskp)) + ")" + "\"";
*logp << " " << mtaskp->name() << " [" << label << " width=" << nodeWidth << " pos=\""
<< x << "," << y << "!\"]\n";
};
// Emit MTasks for (const Column& column : columns) {
for (const V3GraphVertex& vtx : graph.vertices()) { double lastColumnOffset = 0;
if (const ExecMTask* const mtaskp = vtx.cast<const ExecMTask>()) emitMTask(mtaskp); for (const ExecMTask* const mtaskp : column) {
} dumpDotFileEmitMTask(logp, mtaskp, schedule, segmentCost, offsets);
lastColumnOffset = std::max(lastColumnOffset, offsets[threadId(mtaskp)]);
}
// Even out column offset
std::fill(offsets.begin(), offsets.end(), lastColumnOffset);
}
// Emit MTask dependency edges dumpDotFileEmitFork(logp, offsets.front(), nThreads);
*logp << "\n // MTask dependencies\n";
for (const V3GraphVertex& vtx : graph.vertices()) { // Emit MTask dependency edges
if (const ExecMTask* const mtaskp = vtx.cast<const ExecMTask>()) { *logp << "\n // MTask dependencies\n";
for (const V3GraphEdge& edge : mtaskp->outEdges()) {
const V3GraphVertex* const top = edge.top(); for (const std::vector<const ExecMTask*>& thread : schedule.threads) {
*logp << " " << vtx.name() << " -> " << top->name() << "\n"; if (thread.empty()) break; // No more threads
// Show that schedule ends when all tasks are finished
*logp << " " << thread.back()->name() << " -> fork_"
<< static_cast<int>(offsets.front()) << "\n";
// Show that tasks from the same thread are executed in a sequence
for (size_t i = 1; i < thread.size(); ++i)
*logp << " " << thread[i - 1]->name() << " -> " << thread[i]->name() << "\n";
// Emit cross-task dependencies
for (const ExecMTask* const mtaskp : thread) {
for (const V3GraphEdge& edge : mtaskp->outEdges()) {
const ExecMTask* const topMTaskp = edge.top()->cast<const ExecMTask>();
if (topMTaskp && schedule.contains(topMTaskp)
&& threadId(topMTaskp) != threadId(mtaskp))
*logp << " " << mtaskp->name() << " -> " << topMTaskp->name() << "\n";
}
} }
} }
} }
@ -163,10 +223,47 @@ private:
*logp << "}\n"; *logp << "}\n";
logp->close(); logp->close();
} }
static void dumpDotFileEmitBlock(const std::unique_ptr<std::ofstream>& logp,
const string& name, const string& label, double width,
double height, double xPos, double yPos,
const string& fillColor) {
*logp << " " << name << " [label=\"" << label << "\" width=" << width
<< " height=" << height << " pos=\"" << xPos << "," << yPos
<< "!\" style=\"filled\" fillcolor=\"" << fillColor << "\"]\n";
}
static void dumpDotFileEmitMTask(const std::unique_ptr<std::ofstream>& logp,
const ExecMTask* const mtaskp, const ThreadSchedule& schedule,
uint32_t segmentCost, std::vector<double>& offsets) {
for (int i = 0; i < mtaskp->threads(); ++i) {
// Keep original name for the original thread of hierarchical task to keep
// dependency tracking, add '_' for the rest to differentiate them.
const string name = i == 0 ? mtaskp->name() : mtaskp->name() + '_' + std::to_string(i);
const string label = mtaskp->name() + " (" + std::to_string(startTime(mtaskp)) + ':'
+ std::to_string(endTime(mtaskp)) + ')'
+ "\\ncost=" + std::to_string(mtaskp->cost())
+ "\\npriority=" + std::to_string(mtaskp->priority());
const double width
= std::max(s_threadBoxWidth,
s_threadBoxWidth * static_cast<double>(mtaskp->cost()) / segmentCost);
const uint32_t mtaskThreadId = threadId(mtaskp) + i * schedule.threads.size();
const double xPos = width / 2 + offsets[mtaskThreadId];
offsets[mtaskThreadId] += width + s_horizontalGap;
const double yPos = -s_threadBoxHeight * static_cast<double>(mtaskThreadId);
const string fillColor = i == 0 ? "white" : "lightgreen";
dumpDotFileEmitBlock(logp, name, label, width, s_threadBoxHeight, xPos, yPos,
fillColor);
}
}
// Variant of dumpDotFilePrefixed without --dump option check static void dumpDotFileEmitFork(const std::unique_ptr<std::ofstream>& logp, double offset,
void dumpDotFilePrefixedAlways(const V3Graph& graph, const string& nameComment) const { uint32_t nThreads) {
dumpDotFile(graph, v3Global.debugFilename(nameComment) + ".dot"); const string& name = "fork_" + std::to_string(static_cast<int>(offset));
constexpr double width = s_threadBoxWidth / 8;
const double height = s_threadBoxHeight * nThreads;
const double xPos = offset - s_horizontalGap / 2;
const double yPos
= -static_cast<double>(nThreads) / 2 * s_threadBoxHeight + s_threadBoxHeight / 2;
dumpDotFileEmitBlock(logp, name, "", width, height, xPos, yPos, "black");
} }
public: public:
@ -424,7 +521,8 @@ class PackThreads final {
} }
// All schedules are combined on a single graph // All schedules are combined on a single graph
if (dumpGraphLevel() >= 4) result.back().dumpDotFilePrefixedAlways(mtaskGraph, "schedule"); if (dumpGraphLevel() >= 4)
ThreadSchedule::dumpDotFilePrefixedAlways(result, "schedule", m_nThreads);
return result; return result;
} }

View File

@ -1749,10 +1749,7 @@ class DpiThreadsVisitor final : public VNVisitorConst {
m_threads = std::max(m_threads, V3Config::getHierWorkers(nodep->cname())); m_threads = std::max(m_threads, V3Config::getHierWorkers(nodep->cname()));
iterateChildrenConst(nodep); iterateChildrenConst(nodep);
} }
void visit(AstNodeCCall* nodep) override { void visit(AstNodeCCall* nodep) override { iterateConst(nodep->funcp()); }
iterateChildrenConst(nodep);
iterateConst(nodep->funcp());
}
void visit(AstNode* nodep) override { iterateChildrenConst(nodep); } void visit(AstNode* nodep) override { iterateChildrenConst(nodep); }
public: public: