From 6d257ef52c4af35634dc85e74e005f19f447cc2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Chmiel?= Date: Tue, 20 May 2025 18:15:09 +0200 Subject: [PATCH] Improve hierarchical scheduling visualization in V3ExecGraph (#6009) --- src/V3ExecGraph.cpp | 182 ++++++++++++++++++++++++++++++---------- src/V3OrderParallel.cpp | 5 +- 2 files changed, 141 insertions(+), 46 deletions(-) diff --git a/src/V3ExecGraph.cpp b/src/V3ExecGraph.cpp index 7d1d466d7..318009efe 100644 --- a/src/V3ExecGraph.cpp +++ b/src/V3ExecGraph.cpp @@ -96,8 +96,18 @@ public: private: VL_UNCOPYABLE(ThreadSchedule); + static constexpr double s_threadBoxWidth = 2.5; + static constexpr double s_threadBoxHeight = 1.5; + static constexpr double s_horizontalGap = s_threadBoxWidth / 2; + // Debugging - void dumpDotFile(const V3Graph& graph, const string& filename) const { + // Variant of dumpDotFilePrefixed without --dump option check + static void dumpDotFilePrefixedAlways(const std::vector& schedules, + const string& nameComment, uint32_t nThreads) { + dumpDotFile(schedules, v3Global.debugFilename(nameComment) + ".dot", nThreads); + } + static void dumpDotFile(const std::vector& schedules, const string& filename, + uint32_t nThreads) { // This generates a file used by graphviz, https://www.graphviz.org const std::unique_ptr logp{V3File::new_ofstream(filename)}; if (logp->fail()) v3fatal("Can't write file: " << filename); @@ -109,52 +119,102 @@ private: // Thread labels *logp << "\n // Threads\n"; - const int threadBoxWidth = 2; - for (int i = 0; i < v3Global.opt.threads(); i++) { - *logp << " t" << i << " [label=\"Thread " << i << "\" width=" << threadBoxWidth - << " pos=\"" << (-threadBoxWidth / 2) << "," << -i - << "!\" style=\"filled\" fillcolor=\"grey\"] \n"; + + for (uint32_t i = 0; i < nThreads; ++i) { + const string name = "t" + std::to_string(i); + const string label = "Thread " + std::to_string(i); + constexpr double posX = -s_horizontalGap; + const double posY = -static_cast(i) * s_threadBoxHeight; + dumpDotFileEmitBlock(logp, name, label, s_threadBoxWidth, s_threadBoxHeight, posX, + posY, "grey"); } // MTask nodes *logp << "\n // MTasks\n"; - // Find minimum cost MTask for scaling MTask node widths - uint32_t minCost = UINT32_MAX; - for (const V3GraphVertex& vtx : graph.vertices()) { - if (const ExecMTask* const mtaskp = vtx.cast()) { - minCost = minCost > mtaskp->cost() ? mtaskp->cost() : minCost; + uint32_t maxCost = 0; + for (const auto& state : ThreadSchedule::mtaskState) { + const ExecMTask* const mtaskp = state.first; + maxCost = std::max(maxCost, mtaskp->cost()); + } + // To avoid segments getting too large, limit maximal mtask length. + // Based on the mtask cost, normalize it using segment cost + constexpr uint32_t segmentsPerLongestMtask = 4; + const uint32_t segmentCost = maxCost / segmentsPerLongestMtask; + + // Create columns of tasks whose execution intervals overlaps. + // Keep offset for each column for correctly aligned tasks. + std::vector offsets(nThreads, 0.0); + for (const ThreadSchedule& schedule : schedules) { + if (schedule.mtasks.empty()) continue; + using Column = std::vector; + std::vector columns = {{}}; + + // Order tasks based on their start time + struct Cmp final { + bool operator()(const ExecMTask* const a, const ExecMTask* const b) const { + if (startTime(a) == startTime(b)) return threadId(a) < threadId(b); + return startTime(a) < startTime(b); + } + }; + const std::multiset tasks(schedule.mtasks.begin(), + schedule.mtasks.end()); + + for (const ExecMTask* const mtaskp : tasks) { + Column& column = columns.back(); + UASSERT(column.size() <= nThreads, "Invalid partitioning"); + + bool intersects = true; + for (const ExecMTask* const earlierMtask : column) { + if (endTime(mtaskp) <= startTime(earlierMtask) + || startTime(mtaskp) >= endTime(earlierMtask)) { + intersects = false; + break; + } + } + if (intersects) { + column.emplace_back(mtaskp); + } else { + columns.emplace_back(Column{mtaskp}); + } } - } - const double minWidth = 2.0; - const auto mtaskXPos = [&](const ExecMTask* mtaskp, const double nodeWidth) { - const double startPosX = (minWidth * startTime(mtaskp)) / minCost; - return nodeWidth / minWidth + startPosX; - }; - const auto emitMTask = [&](const ExecMTask* mtaskp) { - const int thread = threadId(mtaskp); - const double nodeWidth = minWidth * (static_cast(mtaskp->cost()) / minCost); - const double x = mtaskXPos(mtaskp, nodeWidth); - const int y = -thread; - const string label = "label=\"" + mtaskp->name() + " (" + cvtToStr(startTime(mtaskp)) - + ":" + std::to_string(endTime(mtaskp)) + ")" + "\""; - *logp << " " << mtaskp->name() << " [" << label << " width=" << nodeWidth << " pos=\"" - << x << "," << y << "!\"]\n"; - }; + UASSERT(!columns.front().empty(), "Should be populated by mtasks"); - // Emit MTasks - for (const V3GraphVertex& vtx : graph.vertices()) { - if (const ExecMTask* const mtaskp = vtx.cast()) emitMTask(mtaskp); - } + for (const Column& column : columns) { + double lastColumnOffset = 0; + for (const ExecMTask* const mtaskp : column) { + dumpDotFileEmitMTask(logp, mtaskp, schedule, segmentCost, offsets); + lastColumnOffset = std::max(lastColumnOffset, offsets[threadId(mtaskp)]); + } + // Even out column offset + std::fill(offsets.begin(), offsets.end(), lastColumnOffset); + } - // Emit MTask dependency edges - *logp << "\n // MTask dependencies\n"; - for (const V3GraphVertex& vtx : graph.vertices()) { - if (const ExecMTask* const mtaskp = vtx.cast()) { - for (const V3GraphEdge& edge : mtaskp->outEdges()) { - const V3GraphVertex* const top = edge.top(); - *logp << " " << vtx.name() << " -> " << top->name() << "\n"; + dumpDotFileEmitFork(logp, offsets.front(), nThreads); + + // Emit MTask dependency edges + *logp << "\n // MTask dependencies\n"; + + for (const std::vector& thread : schedule.threads) { + if (thread.empty()) break; // No more threads + + // Show that schedule ends when all tasks are finished + *logp << " " << thread.back()->name() << " -> fork_" + << static_cast(offsets.front()) << "\n"; + + // Show that tasks from the same thread are executed in a sequence + for (size_t i = 1; i < thread.size(); ++i) + *logp << " " << thread[i - 1]->name() << " -> " << thread[i]->name() << "\n"; + + // Emit cross-task dependencies + for (const ExecMTask* const mtaskp : thread) { + for (const V3GraphEdge& edge : mtaskp->outEdges()) { + const ExecMTask* const topMTaskp = edge.top()->cast(); + if (topMTaskp && schedule.contains(topMTaskp) + && threadId(topMTaskp) != threadId(mtaskp)) + *logp << " " << mtaskp->name() << " -> " << topMTaskp->name() << "\n"; + } } } } @@ -163,10 +223,47 @@ private: *logp << "}\n"; logp->close(); } + static void dumpDotFileEmitBlock(const std::unique_ptr& logp, + const string& name, const string& label, double width, + double height, double xPos, double yPos, + const string& fillColor) { + *logp << " " << name << " [label=\"" << label << "\" width=" << width + << " height=" << height << " pos=\"" << xPos << "," << yPos + << "!\" style=\"filled\" fillcolor=\"" << fillColor << "\"]\n"; + } + static void dumpDotFileEmitMTask(const std::unique_ptr& logp, + const ExecMTask* const mtaskp, const ThreadSchedule& schedule, + uint32_t segmentCost, std::vector& offsets) { + for (int i = 0; i < mtaskp->threads(); ++i) { + // Keep original name for the original thread of hierarchical task to keep + // dependency tracking, add '_' for the rest to differentiate them. + const string name = i == 0 ? mtaskp->name() : mtaskp->name() + '_' + std::to_string(i); + const string label = mtaskp->name() + " (" + std::to_string(startTime(mtaskp)) + ':' + + std::to_string(endTime(mtaskp)) + ')' + + "\\ncost=" + std::to_string(mtaskp->cost()) + + "\\npriority=" + std::to_string(mtaskp->priority()); + const double width + = std::max(s_threadBoxWidth, + s_threadBoxWidth * static_cast(mtaskp->cost()) / segmentCost); + const uint32_t mtaskThreadId = threadId(mtaskp) + i * schedule.threads.size(); + const double xPos = width / 2 + offsets[mtaskThreadId]; + offsets[mtaskThreadId] += width + s_horizontalGap; + const double yPos = -s_threadBoxHeight * static_cast(mtaskThreadId); + const string fillColor = i == 0 ? "white" : "lightgreen"; + dumpDotFileEmitBlock(logp, name, label, width, s_threadBoxHeight, xPos, yPos, + fillColor); + } + } - // Variant of dumpDotFilePrefixed without --dump option check - void dumpDotFilePrefixedAlways(const V3Graph& graph, const string& nameComment) const { - dumpDotFile(graph, v3Global.debugFilename(nameComment) + ".dot"); + static void dumpDotFileEmitFork(const std::unique_ptr& logp, double offset, + uint32_t nThreads) { + const string& name = "fork_" + std::to_string(static_cast(offset)); + constexpr double width = s_threadBoxWidth / 8; + const double height = s_threadBoxHeight * nThreads; + const double xPos = offset - s_horizontalGap / 2; + const double yPos + = -static_cast(nThreads) / 2 * s_threadBoxHeight + s_threadBoxHeight / 2; + dumpDotFileEmitBlock(logp, name, "", width, height, xPos, yPos, "black"); } public: @@ -424,7 +521,8 @@ class PackThreads final { } // All schedules are combined on a single graph - if (dumpGraphLevel() >= 4) result.back().dumpDotFilePrefixedAlways(mtaskGraph, "schedule"); + if (dumpGraphLevel() >= 4) + ThreadSchedule::dumpDotFilePrefixedAlways(result, "schedule", m_nThreads); return result; } diff --git a/src/V3OrderParallel.cpp b/src/V3OrderParallel.cpp index aca1ac329..57ab4c3c5 100644 --- a/src/V3OrderParallel.cpp +++ b/src/V3OrderParallel.cpp @@ -1749,10 +1749,7 @@ class DpiThreadsVisitor final : public VNVisitorConst { m_threads = std::max(m_threads, V3Config::getHierWorkers(nodep->cname())); iterateChildrenConst(nodep); } - void visit(AstNodeCCall* nodep) override { - iterateChildrenConst(nodep); - iterateConst(nodep->funcp()); - } + void visit(AstNodeCCall* nodep) override { iterateConst(nodep->funcp()); } void visit(AstNode* nodep) override { iterateChildrenConst(nodep); } public: