From e8a9662eb551f672d11e68be913df2cf13ed8be2 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sat, 16 Mar 2024 14:02:17 +0000 Subject: [PATCH] Simplify LogicMTask/ExecMTask IDs (#4990) There is no strong need to re-map LogicMTask IDs and it just adds extra processing. Instead we just allocate a separate set of ExecMTask IDs as they are created, which can also be used as the unique profiling ID as well. The only effect on the output of this is the change in mtask IDs emitted, which was fairly arbitrary to begin with. --- src/CMakeLists.txt | 1 - src/V3AstNodeOther.h | 10 ++---- src/V3AstNodes.cpp | 2 +- src/V3EmitCSyms.cpp | 7 ++-- src/V3ExecGraph.cpp | 42 ++++++++++++++--------- src/V3ExecGraph.h | 42 +++++++++++++++++++++++ src/V3LifePost.cpp | 2 +- src/V3OrderParallel.cpp | 50 +++++++++------------------ src/V3PartitionGraph.h | 75 ----------------------------------------- 9 files changed, 92 insertions(+), 139 deletions(-) delete mode 100644 src/V3PartitionGraph.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 146dd611a..927b8b3e9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -131,7 +131,6 @@ set(HEADERS V3Parse.h V3ParseImp.h V3ParseSym.h - V3PartitionGraph.h V3PchAstMT.h V3PchAstNoMT.h V3PreExpr.h diff --git a/src/V3AstNodeOther.h b/src/V3AstNodeOther.h index 4a745a0c8..35e66738e 100644 --- a/src/V3AstNodeOther.h +++ b/src/V3AstNodeOther.h @@ -1115,8 +1115,9 @@ class AstExecGraph final : public AstNode { public: explicit AstExecGraph(FileLine* fl, const string& name) VL_MT_DISABLED; - ASTGEN_MEMBERS_AstExecGraph; ~AstExecGraph() override; + ASTGEN_MEMBERS_AstExecGraph; + void cloneRelink() override { V3ERROR_NA; } const char* broken() const override { BROKEN_RTN(!m_depGraphp); return nullptr; @@ -1170,6 +1171,7 @@ public: explicit AstMTaskBody(FileLine* fl) : ASTGEN_SUPER_MTaskBody(fl) {} ASTGEN_MEMBERS_AstMTaskBody; + void cloneRelink() override { V3ERROR_NA; } const char* broken() const override { BROKEN_RTN(!m_execMTaskp); return nullptr; @@ -1267,9 +1269,6 @@ class AstNetlist final : public AstNode { VTimescale m_timeunit; // Global time unit VTimescale m_timeprecision; // Global time precision bool m_timescaleSpecified = false; // Input HDL specified timescale - uint32_t m_nextFreeMTaskID = 1; // Next unique MTask ID within netlist - // starts at 1 so 0 means no MTask ID - uint32_t m_nextFreeMTaskProfilingID = 0; // Next unique ID to use for PGO public: AstNetlist(); ASTGEN_MEMBERS_AstNetlist; @@ -1310,9 +1309,6 @@ public: void timeprecisionMerge(FileLine*, const VTimescale& value); void timescaleSpecified(bool specified) { m_timescaleSpecified = specified; } bool timescaleSpecified() const { return m_timescaleSpecified; } - uint32_t allocNextMTaskID() { return m_nextFreeMTaskID++; } - uint32_t allocNextMTaskProfilingID() { return m_nextFreeMTaskProfilingID++; } - uint32_t usedMTaskProfilingIDs() const { return m_nextFreeMTaskProfilingID; } }; class AstPackageExport final : public AstNode { // A package export declaration diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 322a8793d..bd1919c52 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -17,11 +17,11 @@ #include "V3PchAstMT.h" #include "V3EmitCBase.h" +#include "V3ExecGraph.h" #include "V3File.h" #include "V3Global.h" #include "V3Graph.h" #include "V3Hasher.h" -#include "V3PartitionGraph.h" // Just for mtask dumping #include "V3String.h" #include "V3Ast__gen_impl.h" // Generated by 'astgen' diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index f961ec024..9e7ece0a3 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -18,8 +18,8 @@ #include "V3EmitC.h" #include "V3EmitCBase.h" +#include "V3ExecGraph.h" #include "V3LanguageWords.h" -#include "V3PartitionGraph.h" #include "V3StackCount.h" #include "V3Stats.h" @@ -500,8 +500,7 @@ void EmitCSyms::emitSymHdr() { if (v3Global.opt.profPgo()) { puts("\n// PGO PROFILING\n"); - const uint32_t usedMTaskProfilingIDs = v3Global.rootp()->usedMTaskProfilingIDs(); - puts("VlPgoProfiler<" + cvtToStr(usedMTaskProfilingIDs) + "> _vm_pgoProfiler;\n"); + puts("VlPgoProfiler<" + std::to_string(ExecMTask::numUsedIds()) + "> _vm_pgoProfiler;\n"); } if (!m_scopeNames.empty()) { // Scope names @@ -832,7 +831,7 @@ void EmitCSyms::emitSymImp() { for (const V3GraphVertex* vxp = execGraphp->depGraphp()->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { const ExecMTask* const mtp = static_cast(vxp); - puts("_vm_pgoProfiler.addCounter(" + cvtToStr(mtp->profilerId()) + ", \"" + puts("_vm_pgoProfiler.addCounter(" + cvtToStr(mtp->id()) + ", \"" + mtp->hashName() + "\");\n"); } }); diff --git a/src/V3ExecGraph.cpp b/src/V3ExecGraph.cpp index 8df206bb8..5b6d75309 100644 --- a/src/V3ExecGraph.cpp +++ b/src/V3ExecGraph.cpp @@ -22,11 +22,10 @@ #include "V3EmitCBase.h" #include "V3File.h" #include "V3GraphStream.h" +#include "V3Hasher.h" #include "V3InstrCount.h" #include "V3Os.h" -#include "V3PartitionGraph.h" #include "V3Stats.h" -#include "V3UniqueNames.h" #include #include @@ -34,6 +33,21 @@ VL_DEFINE_DEBUG_FUNCTIONS; +ExecMTask::ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED // + : V3GraphVertex{graphp}, + m_bodyp{bodyp}, + m_id{s_nextId++}, + m_hashName{V3Hasher::uncachedHash(bodyp).toString()} { + UASSERT_OBJ(bodyp->stmtsp(), bodyp, "AstMTaskBody should already be populated for hashing"); +} + +void ExecMTask::dump(std::ostream& str) const { + str << name() << "." << cvtToHex(this); + if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]"; +} + +uint32_t ExecMTask::s_nextId = 0; + namespace V3ExecGraph { //###################################################################### @@ -352,13 +366,19 @@ public: // SELF TEST static void selfTest() { V3Graph graph; - ExecMTask* const t0 = new ExecMTask{&graph, nullptr, 0}; + FileLine* const flp = v3Global.rootp()->fileline(); + const auto makeBody = [flp]() { + AstMTaskBody* const bodyp = new AstMTaskBody{flp}; + bodyp->addStmtsp(new AstComment{flp, ""}); + return bodyp; + }; + ExecMTask* const t0 = new ExecMTask{&graph, makeBody()}; t0->cost(1000); t0->priority(1100); - ExecMTask* const t1 = new ExecMTask{&graph, nullptr, 1}; + ExecMTask* const t1 = new ExecMTask{&graph, makeBody()}; t1->cost(100); t1->priority(100); - ExecMTask* const t2 = new ExecMTask{&graph, nullptr, 2}; + ExecMTask* const t2 = new ExecMTask{&graph, makeBody()}; t2->cost(100); t2->priority(100); @@ -464,8 +484,6 @@ void normalizeCosts(Costs& costs) { } void fillinCosts(V3Graph* execMTaskGraphp) { - V3UniqueNames m_uniqueNames; // For generating unique mtask profile hash names - // Pass 1: See what profiling data applies Costs costs; // For each mtask, costs @@ -473,7 +491,6 @@ void fillinCosts(V3Graph* execMTaskGraphp) { vxp = vxp->verticesNextp()) { ExecMTask* const mtp = const_cast(vxp)->as(); // Compute name of mtask, for hash lookup - mtp->hashName(m_uniqueNames.get(mtp->bodyp())); // This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits const uint64_t costEstimate = V3InstrCount::count(mtp->bodyp(), false); @@ -560,11 +577,6 @@ void finalizeCosts(V3Graph* execMTaskGraphp) { } } - // Assign profiler IDs - for (V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - static_cast(vxp)->profilerId(v3Global.rootp()->allocNextMTaskProfilingID()); - } - // Removing tasks may cause edges that were formerly non-transitive to // become transitive. Also we just created new edges around the removed // tasks, which could be transitive. Prune out all transitive edges. @@ -615,7 +627,7 @@ void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, if (v3Global.opt.profPgo()) { // No lock around startCounter, as counter numbers are unique per thread - addStrStmt("vlSymsp->_vm_pgoProfiler.startCounter(" + std::to_string(mtaskp->profilerId()) + addStrStmt("vlSymsp->_vm_pgoProfiler.startCounter(" + std::to_string(mtaskp->id()) + ");\n"); } @@ -624,7 +636,7 @@ void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, if (v3Global.opt.profPgo()) { // No lock around stopCounter, as counter numbers are unique per thread - addStrStmt("vlSymsp->_vm_pgoProfiler.stopCounter(" + std::to_string(mtaskp->profilerId()) + addStrStmt("vlSymsp->_vm_pgoProfiler.stopCounter(" + std::to_string(mtaskp->id()) + ");\n"); } diff --git a/src/V3ExecGraph.h b/src/V3ExecGraph.h index 660276e76..94c882791 100644 --- a/src/V3ExecGraph.h +++ b/src/V3ExecGraph.h @@ -20,9 +20,51 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3Graph.h" #include "V3ThreadSafety.h" class AstNetlist; +class AstMTaskBody; + +//************************************************************************* +// MTasks and graph structures + +class ExecMTask final : public V3GraphVertex { + VL_RTTI_IMPL(ExecMTask, V3GraphVertex) +private: + AstMTaskBody* const m_bodyp; // Task body + const uint32_t m_id; // Unique ID of this ExecMTask. + static uint32_t s_nextId; // Next ID to use + const std::string m_hashName; // Hashed name based on body for profile-driven optimization + // Predicted critical path from the start of this mtask to the ends of the graph that are + // reachable from this mtask. In abstract time units. + uint32_t m_priority = 0; + // Predicted runtime of this mtask, in the same abstract time units as priority(). + uint32_t m_cost = 0; + uint64_t m_predictStart = 0; // Predicted start time of task + VL_UNCOPYABLE(ExecMTask); + +public: + ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED; + AstMTaskBody* bodyp() const { return m_bodyp; } + uint32_t id() const VL_MT_SAFE { return m_id; } + uint32_t priority() const { return m_priority; } + void priority(uint32_t pri) { m_priority = pri; } + uint32_t cost() const { return m_cost; } + void cost(uint32_t cost) { m_cost = cost; } + void predictStart(uint64_t time) { m_predictStart = time; } + uint64_t predictStart() const { return m_predictStart; } + string name() const override VL_MT_STABLE { return "mt"s + std::to_string(id()); } + string hashName() const { return m_hashName; } + void dump(std::ostream& str) const; + + static uint32_t numUsedIds() { return s_nextId; } +}; + +inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) { + rhs.dump(os); + return os; +} namespace V3ExecGraph { void implement(AstNetlist*) VL_MT_DISABLED; diff --git a/src/V3LifePost.cpp b/src/V3LifePost.cpp index 3ca1f83ee..f6d0541ad 100644 --- a/src/V3LifePost.cpp +++ b/src/V3LifePost.cpp @@ -28,8 +28,8 @@ #include "V3LifePost.h" +#include "V3ExecGraph.h" #include "V3GraphPathChecker.h" -#include "V3PartitionGraph.h" #include "V3Stats.h" #include // for std::unique_ptr -> auto_ptr or unique_ptr diff --git a/src/V3OrderParallel.cpp b/src/V3OrderParallel.cpp index 159cd6f35..801834061 100644 --- a/src/V3OrderParallel.cpp +++ b/src/V3OrderParallel.cpp @@ -21,6 +21,7 @@ #include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT #include "V3Config.h" +#include "V3ExecGraph.h" #include "V3File.h" #include "V3Graph.h" #include "V3GraphStream.h" @@ -31,7 +32,6 @@ #include "V3OrderMoveGraphBuilder.h" #include "V3Os.h" #include "V3PairingHeap.h" -#include "V3PartitionGraph.h" #include "V3Scoreboard.h" #include "V3Stats.h" @@ -242,7 +242,8 @@ private: // the end of the vertex. Same units as m_cost. std::array m_critPathCost; - uint32_t m_serialId; // Unique MTask ID number + const uint32_t m_id; // Unique LogicMTask ID number + static uint32_t s_nextId; // Next ID number to use // Count "generations" which are just operations that scan through the // graph. We'll mark each node with the last generation that scanned @@ -265,7 +266,9 @@ private: public: // CONSTRUCTORS LogicMTask(V3Graph* graphp, MTaskMoveVertex* mtmvVxp) - : V3GraphVertex{graphp} { + : V3GraphVertex{graphp} + , m_id{s_nextId++} { + UASSERT(s_nextId < 0xFFFFFFFFUL, "Too many mTaskGraphp"); for (uint32_t& item : m_critPathCost) item = 0; if (mtmvVxp) { // Else null for test m_mvertices.push_back(mtmvVxp); @@ -273,10 +276,6 @@ public: m_cost += V3InstrCount::count(olvp->nodep(), true); } } - // Start at 1, so that 0 indicates no mtask ID. - static uint32_t s_nextId = 1; - m_serialId = s_nextId++; - UASSERT(s_nextId < 0xFFFFFFFFUL, "Too many mTaskGraphp"); } // METHODS @@ -299,8 +298,7 @@ public: // Use this instead of pointer-compares to compare LogicMTasks. Avoids // nondeterministic output. Also name mTaskGraphp based on this number in // the final C++ output. - uint32_t id() const { return m_serialId; } - void id(uint32_t id) { m_serialId = id; } + uint32_t id() const { return m_id; } // Abstract cost of every logic mtask uint32_t cost() const VL_MT_SAFE { return m_cost; } void setCost(uint32_t cost) { m_cost = cost; } // For tests only @@ -353,8 +351,8 @@ public: // Display forward and reverse critical path costs. This gives a quick // read on whether graph partitioning looks reasonable or bad. std::ostringstream out; - out << "mt" << m_serialId << "." << this << " [b" << m_critPathCost[GraphWay::FORWARD] - << " a" << m_critPathCost[GraphWay::REVERSE] << " c" << cost(); + out << "mt" << m_id << "." << this << " [b" << m_critPathCost[GraphWay::FORWARD] << " a" + << m_critPathCost[GraphWay::REVERSE] << " c" << cost(); return out.str(); } @@ -423,6 +421,9 @@ private: VL_UNCOPYABLE(LogicMTask); }; +// Start at 1, so that 0 indicates no mtask. +uint32_t LogicMTask::s_nextId = 1; + //###################################################################### // MTask utility classes @@ -2360,29 +2361,6 @@ class Partitioner final { m_mTaskGraphp->removeTransitiveEdges(); debugMTaskGraphStats(*m_mTaskGraphp, "transitive1"); - // Reassign MTask IDs onto smaller numbers, which should be more stable - // across small logic changes. Keep MTask IDs in the same relative - // order though, otherwise we break CmpLogicMTask for still-existing - // EdgeSet's that haven't destructed yet. - { - using SortedMTaskSet = std::set; - SortedMTaskSet sorted; - for (V3GraphVertex* itp = m_mTaskGraphp->verticesBeginp(); itp; - itp = itp->verticesNextp()) { - LogicMTask* const mtaskp = static_cast(itp); - sorted.insert(mtaskp); - } - for (auto it = sorted.begin(); it != sorted.end(); ++it) { - // We shouldn't perturb the sort order of the set, despite - // changing the IDs, they should all just remain in the same - // relative order. Confirm that: - const uint32_t nextId = v3Global.rootp()->allocNextMTaskID(); - UASSERT(nextId <= (*it)->id(), "Should only shrink MTaskIDs here"); - UINFO(4, "Reassigning MTask id " << (*it)->id() << " to id " << nextId << "\n"); - (*it)->id(nextId); - } - } - // Set color to indicate the mtaskId on every underlying logic MTaskMoveVertex. // Remove any MTasks that have no logic in it rerouting the edges. for (V3GraphVertex *vtxp = m_mTaskGraphp->verticesBeginp(), *nextp; vtxp; vtxp = nextp) { @@ -2533,7 +2511,9 @@ AstExecGraph* V3Order::createParallel(const OrderGraph& orderGraph, const std::s // and OrderLogicVertex's which are ephemeral to V3Order. // - The ExecMTask graph and the AstMTaskBody's produced here // persist until code generation time. - state.m_execMTaskp = new ExecMTask{depGraphp, bodyp, mtaskp->id()}; + state.m_execMTaskp = new ExecMTask{depGraphp, bodyp}; + UINFO(3, "Final '" << tag << "' LogicMTask " << mtaskp->id() << " maps to ExecMTask" + << state.m_execMTaskp->id() << std::endl); // Cross-link each ExecMTask and MTaskBody // Q: Why even have two objects? // A: One is an AstNode, the other is a GraphVertex, diff --git a/src/V3PartitionGraph.h b/src/V3PartitionGraph.h deleted file mode 100644 index 251e07cc6..000000000 --- a/src/V3PartitionGraph.h +++ /dev/null @@ -1,75 +0,0 @@ -// -*- mode: C++; c-file-style: "cc-mode" -*- -//************************************************************************* -// DESCRIPTION: Verilator: Threading's graph structures -// -// Code available from: https://verilator.org -// -//************************************************************************* -// -// Copyright 2003-2024 by Wilson Snyder. This program is free software; you -// can redistribute it and/or modify it under the terms of either the GNU -// Lesser General Public License Version 3 or the Perl Artistic License -// Version 2.0. -// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 -// -//************************************************************************* - -#ifndef VERILATOR_V3PARTITIONGRAPH_H_ -#define VERILATOR_V3PARTITIONGRAPH_H_ - -#include "config_build.h" -#include "verilatedos.h" - -#include "V3Graph.h" -#include "V3OrderGraph.h" - -#include - -//************************************************************************* -// MTasks and graph structures - -class ExecMTask final : public V3GraphVertex { - VL_RTTI_IMPL(ExecMTask, V3GraphVertex) -private: - AstMTaskBody* const m_bodyp; // Task body - const uint32_t m_id; // Unique id of this mtask. - string m_hashName; // Hashed name for profile-driven optimization - uint32_t m_priority = 0; // Predicted critical path from the start of - // this mtask to the ends of the graph that are reachable from this - // mtask. In abstract time units. - uint32_t m_cost = 0; // Predicted runtime of this mtask, in the same - // abstract time units as priority(). - uint64_t m_predictStart = 0; // Predicted start time of task - uint64_t m_profilerId = 0; // VerilatedCounter number for profiling - VL_UNCOPYABLE(ExecMTask); - -public: - ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp, uint32_t id) VL_MT_DISABLED - : V3GraphVertex{graphp}, - m_bodyp{bodyp}, - m_id{id} {} - AstMTaskBody* bodyp() const { return m_bodyp; } - uint32_t id() const VL_MT_SAFE { return m_id; } - uint32_t priority() const { return m_priority; } - void priority(uint32_t pri) { m_priority = pri; } - uint32_t cost() const { return m_cost; } - void cost(uint32_t cost) { m_cost = cost; } - void predictStart(uint64_t time) { m_predictStart = time; } - uint64_t predictStart() const { return m_predictStart; } - void profilerId(uint64_t id) { m_profilerId = id; } - uint64_t profilerId() const { return m_profilerId; } - string name() const override VL_MT_STABLE { return "mt"s + cvtToStr(id()); } - string hashName() const { return m_hashName; } - void hashName(const string& name) { m_hashName = name; } - void dump(std::ostream& str) const { - str << name() << "." << cvtToHex(this); - if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]"; - } -}; - -inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) { - rhs.dump(os); - return os; -} - -#endif // Guard