diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 146dd611a..927b8b3e9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -131,7 +131,6 @@ set(HEADERS V3Parse.h V3ParseImp.h V3ParseSym.h - V3PartitionGraph.h V3PchAstMT.h V3PchAstNoMT.h V3PreExpr.h diff --git a/src/V3AstNodeOther.h b/src/V3AstNodeOther.h index 4a745a0c8..35e66738e 100644 --- a/src/V3AstNodeOther.h +++ b/src/V3AstNodeOther.h @@ -1115,8 +1115,9 @@ class AstExecGraph final : public AstNode { public: explicit AstExecGraph(FileLine* fl, const string& name) VL_MT_DISABLED; - ASTGEN_MEMBERS_AstExecGraph; ~AstExecGraph() override; + ASTGEN_MEMBERS_AstExecGraph; + void cloneRelink() override { V3ERROR_NA; } const char* broken() const override { BROKEN_RTN(!m_depGraphp); return nullptr; @@ -1170,6 +1171,7 @@ public: explicit AstMTaskBody(FileLine* fl) : ASTGEN_SUPER_MTaskBody(fl) {} ASTGEN_MEMBERS_AstMTaskBody; + void cloneRelink() override { V3ERROR_NA; } const char* broken() const override { BROKEN_RTN(!m_execMTaskp); return nullptr; @@ -1267,9 +1269,6 @@ class AstNetlist final : public AstNode { VTimescale m_timeunit; // Global time unit VTimescale m_timeprecision; // Global time precision bool m_timescaleSpecified = false; // Input HDL specified timescale - uint32_t m_nextFreeMTaskID = 1; // Next unique MTask ID within netlist - // starts at 1 so 0 means no MTask ID - uint32_t m_nextFreeMTaskProfilingID = 0; // Next unique ID to use for PGO public: AstNetlist(); ASTGEN_MEMBERS_AstNetlist; @@ -1310,9 +1309,6 @@ public: void timeprecisionMerge(FileLine*, const VTimescale& value); void timescaleSpecified(bool specified) { m_timescaleSpecified = specified; } bool timescaleSpecified() const { return m_timescaleSpecified; } - uint32_t allocNextMTaskID() { return m_nextFreeMTaskID++; } - uint32_t allocNextMTaskProfilingID() { return m_nextFreeMTaskProfilingID++; } - uint32_t usedMTaskProfilingIDs() const { return m_nextFreeMTaskProfilingID; } }; class AstPackageExport final : public AstNode { // A package export declaration diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 322a8793d..bd1919c52 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -17,11 +17,11 @@ #include "V3PchAstMT.h" #include "V3EmitCBase.h" +#include "V3ExecGraph.h" #include "V3File.h" #include "V3Global.h" #include "V3Graph.h" #include "V3Hasher.h" -#include "V3PartitionGraph.h" // Just for mtask dumping #include "V3String.h" #include "V3Ast__gen_impl.h" // Generated by 'astgen' diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index f961ec024..9e7ece0a3 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -18,8 +18,8 @@ #include "V3EmitC.h" #include "V3EmitCBase.h" +#include "V3ExecGraph.h" #include "V3LanguageWords.h" -#include "V3PartitionGraph.h" #include "V3StackCount.h" #include "V3Stats.h" @@ -500,8 +500,7 @@ void EmitCSyms::emitSymHdr() { if (v3Global.opt.profPgo()) { puts("\n// PGO PROFILING\n"); - const uint32_t usedMTaskProfilingIDs = v3Global.rootp()->usedMTaskProfilingIDs(); - puts("VlPgoProfiler<" + cvtToStr(usedMTaskProfilingIDs) + "> _vm_pgoProfiler;\n"); + puts("VlPgoProfiler<" + std::to_string(ExecMTask::numUsedIds()) + "> _vm_pgoProfiler;\n"); } if (!m_scopeNames.empty()) { // Scope names @@ -832,7 +831,7 @@ void EmitCSyms::emitSymImp() { for (const V3GraphVertex* vxp = execGraphp->depGraphp()->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { const ExecMTask* const mtp = static_cast(vxp); - puts("_vm_pgoProfiler.addCounter(" + cvtToStr(mtp->profilerId()) + ", \"" + puts("_vm_pgoProfiler.addCounter(" + cvtToStr(mtp->id()) + ", \"" + mtp->hashName() + "\");\n"); } }); diff --git a/src/V3ExecGraph.cpp b/src/V3ExecGraph.cpp index 8df206bb8..5b6d75309 100644 --- a/src/V3ExecGraph.cpp +++ b/src/V3ExecGraph.cpp @@ -22,11 +22,10 @@ #include "V3EmitCBase.h" #include "V3File.h" #include "V3GraphStream.h" +#include "V3Hasher.h" #include "V3InstrCount.h" #include "V3Os.h" -#include "V3PartitionGraph.h" #include "V3Stats.h" -#include "V3UniqueNames.h" #include #include @@ -34,6 +33,21 @@ VL_DEFINE_DEBUG_FUNCTIONS; +ExecMTask::ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED // + : V3GraphVertex{graphp}, + m_bodyp{bodyp}, + m_id{s_nextId++}, + m_hashName{V3Hasher::uncachedHash(bodyp).toString()} { + UASSERT_OBJ(bodyp->stmtsp(), bodyp, "AstMTaskBody should already be populated for hashing"); +} + +void ExecMTask::dump(std::ostream& str) const { + str << name() << "." << cvtToHex(this); + if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]"; +} + +uint32_t ExecMTask::s_nextId = 0; + namespace V3ExecGraph { //###################################################################### @@ -352,13 +366,19 @@ public: // SELF TEST static void selfTest() { V3Graph graph; - ExecMTask* const t0 = new ExecMTask{&graph, nullptr, 0}; + FileLine* const flp = v3Global.rootp()->fileline(); + const auto makeBody = [flp]() { + AstMTaskBody* const bodyp = new AstMTaskBody{flp}; + bodyp->addStmtsp(new AstComment{flp, ""}); + return bodyp; + }; + ExecMTask* const t0 = new ExecMTask{&graph, makeBody()}; t0->cost(1000); t0->priority(1100); - ExecMTask* const t1 = new ExecMTask{&graph, nullptr, 1}; + ExecMTask* const t1 = new ExecMTask{&graph, makeBody()}; t1->cost(100); t1->priority(100); - ExecMTask* const t2 = new ExecMTask{&graph, nullptr, 2}; + ExecMTask* const t2 = new ExecMTask{&graph, makeBody()}; t2->cost(100); t2->priority(100); @@ -464,8 +484,6 @@ void normalizeCosts(Costs& costs) { } void fillinCosts(V3Graph* execMTaskGraphp) { - V3UniqueNames m_uniqueNames; // For generating unique mtask profile hash names - // Pass 1: See what profiling data applies Costs costs; // For each mtask, costs @@ -473,7 +491,6 @@ void fillinCosts(V3Graph* execMTaskGraphp) { vxp = vxp->verticesNextp()) { ExecMTask* const mtp = const_cast(vxp)->as(); // Compute name of mtask, for hash lookup - mtp->hashName(m_uniqueNames.get(mtp->bodyp())); // This estimate is 64 bits, but the final mtask graph algorithm needs 32 bits const uint64_t costEstimate = V3InstrCount::count(mtp->bodyp(), false); @@ -560,11 +577,6 @@ void finalizeCosts(V3Graph* execMTaskGraphp) { } } - // Assign profiler IDs - for (V3GraphVertex* vxp = execMTaskGraphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - static_cast(vxp)->profilerId(v3Global.rootp()->allocNextMTaskProfilingID()); - } - // Removing tasks may cause edges that were formerly non-transitive to // become transitive. Also we just created new edges around the removed // tasks, which could be transitive. Prune out all transitive edges. @@ -615,7 +627,7 @@ void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, if (v3Global.opt.profPgo()) { // No lock around startCounter, as counter numbers are unique per thread - addStrStmt("vlSymsp->_vm_pgoProfiler.startCounter(" + std::to_string(mtaskp->profilerId()) + addStrStmt("vlSymsp->_vm_pgoProfiler.startCounter(" + std::to_string(mtaskp->id()) + ");\n"); } @@ -624,7 +636,7 @@ void addMTaskToFunction(const ThreadSchedule& schedule, const uint32_t threadId, if (v3Global.opt.profPgo()) { // No lock around stopCounter, as counter numbers are unique per thread - addStrStmt("vlSymsp->_vm_pgoProfiler.stopCounter(" + std::to_string(mtaskp->profilerId()) + addStrStmt("vlSymsp->_vm_pgoProfiler.stopCounter(" + std::to_string(mtaskp->id()) + ");\n"); } diff --git a/src/V3ExecGraph.h b/src/V3ExecGraph.h index 660276e76..94c882791 100644 --- a/src/V3ExecGraph.h +++ b/src/V3ExecGraph.h @@ -20,9 +20,51 @@ #include "config_build.h" #include "verilatedos.h" +#include "V3Graph.h" #include "V3ThreadSafety.h" class AstNetlist; +class AstMTaskBody; + +//************************************************************************* +// MTasks and graph structures + +class ExecMTask final : public V3GraphVertex { + VL_RTTI_IMPL(ExecMTask, V3GraphVertex) +private: + AstMTaskBody* const m_bodyp; // Task body + const uint32_t m_id; // Unique ID of this ExecMTask. + static uint32_t s_nextId; // Next ID to use + const std::string m_hashName; // Hashed name based on body for profile-driven optimization + // Predicted critical path from the start of this mtask to the ends of the graph that are + // reachable from this mtask. In abstract time units. + uint32_t m_priority = 0; + // Predicted runtime of this mtask, in the same abstract time units as priority(). + uint32_t m_cost = 0; + uint64_t m_predictStart = 0; // Predicted start time of task + VL_UNCOPYABLE(ExecMTask); + +public: + ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp) VL_MT_DISABLED; + AstMTaskBody* bodyp() const { return m_bodyp; } + uint32_t id() const VL_MT_SAFE { return m_id; } + uint32_t priority() const { return m_priority; } + void priority(uint32_t pri) { m_priority = pri; } + uint32_t cost() const { return m_cost; } + void cost(uint32_t cost) { m_cost = cost; } + void predictStart(uint64_t time) { m_predictStart = time; } + uint64_t predictStart() const { return m_predictStart; } + string name() const override VL_MT_STABLE { return "mt"s + std::to_string(id()); } + string hashName() const { return m_hashName; } + void dump(std::ostream& str) const; + + static uint32_t numUsedIds() { return s_nextId; } +}; + +inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) { + rhs.dump(os); + return os; +} namespace V3ExecGraph { void implement(AstNetlist*) VL_MT_DISABLED; diff --git a/src/V3LifePost.cpp b/src/V3LifePost.cpp index 3ca1f83ee..f6d0541ad 100644 --- a/src/V3LifePost.cpp +++ b/src/V3LifePost.cpp @@ -28,8 +28,8 @@ #include "V3LifePost.h" +#include "V3ExecGraph.h" #include "V3GraphPathChecker.h" -#include "V3PartitionGraph.h" #include "V3Stats.h" #include // for std::unique_ptr -> auto_ptr or unique_ptr diff --git a/src/V3OrderParallel.cpp b/src/V3OrderParallel.cpp index 159cd6f35..801834061 100644 --- a/src/V3OrderParallel.cpp +++ b/src/V3OrderParallel.cpp @@ -21,6 +21,7 @@ #include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT #include "V3Config.h" +#include "V3ExecGraph.h" #include "V3File.h" #include "V3Graph.h" #include "V3GraphStream.h" @@ -31,7 +32,6 @@ #include "V3OrderMoveGraphBuilder.h" #include "V3Os.h" #include "V3PairingHeap.h" -#include "V3PartitionGraph.h" #include "V3Scoreboard.h" #include "V3Stats.h" @@ -242,7 +242,8 @@ private: // the end of the vertex. Same units as m_cost. std::array m_critPathCost; - uint32_t m_serialId; // Unique MTask ID number + const uint32_t m_id; // Unique LogicMTask ID number + static uint32_t s_nextId; // Next ID number to use // Count "generations" which are just operations that scan through the // graph. We'll mark each node with the last generation that scanned @@ -265,7 +266,9 @@ private: public: // CONSTRUCTORS LogicMTask(V3Graph* graphp, MTaskMoveVertex* mtmvVxp) - : V3GraphVertex{graphp} { + : V3GraphVertex{graphp} + , m_id{s_nextId++} { + UASSERT(s_nextId < 0xFFFFFFFFUL, "Too many mTaskGraphp"); for (uint32_t& item : m_critPathCost) item = 0; if (mtmvVxp) { // Else null for test m_mvertices.push_back(mtmvVxp); @@ -273,10 +276,6 @@ public: m_cost += V3InstrCount::count(olvp->nodep(), true); } } - // Start at 1, so that 0 indicates no mtask ID. - static uint32_t s_nextId = 1; - m_serialId = s_nextId++; - UASSERT(s_nextId < 0xFFFFFFFFUL, "Too many mTaskGraphp"); } // METHODS @@ -299,8 +298,7 @@ public: // Use this instead of pointer-compares to compare LogicMTasks. Avoids // nondeterministic output. Also name mTaskGraphp based on this number in // the final C++ output. - uint32_t id() const { return m_serialId; } - void id(uint32_t id) { m_serialId = id; } + uint32_t id() const { return m_id; } // Abstract cost of every logic mtask uint32_t cost() const VL_MT_SAFE { return m_cost; } void setCost(uint32_t cost) { m_cost = cost; } // For tests only @@ -353,8 +351,8 @@ public: // Display forward and reverse critical path costs. This gives a quick // read on whether graph partitioning looks reasonable or bad. std::ostringstream out; - out << "mt" << m_serialId << "." << this << " [b" << m_critPathCost[GraphWay::FORWARD] - << " a" << m_critPathCost[GraphWay::REVERSE] << " c" << cost(); + out << "mt" << m_id << "." << this << " [b" << m_critPathCost[GraphWay::FORWARD] << " a" + << m_critPathCost[GraphWay::REVERSE] << " c" << cost(); return out.str(); } @@ -423,6 +421,9 @@ private: VL_UNCOPYABLE(LogicMTask); }; +// Start at 1, so that 0 indicates no mtask. +uint32_t LogicMTask::s_nextId = 1; + //###################################################################### // MTask utility classes @@ -2360,29 +2361,6 @@ class Partitioner final { m_mTaskGraphp->removeTransitiveEdges(); debugMTaskGraphStats(*m_mTaskGraphp, "transitive1"); - // Reassign MTask IDs onto smaller numbers, which should be more stable - // across small logic changes. Keep MTask IDs in the same relative - // order though, otherwise we break CmpLogicMTask for still-existing - // EdgeSet's that haven't destructed yet. - { - using SortedMTaskSet = std::set; - SortedMTaskSet sorted; - for (V3GraphVertex* itp = m_mTaskGraphp->verticesBeginp(); itp; - itp = itp->verticesNextp()) { - LogicMTask* const mtaskp = static_cast(itp); - sorted.insert(mtaskp); - } - for (auto it = sorted.begin(); it != sorted.end(); ++it) { - // We shouldn't perturb the sort order of the set, despite - // changing the IDs, they should all just remain in the same - // relative order. Confirm that: - const uint32_t nextId = v3Global.rootp()->allocNextMTaskID(); - UASSERT(nextId <= (*it)->id(), "Should only shrink MTaskIDs here"); - UINFO(4, "Reassigning MTask id " << (*it)->id() << " to id " << nextId << "\n"); - (*it)->id(nextId); - } - } - // Set color to indicate the mtaskId on every underlying logic MTaskMoveVertex. // Remove any MTasks that have no logic in it rerouting the edges. for (V3GraphVertex *vtxp = m_mTaskGraphp->verticesBeginp(), *nextp; vtxp; vtxp = nextp) { @@ -2533,7 +2511,9 @@ AstExecGraph* V3Order::createParallel(const OrderGraph& orderGraph, const std::s // and OrderLogicVertex's which are ephemeral to V3Order. // - The ExecMTask graph and the AstMTaskBody's produced here // persist until code generation time. - state.m_execMTaskp = new ExecMTask{depGraphp, bodyp, mtaskp->id()}; + state.m_execMTaskp = new ExecMTask{depGraphp, bodyp}; + UINFO(3, "Final '" << tag << "' LogicMTask " << mtaskp->id() << " maps to ExecMTask" + << state.m_execMTaskp->id() << std::endl); // Cross-link each ExecMTask and MTaskBody // Q: Why even have two objects? // A: One is an AstNode, the other is a GraphVertex, diff --git a/src/V3PartitionGraph.h b/src/V3PartitionGraph.h deleted file mode 100644 index 251e07cc6..000000000 --- a/src/V3PartitionGraph.h +++ /dev/null @@ -1,75 +0,0 @@ -// -*- mode: C++; c-file-style: "cc-mode" -*- -//************************************************************************* -// DESCRIPTION: Verilator: Threading's graph structures -// -// Code available from: https://verilator.org -// -//************************************************************************* -// -// Copyright 2003-2024 by Wilson Snyder. This program is free software; you -// can redistribute it and/or modify it under the terms of either the GNU -// Lesser General Public License Version 3 or the Perl Artistic License -// Version 2.0. -// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 -// -//************************************************************************* - -#ifndef VERILATOR_V3PARTITIONGRAPH_H_ -#define VERILATOR_V3PARTITIONGRAPH_H_ - -#include "config_build.h" -#include "verilatedos.h" - -#include "V3Graph.h" -#include "V3OrderGraph.h" - -#include - -//************************************************************************* -// MTasks and graph structures - -class ExecMTask final : public V3GraphVertex { - VL_RTTI_IMPL(ExecMTask, V3GraphVertex) -private: - AstMTaskBody* const m_bodyp; // Task body - const uint32_t m_id; // Unique id of this mtask. - string m_hashName; // Hashed name for profile-driven optimization - uint32_t m_priority = 0; // Predicted critical path from the start of - // this mtask to the ends of the graph that are reachable from this - // mtask. In abstract time units. - uint32_t m_cost = 0; // Predicted runtime of this mtask, in the same - // abstract time units as priority(). - uint64_t m_predictStart = 0; // Predicted start time of task - uint64_t m_profilerId = 0; // VerilatedCounter number for profiling - VL_UNCOPYABLE(ExecMTask); - -public: - ExecMTask(V3Graph* graphp, AstMTaskBody* bodyp, uint32_t id) VL_MT_DISABLED - : V3GraphVertex{graphp}, - m_bodyp{bodyp}, - m_id{id} {} - AstMTaskBody* bodyp() const { return m_bodyp; } - uint32_t id() const VL_MT_SAFE { return m_id; } - uint32_t priority() const { return m_priority; } - void priority(uint32_t pri) { m_priority = pri; } - uint32_t cost() const { return m_cost; } - void cost(uint32_t cost) { m_cost = cost; } - void predictStart(uint64_t time) { m_predictStart = time; } - uint64_t predictStart() const { return m_predictStart; } - void profilerId(uint64_t id) { m_profilerId = id; } - uint64_t profilerId() const { return m_profilerId; } - string name() const override VL_MT_STABLE { return "mt"s + cvtToStr(id()); } - string hashName() const { return m_hashName; } - void hashName(const string& name) { m_hashName = name; } - void dump(std::ostream& str) const { - str << name() << "." << cvtToHex(this); - if (priority() || cost()) str << " [pr=" << priority() << " c=" << cvtToStr(cost()) << "]"; - } -}; - -inline std::ostream& operator<<(std::ostream& os, const ExecMTask& rhs) { - rhs.dump(os); - return os; -} - -#endif // Guard