From 292cc547687ab9dfe47c880a59f8770d5cb7a95f Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sat, 16 Mar 2024 16:32:12 +0000 Subject: [PATCH] Compute MTask affinity in V3VariableOrder (#4991) Instead of carrying around MTask affinity from scheduling, compute it in V3VariableOrder (where it is used), by tracing through the code. This simplifies some code and has the benefit of handling variables introduced after scheduling. It's worth a few % speed at run-time, and the new implementation of V3VariableOrder is slightly more efficient, though the speed/space is still dominated by the TSP sort. --- src/V3Ast.h | 3 - src/V3AstNodeOther.h | 3 - src/V3OrderParallel.cpp | 16 ----- src/V3VariableOrder.cpp | 141 ++++++++++++++++++++++++++++++++-------- src/V3VariableOrder.h | 4 +- src/Verilator.cpp | 2 +- 6 files changed, 118 insertions(+), 51 deletions(-) diff --git a/src/V3Ast.h b/src/V3Ast.h index c36829cb2..56823c952 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -49,9 +49,6 @@ class VFlagLogicPacked {}; class VFlagBitPacked {}; class VFlagChildDType {}; // Used by parser.y to select constructor that sets childDType -// Used as key for another map, needs operator<, hence not an unordered_set -using MTaskIdSet = std::set; // Set of mtaskIds for Var sorting - //###################################################################### // For broken() function, return error string if have a match diff --git a/src/V3AstNodeOther.h b/src/V3AstNodeOther.h index 35e66738e..722ee1553 100644 --- a/src/V3AstNodeOther.h +++ b/src/V3AstNodeOther.h @@ -1742,7 +1742,6 @@ class AstVar final : public AstNode { VDirection m_declDirection; // Declared direction input/output etc VLifetime m_lifetime; // Lifetime VVarAttrClocker m_attrClocker; - MTaskIdSet m_mtaskIds; // MTaskID's that read or write this var int m_pinNum = 0; // For XML, if non-zero the connection pin number bool m_ansi : 1; // Params or pins declared in the module header, rather than the body bool m_declTyped : 1; // Declared as type (for dedup check) @@ -2096,8 +2095,6 @@ public: m_name = name; } static AstVar* scVarRecurse(AstNode* nodep); - void addMTaskId(int id) { m_mtaskIds.insert(id); } - const MTaskIdSet& mtaskIds() const { return m_mtaskIds; } void pinNum(int id) { m_pinNum = id; } int pinNum() const { return m_pinNum; } }; diff --git a/src/V3OrderParallel.cpp b/src/V3OrderParallel.cpp index 801834061..458de6f81 100644 --- a/src/V3OrderParallel.cpp +++ b/src/V3OrderParallel.cpp @@ -2464,22 +2464,6 @@ AstExecGraph* V3Order::createParallel(const OrderGraph& orderGraph, const std::s // Add this logic to the per-mtask order mtaskStates[mtaskId].m_logics.push_back(movep->logicp()); - - // Since we happen to be iterating over every logic node, - // take this opportunity to annotate each AstVar with the id's - // of mTaskGraphp that consume it and produce it. We'll use this - // information in V3EmitC when we lay out var's in memory. - const OrderLogicVertex* const logicp = movep->logicp(); - for (const V3GraphEdge* edgep = logicp->inBeginp(); edgep; edgep = edgep->inNextp()) { - const OrderVarVertex* const vVtxp = edgep->fromp()->cast(); - if (!vVtxp) continue; - vVtxp->vscp()->varp()->addMTaskId(mtaskId); - } - for (const V3GraphEdge* edgep = logicp->outBeginp(); edgep; edgep = edgep->outNextp()) { - const OrderVarVertex* const vVtxp = edgep->top()->cast(); - if (!vVtxp) continue; - vVtxp->vscp()->varp()->addMTaskId(mtaskId); - } } // Create the AstExecGraph node which represents the execution diff --git a/src/V3VariableOrder.cpp b/src/V3VariableOrder.cpp index 9c00de7b5..84671eb28 100644 --- a/src/V3VariableOrder.cpp +++ b/src/V3VariableOrder.cpp @@ -26,25 +26,85 @@ #include "V3AstUserAllocator.h" #include "V3EmitCBase.h" +#include "V3ExecGraph.h" #include "V3TSP.h" #include VL_DEFINE_DEBUG_FUNCTIONS; +using MTaskIdVec = std::vector; // Used as a bit-set indexed by MTask ID +using MTaskAffinityMap = std::unordered_map; + +// Trace through code reachable form an MTask and annotate referenced variabels +class GatherMTaskAffinity final : VNVisitorConst { + // NODE STATE + // AstCFunc::user1() // bool: Already traced this function + // AstVar::user1() // bool: Already traced this variable + const VNUser1InUse m_user1InUse; + + // STATE + MTaskAffinityMap& m_results; // The result map being built; + const uint32_t m_id; // Id of mtask being analysed + const size_t m_usedIds = ExecMTask::numUsedIds(); // Value of max id + 1 + + // CONSTRUCTOR + GatherMTaskAffinity(const ExecMTask* mTaskp, MTaskAffinityMap& results) + : m_results{results} + , m_id{mTaskp->id()} { + iterateChildrenConst(mTaskp->bodyp()); + } + ~GatherMTaskAffinity() = default; + VL_UNMOVABLE(GatherMTaskAffinity); + + // VISIT + void visit(AstNodeVarRef* nodep) { + // Cheaper than relying on emplace().second + if (nodep->user1SetOnce()) return; + AstVar* const varp = nodep->varp(); + // Ignore TriggerVec. They are big and read-only in the MTask bodies + AstBasicDType* const basicp = varp->dtypep()->basicp(); + if (basicp && basicp->isTriggerVec()) return; + // Set affinity bit + MTaskIdVec& affinity = m_results + .emplace(std::piecewise_construct, // + std::forward_as_tuple(varp), // + std::forward_as_tuple(m_usedIds)) + .first->second; + affinity[m_id] = true; + } + + void visit(AstCFunc* nodep) { + if (nodep->user1SetOnce()) return; // Prevent repeat traversals/recursion + iterateChildrenConst(nodep); + } + + void visit(AstNodeCCall* nodep) { + iterateChildrenConst(nodep); // Arguments + iterateConst(nodep->funcp()); // Callee + } + + void visit(AstNode* nodep) { iterateChildrenConst(nodep); } + +public: + static void apply(const ExecMTask* mTaskp, MTaskAffinityMap& results) { + GatherMTaskAffinity{mTaskp, results}; + } +}; + //###################################################################### // Establish mtask variable sort order in mtasks mode class VarTspSorter final : public V3TSP::TspStateBase { // MEMBERS - const MTaskIdSet& m_mtaskIds; // Mtask we're ordering - static unsigned s_serialNext; // Unique ID to establish serial order - unsigned m_serial; // Serial ordering + const MTaskIdVec& m_mTaskIds; // Mtask we're ordering + static uint32_t s_serialNext; // Unique ID to establish serial order + const uint32_t m_serial = ++s_serialNext; // Serial ordering public: // CONSTRUCTORS - explicit VarTspSorter(const MTaskIdSet& mtaskIds) - : m_mtaskIds(mtaskIds) { // Cannot be {} or GCC 4.8 false warning - m_serial = ++s_serialNext; // Cannot be ()/{} or GCC 4.8 false warning + explicit VarTspSorter(const MTaskIdVec& mTaskIds) + : m_mTaskIds{mTaskIds} { + UASSERT(mTaskIds.size() == ExecMTask::numUsedIds(), "Wrong size for MTask ID vector"); } ~VarTspSorter() override = default; // METHODS @@ -52,26 +112,20 @@ public: return operator<(static_cast(other)); } bool operator<(const VarTspSorter& other) const { return m_serial < other.m_serial; } - const MTaskIdSet& mtaskIds() const { return m_mtaskIds; } + const MTaskIdVec& mTaskIds() const { return m_mTaskIds; } int cost(const TspStateBase* otherp) const override { return cost(static_cast(otherp)); } int cost(const VarTspSorter* otherp) const { - int cost = diffs(m_mtaskIds, otherp->m_mtaskIds); - cost += diffs(otherp->m_mtaskIds, m_mtaskIds); + // Compute the number of MTasks not shared (Hamming distance) + int cost = 0; + const size_t size = ExecMTask::numUsedIds(); + for (size_t i = 0; i < size; ++i) { cost += m_mTaskIds.at(i) ^ otherp->m_mTaskIds.at(i); } return cost; } - // Returns the number of elements in set_a that don't appear in set_b - static int diffs(const MTaskIdSet& set_a, const MTaskIdSet& set_b) { - int diffs = 0; - for (int i : set_a) { - if (set_b.find(i) == set_b.end()) ++diffs; - } - return diffs; - } }; -unsigned VarTspSorter::s_serialNext = 0; +uint32_t VarTspSorter::s_serialNext = 0; class VariableOrder final { // NODE STATE @@ -85,6 +139,15 @@ class VariableOrder final { AstUser1Allocator m_attributes; // Attributes used for sorting + const MTaskAffinityMap& m_mTaskAffinity; + + VariableOrder(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity) + : m_mTaskAffinity{mTaskAffinity} { + orderModuleVars(modp); + } + ~VariableOrder() = default; + VL_UNCOPYABLE(VariableOrder); + //###################################################################### // Simple sort @@ -106,14 +169,20 @@ class VariableOrder final { // Sort by MTask-affinity first, then the same as simpleSortVars void tspSortVars(std::vector& varps) { // Map from "MTask affinity" -> "variable list" - std::map> m2v; - for (AstVar* const varp : varps) m2v[varp->mtaskIds()].push_back(varp); + std::map> m2v; + const MTaskIdVec emptyVec(ExecMTask::numUsedIds(), false); + for (AstVar* const varp : varps) { + const auto it = m_mTaskAffinity.find(varp); + const MTaskIdVec& key = it == m_mTaskAffinity.end() ? emptyVec : it->second; + m2v[key].push_back(varp); + } // Create a TSP sort state for each unique MTaskIdSet, except for the empty set V3TSP::StateVec states; for (const auto& pair : m2v) { - if (pair.first.empty()) continue; - states.push_back(new VarTspSorter{pair.first}); + const MTaskIdVec& vec = pair.first; + const bool empty = std::find(vec.begin(), vec.end(), true) == vec.end(); + if (!empty) states.push_back(new VarTspSorter{vec}); } // Do the TSP sort @@ -131,12 +200,12 @@ class VariableOrder final { // Enumerate by sorted MTaskIdSet, sort within the set separately for (const V3TSP::TspStateBase* const stateBasep : sortedStates) { const VarTspSorter* const statep = dynamic_cast(stateBasep); - sortAndAppend(m2v[statep->mtaskIds()]); + sortAndAppend(m2v[statep->mTaskIds()]); VL_DO_DANGLING(delete statep, statep); } // Finally add the variables with no known MTask affinity - sortAndAppend(m2v[MTaskIdSet()]); + sortAndAppend(m2v[emptyVec]); } void orderModuleVars(AstNodeModule* modp) { @@ -190,17 +259,35 @@ class VariableOrder final { } public: - static void processModule(AstNodeModule* modp) { VariableOrder{}.orderModuleVars(modp); } + static void processModule(AstNodeModule* modp, const MTaskAffinityMap& mTaskAffinity) { + VariableOrder{modp, mTaskAffinity}; + } }; //###################################################################### // V3VariableOrder static functions -void V3VariableOrder::orderAll() { +void V3VariableOrder::orderAll(AstNetlist* netlistp) { UINFO(2, __FUNCTION__ << ": " << endl); + + MTaskAffinityMap mTaskAffinity; + + // Gather MTask affinities + if (v3Global.opt.mtasks()) { + netlistp->topModulep()->foreach([&](AstExecGraph* execGraphp) { + for (const V3GraphVertex* vtxp = execGraphp->depGraphp()->verticesBeginp(); vtxp; + vtxp = vtxp->verticesNextp()) { + GatherMTaskAffinity::apply(vtxp->as(), mTaskAffinity); + } + }); + } + + // Order variables in each module for (AstNodeModule* modp = v3Global.rootp()->modulesp(); modp; modp = VN_AS(modp->nextp(), NodeModule)) { - VariableOrder::processModule(modp); + VariableOrder::processModule(modp, mTaskAffinity); } + + // Done V3Global::dumpCheckGlobalTree("variableorder", 0, dumpTreeEitherLevel() >= 3); } diff --git a/src/V3VariableOrder.h b/src/V3VariableOrder.h index 4e556ebf6..1a23b62ca 100644 --- a/src/V3VariableOrder.h +++ b/src/V3VariableOrder.h @@ -22,11 +22,13 @@ #include "V3ThreadSafety.h" +class AstNetlist; + //============================================================================ class V3VariableOrder final { public: - static void orderAll() VL_MT_DISABLED; + static void orderAll(AstNetlist*) VL_MT_DISABLED; }; #endif // Guard diff --git a/src/Verilator.cpp b/src/Verilator.cpp index 029db638c..6b6aff6dc 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -563,7 +563,7 @@ static void process() { V3Common::commonAll(); // Order variables - V3VariableOrder::orderAll(); + V3VariableOrder::orderAll(v3Global.rootp()); // Create AstCUse to determine what class forward declarations/#includes needed in C V3CUse::cUseAll();