From 5c828b7e60a5dd95a805792c5f6285b5972b69b1 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Thu, 1 Sep 2022 18:03:51 +0100 Subject: [PATCH 1/2] V3Partition: use V3Lists to keep track of SiblingMCs Replace std::set with V3Lists to keep track of SiblingMCs associated with MTasks, use a std::set for ensuring uniqueness. This yields a bit more speed in PartContraction. --- src/V3Partition.cpp | 134 +++++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 51 deletions(-) diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 1b11a00b5..a2978e93a 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -24,6 +24,7 @@ #include "V3File.h" #include "V3GraphStream.h" #include "V3InstrCount.h" +#include "V3List.h" #include "V3Os.h" #include "V3PairingHeap.h" #include "V3PartitionGraph.h" @@ -35,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -217,10 +219,12 @@ private: // Store the outgoing and incoming edges in a heap sorted by the critical path length std::array m_edgeHeap; - // SiblingMC for which storage is owned by this MTask - std::set m_ownSibs; - // SiblingMC for which storage is owned by the opposite MTask - std::set m_farSibps; + // MTasks for which a SiblingMC exists with 'this' as the higher ID MTask (m_ap in SiblingMC) + std::set m_siblings; + // List of SiblingMCs for which this is the higher ID MTask (m_ap in SiblingMC) + V3List m_aSiblingMCs; + // List of SiblingMCs for which this is the lower ID MTask (m_bp in SiblingMC) + V3List m_bSiblingMCs; public: // CONSTRUCTORS @@ -240,8 +244,9 @@ public: } // METHODS - std::set& ownSibs() { return m_ownSibs; }; - std::set& farSibs() { return m_farSibps; }; + std::set& siblings() { return m_siblings; }; + V3List& aSiblingMCs() { return m_aSiblingMCs; }; + V3List& bSiblingMCs() { return m_bSiblingMCs; }; void moveAllVerticesFrom(LogicMTask* otherp) { // splice() is constant time @@ -465,10 +470,12 @@ static_assert(sizeof(MergeCandidate) == sizeof(MergeCandidateScoreboard::Node), // A pair of associated LogicMTask's that are merge candidates for sibling // contraction class SiblingMC final : public MergeCandidate { -private: LogicMTask* const m_ap; LogicMTask* const m_bp; + V3ListEnt m_aEnt; // List entry for m_ap->aSiblingMCs() + V3ListEnt m_bEnt; // List entry for m_bp->bSiblingMCs() + public: // CONSTRUCTORS SiblingMC() = delete; @@ -476,26 +483,33 @@ public: : MergeCandidate{/* isSiblingMC: */ true} , m_ap{ap} , m_bp{bp} { - // operator< and storage management depends on this + // Storage management depends on this UASSERT(ap->id() > bp->id(), "Should be ordered"); + UDEBUGONLY(UASSERT(ap->siblings().count(bp), "Should be in sibling map");); + m_aEnt.pushBack(m_ap->aSiblingMCs(), this); + m_bEnt.pushBack(m_bp->bSiblingMCs(), this); } ~SiblingMC() = default; + // METHODS + SiblingMC* aNextp() const { return m_aEnt.nextp(); } + SiblingMC* bNextp() const { return m_bEnt.nextp(); } + void unlinkA() { + VL_ATTR_UNUSED const size_t removed = m_ap->siblings().erase(m_bp); + UDEBUGONLY(UASSERT(removed == 1, "Should have been in sibling set");); + m_aEnt.unlink(m_ap->aSiblingMCs(), this); + } + void unlinkB() { m_bEnt.unlink(m_bp->bSiblingMCs(), this); } + LogicMTask* ap() const { return m_ap; } LogicMTask* bp() const { return m_bp; } bool mergeWouldCreateCycle() const { return (LogicMTask::pathExistsFrom(m_ap, m_bp, nullptr) || LogicMTask::pathExistsFrom(m_bp, m_ap, nullptr)); } - bool operator<(const SiblingMC& other) const { - if (m_ap->id() < other.m_ap->id()) return true; - if (m_ap->id() > other.m_ap->id()) return false; - return m_bp->id() < other.m_bp->id(); - } }; -static_assert(sizeof(SiblingMC) == sizeof(MergeCandidate) + 2 * sizeof(LogicMTask*), - "Should not have a vtable"); +static_assert(!std::is_polymorphic::value, "Should not have a vtable"); // GraphEdge for the MTask graph class MTaskEdge final : public V3GraphEdge, public MergeCandidate { @@ -1385,12 +1399,13 @@ public: // New edges would be AC->B and B->AC which is not a DAG. // Do not allow this. if (mergeCanp->mergeWouldCreateCycle()) { - // Remove this edge from scoreboard so we don't keep + // Remove this candidate from scoreboard so we don't keep // reconsidering it on every loop. m_sb.remove(mergeCanp); if (SiblingMC* const smcp = mergeCanp->toSiblingMC()) { - smcp->bp()->farSibs().erase(smcp); - smcp->ap()->ownSibs().erase(*smcp); // Kills *smcp, so do last + smcp->unlinkA(); + smcp->unlinkB(); + delete smcp; } continue; } @@ -1455,29 +1470,45 @@ private: } void removeSiblingMCsWith(LogicMTask* mtaskp) { - for (const SiblingMC& pair : mtaskp->ownSibs()) { - m_sb.remove(const_cast(&pair)); - // Owner is always ap(), remove from the opposite side - pair.bp()->farSibs().erase(&pair); + for (SiblingMC *smcp = mtaskp->aSiblingMCs().begin(), *nextp; // lintok-begin-on-ref + smcp; smcp = nextp) { + nextp = smcp->aNextp(); + m_sb.remove(smcp); + smcp->unlinkB(); + delete smcp; } - for (const SiblingMC* const pairp : mtaskp->farSibs()) { - m_sb.remove(const_cast(pairp)); - // Owner is always ap(), remove from the opposite side - pairp->ap()->ownSibs().erase(*pairp); + for (SiblingMC *smcp = mtaskp->bSiblingMCs().begin(), *nextp; // lintok-begin-on-ref + smcp; smcp = nextp) { + nextp = smcp->bNextp(); + m_sb.remove(smcp); + smcp->unlinkA(); + delete smcp; } - mtaskp->ownSibs().clear(); - mtaskp->farSibs().clear(); + } + + void removeSiblingMCs(LogicMTask* recipientp, LogicMTask* donorp) { + // The lists here should be disjoint (there should be only one SiblingMC involving these + // two MTasks, and we removed that elsewhere), so no need for unlinking from the lists we + // are clearing. + removeSiblingMCsWith(recipientp); + removeSiblingMCsWith(donorp); + + // Clear the sibling map of the recipient. The donor will be deleted anyway, so we can + // leave that in a corrupt for efficiency. + recipientp->siblings().clear(); + recipientp->aSiblingMCs().reset(); + recipientp->bSiblingMCs().reset(); } void contract(MergeCandidate* mergeCanp) { LogicMTask* top = nullptr; LogicMTask* fromp = nullptr; - MTaskEdge* mergeEdgep = mergeCanp->toMTaskEdge(); + MTaskEdge* const mergeEdgep = mergeCanp->toMTaskEdge(); + SiblingMC* const mergeSibsp = mergeCanp->toSiblingMC(); if (mergeEdgep) { top = static_cast(mergeEdgep->top()); fromp = static_cast(mergeEdgep->fromp()); } else { - const SiblingMC* mergeSibsp = static_cast(mergeCanp); top = mergeSibsp->ap(); fromp = mergeSibsp->bp(); } @@ -1513,14 +1544,19 @@ private: const NewCp recipientNewCpRev = newCp(recipientp, donorp, mergeEdgep); const NewCp donorNewCpRev = newCp(donorp, recipientp, mergeEdgep); + m_sb.remove(mergeCanp); + if (mergeEdgep) { - // Remove and free the connecting edge. Must do this before - // propagating CP's below. - m_sb.remove(mergeCanp); + // Remove and free the connecting edge. Must do this before propagating CP's below. mergeEdgep->fromMTaskp()->removeRelativeMTask(mergeEdgep->toMTaskp()); mergeEdgep->fromMTaskp()->removeRelativeEdge(mergeEdgep); mergeEdgep->toMTaskp()->removeRelativeEdge(mergeEdgep); - VL_DO_CLEAR(mergeEdgep->unlinkDelete(), mergeEdgep = nullptr); + VL_DO_DANGLING(mergeEdgep->unlinkDelete(), mergeEdgep); + } else { + // Remove the siblingMC + mergeSibsp->unlinkA(); + mergeSibsp->unlinkB(); + VL_DO_DANGLING(delete mergeEdgep, mergeEdgep); } // This also updates cost and stepCost on recipientp @@ -1552,13 +1588,10 @@ private: m_forwardPropagator.go(); m_reversePropagator.go(); - // Remove all SiblingMCs that include donorp. This Includes the one - // we're merging, if we're merging a SiblingMC. - removeSiblingMCsWith(donorp); - // Remove all SiblingMCs that include recipientp also, so we can't - // get huge numbers of SiblingMCs. We'll recreate them below, up + // Remove all other SiblingMCs that include recipientp or donorp. We remove all siblingMCs + // of recipientp so we do not get huge numbers of SiblingMCs. We'll recreate them below, up // to a bounded number. - removeSiblingMCsWith(recipientp); + removeSiblingMCs(recipientp, donorp); // Redirect all edges, delete donorp partRedirectEdgesFrom(m_mtasksp, recipientp, donorp, &m_sb); @@ -1607,20 +1640,19 @@ private: void makeSiblingMC(LogicMTask* ap, LogicMTask* bp) { if (ap->id() < bp->id()) std::swap(ap, bp); - // The higher id vertex owns the storage - const auto emplaceResult = ap->ownSibs().emplace(ap, bp); - if (emplaceResult.second) { - SiblingMC* const newSibsp = const_cast(&(*emplaceResult.first)); - bp->farSibs().insert(newSibsp); - m_sb.add(newSibsp); - } else if (m_slowAsserts) { + // The higher id vertex owns the association set + const auto first = ap->siblings().insert(bp).second; + if (first) { + m_sb.add(new SiblingMC{ap, bp}); + } else if (VL_UNLIKELY(m_slowAsserts)) { // It's fine if we already have this SiblingMC, we may have // created it earlier. Just confirm that we have associated data. bool found = false; - for (const SiblingMC& sibs : ap->ownSibs()) { - UASSERT_OBJ(sibs.ap() == ap, ap, "Inconsistent SiblingMC"); - UASSERT_OBJ(m_sb.contains(&sibs), ap, "Must be on the scoreboard"); - if (sibs.bp() == bp) found = true; + for (const SiblingMC* smcp = ap->aSiblingMCs().begin(); // lintok-begin-on-ref + smcp; smcp = smcp->aNextp()) { + UASSERT_OBJ(smcp->ap() == ap, ap, "Inconsistent SiblingMC"); + UASSERT_OBJ(m_sb.contains(smcp), ap, "Must be on the scoreboard"); + if (smcp->bp() == bp) found = true; } UASSERT_OBJ(found, ap, "Sibling not found"); } From 2ba39b25f1a2c8f821a903cbceb28afad3da416c Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Fri, 2 Sep 2022 11:29:02 +0100 Subject: [PATCH 2/2] Replace dynamic_casts with static_casts dynamic_cast is not free. Replace obvious instances (where the result is unconditionally dereferenced) with static_cast in contexts with performance implications. --- src/V3Gate.cpp | 14 +++++++------- src/V3Order.cpp | 14 +++++++------- src/V3VariableOrder.cpp | 6 +++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index 88a9332d4..726bbbfa8 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -107,7 +107,7 @@ public: VNUser iterateInEdges(GateGraphBaseVisitor& v, VNUser vu = VNUser(0)) { VNUser ret = VNUser(0); for (V3GraphEdge* edgep = inBeginp(); edgep; edgep = edgep->inNextp()) { - ret = dynamic_cast(edgep->fromp())->accept(v, vu); + ret = static_cast(edgep->fromp())->accept(v, vu); } return ret; } @@ -121,7 +121,7 @@ public: for (V3GraphEdge* edgep = outBeginp(); edgep; edgep = next_edgep) { // Need to find the next edge before visiting in case the edge is deleted next_edgep = edgep->outNextp(); - ret = dynamic_cast(edgep->top())->accept(v, vu); + ret = static_cast(edgep->top())->accept(v, vu); } return ret; } @@ -683,7 +683,7 @@ bool GateVisitor::elimLogicOkOutputs(GateLogicVertex* consumeVertexp, varscopes.insert(vscp); } for (V3GraphEdge* edgep = consumeVertexp->outBeginp(); edgep; edgep = edgep->outNextp()) { - const GateVarVertex* const consVVertexp = dynamic_cast(edgep->top()); + const GateVarVertex* const consVVertexp = static_cast(edgep->top()); AstVarScope* const vscp = consVVertexp->varScp(); if (varscopes.find(vscp) != varscopes.end()) { UINFO(9, " Block-unopt, insertion generates input vscp " << vscp << endl); @@ -764,8 +764,8 @@ void GateVisitor::consumedMove() { if (!vvertexp->consumed() && !vvertexp->user()) { UINFO(8, "Unconsumed " << vvertexp->varScp() << endl); } - } - if (const GateLogicVertex* const lvertexp = dynamic_cast(vertexp)) { + } else { + const GateLogicVertex* const lvertexp = static_cast(vertexp); AstNode* const nodep = lvertexp->nodep(); const AstActive* const oldactp = lvertexp->activep(); // nullptr under cfunc if (!lvertexp->consumed() && oldactp) { @@ -1108,7 +1108,7 @@ private: // Replace all of this varvertex's consumers with dupVarRefp for (V3GraphEdge* outedgep = vvertexp->outBeginp(); outedgep;) { const GateLogicVertex* const consumeVertexp - = dynamic_cast(outedgep->top()); + = static_cast(outedgep->top()); AstNode* const consumerp = consumeVertexp->nodep(); // if (debug() >= 9) m_graphp->dumpDotFilePrefixed("gate_preelim"); UINFO(9, @@ -1283,7 +1283,7 @@ private: V3GraphEdge* oedgep = ledgep; ledgep = ledgep->inNextp(); GateEitherVertex* const fromvp - = dynamic_cast(oedgep->fromp()); + = static_cast(oedgep->fromp()); new V3GraphEdge(m_graphp, fromvp, m_logicvp, 1); VL_DO_DANGLING(oedgep->unlinkDelete(), oedgep); } diff --git a/src/V3Order.cpp b/src/V3Order.cpp index 1d83983ea..9c20558e4 100644 --- a/src/V3Order.cpp +++ b/src/V3Order.cpp @@ -1027,8 +1027,8 @@ class OrderVerticesByDomainThenScope final { public: virtual bool operator()(const V3GraphVertex* lhsp, const V3GraphVertex* rhsp) const { - const MTaskMoveVertex* const l_vxp = dynamic_cast(lhsp); - const MTaskMoveVertex* const r_vxp = dynamic_cast(rhsp); + const MTaskMoveVertex* const l_vxp = static_cast(lhsp); + const MTaskMoveVertex* const r_vxp = static_cast(rhsp); uint64_t l_id = m_ids.findId(l_vxp->domainp()); uint64_t r_id = m_ids.findId(r_vxp->domainp()); if (l_id < r_id) return true; @@ -1047,8 +1047,8 @@ public: // Sort vertex's, which must be AbstractMTask's, into a deterministic // order by comparing their serial IDs. virtual bool operator()(const V3GraphVertex* lhsp, const V3GraphVertex* rhsp) const { - const AbstractMTask* const lmtaskp = dynamic_cast(lhsp); - const AbstractMTask* const rmtaskp = dynamic_cast(rhsp); + const AbstractMTask* const lmtaskp = static_cast(lhsp); + const AbstractMTask* const rmtaskp = static_cast(rhsp); return lmtaskp->id() < rmtaskp->id(); } }; @@ -1932,7 +1932,7 @@ void OrderProcess::processMTasks() { GraphStream emit_logic(&logicGraph); const V3GraphVertex* moveVxp; while ((moveVxp = emit_logic.nextp())) { - const MTaskMoveVertex* const movep = dynamic_cast(moveVxp); + const MTaskMoveVertex* const movep = static_cast(moveVxp); const unsigned mtaskId = movep->color(); UASSERT(mtaskId > 0, "Every MTaskMoveVertex should have an mtask assignment >0"); if (movep->logicp()) { @@ -1976,7 +1976,7 @@ void OrderProcess::processMTasks() { GraphStream emit_mtasks(&mtasks); const V3GraphVertex* mtaskVxp; while ((mtaskVxp = emit_mtasks.nextp())) { - const AbstractLogicMTask* const mtaskp = dynamic_cast(mtaskVxp); + const AbstractLogicMTask* const mtaskp = static_cast(mtaskVxp); // Create a body for this mtask AstMTaskBody* const bodyp = new AstMTaskBody(rootFlp); @@ -2018,7 +2018,7 @@ void OrderProcess::processMTasks() { for (V3GraphEdge* inp = mtaskp->inBeginp(); inp; inp = inp->inNextp()) { const V3GraphVertex* fromVxp = inp->fromp(); const AbstractLogicMTask* const fromp - = dynamic_cast(fromVxp); + = static_cast(fromVxp); const MTaskState& fromState = mtaskStates[fromp->id()]; new V3GraphEdge(depGraphp, fromState.m_execMTaskp, state.m_execMTaskp, 1); } diff --git a/src/V3VariableOrder.cpp b/src/V3VariableOrder.cpp index d62cfb50f..d3ea7aa3d 100644 --- a/src/V3VariableOrder.cpp +++ b/src/V3VariableOrder.cpp @@ -52,14 +52,14 @@ public: ~VarTspSorter() override = default; // METHODS virtual bool operator<(const TspStateBase& other) const override { - return operator<(dynamic_cast(other)); + return operator<(static_cast(other)); } bool operator<(const VarTspSorter& other) const { return m_serial < other.m_serial; } const MTaskIdSet& mtaskIds() const { return m_mtaskIds; } virtual int cost(const TspStateBase* otherp) const override { - return cost(dynamic_cast(otherp)); + return cost(static_cast(otherp)); } - virtual int cost(const VarTspSorter* otherp) const { + int cost(const VarTspSorter* otherp) const { int cost = diffs(m_mtaskIds, otherp->m_mtaskIds); cost += diffs(otherp->m_mtaskIds, m_mtaskIds); return cost;