From cfb6fd8b3407357470193e6f302c4062be59098b Mon Sep 17 00:00:00 2001 From: Kamil Rakoczy Date: Tue, 2 Aug 2022 14:36:14 +0200 Subject: [PATCH 1/6] Reduce max RSS usage (#3483) By constant folding nodes earlier in V3Expand, we can save some max RSS on large designs. --- src/V3Const.cpp | 12 +++++++++++- src/V3Const.h | 3 +++ src/V3Expand.cpp | 10 +++++++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/V3Const.cpp b/src/V3Const.cpp index c9250a3f6..8a7670c14 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -3583,7 +3583,11 @@ public: } virtual ~ConstVisitor() override { if (m_doCpp) { - V3Stats::addStat("Optimizations, Const bit op reduction", m_statBitOpReduction); + if (m_globalPass) { + V3Stats::addStat("Optimizations, Const bit op reduction", m_statBitOpReduction); + } else { + V3Stats::addStatSum("Optimizations, Const bit op reduction", m_statBitOpReduction); + } } } @@ -3676,6 +3680,12 @@ AstNode* V3Const::constifyEdit(AstNode* nodep) { return nodep; } +AstNode* V3Const::constifyEditCpp(AstNode* nodep) { + ConstVisitor visitor{ConstVisitor::PROC_CPP, /* globalPass: */ false}; + nodep = visitor.mainAcceptEdit(nodep); + return nodep; +} + void V3Const::constifyAllLive(AstNetlist* nodep) { // Only call from Verilator.cpp, as it uses user#'s // This only pushes constants up, doesn't make any other edits diff --git a/src/V3Const.h b/src/V3Const.h index 59f5a5f76..ada396efd 100644 --- a/src/V3Const.h +++ b/src/V3Const.h @@ -39,6 +39,9 @@ public: static void constifyCpp(AstNetlist* nodep); // Only the current node and lower // Return new node that may have replaced nodep + static AstNode* constifyEditCpp(AstNode* nodep); + // Only the current node and lower + // Return new node that may have replaced nodep static AstNode* constifyEdit(AstNode* nodep); // Only the current node and lower, with special SenTree optimization // Return new node that may have replaced nodep diff --git a/src/V3Expand.cpp b/src/V3Expand.cpp index 5c341db26..c10c3c85f 100644 --- a/src/V3Expand.cpp +++ b/src/V3Expand.cpp @@ -32,6 +32,7 @@ #include "V3Expand.h" #include "V3Stats.h" #include "V3Ast.h" +#include "V3Const.h" #include @@ -160,6 +161,7 @@ private: new AstShiftL{fl, llowp, new AstConst{fl, static_cast(loffset)}, VL_EDATASIZE}}}; + newp = V3Const::constifyEditCpp(newp); } else { newp = llowp; } @@ -520,8 +522,9 @@ private: cleanmask.setMask(VL_BITBIT_E(destp->widthMin())); newp = new AstAnd{lfl, newp, new AstConst{lfl, cleanmask}}; } - - addWordAssign(nodep, w, destp, new AstOr{lfl, oldvalp, newp}); + AstNode* const orp + = V3Const::constifyEditCpp(new AstOr{lfl, oldvalp, newp}); + addWordAssign(nodep, w, destp, orp); } } VL_DO_DANGLING(rhsp->deleteTree(), rhsp); @@ -541,7 +544,8 @@ private: AstNode* const shifted = new AstShiftL{ lfl, rhsp, new AstConst{lfl, static_cast(lsb)}, destp->width()}; AstNode* const cleaned = new AstAnd{lfl, shifted, new AstConst{lfl, cleanmask}}; - AstNode* const newp = new AstAssign{nfl, destp, new AstOr{lfl, oldvalp, cleaned}}; + AstNode* const orp = V3Const::constifyEditCpp(new AstOr{lfl, oldvalp, cleaned}); + AstNode* newp = new AstAssign{nfl, destp, orp}; insertBefore(nodep, newp); } return true; From 6c33e6e889bbc59eda8ae822a8a2db340f2cb8aa Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 2 Aug 2022 16:31:45 +0100 Subject: [PATCH 2/6] Tell clang-tidy .h files are C++ (not C) headers --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index b805f4886..fc9b6b1a9 100644 --- a/Makefile.in +++ b/Makefile.in @@ -334,7 +334,7 @@ clang-tidy: $(CLANGTIDY_DEP) %.cpp.tidy: %.cpp $(CLANGTIDY) $(CLANGTIDY_FLAGS) $< -- $(CLANGTIDY_DEFS) $(CPPCHECK_INC) | 2>&1 tee $@ %.h.tidy: %.h - $(CLANGTIDY) $(CLANGTIDY_FLAGS) $< -- $(CLANGTIDY_DEFS) $(CPPCHECK_INC) | 2>&1 tee $@ + $(CLANGTIDY) $(CLANGTIDY_FLAGS) $< -- $(CLANGTIDY_DEFS) $(CPPCHECK_INC) -x c++-header | 2>&1 tee $@ analyzer-src: -rm -rf src/obj_dbg From bd211c87aa2f5ae30c1c01dd31f582c46c79dfd9 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 2 Aug 2022 16:46:31 +0100 Subject: [PATCH 3/6] astgen: split 'visit' method declarations from definitions Add definitions to V3Ast.cpp, and use static_cast. This fixes a lot of clang-tidy noise. --- src/V3Ast.cpp | 5 +++++ src/V3Ast.h | 6 ++---- src/astgen | 21 ++++++++++++++------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/V3Ast.cpp b/src/V3Ast.cpp index 9a6e7fca8..de595be61 100644 --- a/src/V3Ast.cpp +++ b/src/V3Ast.cpp @@ -1293,3 +1293,8 @@ void VNDeleter::doDeletes() { for (AstNode* const nodep : m_deleteps) nodep->deleteTree(); m_deleteps.clear(); } + +//###################################################################### +// VNVisitor + +#include "V3Ast__gen_visitor_defns.h" // From ./astgen diff --git a/src/V3Ast.h b/src/V3Ast.h index e93dc4d86..45fe1fc98 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -1297,10 +1297,8 @@ public: /// Return edited nodep; see comments in V3Ast.cpp AstNode* iterateSubtreeReturnEdits(AstNode* nodep); -#include "V3Ast__gen_visitor.h" // From ./astgen - // Things like: - // virtual void visit(AstBreak* nodep) { visit((AstNodeStmt*)(nodep)); } - // virtual void visit(AstNodeStmt* nodep) { visit((AstNode*)(nodep)); } + virtual void visit(AstNode* nodep) = 0; +#include "V3Ast__gen_visitor_decls.h" // From ./astgen }; //###################################################################### diff --git a/src/astgen b/src/astgen index b6071c06b..44529a0e4 100755 --- a/src/astgen +++ b/src/astgen @@ -507,15 +507,21 @@ def write_classes(filename): fh.write("\n") -def write_visitor(filename): +def write_visitor_decls(filename): with open_file(filename) as fh: for typen in sorted(Classes.keys()): - if typen == "Node": - fh.write(" virtual void visit(Ast" + typen + "*) = 0;\n") - else: + if typen != "Node": + fh.write("virtual void visit(Ast" + typen + "*);\n") + + +def write_visitor_defns(filename): + with open_file(filename) as fh: + for typen in sorted(Classes.keys()): + if typen != "Node": base = Classes[typen] - fh.write(" virtual void visit(Ast" + typen + - "* nodep) { visit((Ast" + base + "*)(nodep)); }\n") + fh.write("void VNVisitor::visit(Ast" + typen + + "* nodep) { visit(static_cast(nodep)); }\n") def write_impl(filename): @@ -692,7 +698,8 @@ for filename in source_files: if Args.classes: write_report("V3Ast__gen_report.txt") write_classes("V3Ast__gen_classes.h") - write_visitor("V3Ast__gen_visitor.h") + write_visitor_decls("V3Ast__gen_visitor_decls.h") + write_visitor_defns("V3Ast__gen_visitor_defns.h") write_impl("V3Ast__gen_impl.h") write_types("V3Ast__gen_types.h") write_yystype("V3Ast__gen_yystype.h") From f9f66d787e5b386c89cb9c9714bcf18a55c4090d Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 3 Aug 2022 09:41:30 +0100 Subject: [PATCH 4/6] Fix integer overflow in V3Unroll (#3451) --- src/V3Unroll.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/V3Unroll.cpp b/src/V3Unroll.cpp index 769faf369..469f5656e 100644 --- a/src/V3Unroll.cpp +++ b/src/V3Unroll.cpp @@ -50,6 +50,7 @@ private: bool m_varModeReplace; // Replacing varrefs bool m_varAssignHit; // Assign var hit bool m_generate; // Expand single generate For loop + int m_unrollLimit; // Unrolling limit string m_beginName; // What name to give begin iterations VDouble0 m_statLoops; // Statistic tracking VDouble0 m_statIters; // Statistic tracking @@ -67,10 +68,6 @@ private: return false; } - int unrollCount() const { - return m_generate ? v3Global.opt.unrollCount() * 16 : v3Global.opt.unrollCount(); - } - bool bodySizeOverRecurse(AstNode* nodep, int& bodySize, int bodyLimit) { if (!nodep) return false; bodySize++; @@ -163,7 +160,7 @@ private: // Check whether to we actually want to try and unroll. int loops; - if (!countLoops(initAssp, condp, incp, unrollCount(), loops)) { + if (!countLoops(initAssp, condp, incp, m_unrollLimit, loops)) { return cantUnroll(nodep, "Unable to simulate loop"); } @@ -336,11 +333,11 @@ private: } ++m_statIters; - if (++times > unrollCount() * 3) { + if (++times / 3 > m_unrollLimit) { nodep->v3error( "Loop unrolling took too long;" " probably this is an infinite loop, or set --unroll-count above " - << unrollCount()); + << m_unrollLimit); break; } @@ -485,6 +482,12 @@ public: m_varModeReplace = false; m_varAssignHit = false; m_generate = generate; + m_unrollLimit = v3Global.opt.unrollCount(); + if (generate) { + m_unrollLimit = std::numeric_limits::max() / 16 > m_unrollLimit + ? m_unrollLimit * 16 + : std::numeric_limits::max(); + } m_beginName = beginName; } void process(AstNode* nodep, bool generate, const string& beginName) { From b864f5f5baef23df633b9a3017026c27198c37f1 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 3 Aug 2022 16:33:55 +0100 Subject: [PATCH 5/6] V3Partition: use static_cast with LogicMTaskVertex dynamic_cast is not free, and the mtask graph contains only LogicMTaskVertex vertices, use static_cast instead for some speedup. --- src/V3Partition.cpp | 52 ++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 4b37f53df..3263b8493 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -375,17 +375,17 @@ public: ~CpCostAccessor() = default; // Return cost of this node uint32_t cost(const V3GraphVertex* vxp) const { - const LogicMTask* const mtaskp = dynamic_cast(vxp); + const LogicMTask* const mtaskp = static_cast(vxp); return mtaskp->stepCost(); } // Return stored CP to this node uint32_t critPathCost(const V3GraphVertex* vxp, GraphWay way) const { - const LogicMTask* const mtaskp = dynamic_cast(vxp); + const LogicMTask* const mtaskp = static_cast(vxp); return mtaskp->critPathCost(way); } // Store a new CP to this node void setCritPathCost(V3GraphVertex* vxp, GraphWay way, uint32_t cost) const { - LogicMTask* const mtaskp = dynamic_cast(vxp); + LogicMTask* const mtaskp = static_cast(vxp); mtaskp->setCritPathCost(way, cost); } // Notify vxp that the wayward CP at the throughp-->vxp edge @@ -393,15 +393,15 @@ public: // This is our cue to update vxp's m_edges[!way][throughp]. void notifyEdgeCp(V3GraphVertex* vxp, GraphWay way, V3GraphVertex* throuvhVxp, uint32_t cp) const { - LogicMTask* const updateVxp = dynamic_cast(vxp); - LogicMTask* const lthrouvhVxp = dynamic_cast(throuvhVxp); + LogicMTask* const updateVxp = static_cast(vxp); + LogicMTask* const lthrouvhVxp = static_cast(throuvhVxp); EdgeSet& edges = updateVxp->m_edges[way.invert()]; const uint32_t edgeCp = edges.at(lthrouvhVxp); if (cp > edgeCp) edges.set(lthrouvhVxp, cp); } // Check that CP matches that of the longest edge wayward of vxp. void checkNewCpVersusEdges(V3GraphVertex* vxp, GraphWay way, uint32_t cp) const { - LogicMTask* const mtaskp = dynamic_cast(vxp); + LogicMTask* const mtaskp = static_cast(vxp); const EdgeSet& edges = mtaskp->m_edges[way.invert()]; // This is mtaskp's relative with longest !wayward inclusive CP: const auto edgeIt = edges.rbegin(); @@ -603,7 +603,7 @@ private: for (const V3GraphEdge* followp = fromp->outBeginp(); followp; followp = followp->outNextp()) { if (followp == excludedEdgep) continue; - LogicMTask* const nextp = dynamic_cast(followp->top()); + LogicMTask* const nextp = static_cast(followp->top()); if (pathExistsFromInternal(nextp, top, nullptr, generation)) return true; } return false; @@ -634,7 +634,7 @@ public: const LogicMTask* startp = nullptr; for (const V3GraphVertex* vxp = graphp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - const LogicMTask* const mtaskp = dynamic_cast(vxp); + const LogicMTask* const mtaskp = static_cast(vxp); if (!startp) { startp = mtaskp; continue; @@ -805,10 +805,10 @@ public: } // METHODS LogicMTask* furtherMTaskp(GraphWay way) const { - return dynamic_cast(this->furtherp(way)); + return static_cast(this->furtherp(way)); } - LogicMTask* fromMTaskp() const { return dynamic_cast(fromp()); } - LogicMTask* toMTaskp() const { return dynamic_cast(top()); } + LogicMTask* fromMTaskp() const { return static_cast(fromp()); } + LogicMTask* toMTaskp() const { return static_cast(top()); } bool mergeWouldCreateCycle() const { return LogicMTask::pathExistsFrom(fromMTaskp(), toMTaskp(), this); } @@ -962,7 +962,7 @@ static void partInitHalfCriticalPaths(GraphWay way, V3Graph* mtasksp, bool check GraphStreamUnordered order(mtasksp, way); const GraphWay rev = way.invert(); for (const V3GraphVertex* vertexp; (vertexp = order.nextp());) { - const LogicMTask* const mtaskcp = dynamic_cast(vertexp); + const LogicMTask* const mtaskcp = static_cast(vertexp); LogicMTask* const mtaskp = const_cast(mtaskcp); uint32_t cpCost = 0; #if VL_DEBUG @@ -977,7 +977,7 @@ static void partInitHalfCriticalPaths(GraphWay way, V3Graph* mtasksp, bool check "Should be no redundant edges in mtasks graph"); relatives.insert(edgep->furtherp(rev)); #endif - const LogicMTask* const relativep = dynamic_cast(edgep->furtherp(rev)); + const LogicMTask* const relativep = static_cast(edgep->furtherp(rev)); cpCost = std::max(cpCost, (relativep->critPathCost(way) + static_cast(relativep->stepCost()))); } @@ -1010,7 +1010,7 @@ static void partCheckCriticalPaths(V3Graph* mtasksp) { partInitHalfCriticalPaths(GraphWay::FORWARD, mtasksp, true); partInitHalfCriticalPaths(GraphWay::REVERSE, mtasksp, true); for (V3GraphVertex* vxp = mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - const LogicMTask* const mtaskp = dynamic_cast(vxp); + const LogicMTask* const mtaskp = static_cast(vxp); mtaskp->checkRelativesCp(GraphWay::FORWARD); mtaskp->checkRelativesCp(GraphWay::REVERSE); } @@ -1326,8 +1326,8 @@ private: MTaskEdge* mergeEdgep = mergeCanp->toMTaskEdge(); const SiblingMC* mergeSibsp = nullptr; if (mergeEdgep) { - top = dynamic_cast(mergeEdgep->top()); - fromp = dynamic_cast(mergeEdgep->fromp()); + top = static_cast(mergeEdgep->top()); + fromp = static_cast(mergeEdgep->fromp()); } else { mergeSibsp = mergeCanp->toSiblingMC(); UASSERT(mergeSibsp, "Failed to cast mergeCanp to either MTaskEdge or SiblingMC"); @@ -1437,14 +1437,14 @@ private: siblingPairFromRelatives(GraphWay::FORWARD, recipientp, true); unsigned edges = 0; for (V3GraphEdge* edgep = recipientp->outBeginp(); edgep; edgep = edgep->outNextp()) { - LogicMTask* const postreqp = dynamic_cast(edgep->top()); + LogicMTask* const postreqp = static_cast(edgep->top()); siblingPairFromRelatives(GraphWay::REVERSE, postreqp, false); ++edges; if (edges > PART_SIBLING_EDGE_LIMIT) break; } edges = 0; for (V3GraphEdge* edgep = recipientp->inBeginp(); edgep; edgep = edgep->inNextp()) { - LogicMTask* const prereqp = dynamic_cast(edgep->fromp()); + LogicMTask* const prereqp = static_cast(edgep->fromp()); siblingPairFromRelatives(GraphWay::FORWARD, prereqp, false); ++edges; if (edges > PART_SIBLING_EDGE_LIMIT) break; @@ -1491,8 +1491,8 @@ private: // Score this edge. Lower is better. The score is the new local CP // length if we merge these mtasks. ("Local" means the longest // critical path running through the merged node.) - const LogicMTask* const top = dynamic_cast(edgep->top()); - const LogicMTask* const fromp = dynamic_cast(edgep->fromp()); + const LogicMTask* const top = static_cast(edgep->top()); + const LogicMTask* const fromp = static_cast(edgep->fromp()); const uint32_t mergedCpCostFwd = std::max(fromp->critPathCost(GraphWay::FORWARD), top->critPathCostWithout(GraphWay::FORWARD, edgep)); @@ -1534,7 +1534,7 @@ private: std::vector shortestPrereqs; for (V3GraphEdge* edgep = mtaskp->beginp(way); edgep; edgep = edgep->nextp(way)) { - LogicMTask* const prereqp = dynamic_cast(edgep->furtherp(way)); + LogicMTask* const prereqp = static_cast(edgep->furtherp(way)); shortestPrereqs.push_back(prereqp); // Prevent nodes with huge numbers of edges from massively // slowing down the partitioner: @@ -1932,7 +1932,7 @@ public: OvvSet ovvSetSystemC(ovvOrder); for (V3GraphVertex* vxp = m_mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - LogicMTask* const mtaskp = dynamic_cast(vxp); + LogicMTask* const mtaskp = static_cast(vxp); // Should be only one MTaskMoveVertex in each mtask at this // stage, but whatever, write it as a loop: for (LogicMTask::VxList::const_iterator it = mtaskp->vertexListp()->begin(); @@ -2042,7 +2042,7 @@ public: TasksByRank tasksByRank; for (V3GraphVertex* vxp = m_mtasksp->verticesBeginp(); vxp; vxp = vxp->verticesNextp()) { - LogicMTask* const mtaskp = dynamic_cast(vxp); + LogicMTask* const mtaskp = static_cast(vxp); if (hasDpiHazard(mtaskp)) tasksByRank[vxp->rank()].insert(mtaskp); } mergeSameRankTasks(&tasksByRank); @@ -2521,7 +2521,7 @@ void V3Partition::hashGraphDebug(const V3Graph* graphp, const char* debugName) { void V3Partition::setupMTaskDeps(V3Graph* mtasksp, const Vx2MTaskMap* vx2mtaskp) { // Look at each mtask for (V3GraphVertex* itp = mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) { - LogicMTask* const mtaskp = dynamic_cast(itp); + LogicMTask* const mtaskp = static_cast(itp); const LogicMTask::VxList* vertexListp = mtaskp->vertexListp(); // For each logic vertex in this mtask, create an mtask-to-mtask @@ -2651,7 +2651,7 @@ void V3Partition::go(V3Graph* mtasksp) { using SortedMTaskSet = std::set; SortedMTaskSet sorted; for (V3GraphVertex* itp = mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) { - LogicMTask* const mtaskp = dynamic_cast(itp); + LogicMTask* const mtaskp = static_cast(itp); sorted.insert(mtaskp); } for (auto it = sorted.begin(); it != sorted.end(); ++it) { @@ -2667,7 +2667,7 @@ void V3Partition::go(V3Graph* mtasksp) { // Set color to indicate an mtaskId on every underlying MTaskMoveVertex. for (V3GraphVertex* itp = mtasksp->verticesBeginp(); itp; itp = itp->verticesNextp()) { - const LogicMTask* const mtaskp = dynamic_cast(itp); + const LogicMTask* const mtaskp = static_cast(itp); for (LogicMTask::VxList::const_iterator it = mtaskp->vertexListp()->begin(); it != mtaskp->vertexListp()->end(); ++it) { MTaskMoveVertex* const mvertexp = *it; From fac8e76923e40c7d20feffb3b058da53a50b09e9 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Wed, 3 Aug 2022 18:59:40 +0100 Subject: [PATCH 6/6] Rework SortByValueMap for better performance Keep a single std::set of key/value pairs, and a single unordered_map from key to iterators into the set. Also improve some of the accessing mechanisms using modern C++. This speeds up multi-threaded ordering by about 10%. --- src/V3Partition.cpp | 56 +++---- src/V3Scoreboard.h | 375 +++++++++++--------------------------------- 2 files changed, 114 insertions(+), 317 deletions(-) diff --git a/src/V3Partition.cpp b/src/V3Partition.cpp index 3263b8493..d9a071a96 100644 --- a/src/V3Partition.cpp +++ b/src/V3Partition.cpp @@ -195,12 +195,9 @@ public: // longest !wayward edge. Schedule that to be resolved. const uint32_t newPendingVal = newInclusiveCp - m_accessp->critPathCost(relativep, m_way); - if (m_pending.has(relativep)) { - if (newPendingVal > m_pending.at(relativep)) { - m_pending.set(relativep, newPendingVal); - } - } else { - m_pending.set(relativep, newPendingVal); + const auto pair = m_pending.emplace(relativep, newPendingVal); + if (!pair.second && (newPendingVal > pair.first->second)) { + m_pending.update(pair.first, newPendingVal); } } } @@ -225,8 +222,8 @@ public: // This generalizes to multiple seed nodes also. while (!m_pending.empty()) { const auto it = m_pending.rbegin(); - V3GraphVertex* const updateMep = (*it).key(); - const uint32_t cpGrowBy = (*it).value(); + V3GraphVertex* const updateMep = it->first; + const uint32_t cpGrowBy = it->second; m_pending.erase(it); // For *updateMep, whose critPathCost was out-of-date with respect @@ -396,8 +393,8 @@ public: LogicMTask* const updateVxp = static_cast(vxp); LogicMTask* const lthrouvhVxp = static_cast(throuvhVxp); EdgeSet& edges = updateVxp->m_edges[way.invert()]; - const uint32_t edgeCp = edges.at(lthrouvhVxp); - if (cp > edgeCp) edges.set(lthrouvhVxp, cp); + const auto it = edges.find(lthrouvhVxp); + if (cp > it->second) edges.update(it, cp); } // Check that CP matches that of the longest edge wayward of vxp. void checkNewCpVersusEdges(V3GraphVertex* vxp, GraphWay way, uint32_t cp) const { @@ -405,7 +402,7 @@ public: const EdgeSet& edges = mtaskp->m_edges[way.invert()]; // This is mtaskp's relative with longest !wayward inclusive CP: const auto edgeIt = edges.rbegin(); - const uint32_t edgeCp = (*edgeIt).value(); + const uint32_t edgeCp = edgeIt->second; UASSERT_OBJ(edgeCp == cp, vxp, "CP doesn't match longest wayward edge"); } @@ -512,26 +509,21 @@ public: } void addRelative(GraphWay way, LogicMTask* relativep) { - EdgeSet& edges = m_edges[way]; - UASSERT(!edges.has(relativep), "Adding existing edge"); // value is !way cp to this edge - edges.set(relativep, relativep->stepCost() + relativep->critPathCost(way.invert())); - } - void removeRelative(GraphWay way, LogicMTask* relativep) { - EdgeSet& edges = m_edges[way]; - edges.erase(relativep); - } - bool hasRelative(GraphWay way, LogicMTask* relativep) { - const EdgeSet& edges = m_edges[way]; - return edges.has(relativep); + const uint32_t cp = relativep->stepCost() + relativep->critPathCost(way.invert()); + VL_ATTR_UNUSED const bool exits = !m_edges[way].emplace(relativep, cp).second; +#if VL_DEBUG + UASSERT(!exits, "Adding existing edge"); +#endif } + void removeRelative(GraphWay way, LogicMTask* relativep) { m_edges[way].erase(relativep); } + bool hasRelative(GraphWay way, LogicMTask* relativep) { return m_edges[way].has(relativep); } void checkRelativesCp(GraphWay way) const { - const EdgeSet& edges = m_edges[way]; - for (const auto& edge : vlstd::reverse_view(edges)) { - const LogicMTask* const relativep = edge.key(); - const uint32_t cachedCp = edge.value(); - partCheckCachedScoreVsActual(cachedCp, relativep->critPathCost(way.invert()) - + relativep->stepCost()); + for (const auto& edge : vlstd::reverse_view(m_edges[way])) { + const LogicMTask* const relativep = edge.first; + const uint32_t cachedCp = edge.second; + const uint32_t cp = relativep->critPathCost(way.invert()) + relativep->stepCost(); + partCheckCachedScoreVsActual(cachedCp, cp); } } @@ -557,11 +549,11 @@ public: const EdgeSet& edges = m_edges[way.invert()]; uint32_t result = 0; for (const auto& edge : vlstd::reverse_view(edges)) { - if (edge.key() != withoutp->furtherp(way.invert())) { + if (edge.first != withoutp->furtherp(way.invert())) { // Use the cached cost. It could be a small overestimate // due to stepping. This is consistent with critPathCost() // which also returns the cached cost. - result = edge.value(); + result = edge.second; break; } } @@ -657,7 +649,7 @@ public: if (it == children.rend()) { nextp = nullptr; } else { - nextp = (*it).key(); + nextp = it->first; } } @@ -1477,6 +1469,7 @@ private: return 0; } + VL_ATTR_NOINLINE static uint32_t siblingScore(const SiblingMC* sibsp) { const LogicMTask* const ap = sibsp->ap(); const LogicMTask* const bp = sibsp->bp(); @@ -1487,6 +1480,7 @@ private: return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(ap->cost() + bp->cost()); } + VL_ATTR_NOINLINE static uint32_t edgeScore(const V3GraphEdge* edgep) { // Score this edge. Lower is better. The score is the new local CP // length if we merge these mtasks. ("Local" means the longest diff --git a/src/V3Scoreboard.h b/src/V3Scoreboard.h index 7130e7284..43d5be804 100644 --- a/src/V3Scoreboard.h +++ b/src/V3Scoreboard.h @@ -34,298 +34,103 @@ #include #include -//###################################################################### -// SortByValueMap +// ###################################################################### +// SortByValueMap -/// A generic key-value map, except it also supports iterating in -/// value-sorted order. Values need not be unique. Uses T_KeyCompare to -/// break ties in the sort when values collide. +// A generic key-value map, except iteration is in *value* sorted order. Values need not be unique. +// Uses T_KeyCompare to break ties in the sort when values collide. Note: Only const iteration is +// possible, as updating mapped values via iterators is not safe. template > class SortByValueMap final { - // TYPES -private: - using KeySet = std::set; - using Val2Keys = std::map; + // Current implementation is a std::set of key/value pairs, plus a std_unordered_map from keys + // to iterators into the set. This keeps most operations fairly cheap and also has the benefit + // of being able to re-use the std::set iterators. + // TYPES + + using Pair = std::pair; + + struct PairCmp final { + bool operator()(const Pair& a, const Pair& b) const { + // First compare values + if (a.second != b.second) return a.second < b.second; + // Then compare keys + return T_KeyCompare{}(a.first, b.first); + } + }; + + using PairSet = std::set; + +public: + using const_iterator = typename PairSet::const_iterator; + using const_reverse_iterator = typename PairSet::const_reverse_iterator; + +private: // MEMBERS - std::unordered_map m_keys; // Map each key to its value. Not sorted. - Val2Keys m_vals; // Map each value to its keys. Sorted. + PairSet m_pairs; // The contents of the map, stored directly as key-value pairs + std::unordered_map m_kiMap; // Key to iterator map + + VL_UNCOPYABLE(SortByValueMap); public: // CONSTRUCTORS SortByValueMap() = default; - class const_iterator VL_NOT_FINAL { - // TYPES - public: - using value_type = const_iterator; - using reference = const_iterator; // See comment on operator*() - using pointer = void; - using difference_type = std::ptrdiff_t; - using iterator_category = std::bidirectional_iterator_tag; + // Only const iteration is possible + const_iterator begin() const { return m_pairs.begin(); } + const_iterator end() const { return m_pairs.end(); } + const_iterator cbegin() const { m_pairs.cbegin(); } + const_iterator cend() const { return m_pairs.cend(); } + const_reverse_iterator rbegin() const { return m_pairs.rbegin(); } + const_reverse_iterator rend() const { return m_pairs.rend(); } + const_reverse_iterator crbegin() const { return m_pairs.crbegin(); } + const_reverse_iterator crend() const { return m_pairs.crend(); } - protected: - friend class SortByValueMap; - - // MEMBERS - typename KeySet::iterator m_keyIt; - typename Val2Keys::iterator m_valIt; - SortByValueMap* const m_sbvmp; - bool m_end = true; // At the end() - - // CONSTRUCTORS - explicit const_iterator(SortByValueMap* sbmvp) // for end() - : m_sbvmp{sbmvp} {} - const_iterator(typename Val2Keys::iterator valIt, typename KeySet::iterator keyIt, - SortByValueMap* sbvmp) - : m_keyIt{keyIt} - , m_valIt{valIt} - , m_sbvmp{sbvmp} - , m_end{false} {} - - // METHODS - void advanceUntilValid() { - ++m_keyIt; - if (m_keyIt != m_valIt->second.end()) { // Valid iterator, done. - return; - } - // Try the next value? - ++m_valIt; - if (m_valIt == m_sbvmp->m_vals.end()) { // No more values - m_end = true; - return; - } - // Should find a value here, as every value bucket is supposed - // to have at least one key, even after keys get removed. - m_keyIt = m_valIt->second.begin(); - UASSERT(m_keyIt != m_valIt->second.end(), "Algorithm should have key left"); - } - void reverseUntilValid() { - if (m_end) { - UASSERT(!m_sbvmp->m_vals.empty(), "Reverse iterator causes underflow"); - m_valIt = m_sbvmp->m_vals.end(); - --m_valIt; - - UASSERT(!m_valIt->second.empty(), "Reverse iterator causes underflow"); - m_keyIt = m_valIt->second.end(); - --m_keyIt; - - m_end = false; - return; - } - if (m_keyIt != m_valIt->second.begin()) { - // Valid iterator, we're done. - --m_keyIt; - return; - } - // Try the previous value? - if (VL_UNCOVERABLE(m_valIt == m_sbvmp->m_vals.begin())) { - // No more values but it's not defined to decrement an - // iterator past the beginning. - v3fatalSrc("Decremented iterator past beginning"); - return; // LCOV_EXCL_LINE - } - --m_valIt; - // Should find a value here, as Every value bucket is supposed - // to have at least one key, even after keys get removed. - UASSERT(!m_valIt->second.empty(), "Value bucket should have key"); - m_keyIt = m_valIt->second.end(); - --m_keyIt; - UASSERT(m_keyIt != m_valIt->second.end(), "Value bucket should have key"); - } - - public: - const T_Key& key() const { return *m_keyIt; } - const T_Value& value() const { return m_valIt->first; } - const_iterator& operator++() { - advanceUntilValid(); - return *this; - } - const_iterator& operator--() { - reverseUntilValid(); - return *this; - } - bool operator==(const const_iterator& other) const { - // It's not legal to compare iterators from different - // sequences. So check m_end before comparing m_valIt, and - // compare m_valIt's before comparing m_keyIt to ensure nothing - // here is undefined. - if (m_end || other.m_end) return m_end && other.m_end; - return ((m_valIt == other.m_valIt) && (m_keyIt == other.m_keyIt)); - } - bool operator!=(const const_iterator& other) const { return (!this->operator==(other)); } - - // WARNING: Cleverness. - // - // The "reference" returned by *it must remain valid after 'it' - // gets destroyed. The reverse_iterator relies on this for its - // operator*(), so it's not just a theoretical requirement, it's a - // real requirement. - // - // To make that work, define the "reference" type to be the - // iterator itself. So clients can do (*it).key() and - // (*it).value(). This is the clever part. - // - // That's mostly useful for a reverse iterator, where *rit returns - // the forward iterator pointing the to same element, so - // (*rit).key() and (*rit).value() work where rit.key() and - // rit.value() cannot. - // - // It would be nice to support it->key() and it->value(), however - // uncertain what would be an appropriate 'pointer' type define - // that makes this work safely through a reverse iterator. So this - // class does not provide an operator->(). - // - // Q) Why not make our value_type be a pair like a - // normal map, and return a reference to that? This could - // return a reference to one of the pairs inside m_keys, that - // would satisfy the constraint above. - // - // A) It would take a lookup to find that pair within m_keys. This - // iterator is designed to minimize the number of hashtable and - // tree lookups. Increment, decrement, key(), value(), erase() - // by iterator, begin(), end() -- none of these require a - // container lookup. That's true for reverse_iterators too. - reference operator*() const { - UASSERT(!m_end, "Dereferencing iterator that is at end()"); - return *this; - } - }; - - class iterator final : public const_iterator { - public: - // TYPES - using value_type = iterator; - using reference = iterator; - // pointer, difference_type, and iterator_category inherit from - // const_iterator - - // CONSTRUCTORS - explicit iterator(SortByValueMap* sbvmp) - : const_iterator{sbvmp} {} - iterator(typename Val2Keys::iterator valIt, typename KeySet::iterator keyIt, - SortByValueMap* sbvmp) - : const_iterator{valIt, keyIt, sbvmp} {} - - // METHODS - iterator& operator++() { - this->advanceUntilValid(); - return *this; - } - iterator& operator--() { - this->reverseUntilValid(); - return *this; - } - reference operator*() const { - UASSERT(!this->m_end, "Dereferencing iterator that is at end()"); - return *this; - } - }; - - using reverse_iterator = std::reverse_iterator; - using const_reverse_iterator = std::reverse_iterator; - - // METHODS -private: - void removeKeyFromOldVal(const T_Key& k, const T_Value& oldVal) { - // The value of 'k' is about to change, or, 'k' is about to be - // removed from the map. - // Clear the m_vals mapping for k. - KeySet& keysAtOldVal = m_vals[oldVal]; - const size_t erased = keysAtOldVal.erase(k); - UASSERT(erased == 1, "removeKeyFromOldVal() removal key not found"); - if (keysAtOldVal.empty()) { - // Don't keep empty sets in the value map. - m_vals.erase(oldVal); - } + const_iterator find(const T_Key& key) const { + const auto kiIt = m_kiMap.find(key); + if (kiIt == m_kiMap.end()) return cend(); + return kiIt->second; } - void removeKeyFromOldVal(iterator it) { - it.m_valIt->second.erase(it.m_keyIt); - if (it.m_valIt->second.empty()) m_vals.erase(it.m_valIt); - } - -public: - iterator begin() { - const auto valIt = m_vals.begin(); - if (valIt == m_vals.end()) return end(); - const auto keyIt = valIt->second.begin(); - return iterator(valIt, keyIt, this); - } - const_iterator begin() const { - SortByValueMap* const mutp = const_cast(this); - const auto valIt = mutp->m_vals.begin(); - if (valIt == mutp->m_vals.end()) return end(); - const auto keyIt = valIt->second.begin(); - return const_iterator(valIt, keyIt, mutp); - } - iterator end() { return iterator(this); } - const_iterator end() const { - // Safe to cast away const; the const_iterator will still enforce - // it. Same for the const begin() below. - return const_iterator(const_cast(this)); - } - reverse_iterator rbegin() { return reverse_iterator(end()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } - - iterator find(const T_Key& k) { - const auto kvit = m_keys.find(k); - if (kvit == m_keys.end()) return end(); - - const auto valIt = m_vals.find(kvit->second); - const auto keyIt = valIt->second.find(k); - return iterator(valIt, keyIt, this); - } - const_iterator find(const T_Key& k) const { - SortByValueMap* const mutp = const_cast(this); - const auto kvit = mutp->m_keys.find(k); - if (kvit == mutp->m_keys.end()) return end(); - - const auto valIt = mutp->m_vals.find(kvit->second); - const auto keyIt = valIt->second.find(k); - return const_iterator(valIt, keyIt, mutp); - } - void set(const T_Key& k, const T_Value& v) { - const auto kvit = m_keys.find(k); - if (kvit != m_keys.end()) { - if (kvit->second == v) { - return; // LCOV_EXCL_LINE // Same value already present; stop. - } - // Must remove element from m_vals[oldValue] - removeKeyFromOldVal(k, kvit->second); - } - m_keys[k] = v; - m_vals[v].insert(k); - } - size_t erase(const T_Key& k) { - const auto kvit = m_keys.find(k); - if (kvit == m_keys.end()) return 0; - removeKeyFromOldVal(k, kvit->second); - m_keys.erase(kvit); + size_t erase(const T_Key& key) { + const auto kiIt = m_kiMap.find(key); + if (kiIt == m_kiMap.end()) return 0; + m_pairs.erase(kiIt->second); + m_kiMap.erase(kiIt); return 1; } - void erase(const iterator& it) { - m_keys.erase(it.key()); - removeKeyFromOldVal(it); + void erase(const_iterator it) { + m_kiMap.erase(it->first); + m_pairs.erase(it); } - void erase(const reverse_iterator& it) { - erase(*it); // Dereferencing returns a copy of the forward iterator + void erase(const_reverse_iterator rit) { + m_kiMap.erase(rit->first); + m_pairs.erase(std::next(rit).base()); } - bool has(const T_Key& k) const { return (m_keys.find(k) != m_keys.end()); } - bool empty() const { return m_keys.empty(); } - // Look up a value. Returns a reference for efficiency. Note this must - // be a const reference, otherwise the client could corrupt the sorted - // order of m_byValue by reaching through and changing the value. - const T_Value& at(const T_Key& k) const { - const auto kvit = m_keys.find(k); - UASSERT(kvit != m_keys.end(), "at() lookup key not found"); - return kvit->second; + bool has(const T_Key& key) const { return m_kiMap.count(key); } + bool empty() const { return m_pairs.empty(); } + // Returns const reference. + const T_Value& at(const T_Key& key) const { return m_kiMap.at(key)->second; } + // Note this returns const_iterator + template // + std::pair emplace(const T_Key& key, Args&&... args) { + const auto kiEmp = m_kiMap.emplace(key, end()); + if (kiEmp.second) { + const auto result = m_pairs.emplace(key, std::forward(args)...); +#if VL_DEBUG + UASSERT(result.second, "Should not be in set yet"); +#endif + kiEmp.first->second = result.first; + return result; + } + return {kiEmp.first->second, false}; + } + // Invalidates iterators + void update(const_iterator it, T_Value value) { + const auto kiIt = m_kiMap.find(it->first); + m_pairs.erase(it); + kiIt->second = m_pairs.emplace(kiIt->first, value).first; } - -private: - VL_UNCOPYABLE(SortByValueMap); }; //###################################################################### @@ -333,7 +138,7 @@ private: /// V3Scoreboard takes a set of Elem*'s, each having some score. /// Scores are assigned by a user-supplied scoring function. /// -/// At any time, the V3Scoreboard can return the elem with the "best" score +/// At any time, the V3Scoreboard can return th515e elem with the "best" score /// among those elements whose scores are known. /// /// The best score is the _lowest_ score. This makes sense in contexts @@ -418,9 +223,9 @@ public: // reflected in the result of bestp(). Otherwise, bestp() only // considers elements that aren't pending rescore. const T_Elem* bestp() { - const auto result = m_sorted.begin(); - if (VL_UNLIKELY(result == m_sorted.end())) return nullptr; - return (*result).key(); + const auto it = m_sorted.begin(); + if (VL_UNLIKELY(it == m_sorted.end())) return nullptr; + return it->first; } // Tell the scoreboard that this element's score may have changed. @@ -444,20 +249,18 @@ public: bool needsRescore() { return !m_unknown.empty(); } // False if elp's score is known to V3Scoreboard, // else true if elp's score is unknown until the next rescore(). - bool needsRescore(const T_Elem* elp) { return (m_unknown.find(elp) != m_unknown.end()); } + bool needsRescore(const T_Elem* elp) { return m_unknown.count(elp); } // Retrieve the last known score for an element. - T_Score cachedScore(const T_Elem* elp) { - const auto result = m_sorted.find(elp); - UASSERT(result != m_sorted.end(), "V3Scoreboard::cachedScore() failed to find element"); - return (*result).value(); - } + T_Score cachedScore(const T_Elem* elp) { return m_sorted.at(elp); } // For each element whose score is unknown to V3Scoreboard, // call the client's scoring function to get a new score, // and sort all elements by their current score. void rescore() { for (const T_Elem* elp : m_unknown) { - const T_Score sortScore = m_scoreFnp(elp); - m_sorted.set(elp, sortScore); + VL_ATTR_UNUSED const bool exists = !m_sorted.emplace(elp, m_scoreFnp(elp)).second; +#if VL_DEBUG + UASSERT(!exists, "Should not be in both m_unknown and m_sorted"); +#endif } m_unknown.clear(); }