From 60b41c782212e6522b4254ab672b6ef2b4d73a1b Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sun, 19 Apr 2026 19:48:02 +0100 Subject: [PATCH] Optimize DfgBreakCycles IndependentBits analysis ordering (#7446) Perform the data flow analysis based on reverse post-order numbering of vertices. This speeds up convergence by ~10x. Partial fix for #7201. --- src/V3DfgBreakCycles.cpp | 89 +++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 24 deletions(-) diff --git a/src/V3DfgBreakCycles.cpp b/src/V3DfgBreakCycles.cpp index 4c25bbdc0..7e0cdab17 100644 --- a/src/V3DfgBreakCycles.cpp +++ b/src/V3DfgBreakCycles.cpp @@ -22,8 +22,8 @@ #include "V3Hash.h" #include -#include #include +#include #include #include @@ -581,12 +581,19 @@ public: }; class IndependentBits final : public DfgVisitor { + // TYPES + struct VertexState final { + size_t m_rpoNumber = 0; // Reverse Postorder Number of vertex + bool m_isOnWorkList = false; // Whether the vertex is on the work list + }; + // STATE const Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map // Vertex to current bit mask map. The mask is set for the bits that are independent of the SCC std::unordered_map m_vtxp2Mask; - std::deque m_workList; // Work list for the traversal - std::unordered_map m_onWorkList; // Marks vertices on the work list + // Work list for the traversal (min-queue of vertex RPO numbers) + std::priority_queue, std::greater> m_workQueue; + std::unordered_map m_vtxp2State; // State of each vertex #ifdef VL_DEBUG std::ofstream m_lineCoverageFile; // Line coverage file, just for testing @@ -891,16 +898,30 @@ class IndependentBits final : public DfgVisitor { return false; } // Otherwise just enqueue it - bool& onWorkList = m_onWorkList[&sink]; - if (!onWorkList) { + VertexState& state = m_vtxp2State.at(&sink); + if (!state.m_isOnWorkList) { UINFO(9, "Enqueuing vertex " << &sink << " " << sink.typeName()); - m_workList.emplace_back(&sink); + m_workQueue.push(state.m_rpoNumber); } - onWorkList = true; + state.m_isOnWorkList = true; return false; }); }; + void dfsPostOrder(std::unordered_set& visited, // + std::vector& postOrderEnumeration, // + DfgVertex& vtx) { + // Mark visited, skip if already visited + if (!visited.insert(&vtx).second) return; + // Visit un-visited successors + vtx.foreachSink([&](DfgVertex& snk) { + dfsPostOrder(visited, postOrderEnumeration, snk); + return false; + }); + // Add to post order enumeration + postOrderEnumeration.emplace_back(&vtx); + }; + IndependentBits(DfgGraph& dfg, const Vtx2Scc& vtx2Scc) : m_vtx2Scc{vtx2Scc} { @@ -913,31 +934,51 @@ class IndependentBits final : public DfgVisitor { } #endif + // Compute Reverse-Postorder enumeration of vertices + std::vector rpoEnumeration; + std::unordered_set visited; + dfg.forEachVertex([&](DfgVertex& vtx) { + if (!vtx.nInputs()) dfsPostOrder(visited, rpoEnumeration, vtx); + }); + dfg.forEachVertex([&](DfgVertex& vtx) { // + dfsPostOrder(visited, rpoEnumeration, vtx); + }); + std::reverse(rpoEnumeration.begin(), rpoEnumeration.end()); + UASSERT(rpoEnumeration.size() == dfg.size(), "Inconsistent vertex count"); + + // Initialzie VertexState for each vertex + for (size_t i = 0; i < rpoEnumeration.size(); ++i) { + VertexState& state = m_vtxp2State[rpoEnumeration[i]]; + state.m_rpoNumber = i; + state.m_isOnWorkList = false; + } + // Set up the initial conditions: // - For vertices not in an SCC, mark all bits as independent // - For vertices in an SCC, dispatch to analyse at least once, as some vertices // always assign constants bits, which are always independent (eg Extend/Shift) // Enqueue sinks of all SCC vertices that have at least one independent bit - dfg.forEachVertex([&](DfgVertex& vtx) { - if (!handledDirectly(vtx)) return; - if (m_vtx2Scc.at(&vtx)) return; - mask(vtx).setAllBits1(); - }); - dfg.forEachVertex([&](DfgVertex& vtx) { - if (!handledDirectly(vtx)) return; - if (!m_vtx2Scc.at(&vtx)) return; - iterate(&vtx); - UINFO(9, "Initial independent bits of " << &vtx << " " << vtx.typeName() << " are: " - << mask(vtx).displayed(vtx.fileline(), "%b")); - if (!mask(vtx).isEqZero()) enqueueSinks(vtx); - }); + for (DfgVertex* const vtxp : rpoEnumeration) { + if (!handledDirectly(*vtxp)) continue; + if (m_vtx2Scc.at(vtxp)) continue; + mask(*vtxp).setAllBits1(); + } + for (DfgVertex* const vtxp : rpoEnumeration) { + if (!handledDirectly(*vtxp)) continue; + if (!m_vtx2Scc.at(vtxp)) continue; + iterate(vtxp); + UINFO(9, "Initial independent bits of " + << vtxp << " " << vtxp->typeName() + << " are: " << mask(*vtxp).displayed(vtxp->fileline(), "%b")); + if (!mask(*vtxp).isEqZero()) enqueueSinks(*vtxp); + } // Propagate independent bits until no more changes are made - while (!m_workList.empty()) { + while (!m_workQueue.empty()) { // Grab next item - DfgVertex* const currp = m_workList.front(); - m_workList.pop_front(); - m_onWorkList[currp] = false; + DfgVertex* const currp = rpoEnumeration[m_workQueue.top()]; + m_workQueue.pop(); + m_vtxp2State.at(currp).m_isOnWorkList = false; // Should not enqueue vertices that are not in an SCC UASSERT_OBJ(m_vtx2Scc.at(currp), currp, "Vertex should be in an SCC"); // Should only enqueue packed vertices