Optimize DfgBreakCycles IndependentBits analysis ordering (#7446)

Perform the data flow analysis based on reverse post-order numbering of
vertices. This speeds up convergence by ~10x. Partial fix for #7201.
This commit is contained in:
Geza Lore 2026-04-19 19:48:02 +01:00 committed by GitHub
parent 97454a1bc5
commit 60b41c7822
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 65 additions and 24 deletions

View File

@ -22,8 +22,8 @@
#include "V3Hash.h"
#include <algorithm>
#include <deque>
#include <fstream>
#include <queue>
#include <unordered_map>
#include <vector>
@ -581,12 +581,19 @@ public:
};
class IndependentBits final : public DfgVisitor {
// TYPES
struct VertexState final {
size_t m_rpoNumber = 0; // Reverse Postorder Number of vertex
bool m_isOnWorkList = false; // Whether the vertex is on the work list
};
// STATE
const Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map
// Vertex to current bit mask map. The mask is set for the bits that are independent of the SCC
std::unordered_map<const DfgVertex*, V3Number> m_vtxp2Mask;
std::deque<DfgVertex*> m_workList; // Work list for the traversal
std::unordered_map<DfgVertex*, bool> m_onWorkList; // Marks vertices on the work list
// Work list for the traversal (min-queue of vertex RPO numbers)
std::priority_queue<size_t, std::vector<size_t>, std::greater<size_t>> m_workQueue;
std::unordered_map<const DfgVertex*, VertexState> m_vtxp2State; // State of each vertex
#ifdef VL_DEBUG
std::ofstream m_lineCoverageFile; // Line coverage file, just for testing
@ -891,16 +898,30 @@ class IndependentBits final : public DfgVisitor {
return false;
}
// Otherwise just enqueue it
bool& onWorkList = m_onWorkList[&sink];
if (!onWorkList) {
VertexState& state = m_vtxp2State.at(&sink);
if (!state.m_isOnWorkList) {
UINFO(9, "Enqueuing vertex " << &sink << " " << sink.typeName());
m_workList.emplace_back(&sink);
m_workQueue.push(state.m_rpoNumber);
}
onWorkList = true;
state.m_isOnWorkList = true;
return false;
});
};
void dfsPostOrder(std::unordered_set<DfgVertex*>& visited, //
std::vector<DfgVertex*>& postOrderEnumeration, //
DfgVertex& vtx) {
// Mark visited, skip if already visited
if (!visited.insert(&vtx).second) return;
// Visit un-visited successors
vtx.foreachSink([&](DfgVertex& snk) {
dfsPostOrder(visited, postOrderEnumeration, snk);
return false;
});
// Add to post order enumeration
postOrderEnumeration.emplace_back(&vtx);
};
IndependentBits(DfgGraph& dfg, const Vtx2Scc& vtx2Scc)
: m_vtx2Scc{vtx2Scc} {
@ -913,31 +934,51 @@ class IndependentBits final : public DfgVisitor {
}
#endif
// Compute Reverse-Postorder enumeration of vertices
std::vector<DfgVertex*> rpoEnumeration;
std::unordered_set<DfgVertex*> visited;
dfg.forEachVertex([&](DfgVertex& vtx) {
if (!vtx.nInputs()) dfsPostOrder(visited, rpoEnumeration, vtx);
});
dfg.forEachVertex([&](DfgVertex& vtx) { //
dfsPostOrder(visited, rpoEnumeration, vtx);
});
std::reverse(rpoEnumeration.begin(), rpoEnumeration.end());
UASSERT(rpoEnumeration.size() == dfg.size(), "Inconsistent vertex count");
// Initialzie VertexState for each vertex
for (size_t i = 0; i < rpoEnumeration.size(); ++i) {
VertexState& state = m_vtxp2State[rpoEnumeration[i]];
state.m_rpoNumber = i;
state.m_isOnWorkList = false;
}
// Set up the initial conditions:
// - For vertices not in an SCC, mark all bits as independent
// - For vertices in an SCC, dispatch to analyse at least once, as some vertices
// always assign constants bits, which are always independent (eg Extend/Shift)
// Enqueue sinks of all SCC vertices that have at least one independent bit
dfg.forEachVertex([&](DfgVertex& vtx) {
if (!handledDirectly(vtx)) return;
if (m_vtx2Scc.at(&vtx)) return;
mask(vtx).setAllBits1();
});
dfg.forEachVertex([&](DfgVertex& vtx) {
if (!handledDirectly(vtx)) return;
if (!m_vtx2Scc.at(&vtx)) return;
iterate(&vtx);
UINFO(9, "Initial independent bits of " << &vtx << " " << vtx.typeName() << " are: "
<< mask(vtx).displayed(vtx.fileline(), "%b"));
if (!mask(vtx).isEqZero()) enqueueSinks(vtx);
});
for (DfgVertex* const vtxp : rpoEnumeration) {
if (!handledDirectly(*vtxp)) continue;
if (m_vtx2Scc.at(vtxp)) continue;
mask(*vtxp).setAllBits1();
}
for (DfgVertex* const vtxp : rpoEnumeration) {
if (!handledDirectly(*vtxp)) continue;
if (!m_vtx2Scc.at(vtxp)) continue;
iterate(vtxp);
UINFO(9, "Initial independent bits of "
<< vtxp << " " << vtxp->typeName()
<< " are: " << mask(*vtxp).displayed(vtxp->fileline(), "%b"));
if (!mask(*vtxp).isEqZero()) enqueueSinks(*vtxp);
}
// Propagate independent bits until no more changes are made
while (!m_workList.empty()) {
while (!m_workQueue.empty()) {
// Grab next item
DfgVertex* const currp = m_workList.front();
m_workList.pop_front();
m_onWorkList[currp] = false;
DfgVertex* const currp = rpoEnumeration[m_workQueue.top()];
m_workQueue.pop();
m_vtxp2State.at(currp).m_isOnWorkList = false;
// Should not enqueue vertices that are not in an SCC
UASSERT_OBJ(m_vtx2Scc.at(currp), currp, "Vertex should be in an SCC");
// Should only enqueue packed vertices