verilator/src/V3DfgBreakCycles.cpp

1271 lines
52 KiB
C++
Raw Normal View History

// -*- mode: C++; c-file-style: "cc-mode" -*-
//*************************************************************************
// DESCRIPTION: Verilator: Converting cyclic DFGs into acyclic DFGs
//
// Code available from: https://verilator.org
//
//*************************************************************************
//
// Copyright 2003-2025 by Wilson Snyder. This program is free software; you
// can redistribute it and/or modify it under the terms of either the GNU
// Lesser General Public License Version 3 or the Perl Artistic License
// Version 2.0.
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
//
//*************************************************************************
#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT
#include "V3Dfg.h"
#include "V3DfgPasses.h"
#include "V3Error.h"
#include "V3Hash.h"
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
#include <algorithm>
#include <deque>
#include <fstream>
#include <unordered_map>
#include <vector>
VL_DEFINE_DEBUG_FUNCTIONS;
namespace V3DfgBreakCycles {
// Throughout these algorithm, we use the DfgUserMap as a map to the SCC number
using Vtx2Scc = DfgUserMap<uint64_t>;
class TraceDriver final : public DfgVisitor {
// TYPES
// Key for caching the result of a trace
struct CacheKey final {
DfgVertex* m_vtxp;
uint32_t m_lsb;
uint32_t m_msb;
CacheKey() = delete;
CacheKey(DfgVertex* vtxp, uint32_t lsb, uint32_t msb)
: m_vtxp{vtxp}
, m_lsb{lsb}
, m_msb{msb} {}
struct Hash final {
size_t operator()(const CacheKey& item) const {
// cppcheck-suppress unreadVariable
V3Hash hash{item.m_vtxp};
hash += item.m_lsb;
hash += item.m_msb;
return hash.value();
}
};
struct Equal final {
bool operator()(const CacheKey& a, const CacheKey& b) const {
2025-08-05 15:47:51 +02:00
return a.m_vtxp == b.m_vtxp && a.m_lsb == b.m_lsb && a.m_msb == b.m_msb;
}
};
};
// STATE
DfgGraph& m_dfg; // The graph being processed
Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map
// The strongly connected component we are currently trying to escape
uint64_t m_component = 0;
uint32_t m_lsb = 0; // LSB to extract from the currently visited Vertex
uint32_t m_msb = 0; // MSB to extract from the currently visited Vertex
// Result of tracing the currently visited Vertex. Use SET_RESULT below!
DfgVertex* m_resp = nullptr;
DfgVertex* m_defaultp = nullptr; // When tracing a variable, this is its 'defaultp', if any
// Result cache for reusing already traced vertices
std::unordered_map<CacheKey, DfgVertex*, CacheKey::Hash, CacheKey::Equal> m_cache;
#ifdef VL_DEBUG
std::ofstream m_lineCoverageFile; // Line coverage file, just for testing
#endif
// METHODS
// Create and return a new Vertex. Always use this to create new vertices.
// Fileline is taken from 'refp', but 'refp' is otherwise not used.
// Also sets m_vtx2Scc[vtx] to 0, indicating the new vertex is not part of
// a strongly connected component. This should always be true, as all the
// vertices we create here are driven from outside the component we are
// trying to escape, and will sink into that component. Given those are
// separate SCCs, these new vertices must be acyclic.
template <typename Vertex>
Vertex* make(const DfgVertex* refp, uint32_t width) {
static_assert(std::is_base_of<DfgVertex, Vertex>::value //
&& !std::is_base_of<DfgVertexVar, Vertex>::value,
"Should only make operation vertices and constants");
if VL_CONSTEXPR_CXX17 (std::is_same<DfgConst, Vertex>::value) {
DfgConst* const vtxp = new DfgConst{m_dfg, refp->fileline(), width, 0};
m_vtx2Scc[vtxp] = 0;
return reinterpret_cast<Vertex*>(vtxp);
} else {
// TODO: this is a gross hack around lack of C++17 'if constexpr' Vtx is always Vertex
// when this code is actually executed, but needs a fudged type to type check when
// Vertex is DfgConst, in which case this code is unreachable ...
using Vtx = typename std::conditional<std::is_same<DfgConst, Vertex>::value, DfgSel,
Vertex>::type;
Vtx* const vtxp = new Vtx{m_dfg, refp->fileline(), DfgDataType::packed(width)};
m_vtx2Scc[vtxp] = 0;
return reinterpret_cast<Vertex*>(vtxp);
}
}
// Create temporary capable of holding the result of 'vtxp'
DfgVertexVar* createTmp(const char* prefix, DfgVertex* vtxp) {
AstNode* nodep = m_dfg.modulep();
if (!nodep) nodep = v3Global.rootp();
const std::string name = m_dfg.makeUniqueName(prefix, nodep->user2Inc());
FileLine* const flp = vtxp->fileline();
DfgVertex::ScopeCache scopeCache;
AstScope* const scopep = m_dfg.modulep() ? nullptr : vtxp->scopep(scopeCache);
DfgVertexVar* const varp = m_dfg.makeNewVar(flp, name, vtxp->dtype(), scopep);
varp->varp()->isInternal(true);
varp->tmpForp(varp->nodep());
m_vtx2Scc[varp] = 0;
return varp;
}
// Continue tracing drivers of the given vertex, at the given LSB.
// Every visitor should call this to continue the traversal.
DfgVertex* trace(DfgVertex* const vtxp, const uint32_t msb, const uint32_t lsb) {
UASSERT_OBJ(vtxp->isPacked(), vtxp, "Can only trace packed type vertices");
UASSERT_OBJ(vtxp->size() > msb, vtxp, "Traced Vertex too narrow");
// Get the cache entry, which is the resulting driver that is not part of
// the same component as vtxp
DfgVertex*& respr = m_cache
.emplace(std::piecewise_construct, //
std::forward_as_tuple(vtxp, msb, lsb), //
std::forward_as_tuple(nullptr))
.first->second;
// Trace the vertex
if (respr) {
// If already traced this vtxp/msb/lsb, just use the result.
// This is important to avoid combinatorial explosion when the
// same sub-expression is needed multiple times.
} else if (m_vtx2Scc.at(vtxp) != m_component) {
// If the currently traced vertex is in a different component,
// then we found what we were looking for.
respr = vtxp;
// If the result is a splice, we need to insert a temporary for it
// as a splice cannot be fed into arbitray logic
if (DfgVertexSplice* const splicep = respr->cast<DfgVertexSplice>()) {
DfgVertexVar* const tmpp = createTmp("TraceDriver", splicep);
// Note: we can't do 'splicep->replaceWith(tmpp)', as other
// variable sinks of the splice might have a defaultp driver.
tmpp->srcp(splicep);
respr = tmpp;
}
// Apply a Sel to extract the relevant bits if only a part is needed
if (msb != respr->width() - 1 || lsb != 0) {
DfgSel* const selp = make<DfgSel>(respr, msb - lsb + 1);
selp->fromp(respr);
selp->lsb(lsb);
respr = selp;
}
} else {
// Otherwise visit the vertex to trace it
VL_RESTORER(m_msb);
VL_RESTORER(m_lsb);
VL_RESTORER(m_resp);
m_msb = msb;
m_lsb = lsb;
m_resp = nullptr;
iterate(vtxp);
respr = m_resp;
}
// We only ever trace drivers of bits that are known to be independent
// of the cycles, so we should always be able to find an acyclic driver.
UASSERT_OBJ(respr, vtxp, "Tracing driver failed for " << vtxp->typeName());
UASSERT_OBJ(respr->width() == (msb - lsb + 1), vtxp, "Wrong result width");
return respr;
}
template <typename Vertex>
2025-08-15 09:20:36 +02:00
Vertex* traceBitwiseBinary(Vertex* vtxp) {
static_assert(std::is_base_of<DfgVertexBinary, Vertex>::value,
"Should only call on DfgVertexBinary");
Vertex* const resp = make<Vertex>(vtxp, m_msb - m_lsb + 1);
resp->rhsp(trace(vtxp->rhsp(), m_msb, m_lsb));
resp->lhsp(trace(vtxp->lhsp(), m_msb, m_lsb));
return resp;
}
2025-08-15 09:20:36 +02:00
template <typename Vertex>
DfgVertex* traceAddSub(Vertex* vtxp) {
static_assert(std::is_base_of<DfgVertexBinary, Vertex>::value,
"Should only call on DfgVertexBinary");
Vertex* const opp = make<Vertex>(vtxp, m_msb + 1);
opp->rhsp(trace(vtxp->rhsp(), m_msb, 0));
opp->lhsp(trace(vtxp->lhsp(), m_msb, 0));
DfgSel* const selp = make<DfgSel>(vtxp, m_msb - m_lsb + 1);
selp->fromp(opp);
selp->lsb(m_lsb);
return selp;
2025-08-15 09:20:36 +02:00
}
template <typename Vertex>
Vertex* traceReduction(Vertex* vtxp) {
static_assert(std::is_base_of<DfgVertexUnary, Vertex>::value,
"Should only call on DfgVertexUnary");
Vertex* const resp = make<Vertex>(vtxp, 1);
resp->srcp(trace(vtxp->srcp(), vtxp->srcp()->width() - 1, 0));
return resp;
}
2025-08-15 09:20:36 +02:00
template <typename Vertex>
Vertex* traceCmp(Vertex* vtxp) {
static_assert(std::is_base_of<DfgVertexBinary, Vertex>::value,
"Should only call on DfgVertexBinary");
Vertex* const resp = make<Vertex>(vtxp, m_msb - m_lsb + 1);
resp->rhsp(trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0));
resp->lhsp(trace(vtxp->lhsp(), vtxp->lhsp()->width() - 1, 0));
return resp;
}
// Use this macro to set the result in 'visit' methods. This also emits
// a line to m_lineCoverageFile for testing.
// TODO: Use C++20 std::source_location instead of a macro
#ifdef VL_DEBUG
#define SET_RESULT(vtxp) \
do { \
m_resp = vtxp; \
if (VL_UNLIKELY(m_lineCoverageFile.is_open())) m_lineCoverageFile << __LINE__ << '\n'; \
} while (false)
#else
#define SET_RESULT(vtxp) m_resp = vtxp;
#endif
// VISITORS
void visit(DfgVertex* vtxp) override { // LCOV_EXCL_START
vtxp->v3fatalSrc("TraceDriver - Unhandled vertex type: " << vtxp->typeName());
} // LCOV_EXCL_STOP
void visit(DfgSplicePacked* vtxp) override {
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
struct Driver final {
DfgVertex* m_vtxp;
uint32_t m_lsb; // LSB of driven range (internal, not Verilog)
uint32_t m_msb; // MSB of driven range (internal, not Verilog)
Driver() = delete;
Driver(DfgVertex* vtxp, uint32_t lsb, uint32_t msb)
: m_vtxp{vtxp}
, m_lsb{lsb}
, m_msb{msb} {}
};
std::vector<Driver> drivers;
// Look at all the drivers, one might cover the whole range, but also gather all drivers
bool tryWholeDefault = m_defaultp;
const bool done = vtxp->foreachDriver([&](DfgVertex& src, uint32_t lsb) {
const uint32_t msb = lsb + src.width() - 1;
drivers.emplace_back(&src, lsb, msb);
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
// Check if this driver covers any of the bits, then we can't use whole default
if (m_msb >= lsb && msb >= m_lsb) tryWholeDefault = false;
// If it does not cover the whole searched bit range, move on
if (m_lsb < lsb || msb < m_msb) return false;
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
// Driver covers whole search range, trace that and we are done
SET_RESULT(trace(&src, m_msb - lsb, m_lsb - lsb));
return true;
});
if (done) return;
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
// Trace the default driver if no other drivers cover the searched range
if (tryWholeDefault) {
SET_RESULT(trace(m_defaultp, m_msb, m_lsb));
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
return;
}
// Hard case: We need to combine multiple drivers to produce the searched bit range
// Sort drivers (they are non-overlapping)
std::sort(drivers.begin(), drivers.end(), [](const Driver& a, const Driver& b) { //
return a.m_lsb < b.m_lsb;
});
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
// Gather terms
std::vector<DfgVertex*> termps;
for (const Driver& driver : drivers) {
// Driver is below the searched LSB, move on
if (m_lsb > driver.m_msb) continue;
// Driver is above the searched MSB, done
if (driver.m_lsb > m_msb) break;
// Gap below this driver, trace default to fill it
if (driver.m_lsb > m_lsb) {
UASSERT_OBJ(m_defaultp, vtxp, "Should have a default driver if needs tracing");
termps.emplace_back(trace(m_defaultp, driver.m_lsb - 1, m_lsb));
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
m_lsb = driver.m_lsb;
}
// Driver covers searched range, pick the needed/available bits
uint32_t lim = std::min(m_msb, driver.m_msb);
termps.emplace_back(trace(driver.m_vtxp, lim - driver.m_lsb, m_lsb - driver.m_lsb));
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
m_lsb = lim + 1;
}
if (m_msb >= m_lsb) {
UASSERT_OBJ(m_defaultp, vtxp, "Should have a default driver if needs tracing");
termps.emplace_back(trace(m_defaultp, m_msb, m_lsb));
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
}
// The earlier cheks cover the case when either a whole driver or the default covers
// the whole range, so there should be at least 2 terms required here.
UASSERT_OBJ(termps.size() >= 2, vtxp, "Should have returned in special cases");
// Concatenate all terms and set result
DfgVertex* resp = termps.front();
for (size_t i = 1; i < termps.size(); ++i) {
DfgVertex* const termp = termps[i];
DfgConcat* const catp = make<DfgConcat>(termp, resp->width() + termp->width());
catp->rhsp(resp);
catp->lhsp(termp);
resp = catp;
}
SET_RESULT(resp);
}
void visit(DfgVarPacked* vtxp) override {
UASSERT_OBJ(!vtxp->isVolatile(), vtxp, "Should not trace through volatile VarPacked");
VL_RESTORER(m_defaultp);
m_defaultp = vtxp->defaultp();
SET_RESULT(trace(vtxp->srcp(), m_msb, m_lsb));
}
void visit(DfgArraySel* vtxp) override {
// Only constant select
const DfgConst* const idxp = vtxp->bitp()->cast<DfgConst>();
if (!idxp) return;
// From a variable
const DfgVarArray* varp = vtxp->fromp()->cast<DfgVarArray>();
if (!varp) return;
UASSERT_OBJ(!varp->isVolatile(), vtxp, "Should not trace through volatile VarArray");
// Skip through intermediate variables
while (varp->srcp() && varp->srcp()->is<DfgVarArray>()) {
varp = varp->srcp()->as<DfgVarArray>();
UASSERT_OBJ(!varp->isVolatile(), vtxp, "Should not trace through volatile VarArray");
}
// Find driver
const DfgVertex* srcp = varp->srcp();
if (const DfgSpliceArray* const splicep = srcp->cast<DfgSpliceArray>()) {
srcp = splicep->driverAt(idxp->toSizeT());
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
}
// Trace the driver, which at this point must be a UnitArray
SET_RESULT(trace(srcp->as<DfgUnitArray>()->srcp(), m_msb, m_lsb));
}
void visit(DfgConcat* vtxp) override {
DfgVertex* const rhsp = vtxp->rhsp();
DfgVertex* const lhsp = vtxp->lhsp();
const uint32_t rWidth = rhsp->width();
// If the traced bits are wholly in the RHS
if (rWidth > m_msb) {
SET_RESULT(trace(rhsp, m_msb, m_lsb));
return;
}
// If the traced bits are wholly in the LHS
if (m_lsb >= rWidth) {
SET_RESULT(trace(lhsp, m_msb - rWidth, m_lsb - rWidth));
return;
}
// The traced bit spans both sides, trace both
DfgConcat* const resp = make<DfgConcat>(vtxp, m_msb - m_lsb + 1);
resp->rhsp(trace(rhsp, rWidth - 1, m_lsb));
resp->lhsp(trace(lhsp, m_msb - rWidth, 0));
SET_RESULT(resp);
}
void visit(DfgReplicate* vtxp) override {
DfgVertex* const srcp = vtxp->srcp();
const uint32_t sWidth = srcp->width();
// If we need more bits than the source, then we need the whole source
if (m_msb - m_lsb + 1 > sWidth) {
DfgReplicate* const repp = make<DfgReplicate>(vtxp, vtxp->width());
repp->srcp(trace(srcp, sWidth - 1, 0));
repp->countp(vtxp->countp()); // Always a DfgConst
DfgSel* const resp = make<DfgSel>(vtxp, m_msb - m_lsb + 1);
resp->fromp(repp);
resp->lsb(m_lsb);
SET_RESULT(resp);
return;
}
// If the requested bits are within the same repliacted word
if (m_msb / sWidth == m_lsb / sWidth) {
SET_RESULT(trace(srcp, m_msb % sWidth, m_lsb % sWidth));
return;
}
// The requested bits span two replicated words
DfgConcat* const resp = make<DfgConcat>(vtxp, m_msb - m_lsb + 1);
resp->rhsp(trace(srcp, sWidth - 1, m_lsb % sWidth));
resp->lhsp(trace(srcp, m_msb % sWidth, 0));
SET_RESULT(resp);
}
void visit(DfgExtend* vtxp) override {
DfgVertex* const srcp = vtxp->srcp();
const uint32_t sWidth = srcp->width();
// If the traced bits are wholly in the input
if (sWidth > m_msb) {
SET_RESULT(trace(srcp, m_msb, m_lsb));
return;
}
// If the traced bits are wholly in the extension
if (m_lsb >= sWidth) {
SET_RESULT(make<DfgConst>(vtxp, m_msb - m_lsb + 1));
return;
}
// The traced bits span both sides
DfgExtend* const resp = make<DfgExtend>(vtxp, m_msb - m_lsb + 1);
resp->srcp(trace(srcp, sWidth - 1, m_lsb));
SET_RESULT(resp);
}
void visit(DfgExtendS* vtxp) override {
DfgVertex* const srcp = vtxp->srcp();
const uint32_t sWidth = srcp->width();
// If the traced bits are wholly in the input
if (sWidth > m_msb) {
SET_RESULT(trace(srcp, m_msb, m_lsb));
return;
}
// If the traced bits are wholly in the extension
if (m_lsb >= sWidth) {
DfgVertex* const sp = trace(srcp, sWidth - 1, sWidth - 1);
if (m_msb == m_lsb) {
SET_RESULT(sp);
} else {
DfgExtendS* const resp = make<DfgExtendS>(vtxp, m_msb - m_lsb + 1);
resp->srcp(sp);
SET_RESULT(resp);
}
return;
}
// The traced bits span both sides
DfgExtendS* const resp = make<DfgExtendS>(vtxp, m_msb - m_lsb + 1);
resp->srcp(trace(srcp, sWidth - 1, m_lsb));
SET_RESULT(resp);
}
void visit(DfgSel* vtxp) override {
const uint32_t lsb = vtxp->lsb();
SET_RESULT(trace(vtxp->srcp(), m_msb + lsb, m_lsb + lsb));
}
void visit(DfgNot* vtxp) override {
DfgNot* const resp = make<DfgNot>(vtxp, m_msb - m_lsb + 1);
resp->srcp(trace(vtxp->srcp(), m_msb, m_lsb));
SET_RESULT(resp);
}
void visit(DfgAnd* vtxp) override { SET_RESULT(traceBitwiseBinary(vtxp)); }
void visit(DfgOr* vtxp) override { SET_RESULT(traceBitwiseBinary(vtxp)); }
void visit(DfgXor* vtxp) override { SET_RESULT(traceBitwiseBinary(vtxp)); }
void visit(DfgAdd* vtxp) override { SET_RESULT(traceAddSub(vtxp)); }
void visit(DfgSub* vtxp) override { SET_RESULT(traceAddSub(vtxp)); }
void visit(DfgRedAnd* vtxp) override { SET_RESULT(traceReduction(vtxp)); }
void visit(DfgRedOr* vtxp) override { SET_RESULT(traceReduction(vtxp)); }
void visit(DfgRedXor* vtxp) override { SET_RESULT(traceReduction(vtxp)); }
void visit(DfgEq* vtxp) override { SET_RESULT(traceCmp(vtxp)); }
void visit(DfgNeq* vtxp) override { SET_RESULT(traceCmp(vtxp)); }
void visit(DfgLt* vtxp) override { SET_RESULT(traceCmp(vtxp)); }
void visit(DfgLte* vtxp) override { SET_RESULT(traceCmp(vtxp)); }
void visit(DfgGt* vtxp) override { SET_RESULT(traceCmp(vtxp)); }
void visit(DfgGte* vtxp) override { SET_RESULT(traceCmp(vtxp)); }
void visit(DfgShiftR* vtxp) override {
DfgVertex* const lhsp = vtxp->lhsp();
if (const DfgConst* const rConstp = vtxp->rhsp()->cast<DfgConst>()) {
const uint32_t shiftAmnt = rConstp->toU32();
// Width of lower half of result
const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? 0 : vtxp->width() - shiftAmnt;
// If the traced bits are wholly in the input
if (lowerWidth > m_msb) {
SET_RESULT(trace(lhsp, m_msb + shiftAmnt, m_lsb + shiftAmnt));
return;
}
// If the traced bits are wholly in the extension
if (m_lsb >= lowerWidth) {
SET_RESULT(make<DfgConst>(vtxp, m_msb - m_lsb + 1));
return;
}
// The traced bits span both sides
DfgExtend* const resp = make<DfgExtend>(vtxp, m_msb - m_lsb + 1);
resp->srcp(trace(lhsp, lowerWidth - 1 + shiftAmnt, m_lsb + shiftAmnt));
SET_RESULT(resp);
return;
}
DfgShiftR* const shiftrp = make<DfgShiftR>(vtxp, vtxp->lhsp()->width() - m_lsb);
shiftrp->rhsp(trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0));
shiftrp->lhsp(trace(vtxp->lhsp(), vtxp->lhsp()->width() - 1, m_lsb));
if (m_msb == vtxp->lhsp()->width() - 1) {
SET_RESULT(shiftrp);
return;
}
DfgSel* const selp = make<DfgSel>(vtxp, m_msb - m_lsb + 1);
selp->fromp(shiftrp);
selp->lsb(0);
SET_RESULT(selp);
}
void visit(DfgShiftL* vtxp) override {
DfgVertex* const lhsp = vtxp->lhsp();
if (const DfgConst* const rConstp = vtxp->rhsp()->cast<DfgConst>()) {
const uint32_t shiftAmnt = rConstp->toU32();
// Width of lower half of result
const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? vtxp->width() : shiftAmnt;
// If the traced bits are wholly in the input
if (m_lsb >= lowerWidth) {
SET_RESULT(trace(lhsp, m_msb - shiftAmnt, m_lsb - shiftAmnt));
return;
}
// If the traced bits are wholly in the extension
if (lowerWidth > m_msb) {
SET_RESULT(make<DfgConst>(vtxp, m_msb - m_lsb + 1));
return;
}
// The traced bits span both sides
DfgConcat* const resp = make<DfgConcat>(vtxp, m_msb - m_lsb + 1);
resp->rhsp(make<DfgConst>(vtxp, resp->width() - (m_msb - lowerWidth + 1)));
resp->lhsp(trace(lhsp, m_msb - shiftAmnt, lowerWidth - shiftAmnt));
SET_RESULT(resp);
return;
}
DfgShiftL* const shiftlp = make<DfgShiftL>(vtxp, m_msb + 1);
shiftlp->rhsp(trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0));
shiftlp->lhsp(trace(vtxp->lhsp(), m_msb, 0));
if (m_lsb == 0) {
SET_RESULT(shiftlp);
return;
}
DfgSel* const selp = make<DfgSel>(vtxp, m_msb - m_lsb + 1);
selp->fromp(shiftlp);
selp->lsb(m_lsb);
SET_RESULT(selp);
}
void visit(DfgCond* vtxp) override {
DfgCond* const resp = make<DfgCond>(vtxp, m_msb - m_lsb + 1);
resp->condp(trace(vtxp->condp(), vtxp->condp()->width() - 1, 0));
resp->thenp(trace(vtxp->thenp(), m_msb, m_lsb));
resp->elsep(trace(vtxp->elsep(), m_msb, m_lsb));
SET_RESULT(resp);
}
#undef SET_RESULT
public:
// CONSTRUCTOR
TraceDriver(DfgGraph& dfg, Vtx2Scc& vtx2Scc)
: m_dfg{dfg}
, m_vtx2Scc{vtx2Scc} {
#ifdef VL_DEBUG
if (v3Global.opt.debugCheck()) {
m_lineCoverageFile.open( //
v3Global.opt.makeDir() + "/" + v3Global.opt.prefix()
+ "__V3DfgBreakCycles-TraceDriver-line-coverage.txt", //
std::ios_base::out | std::ios_base::app);
}
#endif
}
// Given a Vertex that is part of an SCC, return a vertex that is equivalent
// to 'vtxp[lsb +: width]', but is not part of the same SCC. This should only
// be called if the bit range is known to be independent of the SCC, so the
// trace can always succeed.
DfgVertex* apply(DfgVertex& vtx, uint32_t lsb, uint32_t width) {
VL_RESTORER(m_component);
m_component = m_vtx2Scc.at(&vtx);
return trace(&vtx, lsb + width - 1, lsb);
}
};
class IndependentBits final : public DfgVisitor {
// STATE
const Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map
// Vertex to current bit mask map. The mask is set for the bits that are independent of the SCC
std::unordered_map<const DfgVertex*, V3Number> m_vtxp2Mask;
std::deque<DfgVertex*> m_workList; // Work list for the traversal
std::unordered_map<DfgVertex*, bool> m_onWorkList; // Marks vertices on the work list
#ifdef VL_DEBUG
std::ofstream m_lineCoverageFile; // Line coverage file, just for testing
#endif
// METHODS
// Predicate to check if a vertex should be analysed directly
bool handledDirectly(const DfgVertex& vtx) const {
if (!vtx.isPacked()) return false;
if (vtx.is<DfgSplicePacked>()) return false;
return true;
}
// Retrieve the mask for the given vertex (create it with value 0 if needed)
V3Number& mask(const DfgVertex& vtx) {
UASSERT_OBJ(handledDirectly(vtx), &vtx, "Vertex should not be handled direclty");
// Look up (or create) mask for 'vtxp'
return m_vtxp2Mask
.emplace(std::piecewise_construct, //
std::forward_as_tuple(&vtx), //
std::forward_as_tuple(vtx.fileline(), static_cast<int>(vtx.width()), 0)) //
.first->second;
}
// Use this macro to call 'mask' in 'visit' methods. This also emits
// a line to m_lineCoverageFile for testing.
// TODO: Use C++20 std::source_location instead of a macro
#ifdef VL_DEBUG
#define MASK(vtxp) \
([this](const DfgVertex& vtx) -> V3Number& { \
if (VL_UNLIKELY(m_lineCoverageFile.is_open())) m_lineCoverageFile << __LINE__ << '\n'; \
return mask(vtx); \
}(*vtxp))
#else
#define MASK(vtxp) mask(*vtxp)
#endif
// Clear all bits at or below the most significant clear bit
void floodTowardsLsb(V3Number& num) {
for (int i = num.width() - 1; i >= 0; --i) {
if (num.bitIs1(i)) continue;
num.opSetRange(0, i + 1, '0');
break;
}
}
// Clear all bits at or above the least significant clear bit
void floodTowardsMsb(V3Number& num) {
for (int i = 0; i < num.width(); ++i) {
if (num.bitIs1(i)) continue;
num.opSetRange(i, num.width() - i, '0');
break;
}
}
void propagateFromDriver(V3Number& m, const DfgVertex* srcp) {
// If there is no driver, we are done
if (!srcp) return;
// If it is driven by a splice, we need to combine the masks of the drivers
if (const DfgSplicePacked* const splicep = srcp->cast<DfgSplicePacked>()) {
splicep->foreachDriver([&](const DfgVertex& src, uint32_t lo) {
m.opSelInto(MASK(&src), lo, src.width());
return false;
});
return;
}
// Otherwise, we just use the mask of the single driver
m = MASK(srcp);
}
// VISITORS
void visit(DfgVertex* vtxp) override { // LCOV_EXCL_START
UASSERT_OBJ(handledDirectly(*vtxp), vtxp, "Vertex should be handled direclty");
UINFO(9, "IndependentBits - Unhandled vertex type: " << vtxp->typeName());
// Conservative assumption about all bits being dependent prevails
} // LCOV_EXCL_STOP
void visit(DfgVarPacked* vtxp) override {
// We cannot trace through a volatile variable, so pretend all bits are dependent
if (vtxp->isVolatile()) return;
V3Number& m = MASK(vtxp);
DfgVertex* const srcp = vtxp->srcp();
DfgVertex* const defaultp = vtxp->defaultp();
// If there is a default driver, we start from that
if (defaultp) m = MASK(defaultp);
// Then propagate mask from the driver
propagateFromDriver(m, srcp);
}
void visit(DfgArraySel* vtxp) override {
// Only constant select
const DfgConst* const idxp = vtxp->bitp()->cast<DfgConst>();
if (!idxp) return;
// From a variable
const DfgVarArray* varp = vtxp->fromp()->cast<DfgVarArray>();
if (!varp) return;
// We cannot trace through a volatile variable, so pretend all bits are dependent
if (varp->isVolatile()) return;
// Skip through intermediate variables
while (varp->srcp() && varp->srcp()->is<DfgVarArray>()) {
varp = varp->srcp()->as<DfgVarArray>();
if (varp->isVolatile()) return;
}
// Find driver
const DfgVertex* srcp = varp->srcp();
if (!srcp) return;
if (const DfgSpliceArray* const splicep = srcp->cast<DfgSpliceArray>()) {
srcp = splicep->driverAt(idxp->toSizeT());
if (!srcp) return;
}
const DfgUnitArray* uap = srcp->cast<DfgUnitArray>();
Optimize complex combinational logic in DFG (#6298) This patch adds DfgLogic, which is a vertex that represents a whole, arbitrarily complex combinational AstAlways or AstAssignW in the DfgGraph. Implementing this requires computing the variables live at entry to the AstAlways (variables read by the block), so there is a new ControlFlowGraph data structure and a classical data-flow analysis based live variable analysis to do that at the variable level (as opposed to bit/element level). The actual CFG construction and live variable analysis is best effort, and might fail for currently unhandled constructs or data types. This can be extended later. V3DfgAstToDfg is changed to convert the Ast into an initial DfgGraph containing only DfgLogic, DfgVertexSplice and DfgVertexVar vertices. The DfgLogic are then subsequently synthesized into primitive operations by the new V3DfgSynthesize pass, which is a combination of the old V3DfgAstToDfg conversion and new code to handle AstAlways blocks with complex flow control. V3DfgSynthesize by default will synthesize roughly the same constructs as V3DfgAstToDfg used to handle before, plus any logic that is part of a combinational cycle within the DfgGraph. This enables breaking up these cycles, for which there are extensions to V3DfgBreakCycles in this patch as well. V3DfgSynthesize will then delete all non synthesized or non synthesizable DfgLogic vertices and the rest of the Dfg pipeline is identical, with minor changes to adjust for the changed representation. Because with this change we can now eliminate many more UNOPTFLAT, DFG has been disabled in all the tests that specifically target testing the scheduling and reporting of circular combinational logic.
2025-08-19 16:06:38 +02:00
if (!uap) return;
srcp = uap->srcp();
// Propagate from driver
propagateFromDriver(MASK(vtxp), srcp);
}
void visit(DfgConcat* vtxp) override {
const DfgVertex* const rhsp = vtxp->rhsp();
const DfgVertex* const lhsp = vtxp->lhsp();
V3Number& m = MASK(vtxp);
m.opSelInto(MASK(rhsp), 0, rhsp->width());
m.opSelInto(MASK(lhsp), rhsp->width(), lhsp->width());
}
void visit(DfgReplicate* vtxp) override {
const uint32_t count = vtxp->countp()->as<DfgConst>()->toU32();
const DfgVertex* const srcp = vtxp->srcp();
const uint32_t sWidth = srcp->width();
V3Number& vMask = MASK(vtxp);
V3Number& sMask = MASK(srcp);
for (uint32_t i = 0; i < count; ++i) vMask.opSelInto(sMask, i * sWidth, sWidth);
}
void visit(DfgSel* vtxp) override {
const uint32_t lsb = vtxp->lsb();
const uint32_t msb = lsb + vtxp->width() - 1;
MASK(vtxp).opSel(MASK(vtxp->fromp()), msb, lsb);
}
void visit(DfgExtend* vtxp) override {
const DfgVertex* const srcp = vtxp->srcp();
const uint32_t sWidth = srcp->width();
V3Number& s = MASK(srcp);
V3Number& m = MASK(vtxp);
m.opSelInto(s, 0, sWidth);
m.opSetRange(sWidth, vtxp->width() - sWidth, '1');
}
void visit(DfgExtendS* vtxp) override {
const DfgVertex* const srcp = vtxp->srcp();
const uint32_t sWidth = srcp->width();
V3Number& s = MASK(srcp);
V3Number& m = MASK(vtxp);
m.opSelInto(s, 0, sWidth);
m.opSetRange(sWidth, vtxp->width() - sWidth, s.bitIs0(sWidth - 1) ? '0' : '1');
}
void visit(DfgNot* vtxp) override { //
MASK(vtxp) = MASK(vtxp->srcp());
}
void visit(DfgAnd* vtxp) override { //
MASK(vtxp).opAnd(MASK(vtxp->lhsp()), MASK(vtxp->rhsp()));
}
void visit(DfgOr* vtxp) override { //
MASK(vtxp).opAnd(MASK(vtxp->lhsp()), MASK(vtxp->rhsp()));
}
void visit(DfgXor* vtxp) override { //
MASK(vtxp).opAnd(MASK(vtxp->lhsp()), MASK(vtxp->rhsp()));
}
void visit(DfgAdd* vtxp) override {
V3Number& m = MASK(vtxp);
m.opAnd(MASK(vtxp->lhsp()), MASK(vtxp->rhsp()));
floodTowardsMsb(m);
}
void visit(DfgSub* vtxp) override { // Same as Add: 2's complement (a - b) == (a + ~b + 1)
V3Number& m = MASK(vtxp);
m.opAnd(MASK(vtxp->lhsp()), MASK(vtxp->rhsp()));
floodTowardsMsb(m);
}
void visit(DfgRedAnd* vtxp) override { //
if (MASK(vtxp->lhsp()).isEqAllOnes()) { //
MASK(vtxp).setAllBits1();
}
}
void visit(DfgRedOr* vtxp) override { //
if (MASK(vtxp->lhsp()).isEqAllOnes()) { //
MASK(vtxp).setAllBits1();
}
}
void visit(DfgRedXor* vtxp) override { //
if (MASK(vtxp->lhsp()).isEqAllOnes()) { //
MASK(vtxp).setAllBits1();
}
}
void visit(DfgEq* vtxp) override {
const bool independent
= MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes();
MASK(vtxp).setBit(0, independent ? '1' : '0');
}
void visit(DfgNeq* vtxp) override {
const bool independent
= MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes();
MASK(vtxp).setBit(0, independent ? '1' : '0');
}
void visit(DfgLt* vtxp) override {
const bool independent
= MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes();
MASK(vtxp).setBit(0, independent ? '1' : '0');
}
void visit(DfgLte* vtxp) override {
const bool independent
= MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes();
MASK(vtxp).setBit(0, independent ? '1' : '0');
}
void visit(DfgGt* vtxp) override {
const bool independent
= MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes();
MASK(vtxp).setBit(0, independent ? '1' : '0');
}
void visit(DfgGte* vtxp) override {
const bool independent
= MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes();
MASK(vtxp).setBit(0, independent ? '1' : '0');
}
void visit(DfgShiftR* vtxp) override {
DfgVertex* const rhsp = vtxp->rhsp();
DfgVertex* const lhsp = vtxp->lhsp();
const uint32_t width = vtxp->width();
// Constant shift can be computed precisely
if (DfgConst* const rConstp = rhsp->cast<DfgConst>()) {
const uint32_t shiftAmount = rConstp->toU32();
V3Number& m = MASK(vtxp);
if (shiftAmount >= width) {
m.setAllBits1();
} else {
m.opShiftR(MASK(lhsp), rConstp->num());
m.opSetRange(width - shiftAmount, shiftAmount, '1');
}
return;
}
// Otherwise, as the shift amount is non-negative, all independent
// and consecutive top bits in the lhs yield an independent result
// if the shift amount is independent.
if (MASK(rhsp).isEqAllOnes()) {
V3Number& m = MASK(vtxp);
m = MASK(lhsp);
floodTowardsLsb(m);
}
}
void visit(DfgShiftL* vtxp) override {
DfgVertex* const rhsp = vtxp->rhsp();
DfgVertex* const lhsp = vtxp->lhsp();
const uint32_t width = vtxp->width();
// Constant shift can be computed precisely
if (DfgConst* const rConstp = rhsp->cast<DfgConst>()) {
const uint32_t shiftAmount = rConstp->toU32();
V3Number& m = MASK(vtxp);
if (shiftAmount >= width) {
m.setAllBits1();
} else {
m.opShiftL(MASK(lhsp), rConstp->num());
m.opSetRange(0, shiftAmount, '1');
}
return;
}
// Otherwise, as the shift amount is non-negative, all independent
// and consecutive bottom bits in the lhs yield an independent result
// if the shift amount is independent.
if (MASK(rhsp).isEqAllOnes()) {
V3Number& m = MASK(vtxp);
m = MASK(lhsp);
floodTowardsMsb(m);
}
}
void visit(DfgCond* vtxp) override {
if (MASK(vtxp->condp()).isEqAllOnes()) {
MASK(vtxp).opAnd(MASK(vtxp->thenp()), MASK(vtxp->elsep()));
}
}
#undef MASK
// Enqueue sinks of vertex to work list for traversal - only called from constructor
void enqueueSinks(DfgVertex& vtx) {
vtx.foreachSink([&](DfgVertex& sink) {
// Ignore if sink is not part of an SCC, already has all bits marked independent
if (!m_vtx2Scc.at(sink)) return false;
// If a vertex is not handled directly, recursively enqueue its sinks instead
if (!handledDirectly(sink)) {
enqueueSinks(sink);
return false;
}
// Otherwise just enqueue it
bool& onWorkList = m_onWorkList[&sink];
if (!onWorkList) {
UINFO(9, "Enqueuing vertex " << &sink << " " << sink.typeName());
m_workList.emplace_back(&sink);
}
onWorkList = true;
return false;
});
};
IndependentBits(DfgGraph& dfg, const Vtx2Scc& vtx2Scc)
: m_vtx2Scc{vtx2Scc} {
#ifdef VL_DEBUG
if (v3Global.opt.debugCheck()) {
m_lineCoverageFile.open( //
v3Global.opt.makeDir() + "/" + v3Global.opt.prefix()
+ "__V3DfgBreakCycles-IndependentBits-line-coverage.txt", //
std::ios_base::out | std::ios_base::app);
}
#endif
// Set up the initial conditions:
// - For vertices not in an SCC, mark all bits as independent
// - For vertices in an SCC, dispatch to analyse at least once, as some vertices
// always assign constants bits, which are always independent (eg Extend/Shift)
// Enqueue sinks of all SCC vertices that have at least one independent bit
dfg.forEachVertex([&](DfgVertex& vtx) {
if (!handledDirectly(vtx)) return;
if (m_vtx2Scc.at(&vtx)) return;
mask(vtx).setAllBits1();
});
dfg.forEachVertex([&](DfgVertex& vtx) {
if (!handledDirectly(vtx)) return;
if (!m_vtx2Scc.at(&vtx)) return;
iterate(&vtx);
UINFO(9, "Initial independent bits of " << &vtx << " " << vtx.typeName() << " are: "
<< mask(vtx).displayed(vtx.fileline(), "%b"));
if (!mask(vtx).isEqZero()) enqueueSinks(vtx);
});
// Propagate independent bits until no more changes are made
while (!m_workList.empty()) {
// Grab next item
DfgVertex* const currp = m_workList.front();
m_workList.pop_front();
m_onWorkList[currp] = false;
// Should not enqueue vertices that are not in an SCC
UASSERT_OBJ(m_vtx2Scc.at(currp), currp, "Vertex should be in an SCC");
// Should only enqueue packed vertices
UASSERT_OBJ(handledDirectly(*currp), currp, "Vertex should be handled directly");
// Grab current mask of item
const V3Number& currMask = mask(*currp);
// Remember current mask so we can check if it changed
const V3Number prevMask = currMask;
UINFO(9, "Analyzing independent bits of " << currp << " " << currp->typeName());
// Dispatch it to update the mask in the visit methods
iterate(currp);
// If mask changed, enqueue sinks
if (!prevMask.isCaseEq(currMask)) {
UINFO(9, "Independent bits of " //
<< currp << " " << currp->typeName() << " changed" //
<< "\n form: " << prevMask.displayed(currp->fileline(), "%b")
<< "\n to: " << currMask.displayed(currp->fileline(), "%b"));
enqueueSinks(*currp);
// Check the mask only ever expands (no bit goes 1 -> 0)
if (VL_UNLIKELY(v3Global.opt.debugCheck())) {
V3Number notCurr{currMask};
notCurr.opNot(currMask);
V3Number prevAndNotCurr{currMask};
prevAndNotCurr.opAnd(prevMask, notCurr);
UASSERT_OBJ(prevAndNotCurr.isEqZero(), currp, "Mask should only expand");
}
}
}
}
public:
// Given a DfgGraph, and a map from vertices to the SCCs they reside in,
// returns a map from vertices to a bit mask, where a bit in the mask is
// set if the corresponding bit in that vertex is known to be independent
// of the values of vertices in the same SCC as the vertex resides in.
static std::unordered_map<const DfgVertex*, V3Number> apply(DfgGraph& dfg,
const Vtx2Scc& vtx2Scc) {
return std::move(IndependentBits{dfg, vtx2Scc}.m_vtxp2Mask);
}
};
class FixUp final {
DfgGraph& m_dfg; // The graph being processed
Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map
TraceDriver m_traceDriver{m_dfg, m_vtx2Scc};
// The independent bits map
const std::unordered_map<const DfgVertex*, V3Number> m_independentBits
= IndependentBits::apply(m_dfg, m_vtx2Scc);
size_t m_nImprovements = 0; // Number of improvements mde
// Returns a bitmask set if that bit of 'vtx' is used (has a sink)
static V3Number computeUsedBits(DfgVertex& vtx) {
V3Number result{vtx.fileline(), static_cast<int>(vtx.width()), 0};
vtx.foreachSink([&result](DfgVertex& sink) {
// If used via a Sel, mark the selected bits used
if (const DfgSel* const selp = sink.cast<DfgSel>()) {
result.opSetRange(selp->lsb(), selp->width(), '1');
return false;
}
// Used without a Sel, so all bits are used
result.setAllBits1();
return true;
});
return result;
}
static std::string debugStr(const DfgVertex& vtx) { // LCOV_EXCL_START
if (const DfgArraySel* const aselp = vtx.cast<DfgArraySel>()) {
const size_t i = aselp->bitp()->as<DfgConst>()->toSizeT();
return debugStr(*aselp->fromp()) + "[" + std::to_string(i) + "]";
}
if (const DfgVertexVar* const varp = vtx.cast<DfgVertexVar>()) {
return varp->nodep()->name();
}
vtx.v3fatalSrc("Unhandled node type");
} // LCOV_EXCL_STOP
// Trace drivers of independent bits of 'vtxp' in the range '[hi:lo]'
// append replacement terms to 'termps'.
void gatherTerms(std::vector<DfgVertex*>& termps, DfgVertex& vtx, const V3Number& indpBits,
const uint32_t hi, const uint32_t lo) {
// Iterate through consecutive dependent/non-dependet ranges within [hi:lo]
bool isIndependent = indpBits.bitIs1(lo);
uint32_t lsb = lo;
for (uint32_t msb = lo; msb <= hi; ++msb) {
const bool endRange = msb == hi || isIndependent != indpBits.bitIs1(msb + 1);
if (!endRange) continue;
const uint32_t width = msb - lsb + 1;
// Compute the term to use for this range
if (isIndependent) {
// If the range is independent of the cycles, trace its driver
++m_nImprovements;
termps.emplace_back(m_traceDriver.apply(vtx, lsb, width));
} else {
// If dependent, fall back on using the part of the variable
DfgSel* const selp = new DfgSel{m_dfg, vtx.fileline(), DfgDataType::packed(width)};
// Same component as 'vtxp', as reads 'vtxp' and will replace 'vtxp'
m_vtx2Scc[selp] = m_vtx2Scc.at(vtx);
// Do not connect selp->fromp yet, need to do afer replacing 'vtxp'
selp->lsb(lsb);
termps.emplace_back(selp);
}
// Next iteration
isIndependent = !isIndependent;
lsb = msb + 1;
}
}
void fixUpPacked(DfgVertex& vtx) {
UASSERT_OBJ(vtx.isPacked(), &vtx, "Should be a packed type");
UASSERT_OBJ(!vtx.is<DfgSplicePacked>(), &vtx, "Should not be a splice");
// Get which bits of 'vtxp' are independent of the SCCs
const V3Number& indpBits = m_independentBits.at(&vtx);
UINFO(9, "Independent bits of '" << debugStr(vtx) << "' are "
<< indpBits.displayed(vtx.fileline(), "%b"));
// Can't do anything if all bits are dependent
if (indpBits.isEqZero()) return;
// Figure out which bits of 'vtxp' are used
const V3Number usedBits = computeUsedBits(vtx);
UINFO(9, "Used bits of '" << debugStr(vtx) << "' are "
<< usedBits.displayed(vtx.fileline(), "%b"));
// Nothing to do if no used bits are independen (all used bits are dependent)
V3Number usedAndIndependent{vtx.fileline(), static_cast<int>(vtx.width()), 0};
usedAndIndependent.opAnd(usedBits, indpBits);
if (usedAndIndependent.isEqZero()) return;
// We are computing the terms to concatenate and replace 'vtxp' with
std::vector<DfgVertex*> termps;
// Iterate through consecutive used/unused ranges
FileLine* const flp = vtx.fileline();
const uint32_t width = vtx.width();
bool isUsed = usedBits.bitIs1(0); // Is current range used
uint32_t lsb = 0; // LSB of current range
for (uint32_t msb = 0; msb < width; ++msb) {
const bool endRange = msb == width - 1 || isUsed != usedBits.bitIs1(msb + 1);
if (!endRange) continue;
if (isUsed) {
// The range is used, compute the replacement terms
gatherTerms(termps, vtx, indpBits, msb, lsb);
} else {
// The range is not used, just use constant 0 as a placeholder
DfgConst* const constp = new DfgConst{m_dfg, flp, msb - lsb + 1, 0};
m_vtx2Scc[constp] = 0;
termps.emplace_back(constp);
}
// Next iteration
isUsed = !isUsed;
lsb = msb + 1;
}
2025-08-15 09:20:36 +02:00
// Concatenate all the terms to create the replacement
DfgVertex* replacementp = termps.front();
const uint64_t vComp = m_vtx2Scc.at(vtx);
2025-08-15 09:20:36 +02:00
for (size_t i = 1; i < termps.size(); ++i) {
DfgVertex* const termp = termps[i];
const uint32_t catWidth = replacementp->width() + termp->width();
DfgConcat* const catp = new DfgConcat{m_dfg, flp, DfgDataType::packed(catWidth)};
2025-08-15 09:20:36 +02:00
catp->rhsp(replacementp);
catp->lhsp(termp);
// Need to figure out which component the replacement vertex
// belongs to. If any of the terms are of the original
// component, it's part of that component, otherwise its not
// cyclic (all terms are from outside the original component,
// and feed into the original component).
const uint64_t tComp = m_vtx2Scc.at(termp);
const uint64_t rComp = m_vtx2Scc.at(replacementp);
m_vtx2Scc[catp] = tComp == vComp || rComp == vComp ? vComp : 0;
2025-08-15 09:20:36 +02:00
replacementp = catp;
}
// Replace the vertex
vtx.replaceWith(replacementp);
2025-08-15 09:20:36 +02:00
// Connect the Sel nodes in the replacement
for (DfgVertex* const termp : termps) {
if (DfgSel* const selp = termp->cast<DfgSel>()) {
if (!selp->fromp()) selp->fromp(&vtx);
}
}
}
void main(DfgVertexVar& var) {
UINFO(9, "FixUp of " << var.nodep()->name());
if (var.is<DfgVarPacked>()) {
// For Packed variables, fix up as whole
fixUpPacked(var);
} else if (var.is<DfgVarArray>()) {
// For array variables, fix up element-wise
const uint64_t component = m_vtx2Scc.at(var);
var.foreachSink([&](DfgVertex& sink) {
// Ignore if sink is not part of same cycle
if (m_vtx2Scc.at(sink) != component) return false;
// Only handle ArraySels with constant index
DfgArraySel* const aselp = sink.cast<DfgArraySel>();
if (!aselp) return false;
if (!aselp->bitp()->is<DfgConst>()) return false;
// Fix up the word
fixUpPacked(*aselp);
return false;
});
}
UINFO(9, "FixUp made " << m_nImprovements << " improvements");
}
FixUp(DfgGraph& dfg, Vtx2Scc& vtx2Scc)
: m_dfg{dfg}
, m_vtx2Scc{vtx2Scc} {}
public:
// Compute which bits of vertices are independent of the SCC they reside
// in, and replace rferences to used independent bits with an equivalent
// vertex that is not part of the same SCC.
static size_t apply(DfgGraph& dfg, Vtx2Scc& vtx2Scc) {
FixUp fixUp{dfg, vtx2Scc};
// TODO: Compute minimal feedback vertex set and cut only those.
// Sadly that is a computationally hard problem.
// Fix up cyclic variables
for (DfgVertexVar& vtx : dfg.varVertices()) {
if (vtx2Scc.at(vtx)) fixUp.main(vtx);
}
// Return the number of improvements made
return fixUp.m_nImprovements;
}
};
std::pair<std::unique_ptr<DfgGraph>, bool> //
breakCycles(const DfgGraph& dfg, V3DfgContext& ctx) {
// Shorthand for dumping graph at given dump level
const auto dump = [&](int level, const DfgGraph& dfg, const std::string& name) {
if (dumpDfgLevel() >= level) dfg.dumpDotFilePrefixed(ctx.prefix() + "breakCycles-" + name);
};
// Can't do much with trivial things ('a = a' or 'a[1] = a[0]'), so bail
if (dfg.size() <= 2) {
UINFO(7, "Graph is trivial");
dump(9, dfg, "trivial");
++ctx.m_breakCyclesContext.m_nTrivial;
return {nullptr, false};
}
// AstNetlist/AstNodeModule user2 used as sequence numbers for temporaries
const VNUser2InUse user2InUse;
// Show input for debugging
dump(7, dfg, "input");
// We might fail to make any improvements, so first create a clone of the
// graph. This is what we will be working on, and return if successful.
// Do not touch the input graph.
std::unique_ptr<DfgGraph> resultp = dfg.clone();
// Just shorthand for code below
DfgGraph& res = *resultp;
dump(9, res, "clone");
// How many improvements have we made
size_t nImprovements = 0;
size_t prevNImprovements;
// Iterate while an improvement can be made and the graph is still cyclic
do {
// Color SCCs (populates DfgVertex::user<uint64_t>())
Vtx2Scc vtx2Scc = res.makeUserMap<uint64_t>();
const uint32_t numNonTrivialSCCs
= V3DfgPasses::colorStronglyConnectedComponents(res, vtx2Scc);
// Congrats if it has become acyclic
if (!numNonTrivialSCCs) {
UINFO(7, "Graph became acyclic after " << nImprovements << " improvements");
dump(7, res, "result-acyclic");
++ctx.m_breakCyclesContext.m_nFixed;
return {std::move(resultp), true};
}
// Fix up independent ranges in vertices
UINFO(9, "New iteration after " << nImprovements << " improvements");
prevNImprovements = nImprovements;
const size_t nFixed = FixUp::apply(res, vtx2Scc);
if (nFixed) {
nImprovements += nFixed;
ctx.m_breakCyclesContext.m_nImprovements += nFixed;
dump(9, res, "FixUp");
}
} while (nImprovements != prevNImprovements);
if (dumpDfgLevel() >= 9) {
Vtx2Scc vtx2Scc = res.makeUserMap<uint64_t>();
V3DfgPasses::colorStronglyConnectedComponents(res, vtx2Scc);
res.dumpDotFilePrefixed(ctx.prefix() + "breakCycles-remaining", [&](const DfgVertex& vtx) {
return vtx2Scc[vtx]; //
});
}
// If an improvement was made, return the still cyclic improved graph
if (nImprovements) {
UINFO(7, "Graph was improved " << nImprovements << " times");
dump(7, res, "result-improved");
++ctx.m_breakCyclesContext.m_nImproved;
return {std::move(resultp), false};
}
// No improvement was made
UINFO(7, "Graph NOT improved");
dump(7, res, "result-original");
++ctx.m_breakCyclesContext.m_nUnchanged;
return {nullptr, false};
}
} //namespace V3DfgBreakCycles
std::pair<std::unique_ptr<DfgGraph>, bool> //
V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgContext& ctx) {
auto pair = V3DfgBreakCycles::breakCycles(dfg, ctx);
if (pair.first) {
if (v3Global.opt.debugCheck()) V3DfgPasses::typeCheck(*pair.first);
V3DfgPasses::removeUnused(*pair.first);
}
return pair;
}