Optimize more cycles in DFG (#6173)

Added a second algorithm to break cycles in DFG by identifying which
bits of a circular variable are actually independent of the variable,
then reuse the existing (but extended) driver tracing algorithm to
eliminate them.

This can fix up things like: `assign gray = binary ^ (gray >> 1)`
This commit is contained in:
Geza Lore 2025-07-11 19:19:09 +01:00 committed by GitHub
parent 2fc12d951e
commit 77180c4020
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 709 additions and 71 deletions

View File

@ -20,6 +20,7 @@
#include "V3DfgPasses.h"
#include "V3Hash.h"
#include <deque>
#include <fstream>
#include <limits>
#include <unordered_map>
@ -191,32 +192,56 @@ class TraceDriver final : public DfgVisitor {
DfgGraph& m_dfg; // The graph being processed
// The strongly connected component we are trying to escape
const uint32_t m_component;
const bool m_aggressive; // Trace aggressively, creating intermediate ops
uint32_t m_lsb = 0; // LSB to extract from the currently visited Vertex
uint32_t m_msb = 0; // MSB to extract from the currently visited Vertex
// Result of tracing the currently visited Vertex. Use SET_RESULT below!
DfgVertex* m_resp = nullptr;
std::vector<DfgVertex*> m_newVtxps; // New vertices created during the traversal
std::ofstream m_lineCoverageFile; // Line coverage file, just for testing
std::vector<Visited> m_stack; // Stack of currently visited vertices
// Denotes if a 'Visited' entry appear in m_stack
std::unordered_map<Visited, bool, Visited::Hash, Visited::Equal> m_visited;
std::ofstream m_lineCoverageFile; // Line coverage file, just for testing
// METHODS
// Create and return a new Vertex and add it to m_newVtxps. You should
// Create and return a new Vertex and add it to m_newVtxps. Fileline is
// taken from 'refp', but 'refp' is otherwise not used. You should
// always use this to create new vertices, so unused ones (if a trace
// eventually fails) can be cleaned up at the end.
template <typename Vertex>
Vertex* make(FileLine* flp, uint32_t width) {
Vertex* make(const DfgVertex* refp, uint32_t width) {
static_assert(std::is_base_of<DfgVertex, Vertex>::value //
&& !std::is_base_of<DfgVertexVar, Vertex>::value //
&& !std::is_same<DfgConst, DfgVertex>::value,
"Should only make operation vertices");
AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(width);
Vertex* const vtxp = new Vertex{m_dfg, flp, dtypep};
m_newVtxps.emplace_back(vtxp);
return vtxp;
&& !std::is_base_of<DfgVertexVar, Vertex>::value,
"Should only make operation vertices and constants");
constexpr bool okWithoutAggressive = //
std::is_same<DfgConst, Vertex>::value //
|| std::is_same<DfgSel, Vertex>::value //
|| std::is_same<DfgConcat, Vertex>::value //
|| std::is_same<DfgExtend, Vertex>::value;
UASSERT_OBJ(
okWithoutAggressive || m_aggressive, refp,
"Should only create Const, Sel, Concat, Exend Vertices without aggressive mode");
if VL_CONSTEXPR_CXX17 (std::is_same<DfgConst, Vertex>::value) {
DfgConst* const vtxp = new DfgConst{m_dfg, refp->fileline(), width};
m_newVtxps.emplace_back(vtxp);
return reinterpret_cast<Vertex*>(vtxp);
} else {
// TODO: this is a gross hack around lack of C++17 'if constexpr' Vtx is always Vertex
// when this code is actually executed, but needs a fudged type to type check when
// Vertex is DfgConst, in which case this code is unreachable ...
using Vtx = typename std::conditional<std::is_same<DfgConst, Vertex>::value, DfgSel,
Vertex>::type;
AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(width);
Vtx* const vtxp = new Vtx{m_dfg, refp->fileline(), dtypep};
m_newVtxps.emplace_back(vtxp);
return reinterpret_cast<Vertex*>(vtxp);
}
}
// Continue tracing drivers of the given vertex, at the given LSB. Every
@ -256,7 +281,7 @@ class TraceDriver final : public DfgVisitor {
// then we found what we were looking for.
if (msb != vtxp->width() - 1 || lsb != 0) {
// Apply a Sel to extract the relevant bits if only a part is needed
DfgSel* const selp = make<DfgSel>(vtxp->fileline(), msb - lsb + 1);
DfgSel* const selp = make<DfgSel>(vtxp, msb - lsb + 1);
selp->fromp(vtxp);
selp->lsb(lsb);
m_resp = selp;
@ -280,9 +305,49 @@ class TraceDriver final : public DfgVisitor {
m_stack.pop_back();
// Done
if (!m_resp) {
UINFO(9, "TraceDriver - Failed to trace vertex of type: " << vtxp->typeName());
}
return m_resp;
}
template <typename Vertex>
Vertex* bitwiseBinary(Vertex* vtxp) {
static_assert(std::is_base_of<DfgVertexBinary, Vertex>::value,
"Should only call on DfgVertexBinary");
if (DfgVertex* const rp = trace(vtxp->rhsp(), m_msb, m_lsb)) {
if (DfgVertex* const lp = trace(vtxp->lhsp(), m_msb, m_lsb)) {
Vertex* const resp = make<Vertex>(vtxp, m_msb - m_lsb + 1);
resp->rhsp(rp);
resp->lhsp(lp);
return resp;
}
}
return nullptr;
}
// Predicate to do determine if vtxp[$bits(vtxp)-1:idx] is known to be zero
// Somewhat rudimentary but sufficient for current purposes.
static bool knownToBeZeroAtAndAbove(const DfgVertex* vtxp, uint32_t idx) {
if (const DfgConcat* const catp = vtxp->cast<DfgConcat>()) {
DfgConst* const lConstp = catp->lhsp()->cast<DfgConst>();
return lConstp && idx >= catp->rhsp()->width() && lConstp->isZero();
}
if (const DfgExtend* const extp = vtxp->cast<DfgExtend>()) {
return idx >= extp->srcp()->width();
}
return false;
}
// Like knownToBeZeroAtAndAbove, but checks vtxp[idx:0]
static bool knownToBeZeroAtAndBelow(const DfgVertex* vtxp, uint32_t idx) {
if (const DfgConcat* const catp = vtxp->cast<DfgConcat>()) {
DfgConst* const rConstp = catp->rhsp()->cast<DfgConst>();
return rConstp && idx < rConstp->width() && rConstp->isZero();
}
return false;
}
// Use this macro to set the result in 'visit' methods. This also emits
// a line to m_lineCoverageFile for testing.
// TODO: Use C++20 std::source_location instead of a macro
@ -327,10 +392,10 @@ class TraceDriver final : public DfgVisitor {
SET_RESULT(trace(lhsp, m_msb - rWidth, m_lsb - rWidth));
return;
}
// The traced bit span both sides, attempt to trace both
// The traced bit spans both sides, attempt to trace both
if (DfgVertex* const rp = trace(rhsp, rWidth - 1, m_lsb)) {
if (DfgVertex* const lp = trace(lhsp, m_msb - rWidth, 0)) {
DfgConcat* const resp = make<DfgConcat>(vtxp->fileline(), m_msb - m_lsb + 1);
DfgConcat* const resp = make<DfgConcat>(vtxp, m_msb - m_lsb + 1);
resp->rhsp(rp);
resp->lhsp(lp);
SET_RESULT(resp);
@ -341,10 +406,24 @@ class TraceDriver final : public DfgVisitor {
void visit(DfgExtend* vtxp) override {
DfgVertex* const srcp = vtxp->srcp();
if (srcp->width() > m_msb) {
const uint32_t sWidth = srcp->width();
// If the traced bits are wholly in the input
if (sWidth > m_msb) {
SET_RESULT(trace(srcp, m_msb, m_lsb));
return;
}
// If the traced bits are wholly in the extension
if (m_lsb >= sWidth) {
SET_RESULT(make<DfgConst>(vtxp, m_msb - m_lsb + 1));
return;
}
// The traced bits span both sides
if (DfgVertex* const sp = trace(srcp, sWidth - 1, m_lsb)) {
DfgExtend* const resp = make<DfgExtend>(vtxp, m_msb - m_lsb + 1);
resp->srcp(sp);
SET_RESULT(resp);
return;
}
}
void visit(DfgSel* vtxp) override {
@ -353,12 +432,107 @@ class TraceDriver final : public DfgVisitor {
return;
}
void visit(DfgNot* vtxp) override {
if (!m_aggressive) return;
if (DfgVertex* const sp = trace(vtxp->srcp(), m_msb, m_lsb)) {
DfgNot* const resp = make<DfgNot>(vtxp, m_msb - m_lsb + 1);
resp->srcp(sp);
SET_RESULT(resp);
return;
}
}
void visit(DfgAnd* vtxp) override {
if (!m_aggressive) return;
SET_RESULT(bitwiseBinary(vtxp));
}
void visit(DfgOr* vtxp) override {
if (!m_aggressive) return;
SET_RESULT(bitwiseBinary(vtxp));
}
void visit(DfgXor* vtxp) override {
if (!m_aggressive) return;
SET_RESULT(bitwiseBinary(vtxp));
}
void visit(DfgShiftR* vtxp) override {
DfgVertex* const lhsp = vtxp->lhsp();
if (DfgConst* const rConstp = vtxp->rhsp()->cast<DfgConst>()) {
const uint32_t shiftAmnt = rConstp->toU32();
// Width of lower half of result
const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? 0 : vtxp->width() - shiftAmnt;
// If the traced bits are wholly in the input
if (lowerWidth > m_msb) {
SET_RESULT(trace(lhsp, m_msb + shiftAmnt, m_lsb + shiftAmnt));
return;
}
// If the traced bits are wholly in the extension
if (m_lsb >= lowerWidth) {
SET_RESULT(make<DfgConst>(vtxp, m_msb - m_lsb + 1));
return;
}
// The traced bits span both sides
if (DfgVertex* const sp = trace(lhsp, lowerWidth - 1 + shiftAmnt, m_lsb + shiftAmnt)) {
DfgExtend* const resp = make<DfgExtend>(vtxp, m_msb - m_lsb + 1);
resp->srcp(sp);
SET_RESULT(resp);
return;
}
return;
}
// The shift amount is non-negative, so we can conclude zero if all
// bits at and above the LSB we seek are zeroes
if (knownToBeZeroAtAndAbove(lhsp, m_lsb)) {
SET_RESULT(make<DfgConst>(vtxp, m_msb - m_lsb + 1));
return;
}
}
void visit(DfgShiftL* vtxp) override {
DfgVertex* const lhsp = vtxp->lhsp();
if (DfgConst* const rConstp = vtxp->rhsp()->cast<DfgConst>()) {
const uint32_t shiftAmnt = rConstp->toU32();
// Width of lower half of result
const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? vtxp->width() : shiftAmnt;
// If the traced bits are wholly in the input
if (m_lsb >= lowerWidth) {
SET_RESULT(trace(lhsp, m_msb - shiftAmnt, m_lsb - shiftAmnt));
return;
}
// If the traced bits are wholly in the extension
if (lowerWidth > m_msb) {
SET_RESULT(make<DfgConst>(vtxp, m_msb - m_lsb + 1));
return;
}
// The traced bits span both sides
if (DfgVertex* const sp = trace(lhsp, m_msb - shiftAmnt, lowerWidth - shiftAmnt)) {
DfgConcat* const resp = make<DfgConcat>(vtxp, m_msb - m_lsb + 1);
resp->rhsp(make<DfgConst>(vtxp, resp->width() - sp->width()));
resp->lhsp(sp);
SET_RESULT(resp);
return;
}
return;
}
// The shift amount is non-negative, so we can conclude zero if all
// bits at and below the MSB we seek are zeroes
if (knownToBeZeroAtAndBelow(lhsp, m_msb)) {
SET_RESULT(make<DfgConst>(vtxp, m_msb - m_lsb + 1));
return;
}
}
#undef SET_RESULT
// CONSTRUCTOR
TraceDriver(DfgGraph& dfg, uint32_t component)
TraceDriver(DfgGraph& dfg, uint32_t component, bool aggressive)
: m_dfg{dfg}
, m_component{component} {
, m_component{component}
, m_aggressive{aggressive} {
if (v3Global.opt.debugCheck()) {
m_lineCoverageFile.open( //
v3Global.opt.makeDir() + "/" + v3Global.opt.prefix()
@ -371,9 +545,13 @@ public:
// Given a Vertex that is part of an SCC denoted by vtxp->user<uint32_t>(),
// return a vertex that is equivalent to 'vtxp[lsb +: width]', but is not
// part of the same SCC. Returns nullptr if such a vertex cannot be
// computed. This can add new vertices to the graph.
static DfgVertex* apply(DfgGraph& dfg, DfgVertex* vtxp, uint32_t lsb, uint32_t width) {
TraceDriver traceDriver{dfg, vtxp->user<uint32_t>()};
// computed. This can add new vertices to the graph. The 'aggressive' flag
// enables creating many intermediate operations. This should only be set
// if it is reasonably certain the tracing will succeed, otherwise we can
// waste a lot of compute.
static DfgVertex* apply(DfgGraph& dfg, DfgVertex* vtxp, uint32_t lsb, uint32_t width,
bool aggressive) {
TraceDriver traceDriver{dfg, vtxp->user<uint32_t>(), aggressive};
// Find the out-of-component driver of the given vertex
DfgVertex* const resultp = traceDriver.trace(vtxp, lsb + width - 1, lsb);
// Delete unused newly created vertices (these can be created if a
@ -393,6 +571,385 @@ public:
}
};
class IndependentBits final : public DfgVisitor {
// STATE
DfgVarPacked* const m_varp; // The variable we are computing dependencies for
std::deque<DfgVertex*> m_workList; // Work list for traversal
// Vertex to current bit mask map. The mask is set for the bits that **depend** on 'm_varp'.
std::unordered_map<const DfgVertex*, V3Number> m_vtxp2Mask;
std::ofstream m_lineCoverageFile; // Line coverage file, just for testing
// METHODS
// Retrieve the mask for the given vertex (create it with value 0 if needed)
V3Number& mask(const DfgVertex* vtxp) {
return m_vtxp2Mask
.emplace(std::piecewise_construct, //
std::forward_as_tuple(vtxp), //
std::forward_as_tuple(vtxp->fileline(), static_cast<int>(vtxp->width()), 0))
.first->second;
}
// Use this macro to call 'mask' in 'visit' methods. This also emits
// a line to m_lineCoverageFile for testing.
// TODO: Use C++20 std::source_location instead of a macro
#define MASK(vtxp) \
([this](const DfgVertex* p) -> V3Number& { \
if (VL_UNLIKELY(m_lineCoverageFile.is_open())) m_lineCoverageFile << __LINE__ << '\n'; \
return mask(p); \
}(vtxp))
// VISITORS
void visit(DfgVertex* vtxp) override {
UINFO(9, "Unhandled vertex type " << vtxp->typeName());
// Conservatively assume it depends on the variable...
mask(vtxp).setAllBits1(); // intentionally not using MASK here
}
void visit(DfgVarPacked* vtxp) override {
// The mask of the traced variable is known to be all ones
if (vtxp == m_varp) return;
// Combine the masks of all drivers
V3Number& m = MASK(vtxp);
vtxp->forEachSourceEdge([&](DfgEdge& edge, size_t i) {
const DfgVertex* const srcp = edge.sourcep();
m.opSelInto(MASK(srcp), vtxp->driverLsb(i), srcp->width());
});
}
void visit(DfgConcat* vtxp) override {
const DfgVertex* const rhsp = vtxp->rhsp();
const DfgVertex* const lhsp = vtxp->lhsp();
V3Number& m = MASK(vtxp);
m.opSelInto(MASK(rhsp), 0, rhsp->width());
m.opSelInto(MASK(lhsp), rhsp->width(), lhsp->width());
}
void visit(DfgReplicate* vtxp) override {
const uint32_t count = vtxp->countp()->as<DfgConst>()->toU32();
const DfgVertex* const srcp = vtxp->srcp();
const uint32_t sWidth = srcp->width();
V3Number& vMask = MASK(vtxp);
V3Number& sMask = MASK(srcp);
for (uint32_t i = 0; i < count; ++i) vMask.opSelInto(sMask, i * sWidth, sWidth);
}
void visit(DfgSel* vtxp) override {
const uint32_t lsb = vtxp->lsb();
const uint32_t msb = lsb + vtxp->width() - 1;
MASK(vtxp).opSel(MASK(vtxp->fromp()), msb, lsb);
}
void visit(DfgExtend* vtxp) override {
const DfgVertex* const srcp = vtxp->srcp();
MASK(vtxp).opSelInto(MASK(srcp), 0, srcp->width());
}
void visit(DfgNot* vtxp) override { //
MASK(vtxp) = MASK(vtxp->srcp());
}
void visit(DfgAnd* vtxp) override { //
MASK(vtxp).opOr(MASK(vtxp->lhsp()), MASK(vtxp->rhsp()));
}
void visit(DfgOr* vtxp) override { //
MASK(vtxp).opOr(MASK(vtxp->lhsp()), MASK(vtxp->rhsp()));
}
void visit(DfgXor* vtxp) override { //
MASK(vtxp).opOr(MASK(vtxp->lhsp()), MASK(vtxp->rhsp()));
}
void visit(DfgShiftR* vtxp) override {
DfgVertex* const rhsp = vtxp->rhsp();
DfgVertex* const lhsp = vtxp->lhsp();
const uint32_t width = vtxp->width();
// Constant shift can be computed precisely
if (DfgConst* const rConstp = rhsp->cast<DfgConst>()) {
const uint32_t shiftAmount = rConstp->toU32();
if (shiftAmount >= width) return;
V3Number shiftedMask{lhsp->fileline(), static_cast<int>(width), 0};
shiftedMask.opShiftR(MASK(lhsp), rConstp->num());
MASK(vtxp).opSelInto(shiftedMask, 0, width - shiftAmount);
return;
}
// Otherwise, as the shift amount is non-negative, any bit at or below
// the most significant dependent bit might be dependent
V3Number& lMask = MASK(lhsp);
V3Number& vMask = MASK(vtxp);
int idx = width - 1;
while (idx >= 0 && lMask.bitIs0(idx)) --idx;
while (idx >= 0) vMask.setBit(idx--, '1');
}
void visit(DfgShiftL* vtxp) override {
DfgVertex* const rhsp = vtxp->rhsp();
DfgVertex* const lhsp = vtxp->lhsp();
const uint32_t width = vtxp->width();
// Constant shift can be computed precisely
if (DfgConst* const rConstp = rhsp->cast<DfgConst>()) {
const uint32_t shiftAmount = rConstp->toU32();
if (shiftAmount >= width) return;
V3Number shiftedMask{lhsp->fileline(), static_cast<int>(width), 0};
shiftedMask.opShiftL(MASK(lhsp), rConstp->num());
MASK(vtxp).opSelInto(shiftedMask, shiftAmount, width - shiftAmount);
return;
}
// Otherwise, as the shift amount is non-negative, any bit at or above
// the least significant dependent bit might be dependent
V3Number& lMask = MASK(lhsp);
V3Number& vMask = MASK(vtxp);
uint32_t idx = 0;
while (idx < width && lMask.bitIs0(idx)) ++idx;
while (idx < width) vMask.setBit(idx++, '1');
}
#undef MASK
// CONSTRUCTOR
IndependentBits(DfgVarPacked* varp)
: m_varp{varp} {
if (v3Global.opt.debugCheck()) {
m_lineCoverageFile.open( //
v3Global.opt.makeDir() + "/" + v3Global.opt.prefix()
+ "__V3DfgBreakCycles-IndependentBits-line-coverage.txt", //
std::ios_base::out | std::ios_base::app);
}
// The starting vertex depends on it's own value, duuhh...
mask(varp).setAllBits1();
// Enqueue all sinks
varp->forEachSink([&](DfgVertex& vtx) { m_workList.emplace_back(&vtx); });
// While there is an item on the worklist ..
while (!m_workList.empty()) {
// Grab next item
DfgVertex* const currp = m_workList.front();
m_workList.pop_front();
// Grab current mask of item
const V3Number& maskCurr = mask(currp);
// Remember current mask
const V3Number prevMask = maskCurr;
// Dispatch
iterate(currp);
// If mask changed, enqueue sinks
if (!prevMask.isCaseEq(maskCurr)) {
currp->forEachSink([&](DfgVertex& vtx) { m_workList.emplace_back(&vtx); });
// Check the mask only ever expands (no bit goes 1 -> 0)
if (VL_UNLIKELY(v3Global.opt.debugCheck())) {
V3Number notCurr{maskCurr};
notCurr.opNot(maskCurr);
V3Number prevAndNotCurr{maskCurr};
prevAndNotCurr.opAnd(prevMask, notCurr);
UASSERT_OBJ(prevAndNotCurr.isEqZero(), currp, "Mask should only expand");
}
}
}
}
public:
// Given a variable, compute which bits in this variable are independent of
// the variable itself (simple forward dataflow analysis). Returns a bit
// mask where a set bit indicates that bit is independent of the variable
// itself (logic is not circular). The result is a conservative estimate,
// so bits reported dependent might not actually be, but all bits reported
// independent are known to be so.
static V3Number apply(DfgVarPacked* varp) {
IndependentBits independentBits{varp};
// Combine the masks of all drivers of the variable
V3Number result{varp->fileline(), static_cast<int>(varp->width()), 0};
varp->forEachSourceEdge([&](DfgEdge& edge, size_t i) {
const DfgVertex* const srcp = edge.sourcep();
// The mask represents the dependent bits, so invert it
V3Number inverseMask{srcp->fileline(), static_cast<int>(srcp->width()), 0};
inverseMask.opNot(independentBits.mask(srcp));
result.opSelInto(inverseMask, varp->driverLsb(i), srcp->width());
});
return result;
}
};
class FixUpSelDrivers final {
public:
// Attempt to push Sel form Var through to the driving
// expression of the selected bits. This can fix things like
// 'a[1:0] = foo', 'a[2] = a[1]', which are somewhat common.
// Returns the number of drivers fixed up.
static size_t apply(DfgGraph& dfg, DfgVarPacked* varp) {
UINFO(9, "FixUpSelDrivers of " << varp->nodep()->name());
const uint32_t component = varp->getUser<uint32_t>();
size_t nImprovements = 0;
varp->forEachSink([&](DfgVertex& sink) {
// Ignore if sink is not part of cycle
if (sink.getUser<uint32_t>() != component) return;
// Only Handle Sels now
DfgSel* const selp = sink.cast<DfgSel>();
if (!selp) return;
// Try to find of the driver of the selected bits outside the cycle
DfgVertex* const fixp
= TraceDriver::apply(dfg, varp, selp->lsb(), selp->width(), false);
if (!fixp) return;
// Found an out-of-cycle driver. We can replace this sel with that.
selp->replaceWith(fixp);
selp->unlinkDelete(dfg);
++nImprovements;
});
UINFO(9, "FixUpSelDrivers made " << nImprovements << " improvements");
return nImprovements;
}
};
class FixUpIndependentRanges final {
// Returns a bitmask set if that bit of 'varp' is used (has a sink)
static V3Number computeUsedBits(DfgVarPacked* varp) {
V3Number result{varp->fileline(), static_cast<int>(varp->width()), 0};
varp->forEachSink([&](DfgVertex& sink) {
// If used via a Sel, mark the selected bits used
if (DfgSel* const selp = sink.cast<DfgSel>()) {
uint32_t lsb = selp->lsb();
uint32_t msb = lsb + selp->width() - 1;
for (uint32_t i = lsb; i <= msb; ++i) result.setBit(i, '1');
return;
}
// Used without a Sel, so all bits are used
result.setAllBits1();
});
return result;
}
// Trace drivers of independent bits of 'varp' in the range '[hi:lo]'
// append replacement terms to 'termps'. Returns number of successful
// replacements.
static size_t gatherTerms(std::vector<DfgVertex*>& termps, DfgGraph& dfg,
DfgVarPacked* const varp, const V3Number& indpBits,
const uint32_t hi, const uint32_t lo) {
size_t nImprovements = 0;
// Iterate through consecutive dependent/non-dependet ranges within [hi:lo]
bool isIndependent = indpBits.bitIs1(lo);
uint32_t lsb = lo;
for (uint32_t msb = lo; msb <= hi; ++msb) {
const bool endRange = msb == hi || isIndependent != indpBits.bitIs1(msb + 1);
if (!endRange) continue;
const uint32_t width = msb - lsb + 1;
DfgVertex* termp = nullptr;
// If the range is not self-dependent, attempt to trace its driver
if (isIndependent) {
termp = TraceDriver::apply(dfg, varp, lsb, width, true);
if (termp) {
++nImprovements;
} else {
UINFO(5, "TraceDriver of independent range failed for "
<< varp->nodep()->name() << "[" << msb << ":" << lsb << "]");
}
}
// Fall back on using the part of the variable (if dependent, or trace failed)
if (!termp) {
AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(width);
DfgSel* const selp = new DfgSel{dfg, varp->fileline(), dtypep};
// Do not connect selp->fromp yet, need to do afer replacing 'varp'
selp->lsb(lsb);
termp = selp;
}
// Record the term
termps.emplace_back(termp);
// Next iteration
isIndependent = !isIndependent;
lsb = msb + 1;
}
return nImprovements;
}
public:
// Similar to FixUpSelDrivers, but first comptute which bits of the
// variable are self dependent, and fix up those that are independent
// but used.
static size_t apply(DfgGraph& dfg, DfgVarPacked* varp) {
UINFO(9, "FixUpIndependentRanges of " << varp->nodep()->name());
size_t nImprovements = 0;
// Figure out which bits of 'varp' are used
const V3Number usedBits = computeUsedBits(varp);
UINFO(9, "Used bits of '" << varp->nodep()->name() << "' are "
<< usedBits.displayed(varp->nodep(), "%b"));
// Nothing to do if no bits are used
if (usedBits.isEqZero()) return 0;
// Figure out which bits of 'varp' are dependent on themselves
const V3Number indpBits = IndependentBits::apply(varp);
UINFO(9, "Independent bits of '" << varp->nodep()->name() << "' are "
<< indpBits.displayed(varp->nodep(), "%b"));
// Can't do anything if all bits are dependent
if (indpBits.isEqZero()) return 0;
{
// Nothing to do if no used bits are independen (all used bits are dependent)
V3Number usedAndIndependent{varp->fileline(), static_cast<int>(varp->width()), 0};
usedAndIndependent.opAnd(usedBits, indpBits);
if (usedAndIndependent.isEqZero()) return 0;
}
// We are computing the terms to concatenate and replace 'varp' with
std::vector<DfgVertex*> termps;
// Iterate through consecutive used/unused ranges
FileLine* const flp = varp->fileline();
const uint32_t width = varp->width();
bool isUsed = usedBits.bitIs1(0); // Is current range used
uint32_t lsb = 0; // LSB of current range
for (uint32_t msb = 0; msb < width; ++msb) {
const bool endRange = msb == width - 1 || isUsed != usedBits.bitIs1(msb + 1);
if (!endRange) continue;
if (isUsed) {
// The range is used, compute the replacement terms
nImprovements += gatherTerms(termps, dfg, varp, indpBits, msb, lsb);
} else {
// The range is not used, just use constant 0 as a placeholder
termps.emplace_back(new DfgConst{dfg, flp, msb - lsb + 1});
}
// Next iteration
isUsed = !isUsed;
lsb = msb + 1;
}
// If we managed to imporove something, apply the replacement
if (nImprovements) {
// Concatenate all the terms to create the replacement
DfgVertex* replacementp = termps.front();
for (size_t i = 1; i < termps.size(); ++i) {
DfgVertex* const termp = termps[i];
const uint32_t catWidth = replacementp->width() + termp->width();
AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(catWidth);
DfgConcat* const catp = new DfgConcat{dfg, flp, dtypep};
catp->rhsp(replacementp);
catp->lhsp(termp);
replacementp = catp;
}
// Replace the variable
varp->replaceWith(replacementp);
// Connect the Sel nodes in the replacement
for (DfgVertex* const termp : termps) {
if (DfgSel* const selp = termp->cast<DfgSel>()) {
if (!selp->fromp()) selp->fromp(varp);
}
}
}
UINFO(9, "FixUpIndependentRanges made " << nImprovements << " improvements");
return nImprovements;
}
};
std::pair<std::unique_ptr<DfgGraph>, bool>
V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgOptimizationContext& ctx) {
// Shorthand for dumping graph at given dump level
@ -441,9 +998,7 @@ V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgOptimizationContext& ctx) {
UINFO(9, "New iteration after " << nImprovements << " improvements");
prevNImprovements = nImprovements;
// Method 1. Attempt to push Sel form Var through to the driving
// expression of the selected bits. This can fix things like
// 'a[1:0] = foo', 'a[2] = a[1]', which are somewhat common.
// Method 1: FixUpSelDrivers
for (DfgVertexVar& vtx : res.varVertices()) {
// Only handle DfgVarPacked at this point
DfgVarPacked* const varp = vtx.cast<DfgVarPacked>();
@ -452,24 +1007,32 @@ V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgOptimizationContext& ctx) {
const uint32_t component = varp->getUser<uint32_t>();
if (!component) continue;
UINFO(9, "Attempting to TraceDriver " << varp->nodep()->name());
const size_t nFixed = FixUpSelDrivers::apply(res, varp);
if (nFixed) {
nImprovements += nFixed;
ctx.m_breakCyclesContext.m_nImprovements += nFixed;
dump(9, res, "FixUpSelDrivers");
}
}
varp->forEachSink([&](DfgVertex& sink) {
// Ignore if sink is not part of cycle
if (sink.getUser<uint32_t>() != component) return;
// Only Handle Sels now
DfgSel* const selp = sink.cast<DfgSel>();
if (!selp) return;
// Try to find of the driver of the selected bits outside the cycle
DfgVertex* const fixp = TraceDriver::apply(res, varp, selp->lsb(), selp->width());
if (!fixp) return;
// Found an out-of-cycle driver. We can replace this sel with that.
selp->replaceWith(fixp);
selp->unlinkDelete(res);
++nImprovements;
++ctx.m_breakCyclesContext.m_nImprovements;
dump(9, res, "TraceDriver");
});
// If we managed to fix something, try again with the earlier methods
if (nImprovements != prevNImprovements) continue;
// Method 2. FixUpIndependentRanges
for (DfgVertexVar& vtx : res.varVertices()) {
// Only handle DfgVarPacked at this point
DfgVarPacked* const varp = vtx.cast<DfgVarPacked>();
if (!varp) continue;
// If Variable is not part of a cycle, move on
const uint32_t component = varp->getUser<uint32_t>();
if (!component) continue;
const size_t nFixed = FixUpIndependentRanges::apply(res, varp);
if (nFixed) {
nImprovements += nFixed;
ctx.m_breakCyclesContext.m_nImprovements += nFixed;
dump(9, res, "FixUpIndependentRanges");
}
}
} while (nImprovements != prevNImprovements);

View File

@ -25,6 +25,8 @@ with open(root + "/src/V3DfgBreakCycles.cpp", 'r', encoding="utf8") as fd:
line = line.split("//")[0]
if re.match(r'^[^#]*SET_RESULT', line):
expectedLines.add(lineno)
if re.match(r'^[^#]*MASK', line):
expectedLines.add(lineno)
if not expectedLines:
test.error("Failed to read expected source line numbers")
@ -62,6 +64,8 @@ test.compile(verilator_flags2=[
"--stats",
"--build",
"-fno-dfg-break-cycles",
"-fno-dfg-post-inline",
"-fno-dfg-scoped",
"+incdir+" + test.obj_dir,
"-Mdir", test.obj_dir + "/obj_ref",
"--prefix", "Vref",
@ -76,6 +80,8 @@ test.file_grep(test.obj_dir + "/obj_ref/Vref__stats.txt",
test.compile(verilator_flags2=[
"--stats",
"--build",
"-fno-dfg-post-inline",
"-fno-dfg-scoped",
"--exe",
"+incdir+" + test.obj_dir,
"-Mdir", test.obj_dir + "/obj_opt",
@ -89,11 +95,16 @@ test.compile(verilator_flags2=[
# Check all source lines hit
coveredLines = set()
with open(test.obj_dir + "/obj_opt/Vopt__V3DfgBreakCycles-TraceDriver-line-coverage.txt",
'r',
encoding="utf8") as fd:
for line in fd:
coveredLines.add(int(line.strip()))
def readCovered(fileName):
with open(fileName, 'r', encoding="utf8") as fd:
for line in fd:
coveredLines.add(int(line.strip()))
readCovered(test.obj_dir + "/obj_opt/Vopt__V3DfgBreakCycles-TraceDriver-line-coverage.txt")
readCovered(test.obj_dir + "/obj_opt/Vopt__V3DfgBreakCycles-IndependentBits-line-coverage.txt")
if coveredLines != expectedLines:
for n in sorted(expectedLines - coveredLines):

View File

@ -8,39 +8,103 @@
module t (
`include "portlist.vh" // Boilerplate generated by t_dfg_break_cycles.py
rand_a, rand_b, srand_a, srand_b
);
rand_a, rand_b, srand_a, srand_b
);
`include "portdecl.vh" // Boilerplate generated by t_dfg_break_cycles.py
input rand_a;
input rand_b;
input srand_a;
input srand_b;
wire logic [63:0] rand_a;
wire logic [63:0] rand_b;
wire logic signed [63:0] srand_a;
wire logic signed [63:0] srand_b;
input rand_a;
input rand_b;
input srand_a;
input srand_b;
wire logic [63:0] rand_a;
wire logic [63:0] rand_b;
wire logic signed [63:0] srand_a;
wire logic signed [63:0] srand_b;
`signal(CONCAT_RHS, 2);
assign CONCAT_RHS[0] = rand_a[0];
assign CONCAT_RHS[1] = CONCAT_RHS[0];
//////////////////////////////////////////////////////////////////////////
// Interesting user code to cover
//////////////////////////////////////////////////////////////////////////
`signal(CONCAT_LHS, 2);
assign CONCAT_LHS[0] = CONCAT_LHS[1];
assign CONCAT_LHS[1] = rand_a[1];
`signal(GRAY_SEL, 3);
assign GRAY_SEL = rand_a[2:0] ^ 3'(GRAY_SEL[2:1]);
`signal(CONCAT_MID, 3);
assign CONCAT_MID[0] = |CONCAT_MID[2:1];
assign CONCAT_MID[2:1] = {rand_a[2], ~rand_a[2]};
`signal(GRAY_SHIFT, 3);
assign GRAY_SHIFT = rand_a[2:0] ^ (GRAY_SHIFT >> 1);
`signal(SEL, 3);
assign SEL[0] = rand_a[4];
assign SEL[1] = SEL[0];
assign SEL[2] = SEL[1];
`signal(GRAY_REV_SEL, 3);
assign GRAY_REV_SEL = rand_a[2:0] ^ {GRAY_REV_SEL[1:0], 1'b0};
`signal(GRAY_REV_SHIFT, 3);
assign GRAY_REV_SHIFT = rand_a[2:0] ^ (GRAY_REV_SHIFT << 1);
//////////////////////////////////////////////////////////////////////////
// Fill coverage
//////////////////////////////////////////////////////////////////////////
`signal(CONCAT_RHS, 2);
assign CONCAT_RHS[0] = rand_a[0];
assign CONCAT_RHS[1] = CONCAT_RHS[0];
`signal(CONCAT_LHS, 2);
assign CONCAT_LHS[0] = CONCAT_LHS[1];
assign CONCAT_LHS[1] = rand_a[1];
`signal(CONCAT_MID, 3);
assign CONCAT_MID[0] = |CONCAT_MID[2:1];
assign CONCAT_MID[2:1] = {rand_a[2], ~rand_a[2]};
`signal(SEL, 3);
assign SEL[0] = rand_a[4];
assign SEL[1] = SEL[0];
assign SEL[2] = SEL[1];
`signal(EXTEND, 8);
assign EXTEND[0] = rand_a[3];
assign EXTEND[3:1] = 3'(EXTEND[0]);
assign EXTEND[4] = EXTEND[1];
assign EXTEND[6:5] = EXTEND[2:1];
assign EXTEND[7] = EXTEND[3];
`signal(NOT, 3);
assign NOT = ~(rand_a[2:0] ^ 3'(NOT[2:1]));
`signal(AND, 3);
assign AND = rand_a[2:0] & 3'(AND[2:1]);
`signal(OR, 3);
assign OR = rand_a[2:0] | 3'(OR[2:1]);
`signal(SHIFTR, 14);
assign SHIFTR = {
SHIFTR[6:5], // 13:12
SHIFTR[7:6], // 11:10
SHIFTR[5:4], // 9:8
SHIFTR[3:0] >> 2, // 7:4
rand_a[3:0] // 3:0
};
`signal(SHIFTR_VARIABLE, 2);
assign SHIFTR_VARIABLE = rand_a[1:0] ^ ({1'b0, SHIFTR_VARIABLE[1]} >> rand_b[0]);
`signal(SHIFTL, 14);
assign SHIFTL = {
SHIFTL[6:5], // 13:12
SHIFTL[7:6], // 11:10
SHIFTL[5:4], // 9:8
SHIFTL[3:0] << 2, // 7:4
rand_a[3:0] // 3:0
};
`signal(SHIFTL_VARIABLE, 2);
assign SHIFTL_VARIABLE = rand_a[1:0] ^ ({SHIFTL_VARIABLE[0], 1'b0} << rand_b[0]);
`signal(VAR_A, 2);
wire logic [1:0] VAR_B;
assign VAR_A = {rand_a[0], VAR_B[0]};
assign VAR_B = (VAR_A >> 1) ^ 2'(VAR_B[1]);
`signal(REPLICATE, 4);
assign REPLICATE = rand_a[3:0] ^ ({2{REPLICATE[3:2]}} >> 2);
`signal(EXTEND_SRC, 5);
assign EXTEND_SRC[0] = rand_a[3];
assign EXTEND_SRC[3:1] = 3'(EXTEND_SRC[0]);
assign EXTEND_SRC[4] = EXTEND_SRC[1];
endmodule