diff --git a/src/V3DfgBreakCycles.cpp b/src/V3DfgBreakCycles.cpp index 774db0f76..b85609e04 100644 --- a/src/V3DfgBreakCycles.cpp +++ b/src/V3DfgBreakCycles.cpp @@ -20,6 +20,7 @@ #include "V3DfgPasses.h" #include "V3Hash.h" +#include #include #include #include @@ -191,32 +192,56 @@ class TraceDriver final : public DfgVisitor { DfgGraph& m_dfg; // The graph being processed // The strongly connected component we are trying to escape const uint32_t m_component; + const bool m_aggressive; // Trace aggressively, creating intermediate ops uint32_t m_lsb = 0; // LSB to extract from the currently visited Vertex uint32_t m_msb = 0; // MSB to extract from the currently visited Vertex // Result of tracing the currently visited Vertex. Use SET_RESULT below! DfgVertex* m_resp = nullptr; std::vector m_newVtxps; // New vertices created during the traversal - std::ofstream m_lineCoverageFile; // Line coverage file, just for testing std::vector m_stack; // Stack of currently visited vertices // Denotes if a 'Visited' entry appear in m_stack std::unordered_map m_visited; + std::ofstream m_lineCoverageFile; // Line coverage file, just for testing + // METHODS - // Create and return a new Vertex and add it to m_newVtxps. You should + // Create and return a new Vertex and add it to m_newVtxps. Fileline is + // taken from 'refp', but 'refp' is otherwise not used. You should // always use this to create new vertices, so unused ones (if a trace // eventually fails) can be cleaned up at the end. template - Vertex* make(FileLine* flp, uint32_t width) { + Vertex* make(const DfgVertex* refp, uint32_t width) { static_assert(std::is_base_of::value // - && !std::is_base_of::value // - && !std::is_same::value, - "Should only make operation vertices"); - AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(width); - Vertex* const vtxp = new Vertex{m_dfg, flp, dtypep}; - m_newVtxps.emplace_back(vtxp); - return vtxp; + && !std::is_base_of::value, + "Should only make operation vertices and constants"); + + constexpr bool okWithoutAggressive = // + std::is_same::value // + || std::is_same::value // + || std::is_same::value // + || std::is_same::value; + + UASSERT_OBJ( + okWithoutAggressive || m_aggressive, refp, + "Should only create Const, Sel, Concat, Exend Vertices without aggressive mode"); + + if VL_CONSTEXPR_CXX17 (std::is_same::value) { + DfgConst* const vtxp = new DfgConst{m_dfg, refp->fileline(), width}; + m_newVtxps.emplace_back(vtxp); + return reinterpret_cast(vtxp); + } else { + // TODO: this is a gross hack around lack of C++17 'if constexpr' Vtx is always Vertex + // when this code is actually executed, but needs a fudged type to type check when + // Vertex is DfgConst, in which case this code is unreachable ... + using Vtx = typename std::conditional::value, DfgSel, + Vertex>::type; + AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(width); + Vtx* const vtxp = new Vtx{m_dfg, refp->fileline(), dtypep}; + m_newVtxps.emplace_back(vtxp); + return reinterpret_cast(vtxp); + } } // Continue tracing drivers of the given vertex, at the given LSB. Every @@ -256,7 +281,7 @@ class TraceDriver final : public DfgVisitor { // then we found what we were looking for. if (msb != vtxp->width() - 1 || lsb != 0) { // Apply a Sel to extract the relevant bits if only a part is needed - DfgSel* const selp = make(vtxp->fileline(), msb - lsb + 1); + DfgSel* const selp = make(vtxp, msb - lsb + 1); selp->fromp(vtxp); selp->lsb(lsb); m_resp = selp; @@ -280,9 +305,49 @@ class TraceDriver final : public DfgVisitor { m_stack.pop_back(); // Done + if (!m_resp) { + UINFO(9, "TraceDriver - Failed to trace vertex of type: " << vtxp->typeName()); + } return m_resp; } + template + Vertex* bitwiseBinary(Vertex* vtxp) { + static_assert(std::is_base_of::value, + "Should only call on DfgVertexBinary"); + if (DfgVertex* const rp = trace(vtxp->rhsp(), m_msb, m_lsb)) { + if (DfgVertex* const lp = trace(vtxp->lhsp(), m_msb, m_lsb)) { + Vertex* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->rhsp(rp); + resp->lhsp(lp); + return resp; + } + } + return nullptr; + } + + // Predicate to do determine if vtxp[$bits(vtxp)-1:idx] is known to be zero + // Somewhat rudimentary but sufficient for current purposes. + static bool knownToBeZeroAtAndAbove(const DfgVertex* vtxp, uint32_t idx) { + if (const DfgConcat* const catp = vtxp->cast()) { + DfgConst* const lConstp = catp->lhsp()->cast(); + return lConstp && idx >= catp->rhsp()->width() && lConstp->isZero(); + } + if (const DfgExtend* const extp = vtxp->cast()) { + return idx >= extp->srcp()->width(); + } + return false; + } + + // Like knownToBeZeroAtAndAbove, but checks vtxp[idx:0] + static bool knownToBeZeroAtAndBelow(const DfgVertex* vtxp, uint32_t idx) { + if (const DfgConcat* const catp = vtxp->cast()) { + DfgConst* const rConstp = catp->rhsp()->cast(); + return rConstp && idx < rConstp->width() && rConstp->isZero(); + } + return false; + } + // Use this macro to set the result in 'visit' methods. This also emits // a line to m_lineCoverageFile for testing. // TODO: Use C++20 std::source_location instead of a macro @@ -327,10 +392,10 @@ class TraceDriver final : public DfgVisitor { SET_RESULT(trace(lhsp, m_msb - rWidth, m_lsb - rWidth)); return; } - // The traced bit span both sides, attempt to trace both + // The traced bit spans both sides, attempt to trace both if (DfgVertex* const rp = trace(rhsp, rWidth - 1, m_lsb)) { if (DfgVertex* const lp = trace(lhsp, m_msb - rWidth, 0)) { - DfgConcat* const resp = make(vtxp->fileline(), m_msb - m_lsb + 1); + DfgConcat* const resp = make(vtxp, m_msb - m_lsb + 1); resp->rhsp(rp); resp->lhsp(lp); SET_RESULT(resp); @@ -341,10 +406,24 @@ class TraceDriver final : public DfgVisitor { void visit(DfgExtend* vtxp) override { DfgVertex* const srcp = vtxp->srcp(); - if (srcp->width() > m_msb) { + const uint32_t sWidth = srcp->width(); + // If the traced bits are wholly in the input + if (sWidth > m_msb) { SET_RESULT(trace(srcp, m_msb, m_lsb)); return; } + // If the traced bits are wholly in the extension + if (m_lsb >= sWidth) { + SET_RESULT(make(vtxp, m_msb - m_lsb + 1)); + return; + } + // The traced bits span both sides + if (DfgVertex* const sp = trace(srcp, sWidth - 1, m_lsb)) { + DfgExtend* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->srcp(sp); + SET_RESULT(resp); + return; + } } void visit(DfgSel* vtxp) override { @@ -353,12 +432,107 @@ class TraceDriver final : public DfgVisitor { return; } + void visit(DfgNot* vtxp) override { + if (!m_aggressive) return; + if (DfgVertex* const sp = trace(vtxp->srcp(), m_msb, m_lsb)) { + DfgNot* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->srcp(sp); + SET_RESULT(resp); + return; + } + } + + void visit(DfgAnd* vtxp) override { + if (!m_aggressive) return; + SET_RESULT(bitwiseBinary(vtxp)); + } + void visit(DfgOr* vtxp) override { + if (!m_aggressive) return; + SET_RESULT(bitwiseBinary(vtxp)); + } + void visit(DfgXor* vtxp) override { + if (!m_aggressive) return; + SET_RESULT(bitwiseBinary(vtxp)); + } + + void visit(DfgShiftR* vtxp) override { + DfgVertex* const lhsp = vtxp->lhsp(); + if (DfgConst* const rConstp = vtxp->rhsp()->cast()) { + const uint32_t shiftAmnt = rConstp->toU32(); + // Width of lower half of result + const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? 0 : vtxp->width() - shiftAmnt; + + // If the traced bits are wholly in the input + if (lowerWidth > m_msb) { + SET_RESULT(trace(lhsp, m_msb + shiftAmnt, m_lsb + shiftAmnt)); + return; + } + // If the traced bits are wholly in the extension + if (m_lsb >= lowerWidth) { + SET_RESULT(make(vtxp, m_msb - m_lsb + 1)); + return; + } + // The traced bits span both sides + if (DfgVertex* const sp = trace(lhsp, lowerWidth - 1 + shiftAmnt, m_lsb + shiftAmnt)) { + DfgExtend* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->srcp(sp); + SET_RESULT(resp); + return; + } + return; + } + + // The shift amount is non-negative, so we can conclude zero if all + // bits at and above the LSB we seek are zeroes + if (knownToBeZeroAtAndAbove(lhsp, m_lsb)) { + SET_RESULT(make(vtxp, m_msb - m_lsb + 1)); + return; + } + } + + void visit(DfgShiftL* vtxp) override { + DfgVertex* const lhsp = vtxp->lhsp(); + if (DfgConst* const rConstp = vtxp->rhsp()->cast()) { + const uint32_t shiftAmnt = rConstp->toU32(); + // Width of lower half of result + const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? vtxp->width() : shiftAmnt; + + // If the traced bits are wholly in the input + if (m_lsb >= lowerWidth) { + SET_RESULT(trace(lhsp, m_msb - shiftAmnt, m_lsb - shiftAmnt)); + return; + } + // If the traced bits are wholly in the extension + if (lowerWidth > m_msb) { + SET_RESULT(make(vtxp, m_msb - m_lsb + 1)); + return; + } + // The traced bits span both sides + if (DfgVertex* const sp = trace(lhsp, m_msb - shiftAmnt, lowerWidth - shiftAmnt)) { + DfgConcat* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->rhsp(make(vtxp, resp->width() - sp->width())); + resp->lhsp(sp); + SET_RESULT(resp); + return; + } + return; + } + + // The shift amount is non-negative, so we can conclude zero if all + // bits at and below the MSB we seek are zeroes + if (knownToBeZeroAtAndBelow(lhsp, m_msb)) { + SET_RESULT(make(vtxp, m_msb - m_lsb + 1)); + return; + } + } + #undef SET_RESULT // CONSTRUCTOR - TraceDriver(DfgGraph& dfg, uint32_t component) + TraceDriver(DfgGraph& dfg, uint32_t component, bool aggressive) : m_dfg{dfg} - , m_component{component} { + , m_component{component} + , m_aggressive{aggressive} { if (v3Global.opt.debugCheck()) { m_lineCoverageFile.open( // v3Global.opt.makeDir() + "/" + v3Global.opt.prefix() @@ -371,9 +545,13 @@ public: // Given a Vertex that is part of an SCC denoted by vtxp->user(), // return a vertex that is equivalent to 'vtxp[lsb +: width]', but is not // part of the same SCC. Returns nullptr if such a vertex cannot be - // computed. This can add new vertices to the graph. - static DfgVertex* apply(DfgGraph& dfg, DfgVertex* vtxp, uint32_t lsb, uint32_t width) { - TraceDriver traceDriver{dfg, vtxp->user()}; + // computed. This can add new vertices to the graph. The 'aggressive' flag + // enables creating many intermediate operations. This should only be set + // if it is reasonably certain the tracing will succeed, otherwise we can + // waste a lot of compute. + static DfgVertex* apply(DfgGraph& dfg, DfgVertex* vtxp, uint32_t lsb, uint32_t width, + bool aggressive) { + TraceDriver traceDriver{dfg, vtxp->user(), aggressive}; // Find the out-of-component driver of the given vertex DfgVertex* const resultp = traceDriver.trace(vtxp, lsb + width - 1, lsb); // Delete unused newly created vertices (these can be created if a @@ -393,6 +571,385 @@ public: } }; +class IndependentBits final : public DfgVisitor { + // STATE + DfgVarPacked* const m_varp; // The variable we are computing dependencies for + std::deque m_workList; // Work list for traversal + // Vertex to current bit mask map. The mask is set for the bits that **depend** on 'm_varp'. + std::unordered_map m_vtxp2Mask; + + std::ofstream m_lineCoverageFile; // Line coverage file, just for testing + + // METHODS + + // Retrieve the mask for the given vertex (create it with value 0 if needed) + V3Number& mask(const DfgVertex* vtxp) { + return m_vtxp2Mask + .emplace(std::piecewise_construct, // + std::forward_as_tuple(vtxp), // + std::forward_as_tuple(vtxp->fileline(), static_cast(vtxp->width()), 0)) + .first->second; + } + + // Use this macro to call 'mask' in 'visit' methods. This also emits + // a line to m_lineCoverageFile for testing. + // TODO: Use C++20 std::source_location instead of a macro +#define MASK(vtxp) \ + ([this](const DfgVertex* p) -> V3Number& { \ + if (VL_UNLIKELY(m_lineCoverageFile.is_open())) m_lineCoverageFile << __LINE__ << '\n'; \ + return mask(p); \ + }(vtxp)) + + // VISITORS + void visit(DfgVertex* vtxp) override { + UINFO(9, "Unhandled vertex type " << vtxp->typeName()); + // Conservatively assume it depends on the variable... + mask(vtxp).setAllBits1(); // intentionally not using MASK here + } + + void visit(DfgVarPacked* vtxp) override { + // The mask of the traced variable is known to be all ones + if (vtxp == m_varp) return; + + // Combine the masks of all drivers + V3Number& m = MASK(vtxp); + vtxp->forEachSourceEdge([&](DfgEdge& edge, size_t i) { + const DfgVertex* const srcp = edge.sourcep(); + m.opSelInto(MASK(srcp), vtxp->driverLsb(i), srcp->width()); + }); + } + + void visit(DfgConcat* vtxp) override { + const DfgVertex* const rhsp = vtxp->rhsp(); + const DfgVertex* const lhsp = vtxp->lhsp(); + V3Number& m = MASK(vtxp); + m.opSelInto(MASK(rhsp), 0, rhsp->width()); + m.opSelInto(MASK(lhsp), rhsp->width(), lhsp->width()); + } + + void visit(DfgReplicate* vtxp) override { + const uint32_t count = vtxp->countp()->as()->toU32(); + const DfgVertex* const srcp = vtxp->srcp(); + const uint32_t sWidth = srcp->width(); + V3Number& vMask = MASK(vtxp); + V3Number& sMask = MASK(srcp); + for (uint32_t i = 0; i < count; ++i) vMask.opSelInto(sMask, i * sWidth, sWidth); + } + + void visit(DfgSel* vtxp) override { + const uint32_t lsb = vtxp->lsb(); + const uint32_t msb = lsb + vtxp->width() - 1; + MASK(vtxp).opSel(MASK(vtxp->fromp()), msb, lsb); + } + + void visit(DfgExtend* vtxp) override { + const DfgVertex* const srcp = vtxp->srcp(); + MASK(vtxp).opSelInto(MASK(srcp), 0, srcp->width()); + } + + void visit(DfgNot* vtxp) override { // + MASK(vtxp) = MASK(vtxp->srcp()); + } + + void visit(DfgAnd* vtxp) override { // + MASK(vtxp).opOr(MASK(vtxp->lhsp()), MASK(vtxp->rhsp())); + } + void visit(DfgOr* vtxp) override { // + MASK(vtxp).opOr(MASK(vtxp->lhsp()), MASK(vtxp->rhsp())); + } + void visit(DfgXor* vtxp) override { // + MASK(vtxp).opOr(MASK(vtxp->lhsp()), MASK(vtxp->rhsp())); + } + + void visit(DfgShiftR* vtxp) override { + DfgVertex* const rhsp = vtxp->rhsp(); + DfgVertex* const lhsp = vtxp->lhsp(); + const uint32_t width = vtxp->width(); + + // Constant shift can be computed precisely + if (DfgConst* const rConstp = rhsp->cast()) { + const uint32_t shiftAmount = rConstp->toU32(); + if (shiftAmount >= width) return; + V3Number shiftedMask{lhsp->fileline(), static_cast(width), 0}; + shiftedMask.opShiftR(MASK(lhsp), rConstp->num()); + MASK(vtxp).opSelInto(shiftedMask, 0, width - shiftAmount); + return; + } + + // Otherwise, as the shift amount is non-negative, any bit at or below + // the most significant dependent bit might be dependent + V3Number& lMask = MASK(lhsp); + V3Number& vMask = MASK(vtxp); + int idx = width - 1; + while (idx >= 0 && lMask.bitIs0(idx)) --idx; + while (idx >= 0) vMask.setBit(idx--, '1'); + } + + void visit(DfgShiftL* vtxp) override { + DfgVertex* const rhsp = vtxp->rhsp(); + DfgVertex* const lhsp = vtxp->lhsp(); + const uint32_t width = vtxp->width(); + + // Constant shift can be computed precisely + if (DfgConst* const rConstp = rhsp->cast()) { + const uint32_t shiftAmount = rConstp->toU32(); + if (shiftAmount >= width) return; + V3Number shiftedMask{lhsp->fileline(), static_cast(width), 0}; + shiftedMask.opShiftL(MASK(lhsp), rConstp->num()); + MASK(vtxp).opSelInto(shiftedMask, shiftAmount, width - shiftAmount); + return; + } + + // Otherwise, as the shift amount is non-negative, any bit at or above + // the least significant dependent bit might be dependent + V3Number& lMask = MASK(lhsp); + V3Number& vMask = MASK(vtxp); + uint32_t idx = 0; + while (idx < width && lMask.bitIs0(idx)) ++idx; + while (idx < width) vMask.setBit(idx++, '1'); + } + +#undef MASK + + // CONSTRUCTOR + IndependentBits(DfgVarPacked* varp) + : m_varp{varp} { + if (v3Global.opt.debugCheck()) { + m_lineCoverageFile.open( // + v3Global.opt.makeDir() + "/" + v3Global.opt.prefix() + + "__V3DfgBreakCycles-IndependentBits-line-coverage.txt", // + std::ios_base::out | std::ios_base::app); + } + + // The starting vertex depends on it's own value, duuhh... + mask(varp).setAllBits1(); + // Enqueue all sinks + varp->forEachSink([&](DfgVertex& vtx) { m_workList.emplace_back(&vtx); }); + + // While there is an item on the worklist .. + while (!m_workList.empty()) { + // Grab next item + DfgVertex* const currp = m_workList.front(); + m_workList.pop_front(); + + // Grab current mask of item + const V3Number& maskCurr = mask(currp); + // Remember current mask + const V3Number prevMask = maskCurr; + + // Dispatch + iterate(currp); + + // If mask changed, enqueue sinks + if (!prevMask.isCaseEq(maskCurr)) { + currp->forEachSink([&](DfgVertex& vtx) { m_workList.emplace_back(&vtx); }); + + // Check the mask only ever expands (no bit goes 1 -> 0) + if (VL_UNLIKELY(v3Global.opt.debugCheck())) { + V3Number notCurr{maskCurr}; + notCurr.opNot(maskCurr); + V3Number prevAndNotCurr{maskCurr}; + prevAndNotCurr.opAnd(prevMask, notCurr); + UASSERT_OBJ(prevAndNotCurr.isEqZero(), currp, "Mask should only expand"); + } + } + } + } + +public: + // Given a variable, compute which bits in this variable are independent of + // the variable itself (simple forward dataflow analysis). Returns a bit + // mask where a set bit indicates that bit is independent of the variable + // itself (logic is not circular). The result is a conservative estimate, + // so bits reported dependent might not actually be, but all bits reported + // independent are known to be so. + static V3Number apply(DfgVarPacked* varp) { + IndependentBits independentBits{varp}; + // Combine the masks of all drivers of the variable + V3Number result{varp->fileline(), static_cast(varp->width()), 0}; + varp->forEachSourceEdge([&](DfgEdge& edge, size_t i) { + const DfgVertex* const srcp = edge.sourcep(); + // The mask represents the dependent bits, so invert it + V3Number inverseMask{srcp->fileline(), static_cast(srcp->width()), 0}; + inverseMask.opNot(independentBits.mask(srcp)); + result.opSelInto(inverseMask, varp->driverLsb(i), srcp->width()); + }); + return result; + } +}; + +class FixUpSelDrivers final { +public: + // Attempt to push Sel form Var through to the driving + // expression of the selected bits. This can fix things like + // 'a[1:0] = foo', 'a[2] = a[1]', which are somewhat common. + // Returns the number of drivers fixed up. + static size_t apply(DfgGraph& dfg, DfgVarPacked* varp) { + UINFO(9, "FixUpSelDrivers of " << varp->nodep()->name()); + const uint32_t component = varp->getUser(); + size_t nImprovements = 0; + varp->forEachSink([&](DfgVertex& sink) { + // Ignore if sink is not part of cycle + if (sink.getUser() != component) return; + // Only Handle Sels now + DfgSel* const selp = sink.cast(); + if (!selp) return; + // Try to find of the driver of the selected bits outside the cycle + DfgVertex* const fixp + = TraceDriver::apply(dfg, varp, selp->lsb(), selp->width(), false); + if (!fixp) return; + // Found an out-of-cycle driver. We can replace this sel with that. + selp->replaceWith(fixp); + selp->unlinkDelete(dfg); + ++nImprovements; + }); + UINFO(9, "FixUpSelDrivers made " << nImprovements << " improvements"); + return nImprovements; + } +}; + +class FixUpIndependentRanges final { + // Returns a bitmask set if that bit of 'varp' is used (has a sink) + static V3Number computeUsedBits(DfgVarPacked* varp) { + V3Number result{varp->fileline(), static_cast(varp->width()), 0}; + varp->forEachSink([&](DfgVertex& sink) { + // If used via a Sel, mark the selected bits used + if (DfgSel* const selp = sink.cast()) { + uint32_t lsb = selp->lsb(); + uint32_t msb = lsb + selp->width() - 1; + for (uint32_t i = lsb; i <= msb; ++i) result.setBit(i, '1'); + return; + } + + // Used without a Sel, so all bits are used + result.setAllBits1(); + }); + return result; + } + + // Trace drivers of independent bits of 'varp' in the range '[hi:lo]' + // append replacement terms to 'termps'. Returns number of successful + // replacements. + static size_t gatherTerms(std::vector& termps, DfgGraph& dfg, + DfgVarPacked* const varp, const V3Number& indpBits, + const uint32_t hi, const uint32_t lo) { + size_t nImprovements = 0; + // Iterate through consecutive dependent/non-dependet ranges within [hi:lo] + bool isIndependent = indpBits.bitIs1(lo); + uint32_t lsb = lo; + for (uint32_t msb = lo; msb <= hi; ++msb) { + const bool endRange = msb == hi || isIndependent != indpBits.bitIs1(msb + 1); + if (!endRange) continue; + const uint32_t width = msb - lsb + 1; + DfgVertex* termp = nullptr; + // If the range is not self-dependent, attempt to trace its driver + if (isIndependent) { + termp = TraceDriver::apply(dfg, varp, lsb, width, true); + if (termp) { + ++nImprovements; + } else { + UINFO(5, "TraceDriver of independent range failed for " + << varp->nodep()->name() << "[" << msb << ":" << lsb << "]"); + } + } + // Fall back on using the part of the variable (if dependent, or trace failed) + if (!termp) { + AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(width); + DfgSel* const selp = new DfgSel{dfg, varp->fileline(), dtypep}; + // Do not connect selp->fromp yet, need to do afer replacing 'varp' + selp->lsb(lsb); + termp = selp; + } + // Record the term + termps.emplace_back(termp); + // Next iteration + isIndependent = !isIndependent; + lsb = msb + 1; + } + return nImprovements; + } + +public: + // Similar to FixUpSelDrivers, but first comptute which bits of the + // variable are self dependent, and fix up those that are independent + // but used. + static size_t apply(DfgGraph& dfg, DfgVarPacked* varp) { + UINFO(9, "FixUpIndependentRanges of " << varp->nodep()->name()); + size_t nImprovements = 0; + + // Figure out which bits of 'varp' are used + const V3Number usedBits = computeUsedBits(varp); + UINFO(9, "Used bits of '" << varp->nodep()->name() << "' are " + << usedBits.displayed(varp->nodep(), "%b")); + // Nothing to do if no bits are used + if (usedBits.isEqZero()) return 0; + + // Figure out which bits of 'varp' are dependent on themselves + const V3Number indpBits = IndependentBits::apply(varp); + UINFO(9, "Independent bits of '" << varp->nodep()->name() << "' are " + << indpBits.displayed(varp->nodep(), "%b")); + // Can't do anything if all bits are dependent + if (indpBits.isEqZero()) return 0; + + { + // Nothing to do if no used bits are independen (all used bits are dependent) + V3Number usedAndIndependent{varp->fileline(), static_cast(varp->width()), 0}; + usedAndIndependent.opAnd(usedBits, indpBits); + if (usedAndIndependent.isEqZero()) return 0; + } + + // We are computing the terms to concatenate and replace 'varp' with + std::vector termps; + + // Iterate through consecutive used/unused ranges + FileLine* const flp = varp->fileline(); + const uint32_t width = varp->width(); + bool isUsed = usedBits.bitIs1(0); // Is current range used + uint32_t lsb = 0; // LSB of current range + for (uint32_t msb = 0; msb < width; ++msb) { + const bool endRange = msb == width - 1 || isUsed != usedBits.bitIs1(msb + 1); + if (!endRange) continue; + if (isUsed) { + // The range is used, compute the replacement terms + nImprovements += gatherTerms(termps, dfg, varp, indpBits, msb, lsb); + } else { + // The range is not used, just use constant 0 as a placeholder + termps.emplace_back(new DfgConst{dfg, flp, msb - lsb + 1}); + } + // Next iteration + isUsed = !isUsed; + lsb = msb + 1; + } + + // If we managed to imporove something, apply the replacement + if (nImprovements) { + // Concatenate all the terms to create the replacement + DfgVertex* replacementp = termps.front(); + for (size_t i = 1; i < termps.size(); ++i) { + DfgVertex* const termp = termps[i]; + const uint32_t catWidth = replacementp->width() + termp->width(); + AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(catWidth); + DfgConcat* const catp = new DfgConcat{dfg, flp, dtypep}; + catp->rhsp(replacementp); + catp->lhsp(termp); + replacementp = catp; + } + + // Replace the variable + varp->replaceWith(replacementp); + // Connect the Sel nodes in the replacement + for (DfgVertex* const termp : termps) { + if (DfgSel* const selp = termp->cast()) { + if (!selp->fromp()) selp->fromp(varp); + } + } + } + + UINFO(9, "FixUpIndependentRanges made " << nImprovements << " improvements"); + return nImprovements; + } +}; + std::pair, bool> V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgOptimizationContext& ctx) { // Shorthand for dumping graph at given dump level @@ -441,9 +998,7 @@ V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgOptimizationContext& ctx) { UINFO(9, "New iteration after " << nImprovements << " improvements"); prevNImprovements = nImprovements; - // Method 1. Attempt to push Sel form Var through to the driving - // expression of the selected bits. This can fix things like - // 'a[1:0] = foo', 'a[2] = a[1]', which are somewhat common. + // Method 1: FixUpSelDrivers for (DfgVertexVar& vtx : res.varVertices()) { // Only handle DfgVarPacked at this point DfgVarPacked* const varp = vtx.cast(); @@ -452,24 +1007,32 @@ V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgOptimizationContext& ctx) { const uint32_t component = varp->getUser(); if (!component) continue; - UINFO(9, "Attempting to TraceDriver " << varp->nodep()->name()); + const size_t nFixed = FixUpSelDrivers::apply(res, varp); + if (nFixed) { + nImprovements += nFixed; + ctx.m_breakCyclesContext.m_nImprovements += nFixed; + dump(9, res, "FixUpSelDrivers"); + } + } - varp->forEachSink([&](DfgVertex& sink) { - // Ignore if sink is not part of cycle - if (sink.getUser() != component) return; - // Only Handle Sels now - DfgSel* const selp = sink.cast(); - if (!selp) return; - // Try to find of the driver of the selected bits outside the cycle - DfgVertex* const fixp = TraceDriver::apply(res, varp, selp->lsb(), selp->width()); - if (!fixp) return; - // Found an out-of-cycle driver. We can replace this sel with that. - selp->replaceWith(fixp); - selp->unlinkDelete(res); - ++nImprovements; - ++ctx.m_breakCyclesContext.m_nImprovements; - dump(9, res, "TraceDriver"); - }); + // If we managed to fix something, try again with the earlier methods + if (nImprovements != prevNImprovements) continue; + + // Method 2. FixUpIndependentRanges + for (DfgVertexVar& vtx : res.varVertices()) { + // Only handle DfgVarPacked at this point + DfgVarPacked* const varp = vtx.cast(); + if (!varp) continue; + // If Variable is not part of a cycle, move on + const uint32_t component = varp->getUser(); + if (!component) continue; + + const size_t nFixed = FixUpIndependentRanges::apply(res, varp); + if (nFixed) { + nImprovements += nFixed; + ctx.m_breakCyclesContext.m_nImprovements += nFixed; + dump(9, res, "FixUpIndependentRanges"); + } } } while (nImprovements != prevNImprovements); diff --git a/test_regress/t/t_dfg_break_cycles.py b/test_regress/t/t_dfg_break_cycles.py index 79ff66fae..19b96d307 100755 --- a/test_regress/t/t_dfg_break_cycles.py +++ b/test_regress/t/t_dfg_break_cycles.py @@ -25,6 +25,8 @@ with open(root + "/src/V3DfgBreakCycles.cpp", 'r', encoding="utf8") as fd: line = line.split("//")[0] if re.match(r'^[^#]*SET_RESULT', line): expectedLines.add(lineno) + if re.match(r'^[^#]*MASK', line): + expectedLines.add(lineno) if not expectedLines: test.error("Failed to read expected source line numbers") @@ -62,6 +64,8 @@ test.compile(verilator_flags2=[ "--stats", "--build", "-fno-dfg-break-cycles", + "-fno-dfg-post-inline", + "-fno-dfg-scoped", "+incdir+" + test.obj_dir, "-Mdir", test.obj_dir + "/obj_ref", "--prefix", "Vref", @@ -76,6 +80,8 @@ test.file_grep(test.obj_dir + "/obj_ref/Vref__stats.txt", test.compile(verilator_flags2=[ "--stats", "--build", + "-fno-dfg-post-inline", + "-fno-dfg-scoped", "--exe", "+incdir+" + test.obj_dir, "-Mdir", test.obj_dir + "/obj_opt", @@ -89,11 +95,16 @@ test.compile(verilator_flags2=[ # Check all source lines hit coveredLines = set() -with open(test.obj_dir + "/obj_opt/Vopt__V3DfgBreakCycles-TraceDriver-line-coverage.txt", - 'r', - encoding="utf8") as fd: - for line in fd: - coveredLines.add(int(line.strip())) + + +def readCovered(fileName): + with open(fileName, 'r', encoding="utf8") as fd: + for line in fd: + coveredLines.add(int(line.strip())) + + +readCovered(test.obj_dir + "/obj_opt/Vopt__V3DfgBreakCycles-TraceDriver-line-coverage.txt") +readCovered(test.obj_dir + "/obj_opt/Vopt__V3DfgBreakCycles-IndependentBits-line-coverage.txt") if coveredLines != expectedLines: for n in sorted(expectedLines - coveredLines): diff --git a/test_regress/t/t_dfg_break_cycles.v b/test_regress/t/t_dfg_break_cycles.v index e083cf7f8..fe46e2055 100644 --- a/test_regress/t/t_dfg_break_cycles.v +++ b/test_regress/t/t_dfg_break_cycles.v @@ -8,39 +8,103 @@ module t ( `include "portlist.vh" // Boilerplate generated by t_dfg_break_cycles.py - rand_a, rand_b, srand_a, srand_b - ); + rand_a, rand_b, srand_a, srand_b +); `include "portdecl.vh" // Boilerplate generated by t_dfg_break_cycles.py - input rand_a; - input rand_b; - input srand_a; - input srand_b; - wire logic [63:0] rand_a; - wire logic [63:0] rand_b; - wire logic signed [63:0] srand_a; - wire logic signed [63:0] srand_b; + input rand_a; + input rand_b; + input srand_a; + input srand_b; + wire logic [63:0] rand_a; + wire logic [63:0] rand_b; + wire logic signed [63:0] srand_a; + wire logic signed [63:0] srand_b; - `signal(CONCAT_RHS, 2); - assign CONCAT_RHS[0] = rand_a[0]; - assign CONCAT_RHS[1] = CONCAT_RHS[0]; + ////////////////////////////////////////////////////////////////////////// + // Interesting user code to cover + ////////////////////////////////////////////////////////////////////////// - `signal(CONCAT_LHS, 2); - assign CONCAT_LHS[0] = CONCAT_LHS[1]; - assign CONCAT_LHS[1] = rand_a[1]; + `signal(GRAY_SEL, 3); + assign GRAY_SEL = rand_a[2:0] ^ 3'(GRAY_SEL[2:1]); - `signal(CONCAT_MID, 3); - assign CONCAT_MID[0] = |CONCAT_MID[2:1]; - assign CONCAT_MID[2:1] = {rand_a[2], ~rand_a[2]}; + `signal(GRAY_SHIFT, 3); + assign GRAY_SHIFT = rand_a[2:0] ^ (GRAY_SHIFT >> 1); - `signal(SEL, 3); - assign SEL[0] = rand_a[4]; - assign SEL[1] = SEL[0]; - assign SEL[2] = SEL[1]; + `signal(GRAY_REV_SEL, 3); + assign GRAY_REV_SEL = rand_a[2:0] ^ {GRAY_REV_SEL[1:0], 1'b0}; + + `signal(GRAY_REV_SHIFT, 3); + assign GRAY_REV_SHIFT = rand_a[2:0] ^ (GRAY_REV_SHIFT << 1); + + ////////////////////////////////////////////////////////////////////////// + // Fill coverage + ////////////////////////////////////////////////////////////////////////// + + `signal(CONCAT_RHS, 2); + assign CONCAT_RHS[0] = rand_a[0]; + assign CONCAT_RHS[1] = CONCAT_RHS[0]; + + `signal(CONCAT_LHS, 2); + assign CONCAT_LHS[0] = CONCAT_LHS[1]; + assign CONCAT_LHS[1] = rand_a[1]; + + `signal(CONCAT_MID, 3); + assign CONCAT_MID[0] = |CONCAT_MID[2:1]; + assign CONCAT_MID[2:1] = {rand_a[2], ~rand_a[2]}; + + `signal(SEL, 3); + assign SEL[0] = rand_a[4]; + assign SEL[1] = SEL[0]; + assign SEL[2] = SEL[1]; + + `signal(EXTEND, 8); + assign EXTEND[0] = rand_a[3]; + assign EXTEND[3:1] = 3'(EXTEND[0]); + assign EXTEND[4] = EXTEND[1]; + assign EXTEND[6:5] = EXTEND[2:1]; + assign EXTEND[7] = EXTEND[3]; + + `signal(NOT, 3); + assign NOT = ~(rand_a[2:0] ^ 3'(NOT[2:1])); + + `signal(AND, 3); + assign AND = rand_a[2:0] & 3'(AND[2:1]); + + `signal(OR, 3); + assign OR = rand_a[2:0] | 3'(OR[2:1]); + + `signal(SHIFTR, 14); + assign SHIFTR = { + SHIFTR[6:5], // 13:12 + SHIFTR[7:6], // 11:10 + SHIFTR[5:4], // 9:8 + SHIFTR[3:0] >> 2, // 7:4 + rand_a[3:0] // 3:0 + }; + + `signal(SHIFTR_VARIABLE, 2); + assign SHIFTR_VARIABLE = rand_a[1:0] ^ ({1'b0, SHIFTR_VARIABLE[1]} >> rand_b[0]); + + `signal(SHIFTL, 14); + assign SHIFTL = { + SHIFTL[6:5], // 13:12 + SHIFTL[7:6], // 11:10 + SHIFTL[5:4], // 9:8 + SHIFTL[3:0] << 2, // 7:4 + rand_a[3:0] // 3:0 + }; + + `signal(SHIFTL_VARIABLE, 2); + assign SHIFTL_VARIABLE = rand_a[1:0] ^ ({SHIFTL_VARIABLE[0], 1'b0} << rand_b[0]); + + `signal(VAR_A, 2); + wire logic [1:0] VAR_B; + assign VAR_A = {rand_a[0], VAR_B[0]}; + assign VAR_B = (VAR_A >> 1) ^ 2'(VAR_B[1]); + + `signal(REPLICATE, 4); + assign REPLICATE = rand_a[3:0] ^ ({2{REPLICATE[3:2]}} >> 2); - `signal(EXTEND_SRC, 5); - assign EXTEND_SRC[0] = rand_a[3]; - assign EXTEND_SRC[3:1] = 3'(EXTEND_SRC[0]); - assign EXTEND_SRC[4] = EXTEND_SRC[1]; endmodule