// -*- mode: C++; c-file-style: "cc-mode" -*- //************************************************************************* // DESCRIPTION: Verilator: Converting cyclic DFGs into acyclic DFGs // // Code available from: https://verilator.org // //************************************************************************* // // Copyright 2003-2025 by Wilson Snyder. This program is free software; you // can redistribute it and/or modify it under the terms of either the GNU // Lesser General Public License Version 3 or the Perl Artistic License // Version 2.0. // SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 // //************************************************************************* #include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT #include "V3Dfg.h" #include "V3DfgPasses.h" #include "V3Error.h" #include "V3Hash.h" #include #include #include #include #include VL_DEFINE_DEBUG_FUNCTIONS; namespace V3DfgBreakCycles { // Throughout these algorithm, we use the DfgUserMap as a map to the SCC number using Vtx2Scc = DfgUserMap; class TraceDriver final : public DfgVisitor { // TYPES // Key for caching the result of a trace struct CacheKey final { DfgVertex* m_vtxp; uint32_t m_lsb; uint32_t m_msb; CacheKey() = delete; CacheKey(DfgVertex* vtxp, uint32_t lsb, uint32_t msb) : m_vtxp{vtxp} , m_lsb{lsb} , m_msb{msb} {} struct Hash final { size_t operator()(const CacheKey& item) const { // cppcheck-suppress unreadVariable V3Hash hash{item.m_vtxp}; hash += item.m_lsb; hash += item.m_msb; return hash.value(); } }; struct Equal final { bool operator()(const CacheKey& a, const CacheKey& b) const { return a.m_vtxp == b.m_vtxp && a.m_lsb == b.m_lsb && a.m_msb == b.m_msb; } }; }; // STATE DfgGraph& m_dfg; // The graph being processed Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map // The strongly connected component we are currently trying to escape uint64_t m_component = 0; uint32_t m_lsb = 0; // LSB to extract from the currently visited Vertex uint32_t m_msb = 0; // MSB to extract from the currently visited Vertex // Result of tracing the currently visited Vertex. Use SET_RESULT below! DfgVertex* m_resp = nullptr; DfgVertex* m_defaultp = nullptr; // When tracing a variable, this is its 'defaultp', if any // Result cache for reusing already traced vertices std::unordered_map m_cache; #ifdef VL_DEBUG std::ofstream m_lineCoverageFile; // Line coverage file, just for testing #endif // METHODS // Create and return a new Vertex. Always use this to create new vertices. // Fileline is taken from 'refp', but 'refp' is otherwise not used. // Also sets m_vtx2Scc[vtx] to 0, indicating the new vertex is not part of // a strongly connected component. This should always be true, as all the // vertices we create here are driven from outside the component we are // trying to escape, and will sink into that component. Given those are // separate SCCs, these new vertices must be acyclic. template Vertex* make(const DfgVertex* refp, uint32_t width) { static_assert(std::is_base_of::value // && !std::is_base_of::value, "Should only make operation vertices and constants"); if VL_CONSTEXPR_CXX17 (std::is_same::value) { DfgConst* const vtxp = new DfgConst{m_dfg, refp->fileline(), width, 0}; m_vtx2Scc[vtxp] = 0; return reinterpret_cast(vtxp); } else { // TODO: this is a gross hack around lack of C++17 'if constexpr' Vtx is always Vertex // when this code is actually executed, but needs a fudged type to type check when // Vertex is DfgConst, in which case this code is unreachable ... using Vtx = typename std::conditional::value, DfgSel, Vertex>::type; Vtx* const vtxp = new Vtx{m_dfg, refp->fileline(), DfgDataType::packed(width)}; m_vtx2Scc[vtxp] = 0; return reinterpret_cast(vtxp); } } // Create temporary capable of holding the result of 'vtxp' DfgVertexVar* createTmp(const char* prefix, DfgVertex* vtxp) { AstNode* nodep = m_dfg.modulep(); if (!nodep) nodep = v3Global.rootp(); const std::string name = m_dfg.makeUniqueName(prefix, nodep->user2Inc()); FileLine* const flp = vtxp->fileline(); DfgVertex::ScopeCache scopeCache; AstScope* const scopep = m_dfg.modulep() ? nullptr : vtxp->scopep(scopeCache); DfgVertexVar* const varp = m_dfg.makeNewVar(flp, name, vtxp->dtype(), scopep); varp->varp()->isInternal(true); varp->tmpForp(varp->nodep()); m_vtx2Scc[varp] = 0; return varp; } // Continue tracing drivers of the given vertex, at the given LSB. // Every visitor should call this to continue the traversal. DfgVertex* trace(DfgVertex* const vtxp, const uint32_t msb, const uint32_t lsb) { UASSERT_OBJ(vtxp->isPacked(), vtxp, "Can only trace packed type vertices"); UASSERT_OBJ(vtxp->size() > msb, vtxp, "Traced Vertex too narrow"); // Get the cache entry, which is the resulting driver that is not part of // the same component as vtxp DfgVertex*& respr = m_cache .emplace(std::piecewise_construct, // std::forward_as_tuple(vtxp, msb, lsb), // std::forward_as_tuple(nullptr)) .first->second; // Trace the vertex if (respr) { // If already traced this vtxp/msb/lsb, just use the result. // This is important to avoid combinatorial explosion when the // same sub-expression is needed multiple times. } else if (m_vtx2Scc.at(vtxp) != m_component) { // If the currently traced vertex is in a different component, // then we found what we were looking for. respr = vtxp; // If the result is a splice, we need to insert a temporary for it // as a splice cannot be fed into arbitray logic if (DfgVertexSplice* const splicep = respr->cast()) { DfgVertexVar* const tmpp = createTmp("TraceDriver", splicep); // Note: we can't do 'splicep->replaceWith(tmpp)', as other // variable sinks of the splice might have a defaultp driver. tmpp->srcp(splicep); respr = tmpp; } // Apply a Sel to extract the relevant bits if only a part is needed if (msb != respr->width() - 1 || lsb != 0) { DfgSel* const selp = make(respr, msb - lsb + 1); selp->fromp(respr); selp->lsb(lsb); respr = selp; } } else { // Otherwise visit the vertex to trace it VL_RESTORER(m_msb); VL_RESTORER(m_lsb); VL_RESTORER(m_resp); m_msb = msb; m_lsb = lsb; m_resp = nullptr; iterate(vtxp); respr = m_resp; } // We only ever trace drivers of bits that are known to be independent // of the cycles, so we should always be able to find an acyclic driver. UASSERT_OBJ(respr, vtxp, "Tracing driver failed for " << vtxp->typeName()); UASSERT_OBJ(respr->width() == (msb - lsb + 1), vtxp, "Wrong result width"); return respr; } template Vertex* traceBitwiseBinary(Vertex* vtxp) { static_assert(std::is_base_of::value, "Should only call on DfgVertexBinary"); Vertex* const resp = make(vtxp, m_msb - m_lsb + 1); resp->rhsp(trace(vtxp->rhsp(), m_msb, m_lsb)); resp->lhsp(trace(vtxp->lhsp(), m_msb, m_lsb)); return resp; } template DfgVertex* traceAddSub(Vertex* vtxp) { static_assert(std::is_base_of::value, "Should only call on DfgVertexBinary"); Vertex* const opp = make(vtxp, m_msb + 1); opp->rhsp(trace(vtxp->rhsp(), m_msb, 0)); opp->lhsp(trace(vtxp->lhsp(), m_msb, 0)); DfgSel* const selp = make(vtxp, m_msb - m_lsb + 1); selp->fromp(opp); selp->lsb(m_lsb); return selp; } template Vertex* traceReduction(Vertex* vtxp) { static_assert(std::is_base_of::value, "Should only call on DfgVertexUnary"); Vertex* const resp = make(vtxp, 1); resp->srcp(trace(vtxp->srcp(), vtxp->srcp()->width() - 1, 0)); return resp; } template Vertex* traceCmp(Vertex* vtxp) { static_assert(std::is_base_of::value, "Should only call on DfgVertexBinary"); Vertex* const resp = make(vtxp, m_msb - m_lsb + 1); resp->rhsp(trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0)); resp->lhsp(trace(vtxp->lhsp(), vtxp->lhsp()->width() - 1, 0)); return resp; } // Use this macro to set the result in 'visit' methods. This also emits // a line to m_lineCoverageFile for testing. // TODO: Use C++20 std::source_location instead of a macro #ifdef VL_DEBUG #define SET_RESULT(vtxp) \ do { \ m_resp = vtxp; \ if (VL_UNLIKELY(m_lineCoverageFile.is_open())) m_lineCoverageFile << __LINE__ << '\n'; \ } while (false) #else #define SET_RESULT(vtxp) m_resp = vtxp; #endif // VISITORS void visit(DfgVertex* vtxp) override { // LCOV_EXCL_START vtxp->v3fatalSrc("TraceDriver - Unhandled vertex type: " << vtxp->typeName()); } // LCOV_EXCL_STOP void visit(DfgSplicePacked* vtxp) override { struct Driver final { DfgVertex* m_vtxp; uint32_t m_lsb; // LSB of driven range (internal, not Verilog) uint32_t m_msb; // MSB of driven range (internal, not Verilog) Driver() = delete; Driver(DfgVertex* vtxp, uint32_t lsb, uint32_t msb) : m_vtxp{vtxp} , m_lsb{lsb} , m_msb{msb} {} }; std::vector drivers; // Look at all the drivers, one might cover the whole range, but also gather all drivers bool tryWholeDefault = m_defaultp; const bool done = vtxp->foreachDriver([&](DfgVertex& src, uint32_t lsb) { const uint32_t msb = lsb + src.width() - 1; drivers.emplace_back(&src, lsb, msb); // Check if this driver covers any of the bits, then we can't use whole default if (m_msb >= lsb && msb >= m_lsb) tryWholeDefault = false; // If it does not cover the whole searched bit range, move on if (m_lsb < lsb || msb < m_msb) return false; // Driver covers whole search range, trace that and we are done SET_RESULT(trace(&src, m_msb - lsb, m_lsb - lsb)); return true; }); if (done) return; // Trace the default driver if no other drivers cover the searched range if (tryWholeDefault) { SET_RESULT(trace(m_defaultp, m_msb, m_lsb)); return; } // Hard case: We need to combine multiple drivers to produce the searched bit range // Sort drivers (they are non-overlapping) std::sort(drivers.begin(), drivers.end(), [](const Driver& a, const Driver& b) { // return a.m_lsb < b.m_lsb; }); // Gather terms std::vector termps; for (const Driver& driver : drivers) { // Driver is below the searched LSB, move on if (m_lsb > driver.m_msb) continue; // Driver is above the searched MSB, done if (driver.m_lsb > m_msb) break; // Gap below this driver, trace default to fill it if (driver.m_lsb > m_lsb) { UASSERT_OBJ(m_defaultp, vtxp, "Should have a default driver if needs tracing"); termps.emplace_back(trace(m_defaultp, driver.m_lsb - 1, m_lsb)); m_lsb = driver.m_lsb; } // Driver covers searched range, pick the needed/available bits uint32_t lim = std::min(m_msb, driver.m_msb); termps.emplace_back(trace(driver.m_vtxp, lim - driver.m_lsb, m_lsb - driver.m_lsb)); m_lsb = lim + 1; } if (m_msb >= m_lsb) { UASSERT_OBJ(m_defaultp, vtxp, "Should have a default driver if needs tracing"); termps.emplace_back(trace(m_defaultp, m_msb, m_lsb)); } // The earlier cheks cover the case when either a whole driver or the default covers // the whole range, so there should be at least 2 terms required here. UASSERT_OBJ(termps.size() >= 2, vtxp, "Should have returned in special cases"); // Concatenate all terms and set result DfgVertex* resp = termps.front(); for (size_t i = 1; i < termps.size(); ++i) { DfgVertex* const termp = termps[i]; DfgConcat* const catp = make(termp, resp->width() + termp->width()); catp->rhsp(resp); catp->lhsp(termp); resp = catp; } SET_RESULT(resp); } void visit(DfgVarPacked* vtxp) override { UASSERT_OBJ(!vtxp->isVolatile(), vtxp, "Should not trace through volatile VarPacked"); VL_RESTORER(m_defaultp); m_defaultp = vtxp->defaultp(); SET_RESULT(trace(vtxp->srcp(), m_msb, m_lsb)); } void visit(DfgArraySel* vtxp) override { // Only constant select const DfgConst* const idxp = vtxp->bitp()->cast(); if (!idxp) return; // From a variable const DfgVarArray* varp = vtxp->fromp()->cast(); if (!varp) return; UASSERT_OBJ(!varp->isVolatile(), vtxp, "Should not trace through volatile VarArray"); // Skip through intermediate variables while (varp->srcp() && varp->srcp()->is()) { varp = varp->srcp()->as(); UASSERT_OBJ(!varp->isVolatile(), vtxp, "Should not trace through volatile VarArray"); } // Find driver const DfgVertex* srcp = varp->srcp(); if (const DfgSpliceArray* const splicep = srcp->cast()) { srcp = splicep->driverAt(idxp->toSizeT()); } // Trace the driver, which at this point must be a UnitArray SET_RESULT(trace(srcp->as()->srcp(), m_msb, m_lsb)); } void visit(DfgConcat* vtxp) override { DfgVertex* const rhsp = vtxp->rhsp(); DfgVertex* const lhsp = vtxp->lhsp(); const uint32_t rWidth = rhsp->width(); // If the traced bits are wholly in the RHS if (rWidth > m_msb) { SET_RESULT(trace(rhsp, m_msb, m_lsb)); return; } // If the traced bits are wholly in the LHS if (m_lsb >= rWidth) { SET_RESULT(trace(lhsp, m_msb - rWidth, m_lsb - rWidth)); return; } // The traced bit spans both sides, trace both DfgConcat* const resp = make(vtxp, m_msb - m_lsb + 1); resp->rhsp(trace(rhsp, rWidth - 1, m_lsb)); resp->lhsp(trace(lhsp, m_msb - rWidth, 0)); SET_RESULT(resp); } void visit(DfgReplicate* vtxp) override { DfgVertex* const srcp = vtxp->srcp(); const uint32_t sWidth = srcp->width(); // If we need more bits than the source, then we need the whole source if (m_msb - m_lsb + 1 > sWidth) { DfgReplicate* const repp = make(vtxp, vtxp->width()); repp->srcp(trace(srcp, sWidth - 1, 0)); repp->countp(vtxp->countp()); // Always a DfgConst DfgSel* const resp = make(vtxp, m_msb - m_lsb + 1); resp->fromp(repp); resp->lsb(m_lsb); SET_RESULT(resp); return; } // If the requested bits are within the same repliacted word if (m_msb / sWidth == m_lsb / sWidth) { SET_RESULT(trace(srcp, m_msb % sWidth, m_lsb % sWidth)); return; } // The requested bits span two replicated words DfgConcat* const resp = make(vtxp, m_msb - m_lsb + 1); resp->rhsp(trace(srcp, sWidth - 1, m_lsb % sWidth)); resp->lhsp(trace(srcp, m_msb % sWidth, 0)); SET_RESULT(resp); } void visit(DfgExtend* vtxp) override { DfgVertex* const srcp = vtxp->srcp(); const uint32_t sWidth = srcp->width(); // If the traced bits are wholly in the input if (sWidth > m_msb) { SET_RESULT(trace(srcp, m_msb, m_lsb)); return; } // If the traced bits are wholly in the extension if (m_lsb >= sWidth) { SET_RESULT(make(vtxp, m_msb - m_lsb + 1)); return; } // The traced bits span both sides DfgExtend* const resp = make(vtxp, m_msb - m_lsb + 1); resp->srcp(trace(srcp, sWidth - 1, m_lsb)); SET_RESULT(resp); } void visit(DfgExtendS* vtxp) override { DfgVertex* const srcp = vtxp->srcp(); const uint32_t sWidth = srcp->width(); // If the traced bits are wholly in the input if (sWidth > m_msb) { SET_RESULT(trace(srcp, m_msb, m_lsb)); return; } // If the traced bits are wholly in the extension if (m_lsb >= sWidth) { DfgVertex* const sp = trace(srcp, sWidth - 1, sWidth - 1); if (m_msb == m_lsb) { SET_RESULT(sp); } else { DfgExtendS* const resp = make(vtxp, m_msb - m_lsb + 1); resp->srcp(sp); SET_RESULT(resp); } return; } // The traced bits span both sides DfgExtendS* const resp = make(vtxp, m_msb - m_lsb + 1); resp->srcp(trace(srcp, sWidth - 1, m_lsb)); SET_RESULT(resp); } void visit(DfgSel* vtxp) override { const uint32_t lsb = vtxp->lsb(); SET_RESULT(trace(vtxp->srcp(), m_msb + lsb, m_lsb + lsb)); } void visit(DfgNot* vtxp) override { DfgNot* const resp = make(vtxp, m_msb - m_lsb + 1); resp->srcp(trace(vtxp->srcp(), m_msb, m_lsb)); SET_RESULT(resp); } void visit(DfgAnd* vtxp) override { SET_RESULT(traceBitwiseBinary(vtxp)); } void visit(DfgOr* vtxp) override { SET_RESULT(traceBitwiseBinary(vtxp)); } void visit(DfgXor* vtxp) override { SET_RESULT(traceBitwiseBinary(vtxp)); } void visit(DfgAdd* vtxp) override { SET_RESULT(traceAddSub(vtxp)); } void visit(DfgSub* vtxp) override { SET_RESULT(traceAddSub(vtxp)); } void visit(DfgRedAnd* vtxp) override { SET_RESULT(traceReduction(vtxp)); } void visit(DfgRedOr* vtxp) override { SET_RESULT(traceReduction(vtxp)); } void visit(DfgRedXor* vtxp) override { SET_RESULT(traceReduction(vtxp)); } void visit(DfgEq* vtxp) override { SET_RESULT(traceCmp(vtxp)); } void visit(DfgNeq* vtxp) override { SET_RESULT(traceCmp(vtxp)); } void visit(DfgLt* vtxp) override { SET_RESULT(traceCmp(vtxp)); } void visit(DfgLte* vtxp) override { SET_RESULT(traceCmp(vtxp)); } void visit(DfgGt* vtxp) override { SET_RESULT(traceCmp(vtxp)); } void visit(DfgGte* vtxp) override { SET_RESULT(traceCmp(vtxp)); } void visit(DfgShiftR* vtxp) override { DfgVertex* const lhsp = vtxp->lhsp(); if (const DfgConst* const rConstp = vtxp->rhsp()->cast()) { const uint32_t shiftAmnt = rConstp->toU32(); // Width of lower half of result const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? 0 : vtxp->width() - shiftAmnt; // If the traced bits are wholly in the input if (lowerWidth > m_msb) { SET_RESULT(trace(lhsp, m_msb + shiftAmnt, m_lsb + shiftAmnt)); return; } // If the traced bits are wholly in the extension if (m_lsb >= lowerWidth) { SET_RESULT(make(vtxp, m_msb - m_lsb + 1)); return; } // The traced bits span both sides DfgExtend* const resp = make(vtxp, m_msb - m_lsb + 1); resp->srcp(trace(lhsp, lowerWidth - 1 + shiftAmnt, m_lsb + shiftAmnt)); SET_RESULT(resp); return; } DfgShiftR* const shiftrp = make(vtxp, vtxp->lhsp()->width() - m_lsb); shiftrp->rhsp(trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0)); shiftrp->lhsp(trace(vtxp->lhsp(), vtxp->lhsp()->width() - 1, m_lsb)); if (m_msb == vtxp->lhsp()->width() - 1) { SET_RESULT(shiftrp); return; } DfgSel* const selp = make(vtxp, m_msb - m_lsb + 1); selp->fromp(shiftrp); selp->lsb(0); SET_RESULT(selp); } void visit(DfgShiftL* vtxp) override { DfgVertex* const lhsp = vtxp->lhsp(); if (const DfgConst* const rConstp = vtxp->rhsp()->cast()) { const uint32_t shiftAmnt = rConstp->toU32(); // Width of lower half of result const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? vtxp->width() : shiftAmnt; // If the traced bits are wholly in the input if (m_lsb >= lowerWidth) { SET_RESULT(trace(lhsp, m_msb - shiftAmnt, m_lsb - shiftAmnt)); return; } // If the traced bits are wholly in the extension if (lowerWidth > m_msb) { SET_RESULT(make(vtxp, m_msb - m_lsb + 1)); return; } // The traced bits span both sides DfgConcat* const resp = make(vtxp, m_msb - m_lsb + 1); resp->rhsp(make(vtxp, resp->width() - (m_msb - lowerWidth + 1))); resp->lhsp(trace(lhsp, m_msb - shiftAmnt, lowerWidth - shiftAmnt)); SET_RESULT(resp); return; } DfgShiftL* const shiftlp = make(vtxp, m_msb + 1); shiftlp->rhsp(trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0)); shiftlp->lhsp(trace(vtxp->lhsp(), m_msb, 0)); if (m_lsb == 0) { SET_RESULT(shiftlp); return; } DfgSel* const selp = make(vtxp, m_msb - m_lsb + 1); selp->fromp(shiftlp); selp->lsb(m_lsb); SET_RESULT(selp); } void visit(DfgCond* vtxp) override { DfgCond* const resp = make(vtxp, m_msb - m_lsb + 1); resp->condp(trace(vtxp->condp(), vtxp->condp()->width() - 1, 0)); resp->thenp(trace(vtxp->thenp(), m_msb, m_lsb)); resp->elsep(trace(vtxp->elsep(), m_msb, m_lsb)); SET_RESULT(resp); } #undef SET_RESULT public: // CONSTRUCTOR TraceDriver(DfgGraph& dfg, Vtx2Scc& vtx2Scc) : m_dfg{dfg} , m_vtx2Scc{vtx2Scc} { #ifdef VL_DEBUG if (v3Global.opt.debugCheck()) { m_lineCoverageFile.open( // v3Global.opt.makeDir() + "/" + v3Global.opt.prefix() + "__V3DfgBreakCycles-TraceDriver-line-coverage.txt", // std::ios_base::out | std::ios_base::app); } #endif } // Given a Vertex that is part of an SCC, return a vertex that is equivalent // to 'vtxp[lsb +: width]', but is not part of the same SCC. This should only // be called if the bit range is known to be independent of the SCC, so the // trace can always succeed. DfgVertex* apply(DfgVertex& vtx, uint32_t lsb, uint32_t width) { VL_RESTORER(m_component); m_component = m_vtx2Scc.at(&vtx); return trace(&vtx, lsb + width - 1, lsb); } }; class IndependentBits final : public DfgVisitor { // STATE const Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map // Vertex to current bit mask map. The mask is set for the bits that are independent of the SCC std::unordered_map m_vtxp2Mask; std::deque m_workList; // Work list for the traversal std::unordered_map m_onWorkList; // Marks vertices on the work list #ifdef VL_DEBUG std::ofstream m_lineCoverageFile; // Line coverage file, just for testing #endif // METHODS // Predicate to check if a vertex should be analysed directly bool handledDirectly(const DfgVertex& vtx) const { if (!vtx.isPacked()) return false; if (vtx.is()) return false; return true; } // Retrieve the mask for the given vertex (create it with value 0 if needed) V3Number& mask(const DfgVertex& vtx) { UASSERT_OBJ(handledDirectly(vtx), &vtx, "Vertex should not be handled direclty"); // Look up (or create) mask for 'vtxp' return m_vtxp2Mask .emplace(std::piecewise_construct, // std::forward_as_tuple(&vtx), // std::forward_as_tuple(vtx.fileline(), static_cast(vtx.width()), 0)) // .first->second; } // Use this macro to call 'mask' in 'visit' methods. This also emits // a line to m_lineCoverageFile for testing. // TODO: Use C++20 std::source_location instead of a macro #ifdef VL_DEBUG #define MASK(vtxp) \ ([this](const DfgVertex& vtx) -> V3Number& { \ if (VL_UNLIKELY(m_lineCoverageFile.is_open())) m_lineCoverageFile << __LINE__ << '\n'; \ return mask(vtx); \ }(*vtxp)) #else #define MASK(vtxp) mask(*vtxp) #endif // Clear all bits at or below the most significant clear bit void floodTowardsLsb(V3Number& num) { for (int i = num.width() - 1; i >= 0; --i) { if (num.bitIs1(i)) continue; num.opSetRange(0, i + 1, '0'); break; } } // Clear all bits at or above the least significant clear bit void floodTowardsMsb(V3Number& num) { for (int i = 0; i < num.width(); ++i) { if (num.bitIs1(i)) continue; num.opSetRange(i, num.width() - i, '0'); break; } } void propagateFromDriver(V3Number& m, const DfgVertex* srcp) { // If there is no driver, we are done if (!srcp) return; // If it is driven by a splice, we need to combine the masks of the drivers if (const DfgSplicePacked* const splicep = srcp->cast()) { splicep->foreachDriver([&](const DfgVertex& src, uint32_t lo) { m.opSelInto(MASK(&src), lo, src.width()); return false; }); return; } // Otherwise, we just use the mask of the single driver m = MASK(srcp); } // VISITORS void visit(DfgVertex* vtxp) override { // LCOV_EXCL_START UASSERT_OBJ(handledDirectly(*vtxp), vtxp, "Vertex should be handled direclty"); UINFO(9, "IndependentBits - Unhandled vertex type: " << vtxp->typeName()); // Conservative assumption about all bits being dependent prevails } // LCOV_EXCL_STOP void visit(DfgVarPacked* vtxp) override { // We cannot trace through a volatile variable, so pretend all bits are dependent if (vtxp->isVolatile()) return; V3Number& m = MASK(vtxp); DfgVertex* const srcp = vtxp->srcp(); DfgVertex* const defaultp = vtxp->defaultp(); // If there is a default driver, we start from that if (defaultp) m = MASK(defaultp); // Then propagate mask from the driver propagateFromDriver(m, srcp); } void visit(DfgArraySel* vtxp) override { // Only constant select const DfgConst* const idxp = vtxp->bitp()->cast(); if (!idxp) return; // From a variable const DfgVarArray* varp = vtxp->fromp()->cast(); if (!varp) return; // We cannot trace through a volatile variable, so pretend all bits are dependent if (varp->isVolatile()) return; // Skip through intermediate variables while (varp->srcp() && varp->srcp()->is()) { varp = varp->srcp()->as(); if (varp->isVolatile()) return; } // Find driver const DfgVertex* srcp = varp->srcp(); if (!srcp) return; if (const DfgSpliceArray* const splicep = srcp->cast()) { srcp = splicep->driverAt(idxp->toSizeT()); if (!srcp) return; } const DfgUnitArray* uap = srcp->cast(); if (!uap) return; srcp = uap->srcp(); // Propagate from driver propagateFromDriver(MASK(vtxp), srcp); } void visit(DfgConcat* vtxp) override { const DfgVertex* const rhsp = vtxp->rhsp(); const DfgVertex* const lhsp = vtxp->lhsp(); V3Number& m = MASK(vtxp); m.opSelInto(MASK(rhsp), 0, rhsp->width()); m.opSelInto(MASK(lhsp), rhsp->width(), lhsp->width()); } void visit(DfgReplicate* vtxp) override { const uint32_t count = vtxp->countp()->as()->toU32(); const DfgVertex* const srcp = vtxp->srcp(); const uint32_t sWidth = srcp->width(); V3Number& vMask = MASK(vtxp); V3Number& sMask = MASK(srcp); for (uint32_t i = 0; i < count; ++i) vMask.opSelInto(sMask, i * sWidth, sWidth); } void visit(DfgSel* vtxp) override { const uint32_t lsb = vtxp->lsb(); const uint32_t msb = lsb + vtxp->width() - 1; MASK(vtxp).opSel(MASK(vtxp->fromp()), msb, lsb); } void visit(DfgExtend* vtxp) override { const DfgVertex* const srcp = vtxp->srcp(); const uint32_t sWidth = srcp->width(); V3Number& s = MASK(srcp); V3Number& m = MASK(vtxp); m.opSelInto(s, 0, sWidth); m.opSetRange(sWidth, vtxp->width() - sWidth, '1'); } void visit(DfgExtendS* vtxp) override { const DfgVertex* const srcp = vtxp->srcp(); const uint32_t sWidth = srcp->width(); V3Number& s = MASK(srcp); V3Number& m = MASK(vtxp); m.opSelInto(s, 0, sWidth); m.opSetRange(sWidth, vtxp->width() - sWidth, s.bitIs0(sWidth - 1) ? '0' : '1'); } void visit(DfgNot* vtxp) override { // MASK(vtxp) = MASK(vtxp->srcp()); } void visit(DfgAnd* vtxp) override { // MASK(vtxp).opAnd(MASK(vtxp->lhsp()), MASK(vtxp->rhsp())); } void visit(DfgOr* vtxp) override { // MASK(vtxp).opAnd(MASK(vtxp->lhsp()), MASK(vtxp->rhsp())); } void visit(DfgXor* vtxp) override { // MASK(vtxp).opAnd(MASK(vtxp->lhsp()), MASK(vtxp->rhsp())); } void visit(DfgAdd* vtxp) override { V3Number& m = MASK(vtxp); m.opAnd(MASK(vtxp->lhsp()), MASK(vtxp->rhsp())); floodTowardsMsb(m); } void visit(DfgSub* vtxp) override { // Same as Add: 2's complement (a - b) == (a + ~b + 1) V3Number& m = MASK(vtxp); m.opAnd(MASK(vtxp->lhsp()), MASK(vtxp->rhsp())); floodTowardsMsb(m); } void visit(DfgRedAnd* vtxp) override { // if (MASK(vtxp->lhsp()).isEqAllOnes()) { // MASK(vtxp).setAllBits1(); } } void visit(DfgRedOr* vtxp) override { // if (MASK(vtxp->lhsp()).isEqAllOnes()) { // MASK(vtxp).setAllBits1(); } } void visit(DfgRedXor* vtxp) override { // if (MASK(vtxp->lhsp()).isEqAllOnes()) { // MASK(vtxp).setAllBits1(); } } void visit(DfgEq* vtxp) override { const bool independent = MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes(); MASK(vtxp).setBit(0, independent ? '1' : '0'); } void visit(DfgNeq* vtxp) override { const bool independent = MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes(); MASK(vtxp).setBit(0, independent ? '1' : '0'); } void visit(DfgLt* vtxp) override { const bool independent = MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes(); MASK(vtxp).setBit(0, independent ? '1' : '0'); } void visit(DfgLte* vtxp) override { const bool independent = MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes(); MASK(vtxp).setBit(0, independent ? '1' : '0'); } void visit(DfgGt* vtxp) override { const bool independent = MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes(); MASK(vtxp).setBit(0, independent ? '1' : '0'); } void visit(DfgGte* vtxp) override { const bool independent = MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->rhsp()).isEqAllOnes(); MASK(vtxp).setBit(0, independent ? '1' : '0'); } void visit(DfgShiftR* vtxp) override { DfgVertex* const rhsp = vtxp->rhsp(); DfgVertex* const lhsp = vtxp->lhsp(); const uint32_t width = vtxp->width(); // Constant shift can be computed precisely if (DfgConst* const rConstp = rhsp->cast()) { const uint32_t shiftAmount = rConstp->toU32(); V3Number& m = MASK(vtxp); if (shiftAmount >= width) { m.setAllBits1(); } else { m.opShiftR(MASK(lhsp), rConstp->num()); m.opSetRange(width - shiftAmount, shiftAmount, '1'); } return; } // Otherwise, as the shift amount is non-negative, all independent // and consecutive top bits in the lhs yield an independent result // if the shift amount is independent. if (MASK(rhsp).isEqAllOnes()) { V3Number& m = MASK(vtxp); m = MASK(lhsp); floodTowardsLsb(m); } } void visit(DfgShiftL* vtxp) override { DfgVertex* const rhsp = vtxp->rhsp(); DfgVertex* const lhsp = vtxp->lhsp(); const uint32_t width = vtxp->width(); // Constant shift can be computed precisely if (DfgConst* const rConstp = rhsp->cast()) { const uint32_t shiftAmount = rConstp->toU32(); V3Number& m = MASK(vtxp); if (shiftAmount >= width) { m.setAllBits1(); } else { m.opShiftL(MASK(lhsp), rConstp->num()); m.opSetRange(0, shiftAmount, '1'); } return; } // Otherwise, as the shift amount is non-negative, all independent // and consecutive bottom bits in the lhs yield an independent result // if the shift amount is independent. if (MASK(rhsp).isEqAllOnes()) { V3Number& m = MASK(vtxp); m = MASK(lhsp); floodTowardsMsb(m); } } void visit(DfgCond* vtxp) override { if (MASK(vtxp->condp()).isEqAllOnes()) { MASK(vtxp).opAnd(MASK(vtxp->thenp()), MASK(vtxp->elsep())); } } #undef MASK // Enqueue sinks of vertex to work list for traversal - only called from constructor void enqueueSinks(DfgVertex& vtx) { vtx.foreachSink([&](DfgVertex& sink) { // Ignore if sink is not part of an SCC, already has all bits marked independent if (!m_vtx2Scc.at(sink)) return false; // If a vertex is not handled directly, recursively enqueue its sinks instead if (!handledDirectly(sink)) { enqueueSinks(sink); return false; } // Otherwise just enqueue it bool& onWorkList = m_onWorkList[&sink]; if (!onWorkList) { UINFO(9, "Enqueuing vertex " << &sink << " " << sink.typeName()); m_workList.emplace_back(&sink); } onWorkList = true; return false; }); }; IndependentBits(DfgGraph& dfg, const Vtx2Scc& vtx2Scc) : m_vtx2Scc{vtx2Scc} { #ifdef VL_DEBUG if (v3Global.opt.debugCheck()) { m_lineCoverageFile.open( // v3Global.opt.makeDir() + "/" + v3Global.opt.prefix() + "__V3DfgBreakCycles-IndependentBits-line-coverage.txt", // std::ios_base::out | std::ios_base::app); } #endif // Set up the initial conditions: // - For vertices not in an SCC, mark all bits as independent // - For vertices in an SCC, dispatch to analyse at least once, as some vertices // always assign constants bits, which are always independent (eg Extend/Shift) // Enqueue sinks of all SCC vertices that have at least one independent bit dfg.forEachVertex([&](DfgVertex& vtx) { if (!handledDirectly(vtx)) return; if (m_vtx2Scc.at(&vtx)) return; mask(vtx).setAllBits1(); }); dfg.forEachVertex([&](DfgVertex& vtx) { if (!handledDirectly(vtx)) return; if (!m_vtx2Scc.at(&vtx)) return; iterate(&vtx); UINFO(9, "Initial independent bits of " << &vtx << " " << vtx.typeName() << " are: " << mask(vtx).displayed(vtx.fileline(), "%b")); if (!mask(vtx).isEqZero()) enqueueSinks(vtx); }); // Propagate independent bits until no more changes are made while (!m_workList.empty()) { // Grab next item DfgVertex* const currp = m_workList.front(); m_workList.pop_front(); m_onWorkList[currp] = false; // Should not enqueue vertices that are not in an SCC UASSERT_OBJ(m_vtx2Scc.at(currp), currp, "Vertex should be in an SCC"); // Should only enqueue packed vertices UASSERT_OBJ(handledDirectly(*currp), currp, "Vertex should be handled directly"); // Grab current mask of item const V3Number& currMask = mask(*currp); // Remember current mask so we can check if it changed const V3Number prevMask = currMask; UINFO(9, "Analyzing independent bits of " << currp << " " << currp->typeName()); // Dispatch it to update the mask in the visit methods iterate(currp); // If mask changed, enqueue sinks if (!prevMask.isCaseEq(currMask)) { UINFO(9, "Independent bits of " // << currp << " " << currp->typeName() << " changed" // << "\n form: " << prevMask.displayed(currp->fileline(), "%b") << "\n to: " << currMask.displayed(currp->fileline(), "%b")); enqueueSinks(*currp); // Check the mask only ever expands (no bit goes 1 -> 0) if (VL_UNLIKELY(v3Global.opt.debugCheck())) { V3Number notCurr{currMask}; notCurr.opNot(currMask); V3Number prevAndNotCurr{currMask}; prevAndNotCurr.opAnd(prevMask, notCurr); UASSERT_OBJ(prevAndNotCurr.isEqZero(), currp, "Mask should only expand"); } } } } public: // Given a DfgGraph, and a map from vertices to the SCCs they reside in, // returns a map from vertices to a bit mask, where a bit in the mask is // set if the corresponding bit in that vertex is known to be independent // of the values of vertices in the same SCC as the vertex resides in. static std::unordered_map apply(DfgGraph& dfg, const Vtx2Scc& vtx2Scc) { return std::move(IndependentBits{dfg, vtx2Scc}.m_vtxp2Mask); } }; class FixUp final { DfgGraph& m_dfg; // The graph being processed Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map TraceDriver m_traceDriver{m_dfg, m_vtx2Scc}; // The independent bits map const std::unordered_map m_independentBits = IndependentBits::apply(m_dfg, m_vtx2Scc); size_t m_nImprovements = 0; // Number of improvements mde // Returns a bitmask set if that bit of 'vtx' is used (has a sink) static V3Number computeUsedBits(DfgVertex& vtx) { V3Number result{vtx.fileline(), static_cast(vtx.width()), 0}; vtx.foreachSink([&result](DfgVertex& sink) { // If used via a Sel, mark the selected bits used if (const DfgSel* const selp = sink.cast()) { result.opSetRange(selp->lsb(), selp->width(), '1'); return false; } // Used without a Sel, so all bits are used result.setAllBits1(); return true; }); return result; } static std::string debugStr(const DfgVertex& vtx) { // LCOV_EXCL_START if (const DfgArraySel* const aselp = vtx.cast()) { const size_t i = aselp->bitp()->as()->toSizeT(); return debugStr(*aselp->fromp()) + "[" + std::to_string(i) + "]"; } if (const DfgVertexVar* const varp = vtx.cast()) { return varp->nodep()->name(); } vtx.v3fatalSrc("Unhandled node type"); } // LCOV_EXCL_STOP // Trace drivers of independent bits of 'vtxp' in the range '[hi:lo]' // append replacement terms to 'termps'. void gatherTerms(std::vector& termps, DfgVertex& vtx, const V3Number& indpBits, const uint32_t hi, const uint32_t lo) { // Iterate through consecutive dependent/non-dependet ranges within [hi:lo] bool isIndependent = indpBits.bitIs1(lo); uint32_t lsb = lo; for (uint32_t msb = lo; msb <= hi; ++msb) { const bool endRange = msb == hi || isIndependent != indpBits.bitIs1(msb + 1); if (!endRange) continue; const uint32_t width = msb - lsb + 1; // Compute the term to use for this range if (isIndependent) { // If the range is independent of the cycles, trace its driver ++m_nImprovements; termps.emplace_back(m_traceDriver.apply(vtx, lsb, width)); } else { // If dependent, fall back on using the part of the variable DfgSel* const selp = new DfgSel{m_dfg, vtx.fileline(), DfgDataType::packed(width)}; // Same component as 'vtxp', as reads 'vtxp' and will replace 'vtxp' m_vtx2Scc[selp] = m_vtx2Scc.at(vtx); // Do not connect selp->fromp yet, need to do afer replacing 'vtxp' selp->lsb(lsb); termps.emplace_back(selp); } // Next iteration isIndependent = !isIndependent; lsb = msb + 1; } } void fixUpPacked(DfgVertex& vtx) { UASSERT_OBJ(vtx.isPacked(), &vtx, "Should be a packed type"); UASSERT_OBJ(!vtx.is(), &vtx, "Should not be a splice"); // Get which bits of 'vtxp' are independent of the SCCs const V3Number& indpBits = m_independentBits.at(&vtx); UINFO(9, "Independent bits of '" << debugStr(vtx) << "' are " << indpBits.displayed(vtx.fileline(), "%b")); // Can't do anything if all bits are dependent if (indpBits.isEqZero()) return; // Figure out which bits of 'vtxp' are used const V3Number usedBits = computeUsedBits(vtx); UINFO(9, "Used bits of '" << debugStr(vtx) << "' are " << usedBits.displayed(vtx.fileline(), "%b")); // Nothing to do if no used bits are independen (all used bits are dependent) V3Number usedAndIndependent{vtx.fileline(), static_cast(vtx.width()), 0}; usedAndIndependent.opAnd(usedBits, indpBits); if (usedAndIndependent.isEqZero()) return; // We are computing the terms to concatenate and replace 'vtxp' with std::vector termps; // Iterate through consecutive used/unused ranges FileLine* const flp = vtx.fileline(); const uint32_t width = vtx.width(); bool isUsed = usedBits.bitIs1(0); // Is current range used uint32_t lsb = 0; // LSB of current range for (uint32_t msb = 0; msb < width; ++msb) { const bool endRange = msb == width - 1 || isUsed != usedBits.bitIs1(msb + 1); if (!endRange) continue; if (isUsed) { // The range is used, compute the replacement terms gatherTerms(termps, vtx, indpBits, msb, lsb); } else { // The range is not used, just use constant 0 as a placeholder DfgConst* const constp = new DfgConst{m_dfg, flp, msb - lsb + 1, 0}; m_vtx2Scc[constp] = 0; termps.emplace_back(constp); } // Next iteration isUsed = !isUsed; lsb = msb + 1; } // Concatenate all the terms to create the replacement DfgVertex* replacementp = termps.front(); const uint64_t vComp = m_vtx2Scc.at(vtx); for (size_t i = 1; i < termps.size(); ++i) { DfgVertex* const termp = termps[i]; const uint32_t catWidth = replacementp->width() + termp->width(); DfgConcat* const catp = new DfgConcat{m_dfg, flp, DfgDataType::packed(catWidth)}; catp->rhsp(replacementp); catp->lhsp(termp); // Need to figure out which component the replacement vertex // belongs to. If any of the terms are of the original // component, it's part of that component, otherwise its not // cyclic (all terms are from outside the original component, // and feed into the original component). const uint64_t tComp = m_vtx2Scc.at(termp); const uint64_t rComp = m_vtx2Scc.at(replacementp); m_vtx2Scc[catp] = tComp == vComp || rComp == vComp ? vComp : 0; replacementp = catp; } // Replace the vertex vtx.replaceWith(replacementp); // Connect the Sel nodes in the replacement for (DfgVertex* const termp : termps) { if (DfgSel* const selp = termp->cast()) { if (!selp->fromp()) selp->fromp(&vtx); } } } void main(DfgVertexVar& var) { UINFO(9, "FixUp of " << var.nodep()->name()); if (var.is()) { // For Packed variables, fix up as whole fixUpPacked(var); } else if (var.is()) { // For array variables, fix up element-wise const uint64_t component = m_vtx2Scc.at(var); var.foreachSink([&](DfgVertex& sink) { // Ignore if sink is not part of same cycle if (m_vtx2Scc.at(sink) != component) return false; // Only handle ArraySels with constant index DfgArraySel* const aselp = sink.cast(); if (!aselp) return false; if (!aselp->bitp()->is()) return false; // Fix up the word fixUpPacked(*aselp); return false; }); } UINFO(9, "FixUp made " << m_nImprovements << " improvements"); } FixUp(DfgGraph& dfg, Vtx2Scc& vtx2Scc) : m_dfg{dfg} , m_vtx2Scc{vtx2Scc} {} public: // Compute which bits of vertices are independent of the SCC they reside // in, and replace rferences to used independent bits with an equivalent // vertex that is not part of the same SCC. static size_t apply(DfgGraph& dfg, Vtx2Scc& vtx2Scc) { FixUp fixUp{dfg, vtx2Scc}; // TODO: Compute minimal feedback vertex set and cut only those. // Sadly that is a computationally hard problem. // Fix up cyclic variables for (DfgVertexVar& vtx : dfg.varVertices()) { if (vtx2Scc.at(vtx)) fixUp.main(vtx); } // Return the number of improvements made return fixUp.m_nImprovements; } }; std::pair, bool> // breakCycles(const DfgGraph& dfg, V3DfgContext& ctx) { // Shorthand for dumping graph at given dump level const auto dump = [&](int level, const DfgGraph& dfg, const std::string& name) { if (dumpDfgLevel() >= level) dfg.dumpDotFilePrefixed(ctx.prefix() + "breakCycles-" + name); }; // Can't do much with trivial things ('a = a' or 'a[1] = a[0]'), so bail if (dfg.size() <= 2) { UINFO(7, "Graph is trivial"); dump(9, dfg, "trivial"); ++ctx.m_breakCyclesContext.m_nTrivial; return {nullptr, false}; } // AstNetlist/AstNodeModule user2 used as sequence numbers for temporaries const VNUser2InUse user2InUse; // Show input for debugging dump(7, dfg, "input"); // We might fail to make any improvements, so first create a clone of the // graph. This is what we will be working on, and return if successful. // Do not touch the input graph. std::unique_ptr resultp = dfg.clone(); // Just shorthand for code below DfgGraph& res = *resultp; dump(9, res, "clone"); // How many improvements have we made size_t nImprovements = 0; size_t prevNImprovements; // Iterate while an improvement can be made and the graph is still cyclic do { // Color SCCs (populates DfgVertex::user()) Vtx2Scc vtx2Scc = res.makeUserMap(); const uint32_t numNonTrivialSCCs = V3DfgPasses::colorStronglyConnectedComponents(res, vtx2Scc); // Congrats if it has become acyclic if (!numNonTrivialSCCs) { UINFO(7, "Graph became acyclic after " << nImprovements << " improvements"); dump(7, res, "result-acyclic"); ++ctx.m_breakCyclesContext.m_nFixed; return {std::move(resultp), true}; } // Fix up independent ranges in vertices UINFO(9, "New iteration after " << nImprovements << " improvements"); prevNImprovements = nImprovements; const size_t nFixed = FixUp::apply(res, vtx2Scc); if (nFixed) { nImprovements += nFixed; ctx.m_breakCyclesContext.m_nImprovements += nFixed; dump(9, res, "FixUp"); } } while (nImprovements != prevNImprovements); if (dumpDfgLevel() >= 9) { Vtx2Scc vtx2Scc = res.makeUserMap(); V3DfgPasses::colorStronglyConnectedComponents(res, vtx2Scc); res.dumpDotFilePrefixed(ctx.prefix() + "breakCycles-remaining", [&](const DfgVertex& vtx) { return vtx2Scc[vtx]; // }); } // If an improvement was made, return the still cyclic improved graph if (nImprovements) { UINFO(7, "Graph was improved " << nImprovements << " times"); dump(7, res, "result-improved"); ++ctx.m_breakCyclesContext.m_nImproved; return {std::move(resultp), false}; } // No improvement was made UINFO(7, "Graph NOT improved"); dump(7, res, "result-original"); ++ctx.m_breakCyclesContext.m_nUnchanged; return {nullptr, false}; } } //namespace V3DfgBreakCycles std::pair, bool> // V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgContext& ctx) { auto pair = V3DfgBreakCycles::breakCycles(dfg, ctx); if (pair.first) { if (v3Global.opt.debugCheck()) V3DfgPasses::typeCheck(*pair.first); V3DfgPasses::removeUnused(*pair.first); } return pair; }