diff --git a/src/V3DfgBreakCycles.cpp b/src/V3DfgBreakCycles.cpp index af313750b..18e46e289 100644 --- a/src/V3DfgBreakCycles.cpp +++ b/src/V3DfgBreakCycles.cpp @@ -29,26 +29,27 @@ VL_DEFINE_DEBUG_FUNCTIONS; +namespace V3DfgBreakCycles { + // Throughout these algorithm, we use the DfgUserMap as a map to the SCC number using Vtx2Scc = DfgUserMap; class TraceDriver final : public DfgVisitor { // TYPES - - // Structure denoting currently visited vertex with the MSB and LSB we are searching for - struct Visited final { + // Key for caching the result of a trace + struct CacheKey final { DfgVertex* m_vtxp; uint32_t m_lsb; uint32_t m_msb; - Visited() = delete; - Visited(DfgVertex* vtxp, uint32_t lsb, uint32_t msb) + CacheKey() = delete; + CacheKey(DfgVertex* vtxp, uint32_t lsb, uint32_t msb) : m_vtxp{vtxp} , m_lsb{lsb} , m_msb{msb} {} struct Hash final { - size_t operator()(const Visited& item) const { + size_t operator()(const CacheKey& item) const { // cppcheck-suppress unreadVariable V3Hash hash{item.m_vtxp}; hash += item.m_lsb; @@ -58,7 +59,7 @@ class TraceDriver final : public DfgVisitor { }; struct Equal final { - bool operator()(const Visited& a, const Visited& b) const { + bool operator()(const CacheKey& a, const CacheKey& b) const { return a.m_vtxp == b.m_vtxp && a.m_lsb == b.m_lsb && a.m_msb == b.m_msb; } }; @@ -67,21 +68,15 @@ class TraceDriver final : public DfgVisitor { // STATE DfgGraph& m_dfg; // The graph being processed Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map - // The strongly connected component we are trying to escape - const uint64_t m_component; - const bool m_aggressive; // Trace aggressively, creating intermediate ops + // The strongly connected component we are currently trying to escape + uint64_t m_component = 0; uint32_t m_lsb = 0; // LSB to extract from the currently visited Vertex uint32_t m_msb = 0; // MSB to extract from the currently visited Vertex - DfgVertex* m_defaultp = nullptr; // When tracing a variable, this is its 'defaultp', if any - // Result cache for reusing already traced vertices - std::unordered_map m_cache; // Result of tracing the currently visited Vertex. Use SET_RESULT below! DfgVertex* m_resp = nullptr; - std::vector m_newVtxps; // New vertices created during the traversal - - std::vector m_stack; // Stack of currently visited vertices - // Denotes if a 'Visited' entry appear in m_stack - std::unordered_map m_visited; + DfgVertex* m_defaultp = nullptr; // When tracing a variable, this is its 'defaultp', if any + // Result cache for reusing already traced vertices + std::unordered_map m_cache; #ifdef VL_DEBUG std::ofstream m_lineCoverageFile; // Line coverage file, just for testing @@ -89,12 +84,10 @@ class TraceDriver final : public DfgVisitor { // METHODS - // Create and return a new Vertex and add it to m_newVtxps. Fileline is - // taken from 'refp', but 'refp' is otherwise not used. You should - // always use this to create new vertices, so unused ones (if a trace - // eventually fails) can be cleaned up at the end. This also sets the - // vertex user to 0, indicating the new vertex is not part of a - // strongly connected component. This should always be true, as all the + // Create and return a new Vertex. Always use this to create new vertices. + // Fileline is taken from 'refp', but 'refp' is otherwise not used. + // Also sets m_vtx2Scc[vtx] to 0, indicating the new vertex is not part of + // a strongly connected component. This should always be true, as all the // vertices we create here are driven from outside the component we are // trying to escape, and will sink into that component. Given those are // separate SCCs, these new vertices must be acyclic. @@ -104,20 +97,9 @@ class TraceDriver final : public DfgVisitor { && !std::is_base_of::value, "Should only make operation vertices and constants"); - constexpr bool okWithoutAggressive = // - std::is_same::value // - || std::is_same::value // - || std::is_same::value // - || std::is_same::value; - - UASSERT_OBJ( - okWithoutAggressive || m_aggressive, refp, - "Should only create Const, Sel, Concat, Exend Vertices without aggressive mode"); - if VL_CONSTEXPR_CXX17 (std::is_same::value) { DfgConst* const vtxp = new DfgConst{m_dfg, refp->fileline(), width, 0}; m_vtx2Scc[vtxp] = 0; - m_newVtxps.emplace_back(vtxp); return reinterpret_cast(vtxp); } else { // TODO: this is a gross hack around lack of C++17 'if constexpr' Vtx is always Vertex @@ -127,7 +109,6 @@ class TraceDriver final : public DfgVisitor { Vertex>::type; Vtx* const vtxp = new Vtx{m_dfg, refp->fileline(), DfgDataType::packed(width)}; m_vtx2Scc[vtxp] = 0; - m_newVtxps.emplace_back(vtxp); return reinterpret_cast(vtxp); } } @@ -143,71 +124,51 @@ class TraceDriver final : public DfgVisitor { DfgVertexVar* const varp = m_dfg.makeNewVar(flp, name, vtxp->dtype(), scopep); varp->varp()->isInternal(true); varp->tmpForp(varp->nodep()); + m_vtx2Scc[varp] = 0; return varp; } - // Continue tracing drivers of the given vertex, at the given LSB. Every - // visitor should call this to continue the traversal, then immediately - // return after the call. 'visit' methods should not call 'iterate', call - // this method instead, which checks for cycles. + // Continue tracing drivers of the given vertex, at the given LSB. + // Every visitor should call this to continue the traversal. DfgVertex* trace(DfgVertex* const vtxp, const uint32_t msb, const uint32_t lsb) { UASSERT_OBJ(vtxp->isPacked(), vtxp, "Can only trace packed type vertices"); UASSERT_OBJ(vtxp->size() > msb, vtxp, "Traced Vertex too narrow"); - // Push to stack - m_stack.emplace_back(vtxp, msb, lsb); - bool& onStackr = m_visited[m_stack.back()]; - // Save onStack before updating - const bool wasOnStack = onStackr; - // We are tracing the vertex now - onStackr = true; - - // The resulting driver that is not part of m_component - DfgVertex* resp = nullptr; - - // Retrieve cache entry - const auto& cachePair = m_cache.emplace(m_stack.back(), nullptr); - DfgVertex*& cachedRespr = cachePair.first->second; + // Get the cache entry, which is the resulting driver that is not part of + // the same component as vtxp + DfgVertex*& respr = m_cache + .emplace(std::piecewise_construct, // + std::forward_as_tuple(vtxp, msb, lsb), // + std::forward_as_tuple(nullptr)) + .first->second; // Trace the vertex - if (wasOnStack) { - // If it was already on the stack, then this is a true - // combinational cycles, terminate the trace. - // Note: could issue a "proper combinational cycle" error here, - // but constructing a legible error message is hard as the Vertex - // Filelines can be very rough after optimizations (could consider - // reporting only the variables involved). Also this pass might - // run mulitple times and report the same error again. There will - // be an UNOPTFLAT issued during scheduling anyway, and the true - // cycle might still settle at run-time. - resp = nullptr; - } else if (!cachePair.second) { + if (respr) { // If already traced this vtxp/msb/lsb, just use the result. // This is important to avoid combinatorial explosion when the // same sub-expression is needed multiple times. - resp = cachedRespr; - } else if (m_vtx2Scc[vtxp] != m_component) { + } else if (m_vtx2Scc.at(vtxp) != m_component) { // If the currently traced vertex is in a different component, // then we found what we were looking for. - resp = vtxp; + respr = vtxp; // If the result is a splice, we need to insert a temporary for it // as a splice cannot be fed into arbitray logic - if (DfgVertexSplice* const splicep = resp->cast()) { + if (DfgVertexSplice* const splicep = respr->cast()) { DfgVertexVar* const tmpp = createTmp("TraceDriver", splicep); // Note: we can't do 'splicep->replaceWith(tmpp)', as other // variable sinks of the splice might have a defaultp driver. tmpp->srcp(splicep); - resp = tmpp; + respr = tmpp; } // Apply a Sel to extract the relevant bits if only a part is needed - if (msb != resp->width() - 1 || lsb != 0) { - DfgSel* const selp = make(resp, msb - lsb + 1); - selp->fromp(resp); + if (msb != respr->width() - 1 || lsb != 0) { + DfgSel* const selp = make(respr, msb - lsb + 1); + selp->fromp(respr); selp->lsb(lsb); - resp = selp; + respr = selp; } } else { - // Otherwise visit the vertex + // Otherwise visit the vertex to trace it VL_RESTORER(m_msb); VL_RESTORER(m_lsb); VL_RESTORER(m_resp); @@ -215,102 +176,55 @@ class TraceDriver final : public DfgVisitor { m_lsb = lsb; m_resp = nullptr; iterate(vtxp); - resp = m_resp; + respr = m_resp; } - UASSERT_OBJ(!resp || resp->width() == (msb - lsb + 1), vtxp, "Wrong result width"); - - // Save to cache - cachedRespr = resp; - - // Pop from stack - onStackr = false; - m_stack.pop_back(); - - // Done - if (!resp) UINFO(9, "TraceDriver - Failed to trace vertex of type: " << vtxp->typeName()); - return resp; + // We only ever trace drivers of bits that are known to be independent + // of the cycles, so we should always be able to find an acyclic driver. + UASSERT_OBJ(respr, vtxp, "Tracing driver failed for " << vtxp->typeName()); + UASSERT_OBJ(respr->width() == (msb - lsb + 1), vtxp, "Wrong result width"); + return respr; } template Vertex* traceBitwiseBinary(Vertex* vtxp) { static_assert(std::is_base_of::value, "Should only call on DfgVertexBinary"); - if (DfgVertex* const rp = trace(vtxp->rhsp(), m_msb, m_lsb)) { - if (DfgVertex* const lp = trace(vtxp->lhsp(), m_msb, m_lsb)) { - Vertex* const resp = make(vtxp, m_msb - m_lsb + 1); - resp->rhsp(rp); - resp->lhsp(lp); - return resp; - } - } - return nullptr; + Vertex* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->rhsp(trace(vtxp->rhsp(), m_msb, m_lsb)); + resp->lhsp(trace(vtxp->lhsp(), m_msb, m_lsb)); + return resp; } template DfgVertex* traceAddSub(Vertex* vtxp) { static_assert(std::is_base_of::value, "Should only call on DfgVertexBinary"); - if (DfgVertex* const rp = trace(vtxp->rhsp(), m_msb, 0)) { - if (DfgVertex* const lp = trace(vtxp->lhsp(), m_msb, 0)) { - Vertex* const opp = make(vtxp, m_msb + 1); - opp->rhsp(rp); - opp->lhsp(lp); - DfgSel* const selp = make(vtxp, m_msb - m_lsb + 1); - selp->fromp(opp); - selp->lsb(m_lsb); - return selp; - } - } - return nullptr; + Vertex* const opp = make(vtxp, m_msb + 1); + opp->rhsp(trace(vtxp->rhsp(), m_msb, 0)); + opp->lhsp(trace(vtxp->lhsp(), m_msb, 0)); + DfgSel* const selp = make(vtxp, m_msb - m_lsb + 1); + selp->fromp(opp); + selp->lsb(m_lsb); + return selp; } template Vertex* traceReduction(Vertex* vtxp) { static_assert(std::is_base_of::value, "Should only call on DfgVertexUnary"); - if (DfgVertex* const sp = trace(vtxp->srcp(), vtxp->srcp()->width() - 1, 0)) { - Vertex* const resp = make(vtxp, 1); - resp->srcp(sp); - return resp; - } - return nullptr; + Vertex* const resp = make(vtxp, 1); + resp->srcp(trace(vtxp->srcp(), vtxp->srcp()->width() - 1, 0)); + return resp; } template Vertex* traceCmp(Vertex* vtxp) { static_assert(std::is_base_of::value, "Should only call on DfgVertexBinary"); - if (DfgVertex* const rp = trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0)) { - if (DfgVertex* const lp = trace(vtxp->lhsp(), vtxp->lhsp()->width() - 1, 0)) { - Vertex* const resp = make(vtxp, m_msb - m_lsb + 1); - resp->rhsp(rp); - resp->lhsp(lp); - return resp; - } - } - return nullptr; - } - - // Predicate to do determine if vtxp[$bits(vtxp)-1:idx] is known to be zero - // Somewhat rudimentary but sufficient for current purposes. - static bool knownToBeZeroAtAndAbove(const DfgVertex* vtxp, uint32_t idx) { - if (const DfgConcat* const catp = vtxp->cast()) { - const DfgConst* const lConstp = catp->lhsp()->cast(); - return lConstp && idx >= catp->rhsp()->width() && lConstp->isZero(); - } - if (const DfgExtend* const extp = vtxp->cast()) { - return idx >= extp->srcp()->width(); - } - return false; - } - - // Like knownToBeZeroAtAndAbove, but checks vtxp[idx:0] - static bool knownToBeZeroAtAndBelow(const DfgVertex* vtxp, uint32_t idx) { - if (const DfgConcat* const catp = vtxp->cast()) { - const DfgConst* const rConstp = catp->rhsp()->cast(); - return rConstp && idx < rConstp->width() && rConstp->isZero(); - } - return false; + Vertex* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->rhsp(trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0)); + resp->lhsp(trace(vtxp->lhsp(), vtxp->lhsp()->width() - 1, 0)); + return resp; } // Use this macro to set the result in 'visit' methods. This also emits @@ -328,8 +242,7 @@ class TraceDriver final : public DfgVisitor { // VISITORS void visit(DfgVertex* vtxp) override { // LCOV_EXCL_START - // Base case: cannot continue ... - UINFO(9, "TraceDriver - Unhandled vertex type: " << vtxp->typeName()); + vtxp->v3fatalSrc("TraceDriver - Unhandled vertex type: " << vtxp->typeName()); } // LCOV_EXCL_STOP void visit(DfgSplicePacked* vtxp) override { @@ -345,7 +258,7 @@ class TraceDriver final : public DfgVisitor { }; std::vector drivers; - // Look at all the drivers, one might cover the whole range, but also gathe all drivers + // Look at all the drivers, one might cover the whole range, but also gather all drivers bool tryWholeDefault = m_defaultp; const bool done = vtxp->foreachDriver([&](DfgVertex& src, uint32_t lsb) { const uint32_t msb = lsb + src.width() - 1; @@ -368,9 +281,10 @@ class TraceDriver final : public DfgVisitor { // Hard case: We need to combine multiple drivers to produce the searched bit range - // Sort ragnes (they are non-overlapping) - std::sort(drivers.begin(), drivers.end(), - [](const Driver& a, const Driver& b) { return a.m_lsb < b.m_lsb; }); + // Sort drivers (they are non-overlapping) + std::sort(drivers.begin(), drivers.end(), [](const Driver& a, const Driver& b) { // + return a.m_lsb < b.m_lsb; + }); // Gather terms std::vector termps; @@ -381,25 +295,18 @@ class TraceDriver final : public DfgVisitor { if (driver.m_lsb > m_msb) break; // Gap below this driver, trace default to fill it if (driver.m_lsb > m_lsb) { - if (!m_defaultp) return; - DfgVertex* const termp = trace(m_defaultp, driver.m_lsb - 1, m_lsb); - if (!termp) return; - termps.emplace_back(termp); + UASSERT_OBJ(m_defaultp, vtxp, "Should have a default driver if needs tracing"); + termps.emplace_back(trace(m_defaultp, driver.m_lsb - 1, m_lsb)); m_lsb = driver.m_lsb; } // Driver covers searched range, pick the needed/available bits uint32_t lim = std::min(m_msb, driver.m_msb); - DfgVertex* const termp - = trace(driver.m_vtxp, lim - driver.m_lsb, m_lsb - driver.m_lsb); - if (!termp) return; - termps.emplace_back(termp); + termps.emplace_back(trace(driver.m_vtxp, lim - driver.m_lsb, m_lsb - driver.m_lsb)); m_lsb = lim + 1; } if (m_msb >= m_lsb) { - if (!m_defaultp) return; - DfgVertex* const termp = trace(m_defaultp, m_msb, m_lsb); - if (!termp) return; - termps.emplace_back(termp); + UASSERT_OBJ(m_defaultp, vtxp, "Should have a default driver if needs tracing"); + termps.emplace_back(trace(m_defaultp, m_msb, m_lsb)); } // The earlier cheks cover the case when either a whole driver or the default covers @@ -419,12 +326,10 @@ class TraceDriver final : public DfgVisitor { } void visit(DfgVarPacked* vtxp) override { + UASSERT_OBJ(!vtxp->isVolatile(), vtxp, "Should not trace through volatile VarPacked"); VL_RESTORER(m_defaultp); m_defaultp = vtxp->defaultp(); - if (DfgVertex* const srcp = vtxp->srcp()) { - SET_RESULT(trace(srcp, m_msb, m_lsb)); - return; - } + SET_RESULT(trace(vtxp->srcp(), m_msb, m_lsb)); } void visit(DfgArraySel* vtxp) override { @@ -434,30 +339,19 @@ class TraceDriver final : public DfgVisitor { // From a variable const DfgVarArray* varp = vtxp->fromp()->cast(); if (!varp) return; + UASSERT_OBJ(!varp->isVolatile(), vtxp, "Should not trace through volatile VarArray"); // Skip through intermediate variables while (varp->srcp() && varp->srcp()->is()) { varp = varp->srcp()->as(); + UASSERT_OBJ(!varp->isVolatile(), vtxp, "Should not trace through volatile VarArray"); } // Find driver - if (!varp->srcp()) return; - - // Driver might be a splice - if (const DfgSpliceArray* const splicep = varp->srcp()->cast()) { - const DfgVertex* const driverp = splicep->driverAt(idxp->toSizeT()); - if (!driverp) return; - const DfgUnitArray* const uap = driverp->cast(); - if (!uap) return; - // Trace the driver - SET_RESULT(trace(uap->srcp(), m_msb, m_lsb)); - return; + const DfgVertex* srcp = varp->srcp(); + if (const DfgSpliceArray* const splicep = srcp->cast()) { + srcp = splicep->driverAt(idxp->toSizeT()); } - - // Or a unit array - const DfgUnitArray* const uap = varp->srcp()->cast(); - if (!uap) return; - // Trace the driver - UASSERT_OBJ(idxp->toSizeT() == 0, vtxp, "Array Index out of range"); - SET_RESULT(trace(uap->srcp(), m_msb, m_lsb)); + // Trace the driver, which at this point must be a UnitArray + SET_RESULT(trace(srcp->as()->srcp(), m_msb, m_lsb)); } void visit(DfgConcat* vtxp) override { @@ -474,16 +368,37 @@ class TraceDriver final : public DfgVisitor { SET_RESULT(trace(lhsp, m_msb - rWidth, m_lsb - rWidth)); return; } - // The traced bit spans both sides, attempt to trace both - if (DfgVertex* const rp = trace(rhsp, rWidth - 1, m_lsb)) { - if (DfgVertex* const lp = trace(lhsp, m_msb - rWidth, 0)) { - DfgConcat* const resp = make(vtxp, m_msb - m_lsb + 1); - resp->rhsp(rp); - resp->lhsp(lp); - SET_RESULT(resp); - return; - } + // The traced bit spans both sides, trace both + DfgConcat* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->rhsp(trace(rhsp, rWidth - 1, m_lsb)); + resp->lhsp(trace(lhsp, m_msb - rWidth, 0)); + SET_RESULT(resp); + } + + void visit(DfgReplicate* vtxp) override { + DfgVertex* const srcp = vtxp->srcp(); + const uint32_t sWidth = srcp->width(); + // If we need more bits than the source, then we need the whole source + if (m_msb - m_lsb + 1 > sWidth) { + DfgReplicate* const repp = make(vtxp, vtxp->width()); + repp->srcp(trace(srcp, sWidth - 1, 0)); + repp->countp(vtxp->countp()); // Always a DfgConst + DfgSel* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->fromp(repp); + resp->lsb(m_lsb); + SET_RESULT(resp); + return; } + // If the requested bits are within the same repliacted word + if (m_msb / sWidth == m_lsb / sWidth) { + SET_RESULT(trace(srcp, m_msb % sWidth, m_lsb % sWidth)); + return; + } + // The requested bits span two replicated words + DfgConcat* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->rhsp(trace(srcp, sWidth - 1, m_lsb % sWidth)); + resp->lhsp(trace(srcp, m_msb % sWidth, 0)); + SET_RESULT(resp); } void visit(DfgExtend* vtxp) override { @@ -500,12 +415,9 @@ class TraceDriver final : public DfgVisitor { return; } // The traced bits span both sides - if (DfgVertex* const sp = trace(srcp, sWidth - 1, m_lsb)) { - DfgExtend* const resp = make(vtxp, m_msb - m_lsb + 1); - resp->srcp(sp); - SET_RESULT(resp); - return; - } + DfgExtend* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->srcp(trace(srcp, sWidth - 1, m_lsb)); + SET_RESULT(resp); } void visit(DfgExtendS* vtxp) override { @@ -516,18 +428,12 @@ class TraceDriver final : public DfgVisitor { SET_RESULT(trace(srcp, m_msb, m_lsb)); return; } - // If there is a single traced bit, wholly in the extension - if (m_lsb >= sWidth && m_msb == m_lsb) { - if (DfgVertex* const sp = trace(srcp, sWidth - 1, sWidth - 1)) SET_RESULT(sp); - return; - } - - // The rest need a real ExtendS - if (!m_aggressive) return; - // If the traced bits are wholly in the extension if (m_lsb >= sWidth) { - if (DfgVertex* const sp = trace(srcp, sWidth - 1, sWidth - 1)) { + DfgVertex* const sp = trace(srcp, sWidth - 1, sWidth - 1); + if (m_msb == m_lsb) { + SET_RESULT(sp); + } else { DfgExtendS* const resp = make(vtxp, m_msb - m_lsb + 1); resp->srcp(sp); SET_RESULT(resp); @@ -535,86 +441,36 @@ class TraceDriver final : public DfgVisitor { return; } // The traced bits span both sides - if (DfgVertex* const sp = trace(srcp, sWidth - 1, m_lsb)) { - DfgExtendS* const resp = make(vtxp, m_msb - m_lsb + 1); - resp->srcp(sp); - SET_RESULT(resp); - return; - } + DfgExtendS* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->srcp(trace(srcp, sWidth - 1, m_lsb)); + SET_RESULT(resp); } void visit(DfgSel* vtxp) override { const uint32_t lsb = vtxp->lsb(); SET_RESULT(trace(vtxp->srcp(), m_msb + lsb, m_lsb + lsb)); - return; } void visit(DfgNot* vtxp) override { - if (!m_aggressive) return; - if (DfgVertex* const sp = trace(vtxp->srcp(), m_msb, m_lsb)) { - DfgNot* const resp = make(vtxp, m_msb - m_lsb + 1); - resp->srcp(sp); - SET_RESULT(resp); - return; - } + DfgNot* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->srcp(trace(vtxp->srcp(), m_msb, m_lsb)); + SET_RESULT(resp); } - void visit(DfgAnd* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceBitwiseBinary(vtxp)); - } - void visit(DfgOr* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceBitwiseBinary(vtxp)); - } - void visit(DfgXor* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceBitwiseBinary(vtxp)); - } - void visit(DfgAdd* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceAddSub(vtxp)); - } - void visit(DfgSub* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceAddSub(vtxp)); - } - void visit(DfgRedAnd* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceReduction(vtxp)); - } - void visit(DfgRedOr* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceReduction(vtxp)); - } - void visit(DfgRedXor* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceReduction(vtxp)); - } - void visit(DfgEq* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceCmp(vtxp)); - } - void visit(DfgNeq* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceCmp(vtxp)); - } - void visit(DfgLt* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceCmp(vtxp)); - } - void visit(DfgLte* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceCmp(vtxp)); - } - void visit(DfgGt* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceCmp(vtxp)); - } - void visit(DfgGte* vtxp) override { - if (!m_aggressive) return; - SET_RESULT(traceCmp(vtxp)); - } + void visit(DfgAnd* vtxp) override { SET_RESULT(traceBitwiseBinary(vtxp)); } + void visit(DfgOr* vtxp) override { SET_RESULT(traceBitwiseBinary(vtxp)); } + void visit(DfgXor* vtxp) override { SET_RESULT(traceBitwiseBinary(vtxp)); } + void visit(DfgAdd* vtxp) override { SET_RESULT(traceAddSub(vtxp)); } + void visit(DfgSub* vtxp) override { SET_RESULT(traceAddSub(vtxp)); } + void visit(DfgRedAnd* vtxp) override { SET_RESULT(traceReduction(vtxp)); } + void visit(DfgRedOr* vtxp) override { SET_RESULT(traceReduction(vtxp)); } + void visit(DfgRedXor* vtxp) override { SET_RESULT(traceReduction(vtxp)); } + void visit(DfgEq* vtxp) override { SET_RESULT(traceCmp(vtxp)); } + void visit(DfgNeq* vtxp) override { SET_RESULT(traceCmp(vtxp)); } + void visit(DfgLt* vtxp) override { SET_RESULT(traceCmp(vtxp)); } + void visit(DfgLte* vtxp) override { SET_RESULT(traceCmp(vtxp)); } + void visit(DfgGt* vtxp) override { SET_RESULT(traceCmp(vtxp)); } + void visit(DfgGte* vtxp) override { SET_RESULT(traceCmp(vtxp)); } void visit(DfgShiftR* vtxp) override { DfgVertex* const lhsp = vtxp->lhsp(); @@ -622,7 +478,6 @@ class TraceDriver final : public DfgVisitor { const uint32_t shiftAmnt = rConstp->toU32(); // Width of lower half of result const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? 0 : vtxp->width() - shiftAmnt; - // If the traced bits are wholly in the input if (lowerWidth > m_msb) { SET_RESULT(trace(lhsp, m_msb + shiftAmnt, m_lsb + shiftAmnt)); @@ -634,21 +489,23 @@ class TraceDriver final : public DfgVisitor { return; } // The traced bits span both sides - if (DfgVertex* const sp = trace(lhsp, lowerWidth - 1 + shiftAmnt, m_lsb + shiftAmnt)) { - DfgExtend* const resp = make(vtxp, m_msb - m_lsb + 1); - resp->srcp(sp); - SET_RESULT(resp); - return; - } + DfgExtend* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->srcp(trace(lhsp, lowerWidth - 1 + shiftAmnt, m_lsb + shiftAmnt)); + SET_RESULT(resp); return; } - // The shift amount is non-negative, so we can conclude zero if all - // bits at and above the LSB we seek are zeroes - if (knownToBeZeroAtAndAbove(lhsp, m_lsb)) { - SET_RESULT(make(vtxp, m_msb - m_lsb + 1)); + DfgShiftR* const shiftrp = make(vtxp, vtxp->lhsp()->width() - m_lsb); + shiftrp->rhsp(trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0)); + shiftrp->lhsp(trace(vtxp->lhsp(), vtxp->lhsp()->width() - 1, m_lsb)); + if (m_msb == vtxp->lhsp()->width() - 1) { + SET_RESULT(shiftrp); return; } + DfgSel* const selp = make(vtxp, m_msb - m_lsb + 1); + selp->fromp(shiftrp); + selp->lsb(0); + SET_RESULT(selp); } void visit(DfgShiftL* vtxp) override { @@ -657,7 +514,6 @@ class TraceDriver final : public DfgVisitor { const uint32_t shiftAmnt = rConstp->toU32(); // Width of lower half of result const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? vtxp->width() : shiftAmnt; - // If the traced bits are wholly in the input if (m_lsb >= lowerWidth) { SET_RESULT(trace(lhsp, m_msb - shiftAmnt, m_lsb - shiftAmnt)); @@ -669,48 +525,41 @@ class TraceDriver final : public DfgVisitor { return; } // The traced bits span both sides - if (DfgVertex* const sp = trace(lhsp, m_msb - shiftAmnt, lowerWidth - shiftAmnt)) { - DfgConcat* const resp = make(vtxp, m_msb - m_lsb + 1); - resp->rhsp(make(vtxp, resp->width() - sp->width())); - resp->lhsp(sp); - SET_RESULT(resp); - return; - } + DfgConcat* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->rhsp(make(vtxp, resp->width() - (m_msb - lowerWidth + 1))); + resp->lhsp(trace(lhsp, m_msb - shiftAmnt, lowerWidth - shiftAmnt)); + SET_RESULT(resp); return; } - // The shift amount is non-negative, so we can conclude zero if all - // bits at and below the MSB we seek are zeroes - if (knownToBeZeroAtAndBelow(lhsp, m_msb)) { - SET_RESULT(make(vtxp, m_msb - m_lsb + 1)); + DfgShiftL* const shiftlp = make(vtxp, m_msb + 1); + shiftlp->rhsp(trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0)); + shiftlp->lhsp(trace(vtxp->lhsp(), m_msb, 0)); + if (m_lsb == 0) { + SET_RESULT(shiftlp); return; } + DfgSel* const selp = make(vtxp, m_msb - m_lsb + 1); + selp->fromp(shiftlp); + selp->lsb(m_lsb); + SET_RESULT(selp); } void visit(DfgCond* vtxp) override { - if (!m_aggressive) return; - if (DfgVertex* const condp = trace(vtxp->condp(), 0, 0)) { - if (DfgVertex* const thenp = trace(vtxp->thenp(), m_msb, m_lsb)) { - if (DfgVertex* const elsep = trace(vtxp->elsep(), m_msb, m_lsb)) { - DfgCond* const resp = make(vtxp, m_msb - m_lsb + 1); - resp->condp(condp); - resp->thenp(thenp); - resp->elsep(elsep); - SET_RESULT(resp); - return; - } - } - } + DfgCond* const resp = make(vtxp, m_msb - m_lsb + 1); + resp->condp(trace(vtxp->condp(), vtxp->condp()->width() - 1, 0)); + resp->thenp(trace(vtxp->thenp(), m_msb, m_lsb)); + resp->elsep(trace(vtxp->elsep(), m_msb, m_lsb)); + SET_RESULT(resp); } #undef SET_RESULT +public: // CONSTRUCTOR - TraceDriver(DfgGraph& dfg, Vtx2Scc& vtx2Scc, uint64_t component, bool aggressive) + TraceDriver(DfgGraph& dfg, Vtx2Scc& vtx2Scc) : m_dfg{dfg} - , m_vtx2Scc{vtx2Scc} - , m_component{component} - , m_aggressive{aggressive} { + , m_vtx2Scc{vtx2Scc} { #ifdef VL_DEBUG if (v3Global.opt.debugCheck()) { m_lineCoverageFile.open( // @@ -721,33 +570,14 @@ class TraceDriver final : public DfgVisitor { #endif } -public: - // Given a Vertex that is part of an SCC denoted by vtx2Scc, - // return a vertex that is equivalent to 'vtxp[lsb +: width]', but is not - // part of the same SCC. Returns nullptr if such a vertex cannot be - // computed. This can add new vertices to the graph. The 'aggressive' flag - // enables creating many intermediate operations. This should only be set - // if it is reasonably certain the tracing will succeed, otherwise we can - // waste a lot of compute. - static DfgVertex* apply(DfgGraph& dfg, Vtx2Scc& vtx2Scc, DfgVertex& vtx, uint32_t lsb, - uint32_t width, bool aggressive) { - TraceDriver traceDriver{dfg, vtx2Scc, vtx2Scc[vtx], aggressive}; - // Find the out-of-component driver of the given vertex - DfgVertex* const resultp = traceDriver.trace(&vtx, lsb + width - 1, lsb); - // Delete unused newly created vertices (these can be created if a - // partial trace succeded, but an eventual one falied). Because new - // vertices should be created depth first, it is enough to do a single - // reverse pass over the collectoin - for (DfgVertex* const newp : vlstd::reverse_view(traceDriver.m_newVtxps)) { - // Keep the actual result! - if (newp == resultp) continue; - // Keep used ones! - if (newp->hasSinks()) continue; - // Delete it - VL_DO_DANGLING(newp->unlinkDelete(dfg), newp); - } - // Return the result - return resultp; + // Given a Vertex that is part of an SCC, return a vertex that is equivalent + // to 'vtxp[lsb +: width]', but is not part of the same SCC. This should only + // be called if the bit range is known to be independent of the SCC, so the + // trace can always succeed. + DfgVertex* apply(DfgVertex& vtx, uint32_t lsb, uint32_t width) { + VL_RESTORER(m_component); + m_component = m_vtx2Scc.at(&vtx); + return trace(&vtx, lsb + width - 1, lsb); } }; @@ -758,7 +588,10 @@ class IndependentBits final : public DfgVisitor { std::unordered_map m_vtxp2Mask; std::deque m_workList; // Work list for the traversal std::unordered_map m_onWorkList; // Marks vertices on the work list + +#ifdef VL_DEBUG std::ofstream m_lineCoverageFile; // Line coverage file, just for testing +#endif // METHODS // Predicate to check if a vertex should be analysed directly @@ -810,12 +643,12 @@ class IndependentBits final : public DfgVisitor { } } - void propagateFromDriver(V3Number& m, DfgVertex* srcp) { + void propagateFromDriver(V3Number& m, const DfgVertex* srcp) { // If there is no driver, we are done if (!srcp) return; // If it is driven by a splice, we need to combine the masks of the drivers - if (DfgSplicePacked* const splicep = srcp->cast()) { - splicep->foreachDriver([&](DfgVertex& src, uint32_t lo) { + if (const DfgSplicePacked* const splicep = srcp->cast()) { + splicep->foreachDriver([&](const DfgVertex& src, uint32_t lo) { m.opSelInto(MASK(&src), lo, src.width()); return false; }); @@ -833,6 +666,8 @@ class IndependentBits final : public DfgVisitor { } // LCOV_EXCL_STOP void visit(DfgVarPacked* vtxp) override { + // We cannot trace through a volatile variable, so pretend all bits are dependent + if (vtxp->isVolatile()) return; V3Number& m = MASK(vtxp); DfgVertex* const srcp = vtxp->srcp(); DfgVertex* const defaultp = vtxp->defaultp(); @@ -849,20 +684,25 @@ class IndependentBits final : public DfgVisitor { // From a variable const DfgVarArray* varp = vtxp->fromp()->cast(); if (!varp) return; + // We cannot trace through a volatile variable, so pretend all bits are dependent + if (varp->isVolatile()) return; // Skip through intermediate variables while (varp->srcp() && varp->srcp()->is()) { varp = varp->srcp()->as(); + if (varp->isVolatile()) return; } // Find driver - if (!varp->srcp()) return; - const DfgSpliceArray* const splicep = varp->srcp()->cast(); - if (!splicep) return; - const DfgVertex* const driverp = splicep->driverAt(idxp->toSizeT()); - if (!driverp) return; - const DfgUnitArray* const uap = driverp->cast(); + const DfgVertex* srcp = varp->srcp(); + if (!srcp) return; + if (const DfgSpliceArray* const splicep = srcp->cast()) { + srcp = splicep->driverAt(idxp->toSizeT()); + if (!srcp) return; + } + const DfgUnitArray* uap = srcp->cast(); if (!uap) return; + srcp = uap->srcp(); // Propagate from driver - propagateFromDriver(MASK(vtxp), uap->srcp()); + propagateFromDriver(MASK(vtxp), srcp); } void visit(DfgConcat* vtxp) override { @@ -995,11 +835,14 @@ class IndependentBits final : public DfgVisitor { return; } - // Otherwise, as the shift amount is non-negative, any bit at or below - // the most significant dependent bit might be dependent - V3Number& m = MASK(vtxp); - m = MASK(lhsp); - floodTowardsLsb(m); + // Otherwise, as the shift amount is non-negative, all independent + // and consecutive top bits in the lhs yield an independent result + // if the shift amount is independent. + if (MASK(rhsp).isEqAllOnes()) { + V3Number& m = MASK(vtxp); + m = MASK(lhsp); + floodTowardsLsb(m); + } } void visit(DfgShiftL* vtxp) override { @@ -1020,11 +863,14 @@ class IndependentBits final : public DfgVisitor { return; } - // Otherwise, as the shift amount is non-negative, any bit at or above - // the least significant dependent bit might be dependent - V3Number& m = MASK(vtxp); - m = MASK(lhsp); - floodTowardsMsb(m); + // Otherwise, as the shift amount is non-negative, all independent + // and consecutive bottom bits in the lhs yield an independent result + // if the shift amount is independent. + if (MASK(rhsp).isEqAllOnes()) { + V3Number& m = MASK(vtxp); + m = MASK(lhsp); + floodTowardsMsb(m); + } } void visit(DfgCond* vtxp) override { @@ -1047,7 +893,10 @@ class IndependentBits final : public DfgVisitor { } // Otherwise just enqueue it bool& onWorkList = m_onWorkList[&sink]; - if (!onWorkList) m_workList.emplace_back(&sink); + if (!onWorkList) { + UINFO(9, "Enqueuing vertex " << &sink << " " << sink.typeName()); + m_workList.emplace_back(&sink); + } onWorkList = true; return false; }); @@ -1079,9 +928,9 @@ class IndependentBits final : public DfgVisitor { if (!handledDirectly(vtx)) return; if (!m_vtx2Scc.at(&vtx)) return; iterate(&vtx); - if (!mask(vtx).isEqZero()) enqueueSinks(vtx); UINFO(9, "Initial independent bits of " << &vtx << " " << vtx.typeName() << " are: " << mask(vtx).displayed(vtx.fileline(), "%b")); + if (!mask(vtx).isEqZero()) enqueueSinks(vtx); }); // Propagate independent bits until no more changes are made @@ -1100,11 +949,17 @@ class IndependentBits final : public DfgVisitor { // Remember current mask so we can check if it changed const V3Number prevMask = currMask; + UINFO(9, "Analyzing independent bits of " << currp << " " << currp->typeName()); + // Dispatch it to update the mask in the visit methods iterate(currp); // If mask changed, enqueue sinks if (!prevMask.isCaseEq(currMask)) { + UINFO(9, "Independent bits of " // + << currp << " " << currp->typeName() << " changed" // + << "\n form: " << prevMask.displayed(currp->fileline(), "%b") + << "\n to: " << currMask.displayed(currp->fileline(), "%b")); enqueueSinks(*currp); // Check the mask only ever expands (no bit goes 1 -> 0) if (VL_UNLIKELY(v3Global.opt.debugCheck())) { @@ -1114,10 +969,6 @@ class IndependentBits final : public DfgVisitor { prevAndNotCurr.opAnd(prevMask, notCurr); UASSERT_OBJ(prevAndNotCurr.isEqZero(), currp, "Mask should only expand"); } - UINFO(9, "Independent bits of " // - << currp << " " << currp->typeName() << " changed" // - << "\n form: " << prevMask.displayed(currp->fileline(), "%b") - << "\n to: " << currMask.displayed(currp->fileline(), "%b")); } } } @@ -1133,86 +984,13 @@ public: } }; -class FixUpSelDrivers final { - DfgGraph& m_dfg; // The graph being processed - Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map - size_t m_nImprovements = 0; // Number of improvements mde - - void fixUpSelSinks(DfgVertex& vtx) { - const uint64_t component = m_vtx2Scc[vtx]; - vtx.foreachSink([&](DfgVertex& sink) { - // Ignore if sink is not part of same cycle - if (m_vtx2Scc[sink] != component) return false; - // Only handle Sel - DfgSel* const selp = sink.cast(); - if (!selp) return false; - // Try to find the driver of the selected bits outside the cycle - DfgVertex* const fixp - = TraceDriver::apply(m_dfg, m_vtx2Scc, vtx, selp->lsb(), selp->width(), false); - if (!fixp) return false; - // Found an out-of-cycle driver. We can replace this sel with that. - selp->replaceWith(fixp); - selp->unlinkDelete(m_dfg); - ++m_nImprovements; - return false; - }); - } - - void fixUpArraySelSinks(DfgVertex& vtx) { - const uint64_t component = m_vtx2Scc[vtx]; - vtx.foreachSink([&](DfgVertex& sink) { - // Ignore if sink is not part of same cycle - if (m_vtx2Scc[sink] != component) return false; - // Only handle ArraySels - DfgArraySel* const asp = sink.cast(); - if (!asp) return false; - // First, try to fix up the whole word - DfgVertex* const fixp - = TraceDriver::apply(m_dfg, m_vtx2Scc, *asp, 0, asp->width(), false); - if (fixp) { - // Found an out-of-cycle driver for the whole ArraySel - asp->replaceWith(fixp); - asp->unlinkDelete(m_dfg); - ++m_nImprovements; - return false; - } - // Attempt to fix up piece-wise at Sels applied to the ArraySel - fixUpSelSinks(*asp); - // Remove if became unused - if (!asp->hasSinks()) asp->unlinkDelete(m_dfg); - return false; - }); - } - - FixUpSelDrivers(DfgGraph& dfg, Vtx2Scc& vtx2Scc, DfgVertexVar& var) - : m_dfg{dfg} - , m_vtx2Scc{vtx2Scc} { - UINFO(9, "FixUpSelDrivers of " << var.nodep()->name()); - if (var.isPacked()) { - // For Packed variables, fix up all Sels applied to it - fixUpSelSinks(var); - } else if (var.isArray()) { - // For Array variables, fix up each ArraySel applied to it - fixUpArraySelSinks(var); - } - UINFO(9, "FixUpSelDrivers made " << m_nImprovements << " improvements"); - } - -public: - // Attempt to push Sel form Var through to the driving - // expression of the selected bits. This can fix things like - // 'a[1:0] = foo', 'a[2] = a[1]', which are somewhat common. - // Returns the number of drivers fixed up. - static size_t apply(DfgGraph& dfg, Vtx2Scc& vtx2Scc, DfgVertexVar& var) { - return FixUpSelDrivers{dfg, vtx2Scc, var}.m_nImprovements; - } -}; - -class FixUpIndependentRanges final { +class FixUp final { DfgGraph& m_dfg; // The graph being processed Vtx2Scc& m_vtx2Scc; // The Vertex to SCC map + TraceDriver m_traceDriver{m_dfg, m_vtx2Scc}; // The independent bits map - const std::unordered_map& m_independentBits; + const std::unordered_map m_independentBits + = IndependentBits::apply(m_dfg, m_vtx2Scc); size_t m_nImprovements = 0; // Number of improvements mde // Returns a bitmask set if that bit of 'vtx' is used (has a sink) @@ -1221,19 +999,17 @@ class FixUpIndependentRanges final { vtx.foreachSink([&result](DfgVertex& sink) { // If used via a Sel, mark the selected bits used if (const DfgSel* const selp = sink.cast()) { - uint32_t lsb = selp->lsb(); - uint32_t msb = lsb + selp->width() - 1; - for (uint32_t i = lsb; i <= msb; ++i) result.setBit(i, '1'); + result.opSetRange(selp->lsb(), selp->width(), '1'); return false; } // Used without a Sel, so all bits are used result.setAllBits1(); - return false; + return true; }); return result; } - static std::string debugStr(const DfgVertex& vtx) { + static std::string debugStr(const DfgVertex& vtx) { // LCOV_EXCL_START if (const DfgArraySel* const aselp = vtx.cast()) { const size_t i = aselp->bitp()->as()->toSizeT(); return debugStr(*aselp->fromp()) + "[" + std::to_string(i) + "]"; @@ -1242,11 +1018,10 @@ class FixUpIndependentRanges final { return varp->nodep()->name(); } vtx.v3fatalSrc("Unhandled node type"); - } + } // LCOV_EXCL_STOP // Trace drivers of independent bits of 'vtxp' in the range '[hi:lo]' - // append replacement terms to 'termps'. Returns number of successful - // replacements. + // append replacement terms to 'termps'. void gatherTerms(std::vector& termps, DfgVertex& vtx, const V3Number& indpBits, const uint32_t hi, const uint32_t lo) { // Iterate through consecutive dependent/non-dependet ranges within [hi:lo] @@ -1256,28 +1031,20 @@ class FixUpIndependentRanges final { const bool endRange = msb == hi || isIndependent != indpBits.bitIs1(msb + 1); if (!endRange) continue; const uint32_t width = msb - lsb + 1; - DfgVertex* termp = nullptr; - // If the range is not self-dependent, attempt to trace its driver + // Compute the term to use for this range if (isIndependent) { - termp = TraceDriver::apply(m_dfg, m_vtx2Scc, vtx, lsb, width, true); - if (termp) { - ++m_nImprovements; - } else { - UINFO(5, "TraceDriver of independent range failed for " - << debugStr(vtx) << "[" << msb << ":" << lsb << "]"); - } - } - // Fall back on using the part of the variable (if dependent, or trace failed) - if (!termp) { + // If the range is independent of the cycles, trace its driver + ++m_nImprovements; + termps.emplace_back(m_traceDriver.apply(vtx, lsb, width)); + } else { + // If dependent, fall back on using the part of the variable DfgSel* const selp = new DfgSel{m_dfg, vtx.fileline(), DfgDataType::packed(width)}; // Same component as 'vtxp', as reads 'vtxp' and will replace 'vtxp' m_vtx2Scc[selp] = m_vtx2Scc.at(vtx); // Do not connect selp->fromp yet, need to do afer replacing 'vtxp' selp->lsb(lsb); - termp = selp; + termps.emplace_back(selp); } - // Record the term - termps.emplace_back(termp); // Next iteration isIndependent = !isIndependent; lsb = msb + 1; @@ -1299,15 +1066,11 @@ class FixUpIndependentRanges final { const V3Number usedBits = computeUsedBits(vtx); UINFO(9, "Used bits of '" << debugStr(vtx) << "' are " << usedBits.displayed(vtx.fileline(), "%b")); - // Nothing to do if no bits are used - if (usedBits.isEqZero()) return; - { - // Nothing to do if no used bits are independen (all used bits are dependent) - V3Number usedAndIndependent{vtx.fileline(), static_cast(vtx.width()), 0}; - usedAndIndependent.opAnd(usedBits, indpBits); - if (usedAndIndependent.isEqZero()) return; - } + // Nothing to do if no used bits are independen (all used bits are dependent) + V3Number usedAndIndependent{vtx.fileline(), static_cast(vtx.width()), 0}; + usedAndIndependent.opAnd(usedBits, indpBits); + if (usedAndIndependent.isEqZero()) return; // We are computing the terms to concatenate and replace 'vtxp' with std::vector termps; @@ -1334,14 +1097,6 @@ class FixUpIndependentRanges final { lsb = msb + 1; } - // If no imporovement was possible, delete the terms we just created - if (!m_nImprovements) { - for (DfgVertex* const tp : termps) VL_DO_DANGLING(tp->unlinkDelete(m_dfg), tp); - termps.clear(); - return; - } - - // We managed to imporove something, apply the replacement // Concatenate all the terms to create the replacement DfgVertex* replacementp = termps.front(); const uint64_t vComp = m_vtx2Scc.at(vtx); @@ -1373,7 +1128,7 @@ class FixUpIndependentRanges final { } void main(DfgVertexVar& var) { - UINFO(9, "FixUpIndependentRanges of " << var.nodep()->name()); + UINFO(9, "FixUp of " << var.nodep()->name()); if (var.is()) { // For Packed variables, fix up as whole @@ -1390,37 +1145,38 @@ class FixUpIndependentRanges final { if (!aselp->bitp()->is()) return false; // Fix up the word fixUpPacked(*aselp); - // Remove if became unused - if (!aselp->hasSinks()) aselp->unlinkDelete(m_dfg); return false; }); } - UINFO(9, "FixUpIndependentRanges made " << m_nImprovements << " improvements"); + UINFO(9, "FixUp made " << m_nImprovements << " improvements"); } - FixUpIndependentRanges(DfgGraph& dfg, Vtx2Scc& vtx2Scc, - const std::unordered_map& independentBits, - DfgVertexVar& var) + FixUp(DfgGraph& dfg, Vtx2Scc& vtx2Scc) : m_dfg{dfg} - , m_vtx2Scc{vtx2Scc} - , m_independentBits{independentBits} { - main(var); - } + , m_vtx2Scc{vtx2Scc} {} public: - // Similar to FixUpSelDrivers, but first comptute which bits of the - // variable are self dependent, and fix up those that are independent - // but used. - static size_t apply(DfgGraph& dfg, Vtx2Scc& vtx2Scc, - const std::unordered_map& independentBits, - DfgVertexVar& var) { - return FixUpIndependentRanges{dfg, vtx2Scc, independentBits, var}.m_nImprovements; + // Compute which bits of vertices are independent of the SCC they reside + // in, and replace rferences to used independent bits with an equivalent + // vertex that is not part of the same SCC. + static size_t apply(DfgGraph& dfg, Vtx2Scc& vtx2Scc) { + FixUp fixUp{dfg, vtx2Scc}; + + // TODO: Compute minimal feedback vertex set and cut only those. + // Sadly that is a computationally hard problem. + + // Fix up cyclic variables + for (DfgVertexVar& vtx : dfg.varVertices()) { + if (vtx2Scc.at(vtx)) fixUp.main(vtx); + } + // Return the number of improvements made + return fixUp.m_nImprovements; } }; std::pair, bool> // -V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgContext& ctx) { +breakCycles(const DfgGraph& dfg, V3DfgContext& ctx) { // Shorthand for dumping graph at given dump level const auto dump = [&](int level, const DfgGraph& dfg, const std::string& name) { if (dumpDfgLevel() >= level) dfg.dumpDotFilePrefixed(ctx.prefix() + "breakCycles-" + name); @@ -1467,41 +1223,14 @@ V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgContext& ctx) { return {std::move(resultp), true}; } - // Attempt new improvements + // Fix up independent ranges in vertices UINFO(9, "New iteration after " << nImprovements << " improvements"); prevNImprovements = nImprovements; - - // Method 1: FixUpSelDrivers - for (DfgVertexVar& vtx : res.varVertices()) { - // If Variable is not part of a cycle, move on - if (!vtx2Scc[vtx]) continue; - - const size_t nFixed = FixUpSelDrivers::apply(res, vtx2Scc, vtx); - if (nFixed) { - nImprovements += nFixed; - ctx.m_breakCyclesContext.m_nImprovements += nFixed; - dump(9, res, "FixUpSelDrivers"); - } - } - - // If we managed to fix something, try again with the earlier methods - if (nImprovements != prevNImprovements) continue; - - // Method 2. FixUpIndependentRanges - const std::unordered_map independentBits - = IndependentBits::apply(res, vtx2Scc); - - for (DfgVertexVar& vtx : res.varVertices()) { - // If Variable is not part of a cycle, move on - if (!vtx2Scc[vtx]) continue; - - const size_t nFixed - = FixUpIndependentRanges::apply(res, vtx2Scc, independentBits, vtx); - if (nFixed) { - nImprovements += nFixed; - ctx.m_breakCyclesContext.m_nImprovements += nFixed; - dump(9, res, "FixUpIndependentRanges"); - } + const size_t nFixed = FixUp::apply(res, vtx2Scc); + if (nFixed) { + nImprovements += nFixed; + ctx.m_breakCyclesContext.m_nImprovements += nFixed; + dump(9, res, "FixUp"); } } while (nImprovements != prevNImprovements); @@ -1527,3 +1256,15 @@ V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgContext& ctx) { ++ctx.m_breakCyclesContext.m_nUnchanged; return {nullptr, false}; } + +} //namespace V3DfgBreakCycles + +std::pair, bool> // +V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgContext& ctx) { + auto pair = V3DfgBreakCycles::breakCycles(dfg, ctx); + if (pair.first) { + if (v3Global.opt.debugCheck()) V3DfgPasses::typeCheck(*pair.first); + V3DfgPasses::removeUnused(*pair.first); + } + return pair; +} diff --git a/test_regress/t/t_dfg_break_cycles.py b/test_regress/t/t_dfg_break_cycles.py index c8ac2f4d2..ab57b5b8e 100755 --- a/test_regress/t/t_dfg_break_cycles.py +++ b/test_regress/t/t_dfg_break_cycles.py @@ -44,7 +44,7 @@ with open(rdFile, 'r', encoding="utf8") as rdFh, \ for line in rdFh: line, _, cmt = line.partition("//") cmt, _, _ = cmt.partition("//") - if "UNOPTFLAT" in cmt: + if "UNOPTFLAT" in cmt and "lint_off" not in cmt: nExpectedCycles += 1 m = re.search(r'`signal\((\w+),', line) if not m: @@ -94,6 +94,9 @@ test.compile(verilator_flags2=[ "../../t/" + test.name + ".cpp" ]) # yapf:disable +# Execute test to check equivalence +test.execute(executable=test.obj_dir + "/obj_opt/Vopt") + # Check all source lines hit coveredLines = set() @@ -119,7 +122,4 @@ if coveredLines != expectedLines: test.file_grep_not(test.obj_dir + "/obj_opt/Vopt__stats.txt", r'DFG.*non-representable.*\s[1-9]\d*$') -# Execute test to check equivalence -test.execute(executable=test.obj_dir + "/obj_opt/Vopt") - test.passes() diff --git a/test_regress/t/t_dfg_break_cycles.v b/test_regress/t/t_dfg_break_cycles.v index 0e7524814..a123ca3b9 100644 --- a/test_regress/t/t_dfg_break_cycles.v +++ b/test_regress/t/t_dfg_break_cycles.v @@ -96,9 +96,21 @@ module t ( rand_a[3:0] // 3:0 }; + `signal(SHIFTR_2_A, 10); // UNOPTFLAT + wire logic [9:0] SHIFTR_2_B = SHIFTR_2_A >> 2; + assign SHIFTR_2_A = {rand_a[1:0], SHIFTR_2_B[9:2]}; + `signal(SHIFTR_VARIABLE, 2); // UNOPTFLAT assign SHIFTR_VARIABLE = rand_a[1:0] ^ ({1'b0, SHIFTR_VARIABLE[1]} >> rand_b[0]); + `signal(SHIFTR_VARIABLE_2, 2); // UNOPTFLAT + assign SHIFTR_VARIABLE_2 = rand_a[1:0] ^ ({1'b1, SHIFTR_VARIABLE_2[1]} >> rand_b[0]); + + `signal(SHIFTR_VARIABLE_3_A, 4); // UNOPTFLAT + `signal(SHIFTR_VARIABLE_3_B, 5); + assign SHIFTR_VARIABLE_3_B = {4'b1111, SHIFTR_VARIABLE_3_A[1]} >> rand_b[0]; + assign SHIFTR_VARIABLE_3_A = rand_a[3:0] ^ SHIFTR_VARIABLE_3_B[3:0]; + `signal(SHIFTL, 14); // UNOPTFLAT assign SHIFTL = { SHIFTL[6:5], // 13:12 @@ -108,16 +120,41 @@ module t ( rand_a[3:0] // 3:0 }; + `signal(SHIFTL_2_A, 10); // UNOPTFLAT + wire logic [9:0] SHIFTL_2_B = SHIFTL_2_A << 2; + assign SHIFTL_2_A = {SHIFTL_2_B[9:2], rand_a[1:0]}; + `signal(SHIFTL_VARIABLE, 2); // UNOPTFLAT assign SHIFTL_VARIABLE = rand_a[1:0] ^ ({SHIFTL_VARIABLE[0], 1'b0} << rand_b[0]); + `signal(SHIFTL_VARIABLE_2, 2); // UNOPTFLAT + assign SHIFTL_VARIABLE_2 = rand_a[1:0] ^ ({SHIFTL_VARIABLE_2[0], 1'b1} << rand_b[0]); + + `signal(SHIFTL_VARIABLE_3_A, 4); // UNOPTFLAT + `signal(SHIFTL_VARIABLE_3_B, 5); + assign SHIFTL_VARIABLE_3_B = {SHIFTL_VARIABLE_3_A[0], 4'b1111} << rand_b[0]; + assign SHIFTL_VARIABLE_3_A = rand_a[3:0] ^ SHIFTL_VARIABLE_3_B[4:1]; + `signal(VAR_A, 2); // UNOPTFLAT wire logic [1:0] VAR_B; assign VAR_A = {rand_a[0], VAR_B[0]}; assign VAR_B = (VAR_A >> 1) ^ 2'(VAR_B[1]); - `signal(REPLICATE, 4); // UNOPTFLAT - assign REPLICATE = rand_a[3:0] ^ ({2{REPLICATE[3:2]}} >> 2); + `signal(REPLICATE_1, 4); // UNOPTFLAT + assign REPLICATE_1 = rand_a[3:0] ^ ({2{REPLICATE_1[3:2]}} >> 2); + + `signal(REPLICATE_2_A, 10); // UNOPTFLAT + wire logic [7:0] REPLICATE_2_B; + assign REPLICATE_2_B = {4{REPLICATE_2_A[1:0]}}; + assign REPLICATE_2_A = {^REPLICATE_2_A[8:0], REPLICATE_2_B, rand_a[0]}; + + `signal(REPLICATE_3_A, 9); // UNOPTFLAT + assign REPLICATE_3_A = {{4{REPLICATE_3_A[1:0]}}, rand_a[0]}; + + `signal(REPLICATE_4_A, 4); // UNOPTFLAT + wire logic [3:0] REPLICATE_4_B; + assign REPLICATE_4_B = {2{REPLICATE_4_A[1:0]}}; + assign REPLICATE_4_A = {REPLICATE_4_B[2:1], rand_a[1:0]}; `signal(PARTIAL, 4); // UNOPTFLAT assign PARTIAL[0] = rand_a[0]; @@ -221,6 +258,17 @@ module t ( `signal(COND_COND, 3); // UNOPTFLAT assign COND_COND = {rand_a[0], (COND_COND >> 2) == 3'b001 ? rand_b[3:2] : rand_b[1:0]}; + // verilator lint_off WIDTHTRUNC + `signal(COND_THEN_2, 3); // UNOPTFLAT + assign COND_THEN_2 = {rand_a[0], rand_a[1:0] ? 2'(COND_THEN_2 << 2) : rand_b[1:0]}; + + `signal(COND_ELSE_2, 3); // UNOPTFLAT + assign COND_ELSE_2 = {rand_a[0], rand_a[1:0] ? rand_b[1:0] : 2'(COND_ELSE_2 << 2)}; + + `signal(COND_COND_2, 3); // UNOPTFLAT + assign COND_COND_2 = {rand_a[0], COND_COND_2 >> 2 ? rand_b[3:2] : rand_b[1:0]}; + // verilator lint_on WIDTHTRUNC + // verilator lint_off ALWCOMBORDER logic [3:0] always_0; always_comb begin @@ -288,6 +336,10 @@ module t ( `signal(PACKED_0_LSB, 1); assign PACKED_0_LSB = packed_0_lsb; + ////////////////////////////////////////////////////////////////////////// + // Cases that can't be fixed up currently + ////////////////////////////////////////////////////////////////////////// + // verilator lint_off UNOPTFLAT logic array_5 [0:6]; // Unconnected d[0:3] @@ -298,4 +350,45 @@ module t ( assign ARRAY_5 = array_5[6]; // verilator lint_on + ////////////////////////////////////////////////////////////////////////// + // Volatile variables + ////////////////////////////////////////////////////////////////////////// + + `signal(VOLATILE_PACKED_OUT_OF_CYCLE, 64); // UNOPTFLAT + wire logic [63:0] volatile_packed_out_of_cycle /* verilator forceable */ = rand_a; + assign VOLATILE_PACKED_OUT_OF_CYCLE = volatile_packed_out_of_cycle ^ 64'(VOLATILE_PACKED_OUT_OF_CYCLE[63:1]); + + // verilator lint_off UNOPTFLAT + `signal(VOLATILE_PACKED_IN_CYCLE, 3); + wire logic [2:0] volatile_packed_in_cycle /* verilator forceable */; + assign volatile_packed_in_cycle = rand_a[2:0] ^ 3'(volatile_packed_in_cycle[2:1]); + assign VOLATILE_PACKED_IN_CYCLE = volatile_packed_in_cycle; + // verilator lint_on + + wire [2:0] volatile_array_out_of_cycle_a [2] /* verilator public_flat_rw */; + assign volatile_array_out_of_cycle_a[0] = rand_a[2:0]; + wire [2:0] volatile_array_out_of_cycle_b [2]; + assign volatile_array_out_of_cycle_b[0] = volatile_array_out_of_cycle_a[0]; + assign volatile_array_out_of_cycle_b[1] = volatile_array_out_of_cycle_b[0]; + `signal(VOLATILE_ARRAY_OUT_OF_CYCLE, 3); // UNOPTFLAT + assign VOLATILE_ARRAY_OUT_OF_CYCLE = volatile_array_out_of_cycle_a[1]; + + // verilator lint_off UNOPTFLAT + wire [2:0] volatile_array_in_cycle_0 [2] /* verilator public_flat_rw */; + assign volatile_array_in_cycle_0[0] = rand_a[2:0]; + assign volatile_array_in_cycle_0[1] = volatile_array_in_cycle_0[0]; + `signal(VOLATILE_ARRAY_IN_CYCLE_0, 3); // UNOPTFLAT + assign VOLATILE_ARRAY_IN_CYCLE_0 = volatile_array_in_cycle_0[1]; + // verilator lint_on + + // verilator lint_off UNOPTFLAT + wire [2:0] volatile_array_in_cycle_1a [2] /* verilator public_flat_rw */; + wire [2:0] volatile_array_in_cycle_1b [2] /* verilator public_flat_rw */; + assign volatile_array_in_cycle_1a[0] = rand_a[2:0]; + assign volatile_array_in_cycle_1a[1] = volatile_array_in_cycle_1b[0]; + assign volatile_array_in_cycle_1b = volatile_array_in_cycle_1a; + `signal(VOLATILE_ARRAY_IN_CYCLE_1, 3); // UNOPTFLAT + assign VOLATILE_ARRAY_IN_CYCLE_1 = volatile_array_in_cycle_1a[1]; + // verilator lint_on + endmodule diff --git a/test_regress/t/t_dfg_true_cycle_bad.out b/test_regress/t/t_dfg_true_cycle_bad.out index 58a941186..9ab0466c3 100644 --- a/test_regress/t/t_dfg_true_cycle_bad.out +++ b/test_regress/t/t_dfg_true_cycle_bad.out @@ -1,11 +1,9 @@ -%Warning-UNOPTFLAT: t/t_dfg_true_cycle_bad.v:10:23: Signal unoptimizable: Circular combinational logic: 't.o' +%Warning-UNOPTFLAT: t/t_dfg_true_cycle_bad.v:10:23: Signal unoptimizable: Circular combinational logic: 'o' 10 | output wire [9:0] o | ^ ... For warning description see https://verilator.org/warn/UNOPTFLAT?v=latest ... Use "/* verilator lint_off UNOPTFLAT */" and lint_on around source to disable this message. - t/t_dfg_true_cycle_bad.v:10:23: Example path: t.o - t/t_dfg_true_cycle_bad.v:10:23: Example path: ASSIGNW t/t_dfg_true_cycle_bad.v:10:23: Example path: o - t/t_dfg_true_cycle_bad.v:10:23: Example path: ASSIGNW - t/t_dfg_true_cycle_bad.v:10:23: Example path: t.o + t/t_dfg_true_cycle_bad.v:12:22: Example path: ASSIGNW + t/t_dfg_true_cycle_bad.v:10:23: Example path: o %Error: Exiting due to