diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index a98de32eb..3ed785b6d 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -606,6 +606,10 @@ Summary: optimizer. Alias for :vlopt:`-fno-dfg-pre-inline`, :vlopt:`-fno-dfg-post-inline` and :vlopt:`-fno-dfg-scoped`. +.. option:: -fno-dfg-break-cycles + + Rarely needed. Disable breaking combinational cycles during DFG. + .. option:: -fno-dfg-peephole Rarely needed. Disable the DFG peephole optimizer. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9e53bb0e9..c224960dd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -227,6 +227,7 @@ set(COMMON_SOURCES V3Descope.cpp V3Dfg.cpp V3DfgAstToDfg.cpp + V3DfgBreakCycles.cpp V3DfgCache.cpp V3DfgDecomposition.cpp V3DfgDfgToAst.cpp diff --git a/src/Makefile_obj.in b/src/Makefile_obj.in index d160a3ab8..ee5cb7617 100644 --- a/src/Makefile_obj.in +++ b/src/Makefile_obj.in @@ -240,6 +240,7 @@ RAW_OBJS_PCH_ASTNOMT = \ V3Descope.o \ V3Dfg.o \ V3DfgAstToDfg.o \ + V3DfgBreakCycles.o \ V3DfgCache.o \ V3DfgDecomposition.o \ V3DfgDfgToAst.o \ diff --git a/src/V3Dfg.cpp b/src/V3Dfg.cpp index 3d037be60..16ccb332a 100644 --- a/src/V3Dfg.cpp +++ b/src/V3Dfg.cpp @@ -34,6 +34,131 @@ DfgGraph::~DfgGraph() { forEachVertex([](DfgVertex& vtxp) { delete &vtxp; }); } +std::unique_ptr DfgGraph::clone() const { + const bool scoped = !modulep(); + + DfgGraph* const clonep = new DfgGraph{modulep(), name()}; + + // Map from original vertex to clone + std::unordered_map vtxp2clonep(size() * 2); + + // Clone constVertices + for (const DfgConst& vtx : m_constVertices) { + DfgConst* const cp = new DfgConst{*clonep, vtx.fileline(), vtx.num()}; + vtxp2clonep.emplace(&vtx, cp); + } + // Clone variable vertices + for (const DfgVertexVar& vtx : m_varVertices) { + const DfgVertexVar* const vp = vtx.as(); + DfgVertexVar* cp = nullptr; + + switch (vtx.type()) { + case VDfgType::atVarArray: { + if (scoped) { + cp = new DfgVarArray{*clonep, vp->varScopep()}; + } else { + cp = new DfgVarArray{*clonep, vp->varp()}; + } + vtxp2clonep.emplace(&vtx, cp); + break; + } + case VDfgType::atVarPacked: { + if (scoped) { + cp = new DfgVarPacked{*clonep, vp->varScopep()}; + } else { + cp = new DfgVarPacked{*clonep, vp->varp()}; + } + vtxp2clonep.emplace(&vtx, cp); + break; + } + default: { + vtx.v3fatalSrc("Unhandled variable vertex type: " + vtx.typeName()); + VL_UNREACHABLE; + break; + } + } + + if (vp->hasDfgRefs()) cp->setHasDfgRefs(); + if (vp->hasModRefs()) cp->setHasModRefs(); + if (vp->hasExtRefs()) cp->setHasExtRefs(); + } + // Clone operation vertices + for (const DfgVertex& vtx : m_opVertices) { + switch (vtx.type()) { +#include "V3Dfg__gen_clone_cases.h" // From ./astgen + case VDfgType::atSel: { + DfgSel* const cp = new DfgSel{*clonep, vtx.fileline(), vtx.dtypep()}; + cp->lsb(vtx.as()->lsb()); + vtxp2clonep.emplace(&vtx, cp); + break; + } + case VDfgType::atMux: { + DfgMux* const cp = new DfgMux{*clonep, vtx.fileline(), vtx.dtypep()}; + vtxp2clonep.emplace(&vtx, cp); + break; + } + default: { + vtx.v3fatalSrc("Unhandled operation vertex type: " + vtx.typeName()); + VL_UNREACHABLE; + break; + } + } + } + UASSERT(size() == clonep->size(), "Size of clone should be the same"); + + // Constants have no inputs + // Hook up inputs of cloned variables + for (const DfgVertexVar& vtx : m_varVertices) { + switch (vtx.type()) { + case VDfgType::atVarArray: { + const DfgVarArray* const vp = vtx.as(); + DfgVarArray* const cp = vtxp2clonep.at(vp)->as(); + vp->forEachSourceEdge([&](const DfgEdge& edge, size_t i) { + if (DfgVertex* const srcp = edge.sourcep()) { + cp->addDriver(vp->driverFileLine(i), // + vp->driverIndex(i), // + vtxp2clonep.at(srcp)); + } + }); + break; + } + case VDfgType::atVarPacked: { + const DfgVarPacked* const vp = vtx.as(); + DfgVarPacked* const cp = vtxp2clonep.at(vp)->as(); + vp->forEachSourceEdge([&](const DfgEdge& edge, size_t i) { + if (DfgVertex* const srcp = edge.sourcep()) { + cp->addDriver(vp->driverFileLine(i), // + vp->driverLsb(i), // + vtxp2clonep.at(srcp)); + } + }); + break; + } + default: { + vtx.v3fatalSrc("Unhandled variable vertex type: " + vtx.typeName()); + VL_UNREACHABLE; + break; + } + } + } + // Hook up inputs of cloned operation vertices + for (const DfgVertex& vtx : m_opVertices) { + DfgVertex* const cp = vtxp2clonep.at(&vtx); + // The code below doesn't work for DfgVertexVariadic, but none of the opVertices are such. + UASSERT_OBJ(!vtx.is(), &vtx, "DfgVertexVariadic not handled"); + const auto oSourceEdges = vtx.sourceEdges(); + auto cSourceEdges = cp->sourceEdges(); + UASSERT_OBJ(oSourceEdges.second == cSourceEdges.second, &vtx, "Mismatched source count"); + for (size_t i = 0; i < oSourceEdges.second; ++i) { + if (DfgVertex* const srcp = oSourceEdges.first[i].sourcep()) { + cSourceEdges.first[i].relinkSource(vtxp2clonep.at(srcp)); + } + } + } + + return std::unique_ptr{clonep}; +} + void DfgGraph::addGraph(DfgGraph& other) { m_size += other.m_size; other.m_size = 0; diff --git a/src/V3Dfg.h b/src/V3Dfg.h index e4df1e06a..4101b3573 100644 --- a/src/V3Dfg.h +++ b/src/V3Dfg.h @@ -706,6 +706,9 @@ public: // 'const' variant of 'forEachVertex'. No mutation allowed. inline void forEachVertex(std::function f) const; + // Return an identical, independent copy of this graph. Vertex and edge order might differ. + std::unique_ptr clone() const VL_MT_DISABLED; + // Add contents of other graph to this graph. Leaves other graph empty. void addGraph(DfgGraph& other) VL_MT_DISABLED; diff --git a/src/V3DfgBreakCycles.cpp b/src/V3DfgBreakCycles.cpp new file mode 100644 index 000000000..774db0f76 --- /dev/null +++ b/src/V3DfgBreakCycles.cpp @@ -0,0 +1,489 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// DESCRIPTION: Verilator: Converting cyclic DFGs into acyclic DFGs +// +// Code available from: https://verilator.org +// +//************************************************************************* +// +// Copyright 2003-2025 by Wilson Snyder. This program is free software; you +// can redistribute it and/or modify it under the terms of either the GNU +// Lesser General Public License Version 3 or the Perl Artistic License +// Version 2.0. +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* + +#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT + +#include "V3Dfg.h" +#include "V3DfgPasses.h" +#include "V3Hash.h" + +#include +#include +#include +#include + +VL_DEFINE_DEBUG_FUNCTIONS; + +// Similar algorithm used in ExtractCyclicComponents. +// This one sets DfgVertex::user(). See the static 'apply' method below. +class ColorStronglyConnectedComponents final { + static constexpr uint32_t UNASSIGNED = std::numeric_limits::max(); + + // TYPES + struct VertexState final { + uint32_t component = UNASSIGNED; // Result component number (0 means not in SCC) + uint32_t index = UNASSIGNED; // Used by Pearce's algorithm for detecting SCCs + VertexState() = default; + VertexState(uint32_t i, uint32_t n) + : component{n} + , index{i} {} + }; + + // STATE + DfgGraph& m_dfg; // The input graph + uint32_t m_nonTrivialSCCs = 0; // Number of non-trivial SCCs in the graph + uint32_t m_index = 0; // Visitation index counter + std::vector m_stack; // The stack used by the algorithm + + // METHODS + void visitColorSCCs(DfgVertex& vtx, VertexState& vtxState) { + UDEBUGONLY(UASSERT_OBJ(vtxState.index == UNASSIGNED, &vtx, "Already visited vertex");); + + // Visiting vertex + const size_t rootIndex = vtxState.index = ++m_index; + + // Visit children + vtx.forEachSink([&](DfgVertex& child) { + VertexState& childSatate = child.user(); + // If the child has not yet been visited, then continue traversal + if (childSatate.index == UNASSIGNED) visitColorSCCs(child, childSatate); + // If the child is not in an SCC + if (childSatate.component == UNASSIGNED) { + if (vtxState.index > childSatate.index) vtxState.index = childSatate.index; + } + }); + + if (vtxState.index == rootIndex) { + // This is the 'root' of an SCC + + // A trivial SCC contains only a single vertex + const bool isTrivial = m_stack.empty() // + || m_stack.back()->getUser().index < rootIndex; + // We also need a separate component for vertices that drive themselves (which can + // happen for input like 'assign a = a'), as we want to extract them (they are cyclic). + const bool drivesSelf = vtx.findSink([&vtx](const DfgVertex& sink) { // + return &vtx == &sink; + }); + + if (!isTrivial || drivesSelf) { + // Allocate new component + ++m_nonTrivialSCCs; + vtxState.component = m_nonTrivialSCCs; + while (!m_stack.empty()) { + VertexState& topState = m_stack.back()->getUser(); + // Only higher nodes belong to the same SCC + if (topState.index < rootIndex) break; + m_stack.pop_back(); + topState.component = m_nonTrivialSCCs; + } + } else { + // Trivial SCC (and does not drive itself), so acyclic. Keep it in original graph. + vtxState.component = 0; + } + } else { + // Not the root of an SCC + m_stack.push_back(&vtx); + } + } + + void colorSCCs() { + // Implements Pearce's algorithm to color the strongly connected components. For reference + // see "An Improved Algorithm for Finding the Strongly Connected Components of a Directed + // Graph", David J.Pearce, 2005. + + // We know constant nodes have no input edges, so they cannot be part + // of a non-trivial SCC. Mark them as such without any real traversals. + for (DfgConst& vtx : m_dfg.constVertices()) vtx.setUser(VertexState{0, 0}); + + // Start traversals through variables + for (DfgVertexVar& vtx : m_dfg.varVertices()) { + VertexState& vtxState = vtx.user(); + // If it has no input or no outputs, it cannot be part of a non-trivial SCC. + if (vtx.arity() == 0 || !vtx.hasSinks()) { + UDEBUGONLY(UASSERT_OBJ(vtxState.index == UNASSIGNED || vtxState.component == 0, + &vtx, "Non circular variable must be in a trivial SCC");); + vtxState.index = 0; + vtxState.component = 0; + continue; + } + // If not yet visited, start a traversal + if (vtxState.index == UNASSIGNED) visitColorSCCs(vtx, vtxState); + } + + // Start traversals through operations + for (DfgVertex& vtx : m_dfg.opVertices()) { + VertexState& vtxState = vtx.user(); + // If not yet visited, start a traversal + if (vtxState.index == UNASSIGNED) visitColorSCCs(vtx, vtxState); + } + } + + ColorStronglyConnectedComponents(DfgGraph& dfg) + : m_dfg{dfg} { + UASSERT(dfg.size() < UNASSIGNED, "Graph too big " << dfg.name()); + // Yet another implementation of Pearce's algorithm. + colorSCCs(); + // Re-assign user values + m_dfg.forEachVertex([](DfgVertex& vtx) { + const size_t component = vtx.getUser().component; + vtx.setUser(component); + }); + } + +public: + // Sets DfgVertex::user() for all vertext to: + // - 0, if the vertex is not part of a non-trivial strongly connected component + // and is not part of a self-loop. That is: the Vertex is not part of any cycle. + // - N, if the vertex is part of a non-trivial strongly conneced component or self-loop N. + // That is: each set of vertices that are reachable from each other will have the same + // non-zero value assigned. + // Returns the number of non-trivial SCCs (distinct cycles) + static uint32_t apply(DfgGraph& dfg) { + return ColorStronglyConnectedComponents{dfg}.m_nonTrivialSCCs; + } +}; + +class TraceDriver final : public DfgVisitor { + // TYPES + + // Structure denoting currently visited vertex with the MSB and LSB we are searching for + struct Visited final { + DfgVertex* m_vtxp; + uint32_t m_lsb; + uint32_t m_msb; + + Visited() = delete; + Visited(DfgVertex* vtxp, uint32_t lsb, uint32_t msb) + : m_vtxp{vtxp} + , m_lsb{lsb} + , m_msb{msb} {} + + struct Hash final { + size_t operator()(const Visited& item) const { + V3Hash hash{reinterpret_cast(item.m_vtxp)}; + hash += item.m_lsb; + hash += item.m_msb; + return hash.value(); + } + }; + + struct Equal final { + bool operator()(const Visited& a, const Visited& b) const { + return a.m_vtxp == b.m_vtxp && a.m_lsb == b.m_lsb && a.m_msb == b.m_lsb; + } + }; + }; + + // STATE + DfgGraph& m_dfg; // The graph being processed + // The strongly connected component we are trying to escape + const uint32_t m_component; + uint32_t m_lsb = 0; // LSB to extract from the currently visited Vertex + uint32_t m_msb = 0; // MSB to extract from the currently visited Vertex + // Result of tracing the currently visited Vertex. Use SET_RESULT below! + DfgVertex* m_resp = nullptr; + std::vector m_newVtxps; // New vertices created during the traversal + std::ofstream m_lineCoverageFile; // Line coverage file, just for testing + + std::vector m_stack; // Stack of currently visited vertices + // Denotes if a 'Visited' entry appear in m_stack + std::unordered_map m_visited; + + // METHODS + + // Create and return a new Vertex and add it to m_newVtxps. You should + // always use this to create new vertices, so unused ones (if a trace + // eventually fails) can be cleaned up at the end. + template + Vertex* make(FileLine* flp, uint32_t width) { + static_assert(std::is_base_of::value // + && !std::is_base_of::value // + && !std::is_same::value, + "Should only make operation vertices"); + AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(width); + Vertex* const vtxp = new Vertex{m_dfg, flp, dtypep}; + m_newVtxps.emplace_back(vtxp); + return vtxp; + } + + // Continue tracing drivers of the given vertex, at the given LSB. Every + // visitor should call this to continue the traversal, then immediately + // return after the call. 'visit' methods should not call 'iterate', call + // this method instead, which checks for cycles. + DfgVertex* trace(DfgVertex* const vtxp, const uint32_t msb, const uint32_t lsb) { + UASSERT_OBJ(!vtxp->is(), vtxp, "Cannot trace array variables"); + UASSERT_OBJ(vtxp->width() > msb, vtxp, "Traced Vertex too narrow"); + + // Push to stack + m_stack.emplace_back(vtxp, msb, lsb); + bool& onStackr = m_visited[m_stack.back()]; + + // Check for true combinational cycles + if (onStackr) { + // Pop from stack + m_stack.pop_back(); + + // Note: could issue a "proper combinational cycle" error here, + // but constructing a legible error message is hard as the Vertex + // Filelines can be very rough after optimizations (could consider + // reporting only the variables involved). Also this pass might + // run mulitple times and report the same error again. There will + // be an UNOPTFLAT issued during scheduling anyway, and the true + // cycle might still settle at run-time. + + // Stop trace + return nullptr; + } + + // Trace the vertex + onStackr = true; + + if (vtxp->user() != m_component) { + // If the currently traced vertex is in a different component, + // then we found what we were looking for. + if (msb != vtxp->width() - 1 || lsb != 0) { + // Apply a Sel to extract the relevant bits if only a part is needed + DfgSel* const selp = make(vtxp->fileline(), msb - lsb + 1); + selp->fromp(vtxp); + selp->lsb(lsb); + m_resp = selp; + } else { + // Otherwise just return the vertex + m_resp = vtxp; + } + } else { + // Otherwise visit the vertex + VL_RESTORER(m_msb); + VL_RESTORER(m_lsb); + m_msb = msb; + m_lsb = lsb; + m_resp = nullptr; + iterate(vtxp); + } + UASSERT_OBJ(!m_resp || m_resp->width() == (msb - lsb + 1), vtxp, "Wrong result width"); + + // Pop from stack + onStackr = false; + m_stack.pop_back(); + + // Done + return m_resp; + } + + // Use this macro to set the result in 'visit' methods. This also emits + // a line to m_lineCoverageFile for testing. + // TODO: Use C++20 std::source_location instead of a macro +#define SET_RESULT(vtxp) \ + do { \ + m_resp = vtxp; \ + if (VL_UNLIKELY(m_lineCoverageFile.is_open())) m_lineCoverageFile << __LINE__ << '\n'; \ + } while (false) + + // VISITORS + void visit(DfgVertex* vtxp) override { + // Base case: cannot continue ... + UINFO(9, "TraceDriver - Unhandled vertex type: " << vtxp->typeName()); + } + + void visit(DfgVarPacked* vtxp) override { + // Proceed with the driver that wholly covers the searched bits + const auto pair = vtxp->sourceEdges(); + for (size_t i = 0; i < pair.second; ++i) { + DfgVertex* const srcp = pair.first[i].sourcep(); + const uint32_t lsb = vtxp->driverLsb(i); + const uint32_t msb = lsb + srcp->width() - 1; + // If it does not cover the searched bit range, move on + if (m_lsb < lsb || msb < m_msb) continue; + // Trace this driver + SET_RESULT(trace(srcp, m_msb - lsb, m_lsb - lsb)); + return; + } + } + + void visit(DfgConcat* vtxp) override { + DfgVertex* const rhsp = vtxp->rhsp(); + DfgVertex* const lhsp = vtxp->lhsp(); + const uint32_t rWidth = rhsp->width(); + // If the traced bits are wholly in the RHS + if (rWidth > m_msb) { + SET_RESULT(trace(rhsp, m_msb, m_lsb)); + return; + } + // If the traced bits are wholly in the LHS + if (m_lsb >= rWidth) { + SET_RESULT(trace(lhsp, m_msb - rWidth, m_lsb - rWidth)); + return; + } + // The traced bit span both sides, attempt to trace both + if (DfgVertex* const rp = trace(rhsp, rWidth - 1, m_lsb)) { + if (DfgVertex* const lp = trace(lhsp, m_msb - rWidth, 0)) { + DfgConcat* const resp = make(vtxp->fileline(), m_msb - m_lsb + 1); + resp->rhsp(rp); + resp->lhsp(lp); + SET_RESULT(resp); + return; + } + } + } + + void visit(DfgExtend* vtxp) override { + DfgVertex* const srcp = vtxp->srcp(); + if (srcp->width() > m_msb) { + SET_RESULT(trace(srcp, m_msb, m_lsb)); + return; + } + } + + void visit(DfgSel* vtxp) override { + const uint32_t lsb = vtxp->lsb(); + SET_RESULT(trace(vtxp->srcp(), m_msb + lsb, m_lsb + lsb)); + return; + } + +#undef SET_RESULT + + // CONSTRUCTOR + TraceDriver(DfgGraph& dfg, uint32_t component) + : m_dfg{dfg} + , m_component{component} { + if (v3Global.opt.debugCheck()) { + m_lineCoverageFile.open( // + v3Global.opt.makeDir() + "/" + v3Global.opt.prefix() + + "__V3DfgBreakCycles-TraceDriver-line-coverage.txt", // + std::ios_base::out | std::ios_base::app); + } + } + +public: + // Given a Vertex that is part of an SCC denoted by vtxp->user(), + // return a vertex that is equivalent to 'vtxp[lsb +: width]', but is not + // part of the same SCC. Returns nullptr if such a vertex cannot be + // computed. This can add new vertices to the graph. + static DfgVertex* apply(DfgGraph& dfg, DfgVertex* vtxp, uint32_t lsb, uint32_t width) { + TraceDriver traceDriver{dfg, vtxp->user()}; + // Find the out-of-component driver of the given vertex + DfgVertex* const resultp = traceDriver.trace(vtxp, lsb + width - 1, lsb); + // Delete unused newly created vertices (these can be created if a + // partial trace succeded, but an eventual one falied). Because new + // vertices should be created depth first, it is enough to do a single + // reverse pass over the collectoin + for (DfgVertex* const vtxp : vlstd::reverse_view(traceDriver.m_newVtxps)) { + // Keep the actual result! + if (vtxp == resultp) continue; + // Keep used ones! + if (vtxp->hasSinks()) continue; + // Delete it + VL_DO_DANGLING(vtxp->unlinkDelete(dfg), vtxp); + } + // Return the result + return resultp; + } +}; + +std::pair, bool> +V3DfgPasses::breakCycles(const DfgGraph& dfg, V3DfgOptimizationContext& ctx) { + // Shorthand for dumping graph at given dump level + const auto dump = [&](int level, const DfgGraph& dfg, const std::string& name) { + if (dumpDfgLevel() >= level) dfg.dumpDotFilePrefixed(ctx.prefix() + "breakCycles-" + name); + }; + + // Can't do much with trivial things ('a = a' or 'a[1] = a[0]'), so bail + if (dfg.size() <= 2) { + UINFO(7, "Graph is trivial"); + dump(9, dfg, "trivial"); + ++ctx.m_breakCyclesContext.m_nTrivial; + return {nullptr, false}; + } + + // Show input for debugging + dump(7, dfg, "input"); + + // We might fail to make any improvements, so first create a clone of the + // graph. This is what we will be working on, and return if successful. + // Do not touch the input graph. + std::unique_ptr resultp = dfg.clone(); + // Just shorthand for code below + DfgGraph& res = *resultp; + dump(9, res, "clone"); + + // How many improvements have we made + size_t nImprovements = 0; + size_t prevNImprovements; + + // Iterate while an improvement can be made and the graph is still cyclic + do { + // Color SCCs (populates DfgVertex::user()) + const auto userDataInUse = res.userDataInUse(); + const uint32_t numNonTrivialSCCs = ColorStronglyConnectedComponents::apply(res); + + // Congrats if it has become acyclic + if (!numNonTrivialSCCs) { + UINFO(7, "Graph became acyclic after " << nImprovements << " improvements"); + dump(7, res, "result-acyclic"); + ++ctx.m_breakCyclesContext.m_nFixed; + return {std::move(resultp), true}; + } + + // Attempt new improvements + UINFO(9, "New iteration after " << nImprovements << " improvements"); + prevNImprovements = nImprovements; + + // Method 1. Attempt to push Sel form Var through to the driving + // expression of the selected bits. This can fix things like + // 'a[1:0] = foo', 'a[2] = a[1]', which are somewhat common. + for (DfgVertexVar& vtx : res.varVertices()) { + // Only handle DfgVarPacked at this point + DfgVarPacked* const varp = vtx.cast(); + if (!varp) continue; + // If Variable is not part of a cycle, move on + const uint32_t component = varp->getUser(); + if (!component) continue; + + UINFO(9, "Attempting to TraceDriver " << varp->nodep()->name()); + + varp->forEachSink([&](DfgVertex& sink) { + // Ignore if sink is not part of cycle + if (sink.getUser() != component) return; + // Only Handle Sels now + DfgSel* const selp = sink.cast(); + if (!selp) return; + // Try to find of the driver of the selected bits outside the cycle + DfgVertex* const fixp = TraceDriver::apply(res, varp, selp->lsb(), selp->width()); + if (!fixp) return; + // Found an out-of-cycle driver. We can replace this sel with that. + selp->replaceWith(fixp); + selp->unlinkDelete(res); + ++nImprovements; + ++ctx.m_breakCyclesContext.m_nImprovements; + dump(9, res, "TraceDriver"); + }); + } + } while (nImprovements != prevNImprovements); + + // If an improvement was made, return the still cyclic improved graph + if (nImprovements) { + UINFO(7, "Graph was improved " << nImprovements << " times"); + dump(7, res, "result-improved"); + ++ctx.m_breakCyclesContext.m_nImproved; + return {std::move(resultp), false}; + } + + // No improvement was made + UINFO(7, "Graph NOT improved"); + dump(7, res, "result-original"); + ++ctx.m_breakCyclesContext.m_nUnchanged; + return {nullptr, false}; +} diff --git a/src/V3DfgOptimizer.cpp b/src/V3DfgOptimizer.cpp index 64c03fb80..571b413ac 100644 --- a/src/V3DfgOptimizer.cpp +++ b/src/V3DfgOptimizer.cpp @@ -240,16 +240,34 @@ static void process(DfgGraph& dfg, V3DfgOptimizationContext& ctx) { // Extract the cyclic sub-graphs. We do this because a lot of the optimizations assume a // DAG, and large, mostly acyclic graphs could not be optimized due to the presence of // small cycles. - const std::vector>& cyclicComponents + std::vector> cyclicComponents = dfg.extractCyclicComponents("cyclic"); // Split the remaining acyclic DFG into [weakly] connected components - const std::vector>& acyclicComponents - = dfg.splitIntoComponents("acyclic"); + std::vector> acyclicComponents = dfg.splitIntoComponents("acyclic"); // Quick sanity check UASSERT_OBJ(dfg.size() == 0, dfg.modulep(), "DfgGraph should have become empty"); + // Attempt to convert cyclic components into acyclic ones + if (v3Global.opt.fDfgBreakCyckes()) { + for (auto it = cyclicComponents.begin(); it != cyclicComponents.end();) { + auto result = V3DfgPasses::breakCycles(**it, ctx); + if (!result.first) { + // No improvement, moving on. + ++it; + } else if (!result.second) { + // Improved, but still cyclic. Replace the original cyclic component. + *it = std::move(result.first); + ++it; + } else { + // Result became acyclic. Move to acyclicComponents, delete original. + acyclicComponents.emplace_back(std::move(result.first)); + it = cyclicComponents.erase(it); + } + } + } + // For each acyclic component for (auto& component : acyclicComponents) { if (dumpDfgLevel() >= 7) component->dumpDotFilePrefixed(ctx.prefix() + "source"); diff --git a/src/V3DfgPasses.cpp b/src/V3DfgPasses.cpp index 63130699e..5d1e86795 100644 --- a/src/V3DfgPasses.cpp +++ b/src/V3DfgPasses.cpp @@ -30,6 +30,16 @@ V3DfgBinToOneHotContext::~V3DfgBinToOneHotContext() { m_decodersCreated); } +V3DfgBreakCyclesContext::~V3DfgBreakCyclesContext() { + V3Stats::addStat("Optimizations, DFG " + m_label + " BreakCycles, made acyclic", m_nFixed); + V3Stats::addStat("Optimizations, DFG " + m_label + " BreakCycles, improved", m_nImproved); + V3Stats::addStat("Optimizations, DFG " + m_label + " BreakCycles, left unchanged", + m_nUnchanged); + V3Stats::addStat("Optimizations, DFG " + m_label + " BreakCycles, trivial", m_nTrivial); + V3Stats::addStat("Optimizations, DFG " + m_label + " BreakCycles, changes applied", + m_nImprovements); +} + V3DfgCseContext::~V3DfgCseContext() { V3Stats::addStat("Optimizations, DFG " + m_label + " CSE, expressions eliminated", m_eliminated); diff --git a/src/V3DfgPasses.h b/src/V3DfgPasses.h index 3893c5d67..99cd404dd 100644 --- a/src/V3DfgPasses.h +++ b/src/V3DfgPasses.h @@ -40,6 +40,20 @@ public: ~V3DfgBinToOneHotContext() VL_MT_DISABLED; }; +class V3DfgBreakCyclesContext final { + const std::string m_label; // Label to apply to stats + +public: + VDouble0 m_nFixed; // Number of graphs that became acyclic + VDouble0 m_nImproved; // Number of graphs that were imporoved but still cyclic + VDouble0 m_nUnchanged; // Number of graphs that were left unchanged + VDouble0 m_nTrivial; // Number of graphs that were not changed + VDouble0 m_nImprovements; // Number of changes made to graphs + explicit V3DfgBreakCyclesContext(const std::string& label) + : m_label{label} {} + ~V3DfgBreakCyclesContext() VL_MT_DISABLED; +}; + class V3DfgCseContext final { const std::string m_label; // Label to apply to stats @@ -93,6 +107,7 @@ public: VDouble0 m_resultEquations; // Number of result combinational equations V3DfgBinToOneHotContext m_binToOneHotContext{m_label}; + V3DfgBreakCyclesContext m_breakCyclesContext{m_label}; V3DfgCseContext m_cseContext0{m_label + " 1st"}; V3DfgCseContext m_cseContext1{m_label + " 2nd"}; V3DfgPeepholeContext m_peepholeContext{m_label}; @@ -120,6 +135,16 @@ DfgGraph* astToDfg(AstModule&, V3DfgOptimizationContext&) VL_MT_DISABLED; // Same as above, but for the entire netlist, after V3Scope DfgGraph* astToDfg(AstNetlist&, V3DfgOptimizationContext&) VL_MT_DISABLED; +// Attempt to make the given cyclic graph into an acyclic, or "less cyclic" +// equivalent. If the returned pointer is null, then no improvement was +// possible on the input graph. Otherwise the returned graph is an improvement +// on the input graph, with at least some cycles eliminated. The returned +// graph is always independent of the original. If an imporoved graph is +// returned, then the returned 'bool' flag indicated if the returned graph is +// acyclic (flag 'true'), or still cyclic (flag 'false'). +std::pair, bool> breakCycles(const DfgGraph&, + V3DfgOptimizationContext&) VL_MT_DISABLED; + // Optimize the given DfgGraph void optimize(DfgGraph&, V3DfgOptimizationContext&) VL_MT_DISABLED; diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 4c99cdbba..c4643c115 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1336,6 +1336,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, m_fDfgPostInline = flag; m_fDfgScoped = flag; }); + DECL_OPTION("-fdfg-break-cycles", FOnOff, &m_fDfgBreakCycles); DECL_OPTION("-fdfg-peephole", FOnOff, &m_fDfgPeephole); DECL_OPTION("-fdfg-peephole-", CbPartialMatch, [this](const char* optp) { // m_fDfgPeepholeDisabled.erase(optp); diff --git a/src/V3Options.h b/src/V3Options.h index 2d062fcb9..e77cc1cad 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -420,6 +420,7 @@ private: bool m_fConstBitOpTree; // main switch: -fno-const-bit-op-tree constant bit op tree bool m_fConstEager = true; // main switch: -fno-const-eagerly run V3Const during passes bool m_fDedupe; // main switch: -fno-dedupe: logic deduplication + bool m_fDfgBreakCycles = true; // main switch: -fno-dfg-break-cycles bool m_fDfgPeephole = true; // main switch: -fno-dfg-peephole bool m_fDfgPreInline; // main switch: -fno-dfg-pre-inline and -fno-dfg bool m_fDfgPostInline; // main switch: -fno-dfg-post-inline and -fno-dfg @@ -735,6 +736,7 @@ public: bool fConstBitOpTree() const { return m_fConstBitOpTree; } bool fConstEager() const { return m_fConstEager; } bool fDedupe() const { return m_fDedupe; } + bool fDfgBreakCyckes() const { return m_fDfgBreakCycles; } bool fDfgPeephole() const { return m_fDfgPeephole; } bool fDfgPreInline() const { return m_fDfgPreInline; } bool fDfgPostInline() const { return m_fDfgPostInline; } diff --git a/src/astgen b/src/astgen index 41f311f2d..03493426a 100755 --- a/src/astgen +++ b/src/astgen @@ -1219,6 +1219,29 @@ def write_dfg_auto_classes(filename): fh.write("\n") +def write_dfg_clone_cases(filename): + with open_file(filename) as fh: + + def emitBlock(pattern, **fmt): + fh.write(textwrap.dedent(pattern).format(**fmt)) + + for node in DfgVertexList: + # Only generate code for automatically derived leaf nodes + if (node.file is not None) or not node.isLeaf: + continue + + emitBlock('''\ + case VDfgType::at{t}: {{ + Dfg{t}* const cp = new Dfg{t}{{*clonep, vtx.fileline(), vtx.dtypep()}}; + vtxp2clonep.emplace(&vtx, cp); + break; + }} + ''', + t=node.name, + s=node.superClass.name) + fh.write("\n") + + def write_dfg_ast_to_dfg(filename): with open_file(filename) as fh: for node in DfgVertexList: @@ -1408,6 +1431,7 @@ if Args.classes: write_type_tests("Dfg", DfgVertexList) write_dfg_macros("V3Dfg__gen_macros.h") write_dfg_auto_classes("V3Dfg__gen_auto_classes.h") + write_dfg_clone_cases("V3Dfg__gen_clone_cases.h") write_dfg_ast_to_dfg("V3Dfg__gen_ast_to_dfg.h") write_dfg_dfg_to_ast("V3Dfg__gen_dfg_to_ast.h") diff --git a/test_regress/t/t_dfg_break_cycles.cpp b/test_regress/t/t_dfg_break_cycles.cpp new file mode 100644 index 000000000..0664397e1 --- /dev/null +++ b/test_regress/t/t_dfg_break_cycles.cpp @@ -0,0 +1,60 @@ +// +// DESCRIPTION: Verilator: DFG optimizer equivalence testing +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2022 by Geza Lore. +// SPDX-License-Identifier: CC0-1.0 +// + +#include +#include + +#include +#include +#include + +void rngUpdate(uint64_t& x) { + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; +} + +int main(int, char**) { + // Create contexts + VerilatedContext ctx; + + // Create models + Vref ref{&ctx}; + Vopt opt{&ctx}; + + uint64_t rand_a = 0x5aef0c8dd70a4497; + uint64_t rand_b = 0xf0c0a8dd75ae4497; + uint64_t srand_a = 0x00fa8dcc7ae4957; + uint64_t srand_b = 0x0fa8dc7ae3c9574; + + for (size_t n = 0; n < 200000; ++n) { + // Update rngs + rngUpdate(rand_a); + rngUpdate(rand_b); + rngUpdate(srand_a); + rngUpdate(srand_b); + + // Assign inputs + ref.rand_a = opt.rand_a = rand_a; + ref.rand_b = opt.rand_b = rand_b; + ref.srand_a = opt.srand_a = srand_a; + ref.srand_b = opt.srand_b = srand_b; + + // Evaluate both models + ref.eval(); + opt.eval(); + + // Check equivalence +#include "checks.h" + + // increment time + ctx.timeInc(1); + } + + std::cout << "*-* All Finished *-*\n"; +} diff --git a/test_regress/t/t_dfg_break_cycles.py b/test_regress/t/t_dfg_break_cycles.py new file mode 100755 index 000000000..79ff66fae --- /dev/null +++ b/test_regress/t/t_dfg_break_cycles.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2025 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt_all') +test.sim_time = 2000000 + +root = ".." + +if not os.path.exists(root + "/.git"): + test.skip("Not in a git repository") + +# Read expected source lines hit +expectedLines = set() + +with open(root + "/src/V3DfgBreakCycles.cpp", 'r', encoding="utf8") as fd: + for lineno, line in enumerate(fd, 1): + line = line.split("//")[0] + if re.match(r'^[^#]*SET_RESULT', line): + expectedLines.add(lineno) + +if not expectedLines: + test.error("Failed to read expected source line numbers") + +# Generate the equivalence checks and declaration boilerplate +rdFile = test.top_filename +plistFile = test.obj_dir + "/portlist.vh" +pdeclFile = test.obj_dir + "/portdecl.vh" +checkFile = test.obj_dir + "/checks.h" +nExpectedCycles = 0 +with open(rdFile, 'r', encoding="utf8") as rdFh, \ + open(plistFile, 'w', encoding="utf8") as plistFh, \ + open(pdeclFile, 'w', encoding="utf8") as pdeclFh, \ + open(checkFile, 'w', encoding="utf8") as checkFh: + for line in rdFh: + line = line.split("//")[0] + m = re.search(r'`signal\((\w+),', line) + if not m: + continue + nExpectedCycles += 1 + sig = m.group(1) + plistFh.write(sig + ",\n") + pdeclFh.write("output " + sig + ";\n") + checkFh.write("if (ref." + sig + " != opt." + sig + ") {\n") + checkFh.write(" std::cout << \"Mismatched " + sig + "\" << std::endl;\n") + checkFh.write(" std::cout << \"Ref: 0x\" << std::hex << (ref." + sig + + " + 0) << std::endl;\n") + checkFh.write(" std::cout << \"Opt: 0x\" << std::hex << (opt." + sig + + " + 0) << std::endl;\n") + checkFh.write(" std::exit(1);\n") + checkFh.write("}\n") + +# Compile un-optimized +test.compile(verilator_flags2=[ + "--stats", + "--build", + "-fno-dfg-break-cycles", + "+incdir+" + test.obj_dir, + "-Mdir", test.obj_dir + "/obj_ref", + "--prefix", "Vref", + "-Wno-UNOPTFLAT" +]) # yapf:disable + +# Check we got the expected number of circular logic warnings +test.file_grep(test.obj_dir + "/obj_ref/Vref__stats.txt", + r'Warnings, Suppressed UNOPTFLAT\s+(\d+)', nExpectedCycles) + +# Compile optimized - also builds executable +test.compile(verilator_flags2=[ + "--stats", + "--build", + "--exe", + "+incdir+" + test.obj_dir, + "-Mdir", test.obj_dir + "/obj_opt", + "--prefix", "Vopt", + "-Werror-UNOPTFLAT", + "--dumpi-V3DfgBreakCycles", "9", # To fill code coverage + "-CFLAGS \"-I .. -I ../obj_ref\"", + "../obj_ref/Vref__ALL.a", + "../../t/" + test.name + ".cpp" +]) # yapf:disable + +# Check all source lines hit +coveredLines = set() +with open(test.obj_dir + "/obj_opt/Vopt__V3DfgBreakCycles-TraceDriver-line-coverage.txt", + 'r', + encoding="utf8") as fd: + for line in fd: + coveredLines.add(int(line.strip())) + +if coveredLines != expectedLines: + for n in sorted(expectedLines - coveredLines): + test.error_keep_going(f"V3DfgBreakCycles.cpp line {n} not covered") + for n in sorted(coveredLines - expectedLines): + test.error_keep_going(f"V3DfgBreakCycles.cpp line {n} covered but not expected") + +# Execute test to check equivalence +test.execute(executable=test.obj_dir + "/obj_opt/Vopt") + +test.passes() diff --git a/test_regress/t/t_dfg_break_cycles.v b/test_regress/t/t_dfg_break_cycles.v new file mode 100644 index 000000000..e083cf7f8 --- /dev/null +++ b/test_regress/t/t_dfg_break_cycles.v @@ -0,0 +1,46 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2025 by Geza Lore. +// SPDX-License-Identifier: CC0-1.0 + +`define signal(name, width) wire [width-1:0] name; + +module t ( +`include "portlist.vh" // Boilerplate generated by t_dfg_break_cycles.py + rand_a, rand_b, srand_a, srand_b + ); + +`include "portdecl.vh" // Boilerplate generated by t_dfg_break_cycles.py + + input rand_a; + input rand_b; + input srand_a; + input srand_b; + wire logic [63:0] rand_a; + wire logic [63:0] rand_b; + wire logic signed [63:0] srand_a; + wire logic signed [63:0] srand_b; + + `signal(CONCAT_RHS, 2); + assign CONCAT_RHS[0] = rand_a[0]; + assign CONCAT_RHS[1] = CONCAT_RHS[0]; + + `signal(CONCAT_LHS, 2); + assign CONCAT_LHS[0] = CONCAT_LHS[1]; + assign CONCAT_LHS[1] = rand_a[1]; + + `signal(CONCAT_MID, 3); + assign CONCAT_MID[0] = |CONCAT_MID[2:1]; + assign CONCAT_MID[2:1] = {rand_a[2], ~rand_a[2]}; + + `signal(SEL, 3); + assign SEL[0] = rand_a[4]; + assign SEL[1] = SEL[0]; + assign SEL[2] = SEL[1]; + + `signal(EXTEND_SRC, 5); + assign EXTEND_SRC[0] = rand_a[3]; + assign EXTEND_SRC[3:1] = 3'(EXTEND_SRC[0]); + assign EXTEND_SRC[4] = EXTEND_SRC[1]; +endmodule diff --git a/test_regress/t/t_dfg_true_cycle_bad.out b/test_regress/t/t_dfg_true_cycle_bad.out new file mode 100644 index 000000000..9ab0466c3 --- /dev/null +++ b/test_regress/t/t_dfg_true_cycle_bad.out @@ -0,0 +1,9 @@ +%Warning-UNOPTFLAT: t/t_dfg_true_cycle_bad.v:10:23: Signal unoptimizable: Circular combinational logic: 'o' + 10 | output wire [9:0] o + | ^ + ... For warning description see https://verilator.org/warn/UNOPTFLAT?v=latest + ... Use "/* verilator lint_off UNOPTFLAT */" and lint_on around source to disable this message. + t/t_dfg_true_cycle_bad.v:10:23: Example path: o + t/t_dfg_true_cycle_bad.v:12:22: Example path: ASSIGNW + t/t_dfg_true_cycle_bad.v:10:23: Example path: o +%Error: Exiting due to diff --git a/test_regress/t/t_dfg_true_cycle_bad.py b/test_regress/t/t_dfg_true_cycle_bad.py new file mode 100755 index 000000000..1bf1426f9 --- /dev/null +++ b/test_regress/t/t_dfg_true_cycle_bad.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2025 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt') + +test.lint(fails=True, expect_filename=test.golden_filename) + +test.passes() diff --git a/test_regress/t/t_dfg_true_cycle_bad.v b/test_regress/t/t_dfg_true_cycle_bad.v new file mode 100644 index 000000000..b9f7e8b56 --- /dev/null +++ b/test_regress/t/t_dfg_true_cycle_bad.v @@ -0,0 +1,16 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2025 by Geza Lore. +// SPDX-License-Identifier: CC0-1.0 + +`default_nettype none + +module t( + output wire [9:0] o +); + assign o[1:0] = o[9:8]; + assign o[3:2] = {o[0], o[1]}; + assign o[7:4] = 4'(o[3:2]); + assign o[9:8] = o[5:4]; +endmodule diff --git a/test_regress/t/t_unoptflat_simple_2_bad.py b/test_regress/t/t_unoptflat_simple_2_bad.py index 5c6e4b955..a87556b80 100755 --- a/test_regress/t/t_unoptflat_simple_2_bad.py +++ b/test_regress/t/t_unoptflat_simple_2_bad.py @@ -14,7 +14,7 @@ test.top_filename = "t/t_unoptflat_simple_2.v" # Compile only test.compile(verilator_flags3=[], - verilator_flags2=["--report-unoptflat"], + verilator_flags2=["--report-unoptflat", "-fno-dfg-break-cycles"], fails=True, expect_filename=test.golden_filename)