From bb0e1c8c61248cc02d24c80990b19cd12a1697bc Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sat, 7 Feb 2026 18:06:12 +0000 Subject: [PATCH] Optimize temporary insertion for concatenations in Dfg (#7013) Add a new Dfg pass 'pushDownSel'. This will try to move selects through a tree of concatenations in order to eliminate temporary nodes holding intermediate concatenation results. This can get rid of a lot of variables when packed arrays are assigned in parts (e.g. bit-wise). --- docs/guide/exe_verilator.rst | 4 + src/CMakeLists.txt | 1 + src/Makefile_obj.in | 1 + src/V3DfgContext.h | 17 ++ src/V3DfgPasses.cpp | 4 + src/V3DfgPasses.h | 2 + src/V3DfgPushDownSels.cpp | 395 +++++++++++++++++++++++++++ src/V3Options.cpp | 1 + src/V3Options.h | 2 + test_regress/t/t_dfg_push_sel.py | 26 ++ test_regress/t/t_dfg_push_sel.v | 66 +++++ test_regress/t/t_dfg_push_sel_off.py | 29 ++ 12 files changed, 548 insertions(+) create mode 100644 src/V3DfgPushDownSels.cpp create mode 100755 test_regress/t/t_dfg_push_sel.py create mode 100644 test_regress/t/t_dfg_push_sel.v create mode 100755 test_regress/t/t_dfg_push_sel_off.py diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index 7c21ae284..2ad1164d8 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -662,6 +662,10 @@ Summary: Rarely needed. Do not apply the DFG optimizer before inlining. +.. option:: -fno-dfg-push-down-sels + + Rarely needed. Disable DFG select/concatenation optimization. + .. option:: -fno-dfg-scoped Rarely needed. Do not apply the DFG optimizer across module scopes. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index de37df5db..7efc365d6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -250,6 +250,7 @@ set(COMMON_SOURCES V3DfgOptimizer.cpp V3DfgPasses.cpp V3DfgPeephole.cpp + V3DfgPushDownSels.cpp V3DfgRegularize.cpp V3DfgSynthesize.cpp V3DiagSarif.cpp diff --git a/src/Makefile_obj.in b/src/Makefile_obj.in index ad6f2387f..cb80e1dac 100644 --- a/src/Makefile_obj.in +++ b/src/Makefile_obj.in @@ -266,6 +266,7 @@ RAW_OBJS_PCH_ASTNOMT = \ V3DfgOptimizer.o \ V3DfgPasses.o \ V3DfgPeephole.o \ + V3DfgPushDownSels.o \ V3DfgRegularize.o \ V3DfgSynthesize.o \ V3DiagSarif.o \ diff --git a/src/V3DfgContext.h b/src/V3DfgContext.h index 92255189c..16b427521 100644 --- a/src/V3DfgContext.h +++ b/src/V3DfgContext.h @@ -171,6 +171,22 @@ private: V3DfgPeepholeContext(V3DfgContext& ctx, const std::string& label) VL_MT_DISABLED; ~V3DfgPeepholeContext() VL_MT_DISABLED; }; +class V3DfgPushDownSelsContext final : public V3DfgSubContext { + // Only V3DfgContext can create an instance + friend class V3DfgContext; + +public: + // STATE + size_t m_pushedDown = 0; // Number of selects pushed down through concatenations + size_t m_wouldBeCyclic = 0; // Number of selects not pushed due to cycle +private: + V3DfgPushDownSelsContext(V3DfgContext& ctx, const std::string& label) + : V3DfgSubContext{ctx, label, "PushDownSels"} {} + ~V3DfgPushDownSelsContext() { + addStat("sels pushed down", m_pushedDown); + addStat("would be cyclic", m_wouldBeCyclic); + } +}; class V3DfgRegularizeContext final : public V3DfgSubContext { // Only V3DfgContext can create an instance friend class V3DfgContext; @@ -348,6 +364,7 @@ public: V3DfgCseContext m_cseContext1{*this, m_label + " 2nd"}; V3DfgDfgToAstContext m_dfg2AstContext{*this, m_label}; V3DfgPeepholeContext m_peepholeContext{*this, m_label}; + V3DfgPushDownSelsContext m_pushDownSelsContext{*this, m_label}; V3DfgRegularizeContext m_regularizeContext{*this, m_label}; V3DfgSynthesisContext m_synthContext{*this, m_label}; diff --git a/src/V3DfgPasses.cpp b/src/V3DfgPasses.cpp index 5b9475374..3a9613531 100644 --- a/src/V3DfgPasses.cpp +++ b/src/V3DfgPasses.cpp @@ -371,6 +371,10 @@ void V3DfgPasses::optimize(DfgGraph& dfg, V3DfgContext& ctx) { run("cse0 ", dumpLvl >= 4, [&]() { cse(dfg, ctx.m_cseContext0); }); run("binToOneHot ", dumpLvl >= 4, [&]() { binToOneHot(dfg, ctx.m_binToOneHotContext); }); run("peephole ", dumpLvl >= 4, [&]() { peephole(dfg, ctx.m_peepholeContext); }); + // Run only on final scoped DfgGraphs, as otherwise later DfgPeephole wold just undo this work + if (!dfg.modulep()) { + run("pushDownSels", dumpLvl >= 4, [&]() { pushDownSels(dfg, ctx.m_pushDownSelsContext); }); + } run("cse1 ", dumpLvl >= 4, [&]() { cse(dfg, ctx.m_cseContext1); }); run("output ", dumpLvl >= 3, [&]() { /* debug dump only */ }); diff --git a/src/V3DfgPasses.h b/src/V3DfgPasses.h index bf745ea6b..bb79b1d4c 100644 --- a/src/V3DfgPasses.h +++ b/src/V3DfgPasses.h @@ -76,6 +76,8 @@ uint32_t colorStronglyConnectedComponents(const DfgGraph&, DfgUserMap& void cse(DfgGraph&, V3DfgCseContext&) VL_MT_DISABLED; // Inline fully driven variables void inlineVars(DfgGraph&) VL_MT_DISABLED; +// Push down selects through concatenations +void pushDownSels(DfgGraph& dfg, V3DfgPushDownSelsContext& ctx) VL_MT_DISABLED; // Peephole optimizations void peephole(DfgGraph&, V3DfgPeepholeContext&) VL_MT_DISABLED; // Regularize graph. This must be run before converting back to Ast. diff --git a/src/V3DfgPushDownSels.cpp b/src/V3DfgPushDownSels.cpp new file mode 100644 index 000000000..a784ad874 --- /dev/null +++ b/src/V3DfgPushDownSels.cpp @@ -0,0 +1,395 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// DESCRIPTION: Verilator: Push DfgSels through DfgConcat to avoid temporaries +// +// Code available from: https://verilator.org +// +//************************************************************************* +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of either the GNU Lesser General Public License Version 3 +// or the Perl Artistic License Version 2.0. +// SPDX-FileCopyrightText: 2003-2026 Wilson Snyder +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* +// +// If a DfgConcat drives both a DfgSel and a DfgConcat, and would othersiwe +// not need a temporary, then push the DfgSel down to the lower DfgConcat. +// This avoids having to insert a temporary for many intermediate results. +// +// We need to be careful not to create a cycle by pushing down a DfgSel +// that in turn feeds the concat it is being redirected to. To handle this, +// we use the Pierce-Kelly algorithm to check if a cycle would be created by +// adding a new edge. See: "A Dynamic Topological Sort Algorithm for +// Directed Acyclic Graphs", David J. Pearce, Paul H.J. Kelly, 2007 +// +//************************************************************************* + +#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT + +#include "V3Dfg.h" +#include "V3DfgPasses.h" +#include "V3Error.h" + +VL_DEFINE_DEBUG_FUNCTIONS; + +class V3DfgPushDownSels final { + // TYPES + + // Each vertex has an associated State via DfgUserMap + struct State final { + // -- For Pearce-Kelly algorithm only + // Topological ordering index. For all pair of vertices (a, b), + // ord(a) < ord(b) iff there is no path from b to a in the graph. + uint32_t ord = 0; + bool visited = false; // Whether the vertex has been visited during DFS + // -- For the actial optimization only management + bool onWorklist = false; // Whether the vertex is in m_catps + }; + + // STATE + // The graph being processed - must be acyclic (DAG) + DfgGraph& m_dfg; + // Context for pass + V3DfgPushDownSelsContext& m_ctx; + // Map from DfgVertex to State + DfgUserMap m_stateMap = m_dfg.makeUserMap(); + + // STATE - Temporaries for Pearce-Kelly algorithm - as members to avoid reallocations + std::vector m_stack; // DFS stack for various steps + std::vector m_fwdVtxps; // Vertices found during forward DFS + std::vector m_bwdVtxps; // Vertices found during backward DFS - also work buffer + std::vector m_ords; // Ordering numbers reassigned in current ordering update + + // STATE - For vertex movement + std::vector m_catps; // DfgConcat vertices that may be optimizable + + // METHODS - Pearce-Kelly algorithm + void debugCheck() { + if (VL_LIKELY(!v3Global.opt.debugCheck())) return; + m_dfg.forEachVertex([&](const DfgVertex& src) { + const State& srcState = m_stateMap[src]; + UASSERT_OBJ(!srcState.visited, &src, "Visit marker not reset"); + UASSERT_OBJ(srcState.ord > 0, &src, "No ordering assigned"); + src.foreachSink([&](const DfgVertex& dst) { + const State& dstState = m_stateMap[dst]; + UASSERT_OBJ(srcState.ord < dstState.ord, &src, "Invalid ordering"); + return false; + }); + }); + } + + // Find initial topological ordering using reverse post order numbering via DFS + void initializeOrdering() { + // Start from all vertices with no inputs + m_stack.reserve(m_dfg.size()); + for (DfgVertexVar& vtx : m_dfg.varVertices()) { + if (vtx.srcp() || vtx.defaultp()) continue; + m_stack.push_back(&vtx); + } + for (DfgConst& vtx : m_dfg.constVertices()) m_stack.push_back(&vtx); + + // Reverse post order number to assign to next vertex + uint32_t rpoNext = m_dfg.size(); + + // DFS loop + while (!m_stack.empty()) { + DfgVertex& vtx = *m_stack.back(); + State& vtxState = m_stateMap[vtx]; + // If the ordering already assigned, just pop. It was visited + // through another path through a different child. + if (vtxState.ord) { + UASSERT_OBJ(vtxState.visited, &vtx, "Not visited, but ordering assigned"); + m_stack.pop_back(); + continue; + } + // When exiting a vertex, assign the reverse post order number as ordering + if (vtxState.visited) { + vtxState.ord = rpoNext--; + m_stack.pop_back(); + continue; + } + // Entering vertex. Enqueue all unvisited children. + vtxState.visited = true; + vtx.foreachSink([&](DfgVertex& dst) { + const State& dstState = m_stateMap[dst]; + if (dstState.visited) return false; + m_stack.push_back(&dst); + return false; + }); + } + + // Should reach exact zero + UASSERT(!rpoNext, "All vertics should have been visited exactly once"); + + // Reset marks + m_dfg.forEachVertex([&](DfgVertex& vtx) { m_stateMap[vtx].visited = false; }); + + // Make sure it's valid + debugCheck(); + } + + // Attempt to add an edge to the graph. Returns false if this would create + // a cycle, and in that case, no state is modified, so it is safe to then + // not add the actual edge. Otherwise returns true and updates state as + // if the edge was indeed added, so caller must add the actual edge. + bool addEdge(DfgVertex& src, DfgVertex& dst) { + UASSERT_OBJ(&src != &dst, &src, "Should be different"); + State& srcState = m_stateMap[src]; + State& dstState = m_stateMap[dst]; + // If 'dst' is after 'src' in the topological ordering, + // then ok to add edge and no need to update the ordering. + if (dstState.ord > srcState.ord) return true; + // Pearce-Kelly dicovery step + if (pkFwdDfs(src, dst)) return false; + pkBwdDfs(src, dst); + // Pearce-Kelly update step + pkReorder(); + return true; + } + + // Pearce-Kelly forward DFS discovery step. Record visited vertices. + // Returns true if a cycle would be created by adding the edge (src, dst). + bool pkFwdDfs(DfgVertex& src, DfgVertex& dst) { + const uint32_t srcOrd = m_stateMap[src].ord; + // DFS forward from dst + m_stack.push_back(&dst); + while (!m_stack.empty()) { + DfgVertex& vtx = *m_stack.back(); + m_stack.pop_back(); + State& vtxState = m_stateMap[vtx]; + + // Ignore if already visited through another path through different sink + if (vtxState.visited) continue; + + // Save vertex, mark visited + m_fwdVtxps.push_back(&vtx); + vtxState.visited = true; + + // Enqueue unvisited sinks in affeced area + const bool cyclic = vtx.foreachSink([&](DfgVertex& sink) { + State& sinkState = m_stateMap[sink]; + if (sinkState.ord == srcOrd) return true; // Stop completely if cyclic + if (sinkState.visited) return false; // Stop search if already visited + if (sinkState.ord > srcOrd) return false; // Stop search if outside critical area + m_stack.push_back(&sink); + return false; + }); + + // If would be cyclic, reset state and return true + if (cyclic) { + for (DfgVertex* const vtxp : m_fwdVtxps) m_stateMap[vtxp].visited = false; + m_fwdVtxps.clear(); + m_stack.clear(); + return true; + } + } + // Won't be cyclic, return false + return false; + } + + // Pearce-Kelly backward DFS discovery step. Record visited vertices. + void pkBwdDfs(DfgVertex& src, DfgVertex& dst) { + const uint32_t dstOrd = m_stateMap[dst].ord; + // DFS backward from src + m_stack.push_back(&src); + while (!m_stack.empty()) { + DfgVertex& vtx = *m_stack.back(); + m_stack.pop_back(); + State& vtxState = m_stateMap[vtx]; + + // Ignore if already visited through another path through different source + if (vtxState.visited) continue; + + // Save vertex, mark visited + m_bwdVtxps.push_back(&vtx); + vtxState.visited = true; + + // Enqueue unvisited sources in affeced area + vtx.foreachSource([&](DfgVertex& source) { + const State& sourceState = m_stateMap[source]; + if (sourceState.visited) return false; // Stop search if already visited + if (sourceState.ord < dstOrd) + return false; // Stop search if outside critical area + m_stack.push_back(&source); + return false; + }); + } + } + + // Pearce-Kelly reorder step + void pkReorder() { + // Sort vertices found during forward and backward search + const auto cmp = [this](const DfgVertex* const ap, const DfgVertex* const bp) { + return m_stateMap[ap].ord < m_stateMap[bp].ord; + }; + std::sort(m_bwdVtxps.begin(), m_bwdVtxps.end(), cmp); + std::sort(m_fwdVtxps.begin(), m_fwdVtxps.end(), cmp); + // Will use m_bwdVtxps for processing to avoid copying. Save the size. + const size_t bwdSize = m_bwdVtxps.size(); + // Append forward vertices to the backward list for processing + m_bwdVtxps.insert(m_bwdVtxps.end(), m_fwdVtxps.begin(), m_fwdVtxps.end()); + // Save the current ordering numbers, reset visitation marks + for (DfgVertex* const vtxp : m_bwdVtxps) { + State& state = m_stateMap[vtxp]; + state.visited = false; + m_ords.push_back(state.ord); + } + // The current ordering numbers are sorted in the two sub lists, merge them + std::inplace_merge(m_ords.begin(), m_ords.begin() + bwdSize, m_ords.end()); + // Assign new ordering + for (size_t i = 0; i < m_ords.size(); ++i) m_stateMap[m_bwdVtxps[i]].ord = m_ords[i]; + // Reset sate + m_fwdVtxps.clear(); + m_bwdVtxps.clear(); + m_ords.clear(); + // Make sure it's valid + debugCheck(); + } + + // METHODS - Vertex processing + + static bool ignoredSink(const DfgVertex& sink) { + // Ignore non observable variable sinks. These will be eliminated. + if (const DfgVarPacked* const varp = sink.cast()) { + if (!varp->hasSinks() && !varp->isObserved()) return true; + } + return false; + } + + // Find all concatenations that feed another concatenation and may be + // optimizable. These are the ones that feed a DfgSel, and no other + // observable sinks. (If there were other observable sinks, a temporary + // would be required anyway.) + void findCandidatess() { + for (DfgVertex& vtx : m_dfg.opVertices()) { + // Consider only concatenations ... + DfgConcat* const catp = vtx.cast(); + if (!catp) continue; + + // Count the various types of sinks + uint32_t nSels = 0; + uint32_t nCats = 0; + uint32_t nOther = 0; + vtx.foreachSink([&](const DfgVertex& sink) { + if (sink.is()) { + ++nSels; + } else if (sink.is()) { + ++nCats; + } else if (!ignoredSink(sink)) { + ++nOther; + } + return false; + }); + + // Consider if optimizable + if (nSels > 0 && nCats == 1 && nOther == 0) { + m_catps.push_back(catp); + m_stateMap[catp].onWorklist = true; + } + } + } + + void pushDownSels() { + // Selects driven by the current vertex. Outside loop to avoid reallocation. + std::vector selps; + selps.reserve(m_dfg.size()); + // Consider each concatenation + while (!m_catps.empty()) { + DfgConcat* const catp = m_catps.back(); + m_catps.pop_back(); + m_stateMap[catp].onWorklist = false; + + // Iterate sinks, collect selects, check if should be optimized + selps.clear(); + DfgVertex* sinkp = nullptr; // The only non DfgSel sink (ignoring some DfgVars) + const bool multipleNonSelSinks = catp->foreachSink([&](DfgVertex& sink) { + // Collect selects + if (DfgSel* const selp = sink.cast()) { + selps.emplace_back(selp); + return false; + } + // Skip ignored sinks + if (ignoredSink(sink)) return false; + // If already found a non DfgSel sink, return true + if (sinkp) return true; + // Save the non DfgSel sink + sinkp = &sink; + return false; + }); + + // It it has multiple non DfgSel sinks, it will need a temporary, so don't bother + if (multipleNonSelSinks) continue; + // We only add DfgConcats to the work list that drive a select. + UASSERT_OBJ(!selps.empty(), catp, "Should have selects"); + // If no other sink, then nothing to do + if (!sinkp) continue; + // If the only other sink is not a concatenation, then nothing to do + DfgConcat* const sinkCatp = sinkp->cast(); + if (!sinkCatp) continue; + + // Ok, we can try to push the selects down to the sink DfgConcat + const uint32_t offset = sinkCatp->rhsp() == catp ? 0 : sinkCatp->rhsp()->width(); + const uint32_t pushedDownBefore = m_ctx.m_pushedDown; + for (DfgSel* const selp : selps) { + // Don't do it if it would create a cycle + if (!addEdge(*sinkCatp, *selp)) { + ++m_ctx.m_wouldBeCyclic; + continue; + } + // Otherwise redirect the select + ++m_ctx.m_pushedDown; + selp->lsb(selp->lsb() + offset); + selp->fromp(sinkCatp); + } + // If we pushed down any selects, then we need to consider the sink concatenation + // again + State& sinkCatState = m_stateMap[sinkCatp]; + if (pushedDownBefore != m_ctx.m_pushedDown && !sinkCatState.onWorklist) { + m_catps.push_back(sinkCatp); + sinkCatState.onWorklist = true; + } + } + } + + // CONSTRUCTOR + V3DfgPushDownSels(DfgGraph& dfg, V3DfgPushDownSelsContext& ctx) + : m_dfg{dfg} + , m_ctx{ctx} { + + // Find optimization candidates + m_catps.reserve(m_dfg.size()); + findCandidatess(); + // Early exit if nothing to do + if (m_catps.empty()) return; + + // Pre-allocate storage + m_stack.reserve(m_dfg.size()); + m_fwdVtxps.reserve(m_dfg.size()); + m_bwdVtxps.reserve(m_dfg.size()); + m_ords.reserve(m_dfg.size()); + + // Initialize topologicel ordering + initializeOrdering(); + + // Sort candidates in topological order so we process them the least amount + std::sort(m_catps.begin(), m_catps.end(), + [this](const DfgConcat* const ap, const DfgConcat* const bp) { + return m_stateMap[ap].ord < m_stateMap[bp].ord; + }); + + // Push selects down to the lowest concatenation + pushDownSels(); + } + +public: + static void apply(DfgGraph& dfg, V3DfgPushDownSelsContext& ctx) { + V3DfgPushDownSels{dfg, ctx}; + } +}; + +void V3DfgPasses::pushDownSels(DfgGraph& dfg, V3DfgPushDownSelsContext& ctx) { + if (!v3Global.opt.fDfgPushDownSels()) return; + V3DfgPushDownSels::apply(dfg, ctx); +} diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 42108834f..86a703515 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1464,6 +1464,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, }); DECL_OPTION("-fdfg-pre-inline", FOnOff, &m_fDfgPreInline); DECL_OPTION("-fdfg-post-inline", FOnOff, &m_fDfgPostInline); + DECL_OPTION("-fdfg-push-down-sels", FOnOff, &m_fDfgPushDownSels); DECL_OPTION("-fdfg-scoped", FOnOff, &m_fDfgScoped); DECL_OPTION("-fdfg-synthesize-all", FOnOff, &m_fDfgSynthesizeAll); DECL_OPTION("-fexpand", FOnOff, &m_fExpand); diff --git a/src/V3Options.h b/src/V3Options.h index 935cdd2d9..c761455d5 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -397,6 +397,7 @@ private: bool m_fDfgPeephole = true; // main switch: -fno-dfg-peephole bool m_fDfgPreInline; // main switch: -fno-dfg-pre-inline and -fno-dfg bool m_fDfgPostInline; // main switch: -fno-dfg-post-inline and -fno-dfg + bool m_fDfgPushDownSels = true; // main switch: -fno-dfg-push-down-sels bool m_fDfgScoped; // main switch: -fno-dfg-scoped and -fno-dfg bool m_fDfgSynthesizeAll = false; // main switch: -fdfg-synthesize-all bool m_fDeadAssigns; // main switch: -fno-dead-assigns: remove dead assigns @@ -711,6 +712,7 @@ public: bool fDfgPeephole() const { return m_fDfgPeephole; } bool fDfgPreInline() const { return m_fDfgPreInline; } bool fDfgPostInline() const { return m_fDfgPostInline; } + bool fDfgPushDownSels() const { return m_fDfgPushDownSels; } bool fDfgScoped() const { return m_fDfgScoped; } bool fDfgSynthesizeAll() const { return m_fDfgSynthesizeAll; } bool fDfgPeepholeEnabled(const std::string& name) const { diff --git a/test_regress/t/t_dfg_push_sel.py b/test_regress/t/t_dfg_push_sel.py new file mode 100755 index 000000000..281112afa --- /dev/null +++ b/test_regress/t/t_dfg_push_sel.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('simulator_st') + +test.compile(verilator_flags2=[ + "--binary", "--stats", "-fno-dfg-pre-inline", "-fno-dfg-post-inline", "-fno-dfg-peephole" +]) + +test.execute() + +if test.vlt: + test.file_grep(test.stats, r'Optimizations, DFG scoped PushDownSels, sels pushed down\s+(\d+)', + 50) + test.file_grep(test.stats, r'Optimizations, DFG scoped PushDownSels, would be cyclic\s+(\d+)', + 1) + +test.passes() diff --git a/test_regress/t/t_dfg_push_sel.v b/test_regress/t/t_dfg_push_sel.v new file mode 100644 index 000000000..2c0ad784c --- /dev/null +++ b/test_regress/t/t_dfg_push_sel.v @@ -0,0 +1,66 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain. +// SPDX-FileCopyrightText: 2026 Wilson Snyder +// SPDX-License-Identifier: CC0-1.0 + +`define stop $stop +`define checkh(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got=%0x exp=%0x (%s !== %s)\n", `__FILE__,`__LINE__, (gotv), (expv), `"gotv`", `"expv`"); `stop; end while(0); + +module t; + + bit clk = 1'b0; + always #5 clk = ~clk; + + logic [63:0] crc = 64'h5aef0c8d_d70a4497; + + localparam N = 16; + + // Generate variables + for (genvar n = 0; n < N; ++n) begin : vars + logic [n:0] tmp; + logic out; + end + + // Generate logic + for (genvar n = 0; n < N; ++n) begin + if (n == 0) begin + assign vars[n].tmp = ~crc[n]; + assign vars[n].out = vars[n].tmp[n]; + end else begin + assign vars[n].tmp = {~crc[n], vars[n-1].tmp}; + assign vars[n].out = vars[n].tmp[n] ^ vars[n-1].out; + end + end + + // Would create cycle: + wire [3:0] danger_src = {crc[4:3], crc[1:0]}; + wire [1:0] danger_sel = danger_src[2:1]; + wire [5:0] danger_dst = {~danger_sel, danger_src}; + + // Sink has no other sinks + wire [3:0] noother_src = {crc[5:4], crc[2:1]}; + wire [1:0] noother_sel = noother_src[2:1]; + wire [7:0] noother_dst = {crc[9:6], noother_src}; // singal intentianally unused + + int cyc; + always @(posedge clk) begin + cyc <= cyc + 1; + crc <= {crc[62:0], crc[63] ^ crc[2] ^ crc[0]}; + //$display("%16b %16b", ~crc[N-1:0], vars[N-1].tmp); + //$display("%16b %16b", ^(~crc[N-1:0]), vars[N-1].out); + // Check halfway through, this prevents pushing sels past this point + `checkh(vars[N/2].tmp, ~crc[N/2:0]); + `checkh(vars[N/2].out, ^(~crc[N/2:0])); + // Check final value + `checkh(vars[N-1].tmp, ~crc[N-1:0]); + `checkh(vars[N-1].out, ^(~crc[N-1:0])); + if (cyc == 10) begin + // Observe danger_dst so it's not eliminated + $display("%0b", danger_dst); + $write("*-* All Finished *-*\n"); + $finish; + end + end + +endmodule diff --git a/test_regress/t/t_dfg_push_sel_off.py b/test_regress/t/t_dfg_push_sel_off.py new file mode 100755 index 000000000..14b54340f --- /dev/null +++ b/test_regress/t/t_dfg_push_sel_off.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('simulator_st') + +test.top_filename = "t/t_dfg_push_sel.v" + +test.compile(verilator_flags2=[ + "--binary", "--stats", "-fno-dfg-pre-inline", "-fno-dfg-post-inline", "-fno-dfg-peephole", + "-fno-dfg-push-down-sels" +]) + +test.execute() + +if test.vlt: + test.file_grep(test.stats, r'Optimizations, DFG scoped PushDownSels, sels pushed down\s+(\d+)', + 0) + test.file_grep(test.stats, r'Optimizations, DFG scoped PushDownSels, would be cyclic\s+(\d+)', + 0) + +test.passes()