From c7a262b05d05d866255545994ba114544c8b8019 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Thu, 11 Jun 2026 16:00:30 +0100 Subject: [PATCH] Optimize bit select removal earlier in Dfg (#7762) Add a simple Dfg pass that removes redundant bit selects early. This can significantly cut down on downstream work and remove some temporary variables introduced during synthesis. --- src/V3DfgContext.h | 21 +++++++ src/V3DfgOptimizer.cpp | 7 +++ src/V3DfgPasses.cpp | 72 ++++++++++++++++++++++++ src/V3DfgPasses.h | 2 + test_regress/t/t_dfg_break_cycles.py | 13 +++-- test_regress/t/t_dfg_break_cycles.v | 6 +- test_regress/t/t_dfg_break_cycles_off.py | 16 ++++++ 7 files changed, 130 insertions(+), 7 deletions(-) create mode 100755 test_regress/t/t_dfg_break_cycles_off.py diff --git a/src/V3DfgContext.h b/src/V3DfgContext.h index dc1b54666..21e7af31a 100644 --- a/src/V3DfgContext.h +++ b/src/V3DfgContext.h @@ -234,6 +234,26 @@ private: addStat("temporaries introduced", m_temporariesIntroduced); } }; +class V3DfgRemoveSelectsContext final : public V3DfgSubContext { + // Only V3DfgContext can create an instance + friend class V3DfgContext; + +public: + // STATE + VDouble0 m_removedFullWidth; // Number of full width selects removed + VDouble0 m_replacedWithSelFromFull; // Number of selects replaced with sel from full driver + VDouble0 m_replacedWithSelFromPart; // Number of selects replaced with sel from partial driver + VDouble0 m_replacedWithPart; // Number of selects replaced with part of driver +private: + V3DfgRemoveSelectsContext() + : V3DfgSubContext{"RemoveSelects"} {} + ~V3DfgRemoveSelectsContext() { + addStat("full width selects removed", m_removedFullWidth); + addStat("replaced with sel from full driver", m_replacedWithSelFromFull); + addStat("replaced with sel from partial driver", m_replacedWithSelFromPart); + addStat("replaced with partial driver", m_replacedWithPart); + } +}; class V3DfgRemoveUnobservableContext final : public V3DfgSubContext { // Only V3DfgContext can create an instance friend class V3DfgContext; @@ -399,6 +419,7 @@ public: V3DfgPeepholeContext m_peepholeContext; V3DfgPushDownSelsContext m_pushDownSelsContext; V3DfgRegularizeContext m_regularizeContext; + V3DfgRemoveSelectsContext m_removeSelectsContext; V3DfgRemoveUnobservableContext m_removeUnobservableContext; V3DfgSynthesisContext m_synthContext; diff --git a/src/V3DfgOptimizer.cpp b/src/V3DfgOptimizer.cpp index b2fa0f9e8..0ad3b8aa9 100644 --- a/src/V3DfgOptimizer.cpp +++ b/src/V3DfgOptimizer.cpp @@ -139,6 +139,13 @@ class DataflowOptimize final { dfg.mergeGraphs(std::move(madeAcyclicComponents)); endOfStage("breakCycles", dfg, cyclicComps); + // Remove redundant selects + V3DfgPasses::removeSelects(dfg, m_ctx.m_removeSelectsContext); + for (std::unique_ptr& compp : cyclicComps) { + V3DfgPasses::removeSelects(*compp, m_ctx.m_removeSelectsContext); + } + endOfStage("removeSelects", dfg, cyclicComps); + // Split the acyclic DFG into [weakly] connected components std::vector> acyclicComps = dfg.splitIntoComponents("acyclic"); UASSERT(dfg.size() == 0, "DfgGraph should have become empty"); diff --git a/src/V3DfgPasses.cpp b/src/V3DfgPasses.cpp index 3e14397c6..afca00589 100644 --- a/src/V3DfgPasses.cpp +++ b/src/V3DfgPasses.cpp @@ -108,6 +108,78 @@ void V3DfgPasses::removeUnobservable(DfgGraph& dfg, V3DfgContext& dfgCtx) { } } +void V3DfgPasses::removeSelects(DfgGraph& dfg, V3DfgRemoveSelectsContext& ctx) { + + std::vector selps; + for (DfgVertex& vtx : dfg.opVertices()) { + DfgSel* const selp = vtx.cast(); + if (!selp) continue; + selps.push_back(selp); + } + + for (DfgSel* const selp : selps) { + FileLine* const flp = selp->fileline(); + const DfgDataType& dtype = selp->dtype(); + + // Remove full width selects + if (selp->fromp()->dtype() == dtype) { + ++ctx.m_removedFullWidth; + selp->replaceWith(selp->fromp()); + continue; + } + + // Push selects through synthesis temporaries only + DfgVarPacked* const varp = selp->fromp()->cast(); + if (!varp || !varp->tmpForp()) continue; + DfgVertex* const srcp = varp->srcp(); + if (!srcp) continue; + // Don't inline CReset + if (srcp->is()) continue; + + const uint32_t lsb = selp->lsb(); + const uint32_t msb = lsb + selp->width() - 1; + + // If driven whole, select from the driver + if (!srcp->is()) { + ++ctx.m_replacedWithSelFromFull; + DfgSel* const newSelp = new DfgSel{dfg, flp, dtype}; + newSelp->lsb(lsb); + newSelp->fromp(srcp); + selp->replaceWith(newSelp); + continue; + } + + // Otherwise attemt to select from the partial driver + DfgSplicePacked* const splicep = srcp->as(); + DfgVertex* driverp = nullptr; + uint32_t driverLsb = 0; + splicep->foreachDriver([&](DfgVertex& src, const uint32_t dLsb) { + const uint32_t dMsb = dLsb + src.width() - 1; + // If it does not cover the whole searched bit range, move on + if (lsb < dLsb || dMsb < msb) return false; + // Save the driver + driverp = &src; + driverLsb = dLsb; + return true; + }); + if (!driverp) continue; + + // If partial driver is the whole thing we are looking for, just replace with the driver + if (driverp->dtype() == dtype) { + ++ctx.m_replacedWithPart; + selp->replaceWith(driverp); + continue; + } + + // Otherwise create a new select from the partial driver + ++ctx.m_replacedWithSelFromPart; + DfgSel* const newSelp = new DfgSel{dfg, flp, dtype}; + newSelp->lsb(lsb - driverLsb); + newSelp->fromp(driverp); + selp->replaceWith(newSelp); + } +} + void V3DfgPasses::inlineVars(DfgGraph& dfg) { for (DfgVertexVar& vtx : dfg.varVertices()) { // Nothing to inline it into diff --git a/src/V3DfgPasses.h b/src/V3DfgPasses.h index a7d2b04fb..2984c6bd7 100644 --- a/src/V3DfgPasses.h +++ b/src/V3DfgPasses.h @@ -41,6 +41,8 @@ void removeUnobservable(DfgGraph&, V3DfgContext&) VL_MT_DISABLED; // Synthesize DfgLogic vertices into primitive operations. // Removes all DfgLogic (even those that were not synthesized). void synthesize(DfgGraph&, V3DfgContext&) VL_MT_DISABLED; +// Remove redundant selects +void removeSelects(DfgGraph& dfg, V3DfgRemoveSelectsContext& ctx) VL_MT_DISABLED; // Attempt to make the given cyclic graph into an acyclic, or "less cyclic" // equivalent. If the returned pointer is null, then no improvement was // possible on the input graph. Otherwise the returned graph is an improvement diff --git a/test_regress/t/t_dfg_break_cycles.py b/test_regress/t/t_dfg_break_cycles.py index 2c35f08f5..ec6c6b339 100755 --- a/test_regress/t/t_dfg_break_cycles.py +++ b/test_regress/t/t_dfg_break_cycles.py @@ -17,6 +17,8 @@ test.sim_time = 2000000 if not os.path.exists(test.root + "/.git"): test.skip("Not in a git repository") +test.top_filename = "t/t_dfg_break_cycles.v" + # Read expected source lines hit expectedLines = set() @@ -65,7 +67,8 @@ with open(rdFile, 'r', encoding="utf8") as rdFh, \ test.compile(verilator_flags2=[ "--stats", "--build", - "-fno-dfg-break-cycles", + "-fno-dfg" if test.name == "t_dfg_break_cycles" else "-fno-dfg-break-cycles", + "-fno-gate", "+incdir+" + test.obj_dir, "-Mdir", test.obj_dir + "/obj_ref", "--prefix", "Vref", @@ -73,8 +76,9 @@ test.compile(verilator_flags2=[ ]) # yapf:disable # Check we got the expected number of circular logic warnings -test.file_grep(test.obj_dir + "/obj_ref/Vref__stats.txt", - r'Warnings, Suppressed UNOPTFLAT\s+(\d+)', nExpectedCycles) +if test.name == "t_dfg_break_cycles": + test.file_grep(test.obj_dir + "/obj_ref/Vref__stats.txt", + r'Warnings, Suppressed UNOPTFLAT\s+(\d+)', nExpectedCycles) # Compile optimized - also builds executable test.compile(verilator_flags2=[ @@ -82,6 +86,7 @@ test.compile(verilator_flags2=[ "--build", "--exe", "-fno-const-before-dfg", + "-fno-gate", "+incdir+" + test.obj_dir, "-Mdir", test.obj_dir + "/obj_opt", "--prefix", "Vopt", @@ -90,7 +95,7 @@ test.compile(verilator_flags2=[ "--debug", "--debugi", "0", "--dumpi-tree", "0", "-CFLAGS \"-I .. -I ../obj_ref\"", "../obj_ref/Vref__ALL.a", - "../../t/" + test.name + ".cpp" + "../../t/t_dfg_break_cycles.cpp" ]) # yapf:disable # Execute test to check equivalence diff --git a/test_regress/t/t_dfg_break_cycles.v b/test_regress/t/t_dfg_break_cycles.v index 5de4c628b..80c5aad04 100644 --- a/test_regress/t/t_dfg_break_cycles.v +++ b/test_regress/t/t_dfg_break_cycles.v @@ -219,14 +219,14 @@ module t ( `signal(ARRAY_1, 3); // UNOPTFLAT assign ARRAY_1 = array_1[0]; - wire [2:0] array_2a [2]; - wire [2:0] array_2b [2]; + wire [2:0] array_2a [2]; // UNOPTFLAT + wire [2:0] array_2b [2]; // UNOPTFLAT assign array_2a[0][0] = rand_a[0]; assign array_2a[0][1] = array_2b[1][0]; assign array_2a[0][2] = array_2b[1][1]; assign array_2a[1] = array_2a[0]; assign array_2b = array_2a; - `signal(ARRAY_2, 3); // UNOPTFLAT + `signal(ARRAY_2, 3); assign ARRAY_2 = array_2a[0]; wire [2:0] array_3 [2]; diff --git a/test_regress/t/t_dfg_break_cycles_off.py b/test_regress/t/t_dfg_break_cycles_off.py new file mode 100755 index 000000000..bf6af753c --- /dev/null +++ b/test_regress/t/t_dfg_break_cycles_off.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2025 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +import runpy + +test.scenarios('vlt_all') + +runpy.run_path("t/t_dfg_break_cycles.py", globals())