Optimize bit select removal earlier in Dfg (#7762)
Add a simple Dfg pass that removes redundant bit selects early. This can significantly cut down on downstream work and remove some temporary variables introduced during synthesis.
This commit is contained in:
parent
4c92c035e7
commit
c7a262b05d
|
|
@ -234,6 +234,26 @@ private:
|
|||
addStat("temporaries introduced", m_temporariesIntroduced);
|
||||
}
|
||||
};
|
||||
class V3DfgRemoveSelectsContext final : public V3DfgSubContext {
|
||||
// Only V3DfgContext can create an instance
|
||||
friend class V3DfgContext;
|
||||
|
||||
public:
|
||||
// STATE
|
||||
VDouble0 m_removedFullWidth; // Number of full width selects removed
|
||||
VDouble0 m_replacedWithSelFromFull; // Number of selects replaced with sel from full driver
|
||||
VDouble0 m_replacedWithSelFromPart; // Number of selects replaced with sel from partial driver
|
||||
VDouble0 m_replacedWithPart; // Number of selects replaced with part of driver
|
||||
private:
|
||||
V3DfgRemoveSelectsContext()
|
||||
: V3DfgSubContext{"RemoveSelects"} {}
|
||||
~V3DfgRemoveSelectsContext() {
|
||||
addStat("full width selects removed", m_removedFullWidth);
|
||||
addStat("replaced with sel from full driver", m_replacedWithSelFromFull);
|
||||
addStat("replaced with sel from partial driver", m_replacedWithSelFromPart);
|
||||
addStat("replaced with partial driver", m_replacedWithPart);
|
||||
}
|
||||
};
|
||||
class V3DfgRemoveUnobservableContext final : public V3DfgSubContext {
|
||||
// Only V3DfgContext can create an instance
|
||||
friend class V3DfgContext;
|
||||
|
|
@ -399,6 +419,7 @@ public:
|
|||
V3DfgPeepholeContext m_peepholeContext;
|
||||
V3DfgPushDownSelsContext m_pushDownSelsContext;
|
||||
V3DfgRegularizeContext m_regularizeContext;
|
||||
V3DfgRemoveSelectsContext m_removeSelectsContext;
|
||||
V3DfgRemoveUnobservableContext m_removeUnobservableContext;
|
||||
V3DfgSynthesisContext m_synthContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -139,6 +139,13 @@ class DataflowOptimize final {
|
|||
dfg.mergeGraphs(std::move(madeAcyclicComponents));
|
||||
endOfStage("breakCycles", dfg, cyclicComps);
|
||||
|
||||
// Remove redundant selects
|
||||
V3DfgPasses::removeSelects(dfg, m_ctx.m_removeSelectsContext);
|
||||
for (std::unique_ptr<DfgGraph>& compp : cyclicComps) {
|
||||
V3DfgPasses::removeSelects(*compp, m_ctx.m_removeSelectsContext);
|
||||
}
|
||||
endOfStage("removeSelects", dfg, cyclicComps);
|
||||
|
||||
// Split the acyclic DFG into [weakly] connected components
|
||||
std::vector<std::unique_ptr<DfgGraph>> acyclicComps = dfg.splitIntoComponents("acyclic");
|
||||
UASSERT(dfg.size() == 0, "DfgGraph should have become empty");
|
||||
|
|
|
|||
|
|
@ -108,6 +108,78 @@ void V3DfgPasses::removeUnobservable(DfgGraph& dfg, V3DfgContext& dfgCtx) {
|
|||
}
|
||||
}
|
||||
|
||||
void V3DfgPasses::removeSelects(DfgGraph& dfg, V3DfgRemoveSelectsContext& ctx) {
|
||||
|
||||
std::vector<DfgSel*> selps;
|
||||
for (DfgVertex& vtx : dfg.opVertices()) {
|
||||
DfgSel* const selp = vtx.cast<DfgSel>();
|
||||
if (!selp) continue;
|
||||
selps.push_back(selp);
|
||||
}
|
||||
|
||||
for (DfgSel* const selp : selps) {
|
||||
FileLine* const flp = selp->fileline();
|
||||
const DfgDataType& dtype = selp->dtype();
|
||||
|
||||
// Remove full width selects
|
||||
if (selp->fromp()->dtype() == dtype) {
|
||||
++ctx.m_removedFullWidth;
|
||||
selp->replaceWith(selp->fromp());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Push selects through synthesis temporaries only
|
||||
DfgVarPacked* const varp = selp->fromp()->cast<DfgVarPacked>();
|
||||
if (!varp || !varp->tmpForp()) continue;
|
||||
DfgVertex* const srcp = varp->srcp();
|
||||
if (!srcp) continue;
|
||||
// Don't inline CReset
|
||||
if (srcp->is<DfgCReset>()) continue;
|
||||
|
||||
const uint32_t lsb = selp->lsb();
|
||||
const uint32_t msb = lsb + selp->width() - 1;
|
||||
|
||||
// If driven whole, select from the driver
|
||||
if (!srcp->is<DfgSplicePacked>()) {
|
||||
++ctx.m_replacedWithSelFromFull;
|
||||
DfgSel* const newSelp = new DfgSel{dfg, flp, dtype};
|
||||
newSelp->lsb(lsb);
|
||||
newSelp->fromp(srcp);
|
||||
selp->replaceWith(newSelp);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise attemt to select from the partial driver
|
||||
DfgSplicePacked* const splicep = srcp->as<DfgSplicePacked>();
|
||||
DfgVertex* driverp = nullptr;
|
||||
uint32_t driverLsb = 0;
|
||||
splicep->foreachDriver([&](DfgVertex& src, const uint32_t dLsb) {
|
||||
const uint32_t dMsb = dLsb + src.width() - 1;
|
||||
// If it does not cover the whole searched bit range, move on
|
||||
if (lsb < dLsb || dMsb < msb) return false;
|
||||
// Save the driver
|
||||
driverp = &src;
|
||||
driverLsb = dLsb;
|
||||
return true;
|
||||
});
|
||||
if (!driverp) continue;
|
||||
|
||||
// If partial driver is the whole thing we are looking for, just replace with the driver
|
||||
if (driverp->dtype() == dtype) {
|
||||
++ctx.m_replacedWithPart;
|
||||
selp->replaceWith(driverp);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise create a new select from the partial driver
|
||||
++ctx.m_replacedWithSelFromPart;
|
||||
DfgSel* const newSelp = new DfgSel{dfg, flp, dtype};
|
||||
newSelp->lsb(lsb - driverLsb);
|
||||
newSelp->fromp(driverp);
|
||||
selp->replaceWith(newSelp);
|
||||
}
|
||||
}
|
||||
|
||||
void V3DfgPasses::inlineVars(DfgGraph& dfg) {
|
||||
for (DfgVertexVar& vtx : dfg.varVertices()) {
|
||||
// Nothing to inline it into
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ void removeUnobservable(DfgGraph&, V3DfgContext&) VL_MT_DISABLED;
|
|||
// Synthesize DfgLogic vertices into primitive operations.
|
||||
// Removes all DfgLogic (even those that were not synthesized).
|
||||
void synthesize(DfgGraph&, V3DfgContext&) VL_MT_DISABLED;
|
||||
// Remove redundant selects
|
||||
void removeSelects(DfgGraph& dfg, V3DfgRemoveSelectsContext& ctx) VL_MT_DISABLED;
|
||||
// Attempt to make the given cyclic graph into an acyclic, or "less cyclic"
|
||||
// equivalent. If the returned pointer is null, then no improvement was
|
||||
// possible on the input graph. Otherwise the returned graph is an improvement
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ test.sim_time = 2000000
|
|||
if not os.path.exists(test.root + "/.git"):
|
||||
test.skip("Not in a git repository")
|
||||
|
||||
test.top_filename = "t/t_dfg_break_cycles.v"
|
||||
|
||||
# Read expected source lines hit
|
||||
expectedLines = set()
|
||||
|
||||
|
|
@ -65,7 +67,8 @@ with open(rdFile, 'r', encoding="utf8") as rdFh, \
|
|||
test.compile(verilator_flags2=[
|
||||
"--stats",
|
||||
"--build",
|
||||
"-fno-dfg-break-cycles",
|
||||
"-fno-dfg" if test.name == "t_dfg_break_cycles" else "-fno-dfg-break-cycles",
|
||||
"-fno-gate",
|
||||
"+incdir+" + test.obj_dir,
|
||||
"-Mdir", test.obj_dir + "/obj_ref",
|
||||
"--prefix", "Vref",
|
||||
|
|
@ -73,8 +76,9 @@ test.compile(verilator_flags2=[
|
|||
]) # yapf:disable
|
||||
|
||||
# Check we got the expected number of circular logic warnings
|
||||
test.file_grep(test.obj_dir + "/obj_ref/Vref__stats.txt",
|
||||
r'Warnings, Suppressed UNOPTFLAT\s+(\d+)', nExpectedCycles)
|
||||
if test.name == "t_dfg_break_cycles":
|
||||
test.file_grep(test.obj_dir + "/obj_ref/Vref__stats.txt",
|
||||
r'Warnings, Suppressed UNOPTFLAT\s+(\d+)', nExpectedCycles)
|
||||
|
||||
# Compile optimized - also builds executable
|
||||
test.compile(verilator_flags2=[
|
||||
|
|
@ -82,6 +86,7 @@ test.compile(verilator_flags2=[
|
|||
"--build",
|
||||
"--exe",
|
||||
"-fno-const-before-dfg",
|
||||
"-fno-gate",
|
||||
"+incdir+" + test.obj_dir,
|
||||
"-Mdir", test.obj_dir + "/obj_opt",
|
||||
"--prefix", "Vopt",
|
||||
|
|
@ -90,7 +95,7 @@ test.compile(verilator_flags2=[
|
|||
"--debug", "--debugi", "0", "--dumpi-tree", "0",
|
||||
"-CFLAGS \"-I .. -I ../obj_ref\"",
|
||||
"../obj_ref/Vref__ALL.a",
|
||||
"../../t/" + test.name + ".cpp"
|
||||
"../../t/t_dfg_break_cycles.cpp"
|
||||
]) # yapf:disable
|
||||
|
||||
# Execute test to check equivalence
|
||||
|
|
|
|||
|
|
@ -219,14 +219,14 @@ module t (
|
|||
`signal(ARRAY_1, 3); // UNOPTFLAT
|
||||
assign ARRAY_1 = array_1[0];
|
||||
|
||||
wire [2:0] array_2a [2];
|
||||
wire [2:0] array_2b [2];
|
||||
wire [2:0] array_2a [2]; // UNOPTFLAT
|
||||
wire [2:0] array_2b [2]; // UNOPTFLAT
|
||||
assign array_2a[0][0] = rand_a[0];
|
||||
assign array_2a[0][1] = array_2b[1][0];
|
||||
assign array_2a[0][2] = array_2b[1][1];
|
||||
assign array_2a[1] = array_2a[0];
|
||||
assign array_2b = array_2a;
|
||||
`signal(ARRAY_2, 3); // UNOPTFLAT
|
||||
`signal(ARRAY_2, 3);
|
||||
assign ARRAY_2 = array_2a[0];
|
||||
|
||||
wire [2:0] array_3 [2];
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2025 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
import runpy
|
||||
|
||||
test.scenarios('vlt_all')
|
||||
|
||||
runpy.run_path("t/t_dfg_break_cycles.py", globals())
|
||||
Loading…
Reference in New Issue