Optimize V3Gate inlining heuristic (#7716)
V3Gate used to inline too many expensive operations. One particularly
bad example is inlining `{<<{wide}}` (bit-reverse of a wide signal),
which is a single input node, but is quite expensive to compute, which
we always used to inline.
Change the heuristic to only inline single input nodes if they are not
wide, or a cheap wide operation, otherwise treat them the same as
multi-input ops and inline them only if they are used no more than once.
This commit is contained in:
parent
0ee5cbf502
commit
0ee25038ac
109
src/V3Gate.cpp
109
src/V3Gate.cpp
|
|
@ -543,89 +543,62 @@ class GateInline final {
|
|||
// Logic block with pending substitutions are stored in this map, together with their ordinal
|
||||
std::unordered_map<AstNode*, size_t> m_hasPending;
|
||||
size_t m_statInlined = 0; // Statistic tracking - signals inlined
|
||||
size_t m_statRefs = 0; // Statistic tracking
|
||||
size_t m_statExcluded = 0; // Statistic tracking
|
||||
size_t m_statNotInlined = 0; // Statistic tracking - signals not inlined due to cost
|
||||
size_t m_statRefs = 0; // Statistic tracking - number of input variable references replaced
|
||||
|
||||
// METHODS
|
||||
static bool isCheapWide(const AstNodeExpr* exprp) {
|
||||
static bool isCheap(const AstNodeExpr* exprp) {
|
||||
// Constant is cheap
|
||||
if (VN_IS(exprp, Const)) return true;
|
||||
// Variable reference is cheap
|
||||
if (VN_IS(exprp, NodeVarRef)) return true;
|
||||
// AstSel is cheap if the fromp is cheap, and not a wide needing bit swizzling
|
||||
if (const AstSel* const selp = VN_CAST(exprp, Sel)) {
|
||||
if (!isCheap(selp->fromp())) return false;
|
||||
if (!selp->isWide()) return true;
|
||||
if (!VN_IS(selp->lsbp(), Const)) return false;
|
||||
if (selp->lsbConst() % VL_EDATASIZE != 0) return false;
|
||||
exprp = selp->fromp();
|
||||
return true;
|
||||
}
|
||||
if (const AstArraySel* const aselp = VN_CAST(exprp, ArraySel)) exprp = aselp->fromp();
|
||||
return VN_IS(exprp, Const) || VN_IS(exprp, NodeVarRef);
|
||||
}
|
||||
static bool excludedWide(GateVarVertex* const vVtxp, const AstNodeExpr* const rhsp) {
|
||||
// Handle wides with logic drivers that are too wide for V3Expand.
|
||||
if (!vVtxp->varScp()->isWide() //
|
||||
|| vVtxp->varScp()->widthWords() <= v3Global.opt.expandLimit() //
|
||||
|| vVtxp->inEmpty() //
|
||||
|| isCheapWide(rhsp))
|
||||
return false;
|
||||
|
||||
const GateLogicVertex* const lVtxp
|
||||
= vVtxp->inEdges().frontp()->fromp()->as<GateLogicVertex>();
|
||||
|
||||
// Exclude from inlining variables READ multiple times.
|
||||
// To decouple actives thus simplifying scheduling, exclude only those
|
||||
// VarRefs that are referenced under the same active as they were assigned.
|
||||
if (const AstActive* const primaryActivep = lVtxp->activep()) {
|
||||
size_t reads = 0;
|
||||
for (const V3GraphEdge& edge : vVtxp->outEdges()) {
|
||||
const GateLogicVertex* const lvp = edge.top()->as<GateLogicVertex>();
|
||||
if (lvp->activep() != primaryActivep) continue;
|
||||
|
||||
reads += edge.weight();
|
||||
if (reads > 1) return true;
|
||||
}
|
||||
// AstArraySel is cheap if the fromp is cheap
|
||||
if (const AstArraySel* const aselp = VN_CAST(exprp, ArraySel)) {
|
||||
return isCheap(aselp->fromp());
|
||||
}
|
||||
// Otherwise it is not cheap
|
||||
return false;
|
||||
}
|
||||
|
||||
bool shouldInline(GateVarVertex* vVtxp, GateLogicVertex* lVtxp, size_t nReads,
|
||||
AstNodeExpr* substp, bool allowMultiIn) {
|
||||
AstVarScope* const vscp = vVtxp->varScp();
|
||||
|
||||
// Always inline constants
|
||||
if (VN_IS(substp, Const)) return true;
|
||||
// Don't inline non-constant static initializers
|
||||
// Don't inline non-constant static initializers - these are scheduled differently
|
||||
if (lVtxp->staticInit()) return false;
|
||||
// Inline simple variable references
|
||||
if (VN_IS(substp, VarRef)) return true;
|
||||
// Only inline arrays if a simple variable or constant
|
||||
if (VN_IS(vscp->dtypep()->skipRefp(), UnpackArrayDType)) return false;
|
||||
// Inline constant array selects
|
||||
if (VN_IS(substp, ArraySel) && nReads <= 1) return true;
|
||||
|
||||
// Don't inline expensive wide operations
|
||||
if (excludedWide(vVtxp, substp)) {
|
||||
++m_statExcluded;
|
||||
UINFO(9, "Gate inline exclude '" << vVtxp->name() << "'");
|
||||
vVtxp->clearReducible("Excluded wide"); // Check once.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nReads == 0) {
|
||||
// Reads no variables, likely unfolded constant expression
|
||||
return true;
|
||||
} else if (nReads == 1) {
|
||||
// Reads one variable
|
||||
return true;
|
||||
} else {
|
||||
// Reads more two or more variables
|
||||
if (!allowMultiIn) return false;
|
||||
// Do it if not used, or used only once, ignoring slow code
|
||||
int n = 0;
|
||||
for (V3GraphEdge& edge : vVtxp->outEdges()) {
|
||||
const GateLogicVertex* const dstVtxp = edge.top()->as<GateLogicVertex>();
|
||||
// Ignore slow code, or if the destination is not used
|
||||
if (dstVtxp->slow()) continue;
|
||||
if (dstVtxp->outEmpty() && !dstVtxp->consumed()) continue;
|
||||
n += edge.weight();
|
||||
if (n > 1) return false;
|
||||
if (VN_IS(vVtxp->varScp()->dtypep()->skipRefp(), UnpackArrayDType)) return false;
|
||||
// Inline if reads no variables - unfolded constant expression, nullary builtin e.g.: $time
|
||||
if (nReads == 0) return true;
|
||||
// If it reads one variable, inline if not wide, or if cheap
|
||||
if (nReads == 1 && (!substp->isWide() || isCheap(substp))) return true;
|
||||
// Don't inline on first round if reads more than one variable
|
||||
if (nReads > 1 && !allowMultiIn) return false;
|
||||
// Reads multiple variables, or is expensive to compute.
|
||||
// Inline if used only once, ignoring slow code, or dead code that can be deleted.
|
||||
int n = 0;
|
||||
for (V3GraphEdge& edge : vVtxp->outEdges()) {
|
||||
const GateLogicVertex* const dstVtxp = edge.top()->as<GateLogicVertex>();
|
||||
// Ignore slow code, or if the destination is not used
|
||||
if (dstVtxp->slow()) continue;
|
||||
if (dstVtxp->outEmpty() && !dstVtxp->consumed()) continue;
|
||||
n += edge.weight();
|
||||
if (n > 1) {
|
||||
++m_statNotInlined;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void recordSubstitution(AstVarScope* vscp, AstNodeExpr* substp, AstNode* logicp) {
|
||||
|
|
@ -724,7 +697,7 @@ class GateInline final {
|
|||
if (!okVisitor.varAssigned(vVtxp->varScp())) continue;
|
||||
|
||||
// Expression we are considering to substitute with
|
||||
AstNodeExpr* const substp = okVisitor.substitutionp();
|
||||
AstNodeExpr* const substp = V3Const::constifyEdit(okVisitor.substitutionp());
|
||||
// Number of variables read by the substitution
|
||||
const size_t nReads = okVisitor.readVscps().size();
|
||||
|
||||
|
|
@ -832,9 +805,9 @@ class GateInline final {
|
|||
}
|
||||
|
||||
~GateInline() {
|
||||
V3Stats::addStat("Optimizations, Gate sigs deleted", m_statInlined);
|
||||
V3Stats::addStat("Optimizations, Gate inputs replaced", m_statRefs);
|
||||
V3Stats::addStat("Optimizations, Gate excluded wide expressions", m_statExcluded);
|
||||
V3Stats::addStat("Optimizations, Gate signals inlined", m_statInlined);
|
||||
V3Stats::addStat("Optimizations, Gate signals not inlined due to cost", m_statNotInlined);
|
||||
V3Stats::addStat("Optimizations, Gate reads replaced", m_statRefs);
|
||||
}
|
||||
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -48,6 +48,6 @@ test.compile(
|
|||
test.execute()
|
||||
|
||||
# Must be <<9000 above to prove this worked
|
||||
test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 8550)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 8550)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ test.scenarios('vlt')
|
|||
|
||||
test.lint(verilator_flags2=['--stats', '--expand-limit 5'])
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 2)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals not inlined due to cost\s+(\d+)', 4)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ test.scenarios('vlt')
|
|||
|
||||
test.lint(verilator_flags2=['--stats', '--expand-limit 5', '-fno-dfg'])
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 1)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals not inlined due to cost\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 1)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ test.scenarios('vlt')
|
|||
|
||||
test.lint(verilator_flags2=['--stats', '--expand-limit 5'])
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 2)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals not inlined due to cost\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 2)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
|
|
@ -1,18 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2024 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
test.lint(verilator_flags2=['--stats', '--expand-limit 5'])
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain
|
||||
// SPDX-FileCopyrightText: 2024 Antmicro
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
localparam N = 256; // Wider than expand limit.
|
||||
|
||||
module t (
|
||||
input wire [N-1:0] i,
|
||||
output wire [N-1:0] o
|
||||
);
|
||||
|
||||
// Do not exclude from inlining wides referenced in different scope.
|
||||
wire [N-1:0] wide = N ~^ i;
|
||||
|
||||
sub sub (
|
||||
i,
|
||||
wide,
|
||||
o
|
||||
);
|
||||
endmodule
|
||||
|
||||
module sub (
|
||||
input wire [N-1:0] i,
|
||||
input wire [N-1:0] wide,
|
||||
output logic [N-1:0] o
|
||||
);
|
||||
initial begin
|
||||
for (integer n = 0; n < N; ++n) begin
|
||||
o[n] = i[N-1-n] | wide[N-1-n];
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -13,8 +13,8 @@ test.scenarios('vlt')
|
|||
|
||||
test.lint(verilator_flags2=['--stats', '--expand-limit 5', '-fno-var-split'])
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 1)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 1)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals not inlined due to cost\s+(\d+)', 2)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 1)
|
||||
test.file_grep(test.stats, r'SplitVar, packed variables split automatically\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
|
|
@ -1,18 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2024 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
test.lint(verilator_flags2=['--stats', '--expand-limit 5'])
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain
|
||||
// SPDX-FileCopyrightText: 2024 Antmicro
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
localparam N = 65; // Wide but narrower than expand limit
|
||||
|
||||
module t (
|
||||
input wire [N-1:0] i,
|
||||
output wire [N-1:0] o
|
||||
);
|
||||
|
||||
// Do not exclude from inlining wides small enough to be handled by
|
||||
// V3Expand.
|
||||
wire [65:0] wide_small = N << i * i / N;
|
||||
|
||||
for (genvar n = 0; n < N; ++n) begin
|
||||
assign o[n] = i[n] ^ wide_small[n];
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -13,7 +13,7 @@ test.scenarios('vlt')
|
|||
|
||||
test.lint(verilator_flags2=['--stats', '--expand-limit 5'])
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals not inlined due to cost\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
Loading…
Reference in New Issue