diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index 4875ac75c..b96c892f2 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -543,89 +543,62 @@ class GateInline final { // Logic block with pending substitutions are stored in this map, together with their ordinal std::unordered_map m_hasPending; size_t m_statInlined = 0; // Statistic tracking - signals inlined - size_t m_statRefs = 0; // Statistic tracking - size_t m_statExcluded = 0; // Statistic tracking + size_t m_statNotInlined = 0; // Statistic tracking - signals not inlined due to cost + size_t m_statRefs = 0; // Statistic tracking - number of input variable references replaced // METHODS - static bool isCheapWide(const AstNodeExpr* exprp) { + static bool isCheap(const AstNodeExpr* exprp) { + // Constant is cheap + if (VN_IS(exprp, Const)) return true; + // Variable reference is cheap + if (VN_IS(exprp, NodeVarRef)) return true; + // AstSel is cheap if the fromp is cheap, and not a wide needing bit swizzling if (const AstSel* const selp = VN_CAST(exprp, Sel)) { + if (!isCheap(selp->fromp())) return false; + if (!selp->isWide()) return true; + if (!VN_IS(selp->lsbp(), Const)) return false; if (selp->lsbConst() % VL_EDATASIZE != 0) return false; - exprp = selp->fromp(); + return true; } - if (const AstArraySel* const aselp = VN_CAST(exprp, ArraySel)) exprp = aselp->fromp(); - return VN_IS(exprp, Const) || VN_IS(exprp, NodeVarRef); - } - static bool excludedWide(GateVarVertex* const vVtxp, const AstNodeExpr* const rhsp) { - // Handle wides with logic drivers that are too wide for V3Expand. - if (!vVtxp->varScp()->isWide() // - || vVtxp->varScp()->widthWords() <= v3Global.opt.expandLimit() // - || vVtxp->inEmpty() // - || isCheapWide(rhsp)) - return false; - - const GateLogicVertex* const lVtxp - = vVtxp->inEdges().frontp()->fromp()->as(); - - // Exclude from inlining variables READ multiple times. - // To decouple actives thus simplifying scheduling, exclude only those - // VarRefs that are referenced under the same active as they were assigned. - if (const AstActive* const primaryActivep = lVtxp->activep()) { - size_t reads = 0; - for (const V3GraphEdge& edge : vVtxp->outEdges()) { - const GateLogicVertex* const lvp = edge.top()->as(); - if (lvp->activep() != primaryActivep) continue; - - reads += edge.weight(); - if (reads > 1) return true; - } + // AstArraySel is cheap if the fromp is cheap + if (const AstArraySel* const aselp = VN_CAST(exprp, ArraySel)) { + return isCheap(aselp->fromp()); } + // Otherwise it is not cheap return false; } bool shouldInline(GateVarVertex* vVtxp, GateLogicVertex* lVtxp, size_t nReads, AstNodeExpr* substp, bool allowMultiIn) { - AstVarScope* const vscp = vVtxp->varScp(); - // Always inline constants if (VN_IS(substp, Const)) return true; - // Don't inline non-constant static initializers + // Don't inline non-constant static initializers - these are scheduled differently if (lVtxp->staticInit()) return false; // Inline simple variable references if (VN_IS(substp, VarRef)) return true; // Only inline arrays if a simple variable or constant - if (VN_IS(vscp->dtypep()->skipRefp(), UnpackArrayDType)) return false; - // Inline constant array selects - if (VN_IS(substp, ArraySel) && nReads <= 1) return true; - - // Don't inline expensive wide operations - if (excludedWide(vVtxp, substp)) { - ++m_statExcluded; - UINFO(9, "Gate inline exclude '" << vVtxp->name() << "'"); - vVtxp->clearReducible("Excluded wide"); // Check once. - return false; - } - - if (nReads == 0) { - // Reads no variables, likely unfolded constant expression - return true; - } else if (nReads == 1) { - // Reads one variable - return true; - } else { - // Reads more two or more variables - if (!allowMultiIn) return false; - // Do it if not used, or used only once, ignoring slow code - int n = 0; - for (V3GraphEdge& edge : vVtxp->outEdges()) { - const GateLogicVertex* const dstVtxp = edge.top()->as(); - // Ignore slow code, or if the destination is not used - if (dstVtxp->slow()) continue; - if (dstVtxp->outEmpty() && !dstVtxp->consumed()) continue; - n += edge.weight(); - if (n > 1) return false; + if (VN_IS(vVtxp->varScp()->dtypep()->skipRefp(), UnpackArrayDType)) return false; + // Inline if reads no variables - unfolded constant expression, nullary builtin e.g.: $time + if (nReads == 0) return true; + // If it reads one variable, inline if not wide, or if cheap + if (nReads == 1 && (!substp->isWide() || isCheap(substp))) return true; + // Don't inline on first round if reads more than one variable + if (nReads > 1 && !allowMultiIn) return false; + // Reads multiple variables, or is expensive to compute. + // Inline if used only once, ignoring slow code, or dead code that can be deleted. + int n = 0; + for (V3GraphEdge& edge : vVtxp->outEdges()) { + const GateLogicVertex* const dstVtxp = edge.top()->as(); + // Ignore slow code, or if the destination is not used + if (dstVtxp->slow()) continue; + if (dstVtxp->outEmpty() && !dstVtxp->consumed()) continue; + n += edge.weight(); + if (n > 1) { + ++m_statNotInlined; + return false; } - return true; } + return true; } void recordSubstitution(AstVarScope* vscp, AstNodeExpr* substp, AstNode* logicp) { @@ -724,7 +697,7 @@ class GateInline final { if (!okVisitor.varAssigned(vVtxp->varScp())) continue; // Expression we are considering to substitute with - AstNodeExpr* const substp = okVisitor.substitutionp(); + AstNodeExpr* const substp = V3Const::constifyEdit(okVisitor.substitutionp()); // Number of variables read by the substitution const size_t nReads = okVisitor.readVscps().size(); @@ -832,9 +805,9 @@ class GateInline final { } ~GateInline() { - V3Stats::addStat("Optimizations, Gate sigs deleted", m_statInlined); - V3Stats::addStat("Optimizations, Gate inputs replaced", m_statRefs); - V3Stats::addStat("Optimizations, Gate excluded wide expressions", m_statExcluded); + V3Stats::addStat("Optimizations, Gate signals inlined", m_statInlined); + V3Stats::addStat("Optimizations, Gate signals not inlined due to cost", m_statNotInlined); + V3Stats::addStat("Optimizations, Gate reads replaced", m_statRefs); } public: diff --git a/test_regress/t/t_gate_chained.py b/test_regress/t/t_gate_chained.py index 7fa18e3a3..ddf8bef62 100755 --- a/test_regress/t/t_gate_chained.py +++ b/test_regress/t/t_gate_chained.py @@ -48,6 +48,6 @@ test.compile( test.execute() # Must be <<9000 above to prove this worked -test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 8550) +test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 8550) test.passes() diff --git a/test_regress/t/t_gate_inline_wide_exclude_multiple.py b/test_regress/t/t_gate_inline_wide_exclude_multiple.py index 76a8991cf..2bf5b1241 100755 --- a/test_regress/t/t_gate_inline_wide_exclude_multiple.py +++ b/test_regress/t/t_gate_inline_wide_exclude_multiple.py @@ -13,7 +13,7 @@ test.scenarios('vlt') test.lint(verilator_flags2=['--stats', '--expand-limit 5']) -test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 2) -test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Gate signals not inlined due to cost\s+(\d+)', 4) +test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 0) test.passes() diff --git a/test_regress/t/t_gate_inline_wide_noexclude_arraysel.py b/test_regress/t/t_gate_inline_wide_noexclude_arraysel.py index 8c99e4b33..15370ae7c 100755 --- a/test_regress/t/t_gate_inline_wide_noexclude_arraysel.py +++ b/test_regress/t/t_gate_inline_wide_noexclude_arraysel.py @@ -13,7 +13,7 @@ test.scenarios('vlt') test.lint(verilator_flags2=['--stats', '--expand-limit 5', '-fno-dfg']) -test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 0) -test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 1) +test.file_grep(test.stats, r'Optimizations, Gate signals not inlined due to cost\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 1) test.passes() diff --git a/test_regress/t/t_gate_inline_wide_noexclude_const.py b/test_regress/t/t_gate_inline_wide_noexclude_const.py index 5102631f7..cb6511e21 100755 --- a/test_regress/t/t_gate_inline_wide_noexclude_const.py +++ b/test_regress/t/t_gate_inline_wide_noexclude_const.py @@ -13,7 +13,7 @@ test.scenarios('vlt') test.lint(verilator_flags2=['--stats', '--expand-limit 5']) -test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 0) -test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 2) +test.file_grep(test.stats, r'Optimizations, Gate signals not inlined due to cost\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 2) test.passes() diff --git a/test_regress/t/t_gate_inline_wide_noexclude_other_scope.py b/test_regress/t/t_gate_inline_wide_noexclude_other_scope.py deleted file mode 100755 index c20d324db..000000000 --- a/test_regress/t/t_gate_inline_wide_noexclude_other_scope.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python3 -# DESCRIPTION: Verilator: Verilog Test driver/expect definition -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of either the GNU Lesser General Public License Version 3 -# or the Perl Artistic License Version 2.0. -# SPDX-FileCopyrightText: 2024 Wilson Snyder -# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 - -import vltest_bootstrap - -test.scenarios('vlt') - -test.lint(verilator_flags2=['--stats', '--expand-limit 5']) - -test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 0) - -test.passes() diff --git a/test_regress/t/t_gate_inline_wide_noexclude_other_scope.v b/test_regress/t/t_gate_inline_wide_noexclude_other_scope.v deleted file mode 100644 index c70022d24..000000000 --- a/test_regress/t/t_gate_inline_wide_noexclude_other_scope.v +++ /dev/null @@ -1,34 +0,0 @@ -// DESCRIPTION: Verilator: Verilog Test module -// -// This file ONLY is placed under the Creative Commons Public Domain -// SPDX-FileCopyrightText: 2024 Antmicro -// SPDX-License-Identifier: CC0-1.0 - -localparam N = 256; // Wider than expand limit. - -module t ( - input wire [N-1:0] i, - output wire [N-1:0] o -); - - // Do not exclude from inlining wides referenced in different scope. - wire [N-1:0] wide = N ~^ i; - - sub sub ( - i, - wide, - o - ); -endmodule - -module sub ( - input wire [N-1:0] i, - input wire [N-1:0] wide, - output logic [N-1:0] o -); - initial begin - for (integer n = 0; n < N; ++n) begin - o[n] = i[N-1-n] | wide[N-1-n]; - end - end -endmodule diff --git a/test_regress/t/t_gate_inline_wide_noexclude_sel.py b/test_regress/t/t_gate_inline_wide_noexclude_sel.py index aa65429ab..41d13c55d 100755 --- a/test_regress/t/t_gate_inline_wide_noexclude_sel.py +++ b/test_regress/t/t_gate_inline_wide_noexclude_sel.py @@ -13,8 +13,8 @@ test.scenarios('vlt') test.lint(verilator_flags2=['--stats', '--expand-limit 5', '-fno-var-split']) -test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 1) -test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 1) +test.file_grep(test.stats, r'Optimizations, Gate signals not inlined due to cost\s+(\d+)', 2) +test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 1) test.file_grep(test.stats, r'SplitVar, packed variables split automatically\s+(\d+)', 0) test.passes() diff --git a/test_regress/t/t_gate_inline_wide_noexclude_small_wide.py b/test_regress/t/t_gate_inline_wide_noexclude_small_wide.py deleted file mode 100755 index c20d324db..000000000 --- a/test_regress/t/t_gate_inline_wide_noexclude_small_wide.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python3 -# DESCRIPTION: Verilator: Verilog Test driver/expect definition -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of either the GNU Lesser General Public License Version 3 -# or the Perl Artistic License Version 2.0. -# SPDX-FileCopyrightText: 2024 Wilson Snyder -# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 - -import vltest_bootstrap - -test.scenarios('vlt') - -test.lint(verilator_flags2=['--stats', '--expand-limit 5']) - -test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 0) - -test.passes() diff --git a/test_regress/t/t_gate_inline_wide_noexclude_small_wide.v b/test_regress/t/t_gate_inline_wide_noexclude_small_wide.v deleted file mode 100644 index 1b1231c4e..000000000 --- a/test_regress/t/t_gate_inline_wide_noexclude_small_wide.v +++ /dev/null @@ -1,21 +0,0 @@ -// DESCRIPTION: Verilator: Verilog Test module -// -// This file ONLY is placed under the Creative Commons Public Domain -// SPDX-FileCopyrightText: 2024 Antmicro -// SPDX-License-Identifier: CC0-1.0 - -localparam N = 65; // Wide but narrower than expand limit - -module t ( - input wire [N-1:0] i, - output wire [N-1:0] o -); - - // Do not exclude from inlining wides small enough to be handled by - // V3Expand. - wire [65:0] wide_small = N << i * i / N; - - for (genvar n = 0; n < N; ++n) begin - assign o[n] = i[n] ^ wide_small[n]; - end -endmodule diff --git a/test_regress/t/t_gate_inline_wide_noexclude_varref.py b/test_regress/t/t_gate_inline_wide_noexclude_varref.py index 1d91e6493..282e88b84 100755 --- a/test_regress/t/t_gate_inline_wide_noexclude_varref.py +++ b/test_regress/t/t_gate_inline_wide_noexclude_varref.py @@ -13,7 +13,7 @@ test.scenarios('vlt') test.lint(verilator_flags2=['--stats', '--expand-limit 5']) -test.file_grep(test.stats, r'Optimizations, Gate excluded wide expressions\s+(\d+)', 0) -test.file_grep(test.stats, r'Optimizations, Gate sigs deleted\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Gate signals not inlined due to cost\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Gate signals inlined\s+(\d+)', 0) test.passes()