Merge pull request #5921 from phsauter/peepopt-shiftpow2

peepopt: add shiftpow2 pattern
This commit is contained in:
nella 2026-06-02 08:07:03 +00:00 committed by GitHub
commit 8125af88d6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 253 additions and 5 deletions

View File

@ -32,6 +32,7 @@ $(eval $(call add_extra_objs,passes/opt/peepopt_pm.h))
PEEPOPT_PATTERN = passes/opt/peepopt_shiftmul_right.pmg
PEEPOPT_PATTERN += passes/opt/peepopt_shiftmul_left.pmg
PEEPOPT_PATTERN += passes/opt/peepopt_shiftadd.pmg
PEEPOPT_PATTERN += passes/opt/peepopt_shiftpow2.pmg
PEEPOPT_PATTERN += passes/opt/peepopt_muldiv.pmg
PEEPOPT_PATTERN += passes/opt/peepopt_muldiv_c.pmg
PEEPOPT_PATTERN += passes/opt/peepopt_formal_clockgateff.pmg

View File

@ -68,6 +68,12 @@ struct PeepoptPass : public Pass {
log(" limits the amount of padding to a multiple of the data, \n");
log(" to avoid high resource usage from large temporary MUX trees.\n");
log("\n");
log(" * shiftpow2 - Replace A>>(B<<K) with a $bmux word multiplexer when\n");
log(" the output width is at most the stride 1<<K. This handles\n");
log(" power-of-two aligned word selects.\n");
log(" Scratchpad: 'peepopt.shiftpow2.max_data_multiple' (default: 2)\n");
log(" limits padding for out-of-range select values.\n");
log("\n");
log("If -formalclk is specified it instead employs the following rules:\n");
log("\n");
log(" * clockgateff - Replace latch based clock gating patterns with a flip-flop\n");
@ -115,6 +121,7 @@ struct PeepoptPass : public Pass {
pm.run_shiftadd();
pm.run_shiftmul_right();
pm.run_shiftmul_left();
pm.run_shiftpow2();
pm.run_muldiv();
pm.run_muldiv_c();
}

View File

@ -0,0 +1,87 @@
pattern shiftpow2
//
// Optimize shifts that result from expressions such as foo[s*W+:W]
// when W is a power of two and the multiply has been folded away.
//
match shift
select shift->type.in($shift, $shiftx, $shr)
filter !port(shift, \B).empty()
endmatch
code
{
// make sure the shift amount cannot be negative
SigSpec amount = port(shift, \B);
bool b_signed = shift->type.in($shift, $shiftx) && param(shift, \B_SIGNED).as_bool();
if (!b_signed)
amount.append(State::S0);
if (amount.bits().back() != State::S0)
reject;
while (GetSize(amount) > 1 && amount.bits().back() == State::S0)
amount.remove(GetSize(amount) - 1);
// low zero bits encode the power-of-two scale
int log2scale = 0;
while (!amount.empty() && amount[0] == State::S0) {
amount.remove(0);
log2scale++;
}
if (log2scale < 1)
reject;
if (amount.empty() || amount.is_fully_const())
reject;
SigSpec sel = amount;
int sel_width = GetSize(sel);
int width = param(shift, \Y_WIDTH).as_int();
if (log2scale >= 8 * (int)sizeof(int) - 1)
reject;
int stride = 1 << log2scale;
// avoid overlapping selections
if (width > stride)
reject;
if (sel_width > 20)
reject;
long long ways = 1LL << sel_width;
SigSpec A = port(shift, \A);
int a_width = GetSize(A);
bool a_signed = !shift->type.in($shiftx) && param(shift, \A_SIGNED).as_bool();
int extended_a_width = a_signed ? std::max(a_width, width) : a_width;
// limit padding for out-of-range select values
int max_ratio = module->design->scratchpad_get_int("peepopt.shiftpow2.max_data_multiple", 2);
if (ways * (long long)width > (long long)max_ratio * std::max(a_width, width))
reject;
did_something = true;
log("shiftpow2 pattern in %s: shift=%s, index=%s, stride=%d, width=%d, ways=%lld\n",
module, shift, log_signal(sel), stride, width, ways);
// way m holds A[m*stride +: width], way 0 in the LSBs
State fill = shift->type.in($shiftx) ? State::Sx : State::S0;
SigSpec bmux_a;
for (long long m = 0; m < ways; m++) {
long long base = m * (long long)stride;
for (int b = 0; b < width; b++) {
long long idx = base + b;
if (idx < a_width)
bmux_a.append(A[idx]);
else if (idx < extended_a_width)
bmux_a.append(A.back());
else
bmux_a.append(fill);
}
}
module->addBmux(NEW_ID, bmux_a, sel, port(shift, \Y));
autoremove(shift);
accept;
}
endcode

View File

@ -8,8 +8,8 @@ prep -nokeepdc
equiv_opt -assert peepopt
design -load postopt
clean
select -assert-count 1 t:$shiftx
select -assert-count 0 t:$shiftx t:* %D
select -assert-count 1 t:$bmux
select -assert-count 0 t:$bmux t:* %D
####################
@ -72,9 +72,10 @@ design -import gate -as gate peepopt_shiftmul_3
miter -equiv -make_assert -make_outputs -ignore_gold_x -flatten gold gate miter
sat -verify -show-public -enable_undef -prove-asserts miter
cd gate
select -assert-count 1 t:$shr
select -assert-count 1 t:$mul
select -assert-count 0 t:$shr t:$mul %% t:* %D
clean
select -assert-count 1 t:$bmux
select -assert-count 0 t:$shr
select -assert-count 0 t:$mul
####################
@ -92,3 +93,155 @@ equiv_opt -assert peepopt
design -load postopt
clean
select -assert-count 0 t:*
####################
# shiftpow2: a power-of-two part-select i[s*W+:W] becomes a $bmux word mux
design -reset
read_verilog <<EOT
module peepopt_shiftpow2_0 #(parameter M=8, parameter W=4) (input [M*W-1:0] i, input [$clog2(M)-1:0] s, output [W-1:0] o);
assign o = i[s*W+:W];
endmodule
EOT
prep -nokeepdc
equiv_opt -assert peepopt
design -load postopt
clean
select -assert-count 1 t:$bmux
select -assert-count 0 t:$bmux t:* %D
####################
# shiftpow2: explicit aligned right shift D >> (S*8), checked by SAT miter
design -reset
read_verilog <<EOT
module peepopt_shiftpow2_1 (input [63:0] D, input [2:0] S, output [7:0] Y);
assign Y = D >> (S*8);
endmodule
EOT
prep
design -save gold
peepopt
design -stash gate
design -import gold -as gold peepopt_shiftpow2_1
design -import gate -as gate peepopt_shiftpow2_1
miter -equiv -make_assert -make_outputs -ignore_gold_x -flatten gold gate miter
sat -verify -show-public -enable_undef -prove-asserts miter
cd gate
clean
select -assert-count 1 t:$bmux
select -assert-count 0 t:$shr
####################
# shiftpow2: width smaller than stride is non-overlapping
design -reset
read_verilog <<EOT
module peepopt_shiftpow2_narrow (input [31:0] D, input [2:0] S, output [3:0] Y);
assign Y = D >> (S*8);
endmodule
EOT
prep
design -save gold
peepopt
design -stash gate
design -import gold -as gold peepopt_shiftpow2_narrow
design -import gate -as gate peepopt_shiftpow2_narrow
miter -equiv -make_assert -make_outputs -ignore_gold_x -flatten gold gate miter
sat -verify -show-public -enable_undef -prove-asserts miter
cd gate
clean
select -assert-count 1 t:$bmux
select -assert-count 0 t:$shr
####################
# shiftpow2: signed part-select with out-of-range padding
design -reset
read_verilog <<EOT
module peepopt_shiftpow2_signed (input signed [15:0] i, input [2:0] s, output [3:0] o);
assign o = i[s*4 +: 4];
endmodule
EOT
prep
# drive wreduce to a fixed point before checking for the reduced shift
wreduce
design -save gold
peepopt
design -stash gate
design -import gold -as gold peepopt_shiftpow2_signed
design -import gate -as gate peepopt_shiftpow2_signed
miter -equiv -make_assert -make_outputs -ignore_gold_x -flatten gold gate miter
sat -verify -show-public -enable_undef -prove-asserts miter
cd gate
clean
select -assert-count 1 t:$bmux
select -assert-count 0 t:$shiftx
####################
# shiftpow2: signed $shr extends A to Y_WIDTH
design -reset
read_verilog <<EOT
module peepopt_shiftpow2_signed_shr (input signed [3:0] D, input S, output [7:0] Y);
assign Y = D >> (S*8);
endmodule
EOT
prep
design -save gold
peepopt
design -stash gate
design -import gold -as gold peepopt_shiftpow2_signed_shr
design -import gate -as gate peepopt_shiftpow2_signed_shr
miter -equiv -make_assert -make_outputs -flatten gold gate miter
sat -verify -show-public -prove-asserts miter
cd gate
clean
select -assert-count 1 t:$bmux
select -assert-count 0 t:$shr
####################
# shiftpow2 must NOT fire for overlapping selections
design -reset
read_verilog <<EOT
module peepopt_shiftpow2_overlap (input [31:0] D, input [1:0] S, output [7:0] Y);
assign Y = D >> (S*4);
endmodule
EOT
prep -nokeepdc
peepopt
clean
select -assert-count 0 t:$bmux
select -assert-count 1 t:$shr
####################
# shiftpow2: shiftmul can expose a non-overlapping power-of-two stride
design -reset
read_verilog <<EOT
module peepopt_shiftpow2_shiftmul #(parameter M=8, parameter W=3) (input [M*W-1:0] i, input [2:0] s, output [W-1:0] o);
assign o = i[s*W+:W];
endmodule
EOT
prep -nokeepdc
equiv_opt -assert peepopt
design -load postopt
clean
select -assert-count 1 t:$bmux
select -assert-count 0 t:$bmux t:* %D