Optimize arithmetic right shift (>>>) in DfgBreakCycles (#7447)

This commit is contained in:
Geza Lore 2026-04-19 20:28:17 +01:00 committed by GitHub
parent 60b41c7822
commit e82bd52fa3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 120 additions and 4 deletions

View File

@ -471,6 +471,44 @@ class TraceDriver final : public DfgVisitor {
void visit(DfgGt* vtxp) override { SET_RESULT(traceCmp(vtxp)); }
void visit(DfgGte* vtxp) override { SET_RESULT(traceCmp(vtxp)); }
void visit(DfgShiftRS* vtxp) override {
DfgVertex* const lhsp = vtxp->lhsp();
if (const DfgConst* const rConstp = vtxp->rhsp()->cast<DfgConst>()) {
const uint32_t shiftAmnt = rConstp->toU32();
// Width of lower half of result
const uint32_t lowerWidth = shiftAmnt > vtxp->width() ? 0 : vtxp->width() - shiftAmnt;
// If the traced bits are wholly in the input
if (lowerWidth > m_msb) {
SET_RESULT(trace(lhsp, m_msb + shiftAmnt, m_lsb + shiftAmnt));
return;
}
// If the traced bits are wholly in the extension
if (m_lsb >= lowerWidth) {
DfgExtendS* const resp = make<DfgExtendS>(vtxp, m_msb - m_lsb + 1);
resp->srcp(trace(lhsp, lhsp->width() - 1, lhsp->width() - 1));
SET_RESULT(resp);
return;
}
// The traced bits span both sides
DfgExtendS* const resp = make<DfgExtendS>(vtxp, m_msb - m_lsb + 1);
resp->srcp(trace(lhsp, lowerWidth - 1 + shiftAmnt, m_lsb + shiftAmnt));
SET_RESULT(resp);
return;
}
DfgShiftRS* const shiftrsp = make<DfgShiftRS>(vtxp, vtxp->lhsp()->width() - m_lsb);
shiftrsp->rhsp(trace(vtxp->rhsp(), vtxp->rhsp()->width() - 1, 0));
shiftrsp->lhsp(trace(vtxp->lhsp(), vtxp->lhsp()->width() - 1, m_lsb));
if (m_msb == vtxp->lhsp()->width() - 1) {
SET_RESULT(shiftrsp);
return;
}
DfgSel* const selp = make<DfgSel>(vtxp, m_msb - m_lsb + 1);
selp->fromp(shiftrsp);
selp->lsb(0);
SET_RESULT(selp);
}
void visit(DfgShiftR* vtxp) override {
DfgVertex* const lhsp = vtxp->lhsp();
if (const DfgConst* const rConstp = vtxp->rhsp()->cast<DfgConst>()) {
@ -823,6 +861,38 @@ class IndependentBits final : public DfgVisitor {
MASK(vtxp).setBit(0, independent ? '1' : '0');
}
void visit(DfgShiftRS* vtxp) override {
DfgVertex* const rhsp = vtxp->rhsp();
DfgVertex* const lhsp = vtxp->lhsp();
const uint32_t width = vtxp->width();
// Constant shift can be computed precisely
if (DfgConst* const rConstp = rhsp->cast<DfgConst>()) {
const uint32_t shiftAmount = rConstp->toU32();
if (shiftAmount >= width) {
if (MASK(lhsp).bitIs0(width - 1)) {
MASK(vtxp).setAllBits0();
} else {
MASK(vtxp).setAllBits1();
}
} else {
V3Number& m = MASK(vtxp);
m.opShiftRS(MASK(lhsp), rConstp->num(), width);
m.opSetRange(width - shiftAmount, shiftAmount, '1');
}
return;
}
// Otherwise, as the shift amount is non-negative, all independent
// and consecutive top bits in the lhs yield an independent result
// if the shift amount is independent.
if (MASK(rhsp).isEqAllOnes()) {
V3Number& m = MASK(vtxp);
m = MASK(lhsp);
floodTowardsLsb(m);
}
}
void visit(DfgShiftR* vtxp) override {
DfgVertex* const rhsp = vtxp->rhsp();
DfgVertex* const lhsp = vtxp->lhsp();
@ -831,10 +901,10 @@ class IndependentBits final : public DfgVisitor {
// Constant shift can be computed precisely
if (DfgConst* const rConstp = rhsp->cast<DfgConst>()) {
const uint32_t shiftAmount = rConstp->toU32();
V3Number& m = MASK(vtxp);
if (shiftAmount >= width) {
m.setAllBits1();
MASK(vtxp).setAllBits1();
} else {
V3Number& m = MASK(vtxp);
m.opShiftR(MASK(lhsp), rConstp->num());
m.opSetRange(width - shiftAmount, shiftAmount, '1');
}
@ -859,10 +929,10 @@ class IndependentBits final : public DfgVisitor {
// Constant shift can be computed precisely
if (DfgConst* const rConstp = rhsp->cast<DfgConst>()) {
const uint32_t shiftAmount = rConstp->toU32();
V3Number& m = MASK(vtxp);
if (shiftAmount >= width) {
m.setAllBits1();
MASK(vtxp).setAllBits1();
} else {
V3Number& m = MASK(vtxp);
m.opShiftL(MASK(lhsp), rConstp->num());
m.opSetRange(0, shiftAmount, '1');
}

View File

@ -81,6 +81,7 @@ test.compile(verilator_flags2=[
"--stats",
"--build",
"--exe",
"-fno-const-before-dfg",
"+incdir+" + test.obj_dir,
"-Mdir", test.obj_dir + "/obj_opt",
"--prefix", "Vopt",

View File

@ -87,6 +87,43 @@ module t (
`signal(OR, 3); // UNOPTFLAT
assign OR = rand_a[2:0] | 3'(OR[2:1]);
`signal(SHIFTRS, 14); // UNOPTFLAT
assign SHIFTRS = {
SHIFTRS[6:5], // 13:12
SHIFTRS[7:6], // 11:10
SHIFTRS[5:4], // 9:8
signed'(SHIFTRS[3:0]) >>> 2, // 7:4
rand_a[3:0] // 3:0
};
`signal(SHIFTRS_2_A, 10); // UNOPTFLAT
wire logic [9:0] SHIFTRS_2_B = signed'(SHIFTRS_2_A) >>> 2;
assign SHIFTRS_2_A = {rand_a[1:0], SHIFTRS_2_B[9:2]};
`signal(SHIFTRS_3_A, 10); // UNOPTFLAT
wire logic [9:0] SHIFTRS_3_B = signed'(SHIFTRS_3_A) >>> 10;
assign SHIFTRS_3_A = {rand_a[1:0], SHIFTRS_3_B[9:2]};
`signal(SHIFTRS_4_A, 10); // UNOPTFLAT
wire logic [9:0] SHIFTRS_4_B = signed'(SHIFTRS_4_A) >>> 2;
assign SHIFTRS_4_A = {rand_a[3:0], SHIFTRS_4_B[7:2]};
`signal(SHIFTRS_VARIABLE, 2); // UNOPTFLAT
assign SHIFTRS_VARIABLE = signed'(rand_a[1:0] ^ ({1'b0, SHIFTRS_VARIABLE[1]}) >>> rand_b[0]);
`signal(SHIFTRS_VARIABLE_2, 2); // UNOPTFLAT
assign SHIFTRS_VARIABLE_2 = signed'(rand_a[1:0] ^ ({1'b1, SHIFTRS_VARIABLE_2[1]}) >>> rand_b[0]);
`signal(SHIFTRS_VARIABLE_3_A, 4); // UNOPTFLAT
`signal(SHIFTRS_VARIABLE_3_B, 5);
assign SHIFTRS_VARIABLE_3_B = signed'({4'b1111, SHIFTRS_VARIABLE_3_A[1]}) >>> rand_b[0];
assign SHIFTRS_VARIABLE_3_A = rand_a[3:0] ^ SHIFTRS_VARIABLE_3_B[3:0];
`signal(SHIFTRS_VARIABLE_4_A, 4); // UNOPTFLAT
`signal(SHIFTRS_VARIABLE_4_B, 5);
assign SHIFTRS_VARIABLE_4_B = signed'({4'b1111, SHIFTRS_VARIABLE_4_A[1]}) >>> rand_b[0];
assign SHIFTRS_VARIABLE_4_A = rand_a[3:0] ^ SHIFTRS_VARIABLE_4_B[4:1];
`signal(SHIFTR, 14); // UNOPTFLAT
assign SHIFTR = {
SHIFTR[6:5], // 13:12
@ -100,6 +137,10 @@ module t (
wire logic [9:0] SHIFTR_2_B = SHIFTR_2_A >> 2;
assign SHIFTR_2_A = {rand_a[1:0], SHIFTR_2_B[9:2]};
`signal(SHIFTR_3_A, 10);
wire logic [9:0] SHIFTR_3_B = SHIFTR_3_A >> 10;
assign SHIFTR_3_A = {rand_a[1:0], SHIFTR_3_B[9:2]};
`signal(SHIFTR_VARIABLE, 2); // UNOPTFLAT
assign SHIFTR_VARIABLE = rand_a[1:0] ^ ({1'b0, SHIFTR_VARIABLE[1]} >> rand_b[0]);
@ -124,6 +165,10 @@ module t (
wire logic [9:0] SHIFTL_2_B = SHIFTL_2_A << 2;
assign SHIFTL_2_A = {SHIFTL_2_B[9:2], rand_a[1:0]};
`signal(SHIFTL_3_A, 10);
wire logic [9:0] SHIFTL_3_B = SHIFTL_3_A << 10;
assign SHIFTL_3_A = {SHIFTL_3_B[9:2], rand_a[1:0]};
`signal(SHIFTL_VARIABLE, 2); // UNOPTFLAT
assign SHIFTL_VARIABLE = rand_a[1:0] ^ ({SHIFTL_VARIABLE[0], 1'b0} << rand_b[0]);