Add 15 new DFG peephole optimizations (#7553)

- FOLD_SELF_SUB: x - x -> 0
- REMOVE_DIV_ONE / REMOVE_DIVS_ONE: x / 1 -> x (unsigned and signed)
- REMOVE_MUL_ZERO / REMOVE_MUL_ONE / REMOVE_MULS_ZERO / REMOVE_MULS_ONE:
  0 * x -> 0 and 1 * x -> x (unsigned and signed)
- REPLACE_NOT_LT/GTE/GT/LTE and their signed counterparts: eliminate a
  Not vertex by flipping the comparison operator

Test signals for the NOT-of-comparison GT/GTE variants use unique shift
amounts to prevent intra-pass CSE from merging their DfgGt/DfgGte vertices
with those produced when the complementary LT/LTE patterns fire, which
would otherwise trigger the !hasMultipleSinks() guard.
This commit is contained in:
Varun Koyyalagunta 2026-05-08 12:39:33 -05:00 committed by GitHub
parent aa68ae953e
commit a3003a2d0f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 153 additions and 2 deletions

View File

@ -1270,6 +1270,58 @@ class V3DfgPeephole final : public DfgVisitor {
return;
}
}
// Not of unsigned comparisons
if (DfgLt* const p = vtxp->srcp()->cast<DfgLt>()) {
APPLYING(REPLACE_NOT_LT) {
replace(make<DfgGte>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
return;
}
}
if (DfgGte* const p = vtxp->srcp()->cast<DfgGte>()) {
APPLYING(REPLACE_NOT_GTE) {
replace(make<DfgLt>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
return;
}
}
if (DfgGt* const p = vtxp->srcp()->cast<DfgGt>()) {
APPLYING(REPLACE_NOT_GT) {
replace(make<DfgLte>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
return;
}
}
if (DfgLte* const p = vtxp->srcp()->cast<DfgLte>()) {
APPLYING(REPLACE_NOT_LTE) {
replace(make<DfgGt>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
return;
}
}
// Not of signed comparisons
if (DfgLtS* const p = vtxp->srcp()->cast<DfgLtS>()) {
APPLYING(REPLACE_NOT_LTS) {
replace(make<DfgGteS>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
return;
}
}
if (DfgGteS* const p = vtxp->srcp()->cast<DfgGteS>()) {
APPLYING(REPLACE_NOT_GTES) {
replace(make<DfgLtS>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
return;
}
}
if (DfgGtS* const p = vtxp->srcp()->cast<DfgGtS>()) {
APPLYING(REPLACE_NOT_GTS) {
replace(make<DfgLteS>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
return;
}
}
if (DfgLteS* const p = vtxp->srcp()->cast<DfgLteS>()) {
APPLYING(REPLACE_NOT_LTES) {
replace(make<DfgGtS>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
return;
}
}
}
}
@ -2007,10 +2059,28 @@ class V3DfgPeephole final : public DfgVisitor {
void visit(DfgDiv* const vtxp) override {
if (binary(vtxp)) return;
if (DfgConst* const rConstp = vtxp->rhsp()->cast<DfgConst>()) {
if (isEqOne(rConstp)) {
APPLYING(REMOVE_DIV_ONE) {
replace(vtxp->lhsp());
return;
}
}
}
}
void visit(DfgDivS* const vtxp) override {
if (binary(vtxp)) return;
if (DfgConst* const rConstp = vtxp->rhsp()->cast<DfgConst>()) {
if (isEqOne(rConstp)) {
APPLYING(REMOVE_DIVS_ONE) {
replace(vtxp->lhsp());
return;
}
}
}
}
void visit(DfgGt* const vtxp) override {
@ -2179,10 +2249,48 @@ class V3DfgPeephole final : public DfgVisitor {
void visit(DfgMul* const vtxp) override {
if (binary(vtxp)) return;
DfgVertex* const lhsp = vtxp->lhsp();
DfgVertex* const rhsp = vtxp->rhsp();
FileLine* const flp = vtxp->fileline();
if (DfgConst* const lConstp = lhsp->cast<DfgConst>()) {
if (isZero(lConstp)) {
APPLYING(REMOVE_MUL_ZERO) {
replace(makeZero(flp, vtxp->dtype()));
return;
}
}
if (isEqOne(lConstp)) {
APPLYING(REMOVE_MUL_ONE) {
replace(rhsp);
return;
}
}
}
}
void visit(DfgMulS* const vtxp) override {
if (binary(vtxp)) return;
DfgVertex* const lhsp = vtxp->lhsp();
DfgVertex* const rhsp = vtxp->rhsp();
FileLine* const flp = vtxp->fileline();
if (DfgConst* const lConstp = lhsp->cast<DfgConst>()) {
if (isZero(lConstp)) {
APPLYING(REMOVE_MULS_ZERO) {
replace(makeZero(flp, vtxp->dtype()));
return;
}
}
if (isEqOne(lConstp)) {
APPLYING(REMOVE_MULS_ONE) {
replace(rhsp);
return;
}
}
}
}
void visit(DfgEq* const vtxp) override {
@ -2493,6 +2601,14 @@ class V3DfgPeephole final : public DfgVisitor {
DfgVertex* const lhsp = vtxp->lhsp();
DfgVertex* const rhsp = vtxp->rhsp();
FileLine* const flp = vtxp->fileline();
if (isSame(lhsp, rhsp)) {
APPLYING(FOLD_SELF_SUB) {
replace(makeZero(flp, vtxp->dtype()));
return;
}
}
if (DfgConst* const rConstp = rhsp->cast<DfgConst>()) {
if (rConstp->isZero()) {

View File

@ -44,6 +44,7 @@
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_LTES) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_LTS) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_NEQ) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_SUB) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_UNARY) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, INLINE_ARRAYSEL_SPLICE) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, INLINE_ARRAYSEL_UNIT) \
@ -77,9 +78,15 @@
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_COND_WITH_BRANCHES_SAME) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_COND_WITH_FALSE_CONDITION) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_COND_WITH_TRUE_CONDITION) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_DIVS_ONE) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_DIV_ONE) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_EQ_BIT_1) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_FULL_WIDTH_SEL) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_IDEMPOTENT_BINARY) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MULS_ONE) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MULS_ZERO) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MUL_ONE) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MUL_ZERO) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_NEQ_BIT_0) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_NOT_NOT) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_OR_WITH_ZERO) \
@ -138,6 +145,14 @@
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_EQ) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GT) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GTE) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GTES) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GTS) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LT) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LTE) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LTES) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LTS) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_NEQ) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONCAT_LHS_ZERO_AND_CONCAT_ZERO_RHS) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONCAT_ZERO_LHS_AND_CONCAT_RHS_ZERO) \

View File

@ -68,6 +68,8 @@ module t (
`signal(FOLD_BINARY_Concat, {const_a, const_b});
`signal(FOLD_BINARY_Div, const_a / 64'd3);
`signal(FOLD_BINARY_DivS, sconst_a / 64'sd3);
`signal(REMOVE_DIV_ONE, rand_a / 64'd1);
`signal(REMOVE_DIVS_ONE, srand_a / 64'sd1);
`signal(FOLD_BINARY_Eq, const_a == const_b);
`signal(FOLD_BINARY_Gt, const_a > const_b);
`signal(FOLD_BINARY_GtS, sconst_a > sconst_b);
@ -89,6 +91,10 @@ module t (
`signal(FOLD_BINARY_ModDivS, sconst_a % 64'sd3);
`signal(FOLD_BINARY_Mul, const_a * 64'd3);
`signal(FOLD_BINARY_MulS, sconst_a * 64'sd3);
`signal(REMOVE_MUL_ZERO, rand_a * 64'd0);
`signal(REMOVE_MUL_ONE, rand_a * 64'd1);
`signal(REMOVE_MULS_ZERO, srand_a * 64'sd0);
`signal(REMOVE_MULS_ONE, srand_a * 64'sd1);
`signal(FOLD_BINARY_Neq, const_a != const_b);
`signal(FOLD_BINARY_Or, const_a | const_b);
`signal(FOLD_BINARY_Pow, const_a ** 64'd2);
@ -134,8 +140,21 @@ module t (
`signal(REPLACE_EXTEND, 4'(rand_a[0]));
`signal(PUSH_NOT_THROUGH_COND, ~(rand_a[0] ? rand_a[4:0] : 5'hb));
`signal(REMOVE_NOT_NOT, ~~rand_a);
`signal(REPLACE_NOT_NEQ, ~(rand_a != rand_b));
`signal(REPLACE_NOT_EQ, ~(srand_a == srand_b));
`signal(REPLACE_NOT_NEQ, ~(rand_a != rand_b));
`signal(REPLACE_NOT_EQ, ~(srand_a == srand_b));
// GT/GTE variants use shifted operands to avoid CSE interference. The peephole's
// REPLACE_NOT_LT fires first and emits a new DfgGte(rand_a, rand_b); the intra-pass
// CSE then merges it with the existing DfgGte(rand_a, rand_b) that is the source of
// REPLACE_NOT_GTE, giving that vertex two sinks and causing its !hasMultipleSinks()
// guard to fail. Unique shift amounts break the CSE match, so all eight patterns fire.
`signal(REPLACE_NOT_GT, ~((rand_a >> 18) > (rand_b >> 18)));
`signal(REPLACE_NOT_GTE, ~((rand_a >> 19) >= (rand_b >> 19)));
`signal(REPLACE_NOT_GTES, ~((srand_a >>> 20) >= (srand_b >>> 20)));
`signal(REPLACE_NOT_GTS, ~((srand_a >>> 21) > (srand_b >>> 21)));
`signal(REPLACE_NOT_LT, ~(rand_a < rand_b));
`signal(REPLACE_NOT_LTE, ~(rand_a <= rand_b));
`signal(REPLACE_NOT_LTES, ~(srand_a <= srand_b));
`signal(REPLACE_NOT_LTS, ~(srand_a < srand_b));
`signal(REPLACE_NOT_OF_CONST, ~4'd0);
`signal(REPLACE_DISTRIBUTIVE_AND_OR_ABAC, ((rand_a >> 10) | (rand_b >> 10)) & ((rand_a >> 10) | (srand_b >> 10)));
`signal(REPLACE_DISTRIBUTIVE_AND_OR_ABCA, ((rand_a >> 11) | (rand_b >> 11)) & ((srand_b >> 11) | (rand_a >> 11)));
@ -164,6 +183,7 @@ module t (
`signal(REMOVE_OR_WITH_ZERO, 64'd0 | rand_a);
`signal(REPLACE_TAUTOLOGICAL_OR, rand_a | ~rand_a);
`signal(REPLACE_TAUTOLOGICAL_OR_3, ~(rand_a + 1) | ((rand_a + 1) | rand_b));
`signal(FOLD_SELF_SUB, rand_a - rand_a);
`signal(REMOVE_SUB_ZERO, rand_a - 64'd0);
`signal(REPLACE_SUB_WITH_NOT, rand_a[0] - 1'b1);
`signal(REMOVE_REDUNDANT_ZEXT_ON_RHS_OF_SHIFT, rand_a << {2'b0, rand_a[2:0]});