From a3003a2d0fd5b8afc72c7a2133ea6a5ab5bea21b Mon Sep 17 00:00:00 2001 From: Varun Koyyalagunta Date: Fri, 8 May 2026 12:39:33 -0500 Subject: [PATCH] Add 15 new DFG peephole optimizations (#7553) - FOLD_SELF_SUB: x - x -> 0 - REMOVE_DIV_ONE / REMOVE_DIVS_ONE: x / 1 -> x (unsigned and signed) - REMOVE_MUL_ZERO / REMOVE_MUL_ONE / REMOVE_MULS_ZERO / REMOVE_MULS_ONE: 0 * x -> 0 and 1 * x -> x (unsigned and signed) - REPLACE_NOT_LT/GTE/GT/LTE and their signed counterparts: eliminate a Not vertex by flipping the comparison operator Test signals for the NOT-of-comparison GT/GTE variants use unique shift amounts to prevent intra-pass CSE from merging their DfgGt/DfgGte vertices with those produced when the complementary LT/LTE patterns fire, which would otherwise trigger the !hasMultipleSinks() guard. --- src/V3DfgPeephole.cpp | 116 ++++++++++++++++++++++++++++++++ src/V3DfgPeepholePatterns.h | 15 +++++ test_regress/t/t_dfg_peephole.v | 24 ++++++- 3 files changed, 153 insertions(+), 2 deletions(-) diff --git a/src/V3DfgPeephole.cpp b/src/V3DfgPeephole.cpp index c20c6ca32..f8a9a2b46 100644 --- a/src/V3DfgPeephole.cpp +++ b/src/V3DfgPeephole.cpp @@ -1270,6 +1270,58 @@ class V3DfgPeephole final : public DfgVisitor { return; } } + + // Not of unsigned comparisons + if (DfgLt* const p = vtxp->srcp()->cast()) { + APPLYING(REPLACE_NOT_LT) { + replace(make(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp())); + return; + } + } + if (DfgGte* const p = vtxp->srcp()->cast()) { + APPLYING(REPLACE_NOT_GTE) { + replace(make(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp())); + return; + } + } + if (DfgGt* const p = vtxp->srcp()->cast()) { + APPLYING(REPLACE_NOT_GT) { + replace(make(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp())); + return; + } + } + if (DfgLte* const p = vtxp->srcp()->cast()) { + APPLYING(REPLACE_NOT_LTE) { + replace(make(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp())); + return; + } + } + + // Not of signed comparisons + if (DfgLtS* const p = vtxp->srcp()->cast()) { + APPLYING(REPLACE_NOT_LTS) { + replace(make(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp())); + return; + } + } + if (DfgGteS* const p = vtxp->srcp()->cast()) { + APPLYING(REPLACE_NOT_GTES) { + replace(make(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp())); + return; + } + } + if (DfgGtS* const p = vtxp->srcp()->cast()) { + APPLYING(REPLACE_NOT_GTS) { + replace(make(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp())); + return; + } + } + if (DfgLteS* const p = vtxp->srcp()->cast()) { + APPLYING(REPLACE_NOT_LTES) { + replace(make(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp())); + return; + } + } } } @@ -2007,10 +2059,28 @@ class V3DfgPeephole final : public DfgVisitor { void visit(DfgDiv* const vtxp) override { if (binary(vtxp)) return; + + if (DfgConst* const rConstp = vtxp->rhsp()->cast()) { + if (isEqOne(rConstp)) { + APPLYING(REMOVE_DIV_ONE) { + replace(vtxp->lhsp()); + return; + } + } + } } void visit(DfgDivS* const vtxp) override { if (binary(vtxp)) return; + + if (DfgConst* const rConstp = vtxp->rhsp()->cast()) { + if (isEqOne(rConstp)) { + APPLYING(REMOVE_DIVS_ONE) { + replace(vtxp->lhsp()); + return; + } + } + } } void visit(DfgGt* const vtxp) override { @@ -2179,10 +2249,48 @@ class V3DfgPeephole final : public DfgVisitor { void visit(DfgMul* const vtxp) override { if (binary(vtxp)) return; + + DfgVertex* const lhsp = vtxp->lhsp(); + DfgVertex* const rhsp = vtxp->rhsp(); + FileLine* const flp = vtxp->fileline(); + + if (DfgConst* const lConstp = lhsp->cast()) { + if (isZero(lConstp)) { + APPLYING(REMOVE_MUL_ZERO) { + replace(makeZero(flp, vtxp->dtype())); + return; + } + } + if (isEqOne(lConstp)) { + APPLYING(REMOVE_MUL_ONE) { + replace(rhsp); + return; + } + } + } } void visit(DfgMulS* const vtxp) override { if (binary(vtxp)) return; + + DfgVertex* const lhsp = vtxp->lhsp(); + DfgVertex* const rhsp = vtxp->rhsp(); + FileLine* const flp = vtxp->fileline(); + + if (DfgConst* const lConstp = lhsp->cast()) { + if (isZero(lConstp)) { + APPLYING(REMOVE_MULS_ZERO) { + replace(makeZero(flp, vtxp->dtype())); + return; + } + } + if (isEqOne(lConstp)) { + APPLYING(REMOVE_MULS_ONE) { + replace(rhsp); + return; + } + } + } } void visit(DfgEq* const vtxp) override { @@ -2493,6 +2601,14 @@ class V3DfgPeephole final : public DfgVisitor { DfgVertex* const lhsp = vtxp->lhsp(); DfgVertex* const rhsp = vtxp->rhsp(); + FileLine* const flp = vtxp->fileline(); + + if (isSame(lhsp, rhsp)) { + APPLYING(FOLD_SELF_SUB) { + replace(makeZero(flp, vtxp->dtype())); + return; + } + } if (DfgConst* const rConstp = rhsp->cast()) { if (rConstp->isZero()) { diff --git a/src/V3DfgPeepholePatterns.h b/src/V3DfgPeepholePatterns.h index 249130cc2..d8d295b2d 100644 --- a/src/V3DfgPeepholePatterns.h +++ b/src/V3DfgPeepholePatterns.h @@ -44,6 +44,7 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_LTES) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_LTS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_NEQ) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_SUB) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_UNARY) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, INLINE_ARRAYSEL_SPLICE) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, INLINE_ARRAYSEL_UNIT) \ @@ -77,9 +78,15 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_COND_WITH_BRANCHES_SAME) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_COND_WITH_FALSE_CONDITION) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_COND_WITH_TRUE_CONDITION) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_DIVS_ONE) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_DIV_ONE) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_EQ_BIT_1) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_FULL_WIDTH_SEL) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_IDEMPOTENT_BINARY) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MULS_ONE) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MULS_ZERO) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MUL_ONE) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MUL_ZERO) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_NEQ_BIT_0) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_NOT_NOT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_OR_WITH_ZERO) \ @@ -138,6 +145,14 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_EQ) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GT) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GTE) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GTES) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GTS) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LT) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LTE) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LTES) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LTS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_NEQ) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONCAT_LHS_ZERO_AND_CONCAT_ZERO_RHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONCAT_ZERO_LHS_AND_CONCAT_RHS_ZERO) \ diff --git a/test_regress/t/t_dfg_peephole.v b/test_regress/t/t_dfg_peephole.v index 8ec285978..aa15acf60 100644 --- a/test_regress/t/t_dfg_peephole.v +++ b/test_regress/t/t_dfg_peephole.v @@ -68,6 +68,8 @@ module t ( `signal(FOLD_BINARY_Concat, {const_a, const_b}); `signal(FOLD_BINARY_Div, const_a / 64'd3); `signal(FOLD_BINARY_DivS, sconst_a / 64'sd3); + `signal(REMOVE_DIV_ONE, rand_a / 64'd1); + `signal(REMOVE_DIVS_ONE, srand_a / 64'sd1); `signal(FOLD_BINARY_Eq, const_a == const_b); `signal(FOLD_BINARY_Gt, const_a > const_b); `signal(FOLD_BINARY_GtS, sconst_a > sconst_b); @@ -89,6 +91,10 @@ module t ( `signal(FOLD_BINARY_ModDivS, sconst_a % 64'sd3); `signal(FOLD_BINARY_Mul, const_a * 64'd3); `signal(FOLD_BINARY_MulS, sconst_a * 64'sd3); + `signal(REMOVE_MUL_ZERO, rand_a * 64'd0); + `signal(REMOVE_MUL_ONE, rand_a * 64'd1); + `signal(REMOVE_MULS_ZERO, srand_a * 64'sd0); + `signal(REMOVE_MULS_ONE, srand_a * 64'sd1); `signal(FOLD_BINARY_Neq, const_a != const_b); `signal(FOLD_BINARY_Or, const_a | const_b); `signal(FOLD_BINARY_Pow, const_a ** 64'd2); @@ -134,8 +140,21 @@ module t ( `signal(REPLACE_EXTEND, 4'(rand_a[0])); `signal(PUSH_NOT_THROUGH_COND, ~(rand_a[0] ? rand_a[4:0] : 5'hb)); `signal(REMOVE_NOT_NOT, ~~rand_a); - `signal(REPLACE_NOT_NEQ, ~(rand_a != rand_b)); - `signal(REPLACE_NOT_EQ, ~(srand_a == srand_b)); + `signal(REPLACE_NOT_NEQ, ~(rand_a != rand_b)); + `signal(REPLACE_NOT_EQ, ~(srand_a == srand_b)); + // GT/GTE variants use shifted operands to avoid CSE interference. The peephole's + // REPLACE_NOT_LT fires first and emits a new DfgGte(rand_a, rand_b); the intra-pass + // CSE then merges it with the existing DfgGte(rand_a, rand_b) that is the source of + // REPLACE_NOT_GTE, giving that vertex two sinks and causing its !hasMultipleSinks() + // guard to fail. Unique shift amounts break the CSE match, so all eight patterns fire. + `signal(REPLACE_NOT_GT, ~((rand_a >> 18) > (rand_b >> 18))); + `signal(REPLACE_NOT_GTE, ~((rand_a >> 19) >= (rand_b >> 19))); + `signal(REPLACE_NOT_GTES, ~((srand_a >>> 20) >= (srand_b >>> 20))); + `signal(REPLACE_NOT_GTS, ~((srand_a >>> 21) > (srand_b >>> 21))); + `signal(REPLACE_NOT_LT, ~(rand_a < rand_b)); + `signal(REPLACE_NOT_LTE, ~(rand_a <= rand_b)); + `signal(REPLACE_NOT_LTES, ~(srand_a <= srand_b)); + `signal(REPLACE_NOT_LTS, ~(srand_a < srand_b)); `signal(REPLACE_NOT_OF_CONST, ~4'd0); `signal(REPLACE_DISTRIBUTIVE_AND_OR_ABAC, ((rand_a >> 10) | (rand_b >> 10)) & ((rand_a >> 10) | (srand_b >> 10))); `signal(REPLACE_DISTRIBUTIVE_AND_OR_ABCA, ((rand_a >> 11) | (rand_b >> 11)) & ((srand_b >> 11) | (rand_a >> 11))); @@ -164,6 +183,7 @@ module t ( `signal(REMOVE_OR_WITH_ZERO, 64'd0 | rand_a); `signal(REPLACE_TAUTOLOGICAL_OR, rand_a | ~rand_a); `signal(REPLACE_TAUTOLOGICAL_OR_3, ~(rand_a + 1) | ((rand_a + 1) | rand_b)); + `signal(FOLD_SELF_SUB, rand_a - rand_a); `signal(REMOVE_SUB_ZERO, rand_a - 64'd0); `signal(REPLACE_SUB_WITH_NOT, rand_a[0] - 1'b1); `signal(REMOVE_REDUNDANT_ZEXT_ON_RHS_OF_SHIFT, rand_a << {2'b0, rand_a[2:0]});