Add 15 new DFG peephole optimizations (#7553)
- FOLD_SELF_SUB: x - x -> 0 - REMOVE_DIV_ONE / REMOVE_DIVS_ONE: x / 1 -> x (unsigned and signed) - REMOVE_MUL_ZERO / REMOVE_MUL_ONE / REMOVE_MULS_ZERO / REMOVE_MULS_ONE: 0 * x -> 0 and 1 * x -> x (unsigned and signed) - REPLACE_NOT_LT/GTE/GT/LTE and their signed counterparts: eliminate a Not vertex by flipping the comparison operator Test signals for the NOT-of-comparison GT/GTE variants use unique shift amounts to prevent intra-pass CSE from merging their DfgGt/DfgGte vertices with those produced when the complementary LT/LTE patterns fire, which would otherwise trigger the !hasMultipleSinks() guard.
This commit is contained in:
parent
aa68ae953e
commit
a3003a2d0f
|
|
@ -1270,6 +1270,58 @@ class V3DfgPeephole final : public DfgVisitor {
|
|||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Not of unsigned comparisons
|
||||
if (DfgLt* const p = vtxp->srcp()->cast<DfgLt>()) {
|
||||
APPLYING(REPLACE_NOT_LT) {
|
||||
replace(make<DfgGte>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (DfgGte* const p = vtxp->srcp()->cast<DfgGte>()) {
|
||||
APPLYING(REPLACE_NOT_GTE) {
|
||||
replace(make<DfgLt>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (DfgGt* const p = vtxp->srcp()->cast<DfgGt>()) {
|
||||
APPLYING(REPLACE_NOT_GT) {
|
||||
replace(make<DfgLte>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (DfgLte* const p = vtxp->srcp()->cast<DfgLte>()) {
|
||||
APPLYING(REPLACE_NOT_LTE) {
|
||||
replace(make<DfgGt>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Not of signed comparisons
|
||||
if (DfgLtS* const p = vtxp->srcp()->cast<DfgLtS>()) {
|
||||
APPLYING(REPLACE_NOT_LTS) {
|
||||
replace(make<DfgGteS>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (DfgGteS* const p = vtxp->srcp()->cast<DfgGteS>()) {
|
||||
APPLYING(REPLACE_NOT_GTES) {
|
||||
replace(make<DfgLtS>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (DfgGtS* const p = vtxp->srcp()->cast<DfgGtS>()) {
|
||||
APPLYING(REPLACE_NOT_GTS) {
|
||||
replace(make<DfgLteS>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (DfgLteS* const p = vtxp->srcp()->cast<DfgLteS>()) {
|
||||
APPLYING(REPLACE_NOT_LTES) {
|
||||
replace(make<DfgGtS>(p->fileline(), vtxp->dtype(), p->lhsp(), p->rhsp()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2007,10 +2059,28 @@ class V3DfgPeephole final : public DfgVisitor {
|
|||
|
||||
void visit(DfgDiv* const vtxp) override {
|
||||
if (binary(vtxp)) return;
|
||||
|
||||
if (DfgConst* const rConstp = vtxp->rhsp()->cast<DfgConst>()) {
|
||||
if (isEqOne(rConstp)) {
|
||||
APPLYING(REMOVE_DIV_ONE) {
|
||||
replace(vtxp->lhsp());
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void visit(DfgDivS* const vtxp) override {
|
||||
if (binary(vtxp)) return;
|
||||
|
||||
if (DfgConst* const rConstp = vtxp->rhsp()->cast<DfgConst>()) {
|
||||
if (isEqOne(rConstp)) {
|
||||
APPLYING(REMOVE_DIVS_ONE) {
|
||||
replace(vtxp->lhsp());
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void visit(DfgGt* const vtxp) override {
|
||||
|
|
@ -2179,10 +2249,48 @@ class V3DfgPeephole final : public DfgVisitor {
|
|||
|
||||
void visit(DfgMul* const vtxp) override {
|
||||
if (binary(vtxp)) return;
|
||||
|
||||
DfgVertex* const lhsp = vtxp->lhsp();
|
||||
DfgVertex* const rhsp = vtxp->rhsp();
|
||||
FileLine* const flp = vtxp->fileline();
|
||||
|
||||
if (DfgConst* const lConstp = lhsp->cast<DfgConst>()) {
|
||||
if (isZero(lConstp)) {
|
||||
APPLYING(REMOVE_MUL_ZERO) {
|
||||
replace(makeZero(flp, vtxp->dtype()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (isEqOne(lConstp)) {
|
||||
APPLYING(REMOVE_MUL_ONE) {
|
||||
replace(rhsp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void visit(DfgMulS* const vtxp) override {
|
||||
if (binary(vtxp)) return;
|
||||
|
||||
DfgVertex* const lhsp = vtxp->lhsp();
|
||||
DfgVertex* const rhsp = vtxp->rhsp();
|
||||
FileLine* const flp = vtxp->fileline();
|
||||
|
||||
if (DfgConst* const lConstp = lhsp->cast<DfgConst>()) {
|
||||
if (isZero(lConstp)) {
|
||||
APPLYING(REMOVE_MULS_ZERO) {
|
||||
replace(makeZero(flp, vtxp->dtype()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (isEqOne(lConstp)) {
|
||||
APPLYING(REMOVE_MULS_ONE) {
|
||||
replace(rhsp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void visit(DfgEq* const vtxp) override {
|
||||
|
|
@ -2493,6 +2601,14 @@ class V3DfgPeephole final : public DfgVisitor {
|
|||
|
||||
DfgVertex* const lhsp = vtxp->lhsp();
|
||||
DfgVertex* const rhsp = vtxp->rhsp();
|
||||
FileLine* const flp = vtxp->fileline();
|
||||
|
||||
if (isSame(lhsp, rhsp)) {
|
||||
APPLYING(FOLD_SELF_SUB) {
|
||||
replace(makeZero(flp, vtxp->dtype()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (DfgConst* const rConstp = rhsp->cast<DfgConst>()) {
|
||||
if (rConstp->isZero()) {
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@
|
|||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_LTES) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_LTS) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_NEQ) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SELF_SUB) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_UNARY) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, INLINE_ARRAYSEL_SPLICE) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, INLINE_ARRAYSEL_UNIT) \
|
||||
|
|
@ -77,9 +78,15 @@
|
|||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_COND_WITH_BRANCHES_SAME) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_COND_WITH_FALSE_CONDITION) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_COND_WITH_TRUE_CONDITION) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_DIVS_ONE) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_DIV_ONE) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_EQ_BIT_1) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_FULL_WIDTH_SEL) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_IDEMPOTENT_BINARY) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MULS_ONE) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MULS_ZERO) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MUL_ONE) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_MUL_ZERO) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_NEQ_BIT_0) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_NOT_NOT) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_OR_WITH_ZERO) \
|
||||
|
|
@ -138,6 +145,14 @@
|
|||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_EQ) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GT) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GTE) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GTES) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_GTS) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LT) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LTE) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LTES) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_LTS) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_NEQ) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONCAT_LHS_ZERO_AND_CONCAT_ZERO_RHS) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONCAT_ZERO_LHS_AND_CONCAT_RHS_ZERO) \
|
||||
|
|
|
|||
|
|
@ -68,6 +68,8 @@ module t (
|
|||
`signal(FOLD_BINARY_Concat, {const_a, const_b});
|
||||
`signal(FOLD_BINARY_Div, const_a / 64'd3);
|
||||
`signal(FOLD_BINARY_DivS, sconst_a / 64'sd3);
|
||||
`signal(REMOVE_DIV_ONE, rand_a / 64'd1);
|
||||
`signal(REMOVE_DIVS_ONE, srand_a / 64'sd1);
|
||||
`signal(FOLD_BINARY_Eq, const_a == const_b);
|
||||
`signal(FOLD_BINARY_Gt, const_a > const_b);
|
||||
`signal(FOLD_BINARY_GtS, sconst_a > sconst_b);
|
||||
|
|
@ -89,6 +91,10 @@ module t (
|
|||
`signal(FOLD_BINARY_ModDivS, sconst_a % 64'sd3);
|
||||
`signal(FOLD_BINARY_Mul, const_a * 64'd3);
|
||||
`signal(FOLD_BINARY_MulS, sconst_a * 64'sd3);
|
||||
`signal(REMOVE_MUL_ZERO, rand_a * 64'd0);
|
||||
`signal(REMOVE_MUL_ONE, rand_a * 64'd1);
|
||||
`signal(REMOVE_MULS_ZERO, srand_a * 64'sd0);
|
||||
`signal(REMOVE_MULS_ONE, srand_a * 64'sd1);
|
||||
`signal(FOLD_BINARY_Neq, const_a != const_b);
|
||||
`signal(FOLD_BINARY_Or, const_a | const_b);
|
||||
`signal(FOLD_BINARY_Pow, const_a ** 64'd2);
|
||||
|
|
@ -134,8 +140,21 @@ module t (
|
|||
`signal(REPLACE_EXTEND, 4'(rand_a[0]));
|
||||
`signal(PUSH_NOT_THROUGH_COND, ~(rand_a[0] ? rand_a[4:0] : 5'hb));
|
||||
`signal(REMOVE_NOT_NOT, ~~rand_a);
|
||||
`signal(REPLACE_NOT_NEQ, ~(rand_a != rand_b));
|
||||
`signal(REPLACE_NOT_EQ, ~(srand_a == srand_b));
|
||||
`signal(REPLACE_NOT_NEQ, ~(rand_a != rand_b));
|
||||
`signal(REPLACE_NOT_EQ, ~(srand_a == srand_b));
|
||||
// GT/GTE variants use shifted operands to avoid CSE interference. The peephole's
|
||||
// REPLACE_NOT_LT fires first and emits a new DfgGte(rand_a, rand_b); the intra-pass
|
||||
// CSE then merges it with the existing DfgGte(rand_a, rand_b) that is the source of
|
||||
// REPLACE_NOT_GTE, giving that vertex two sinks and causing its !hasMultipleSinks()
|
||||
// guard to fail. Unique shift amounts break the CSE match, so all eight patterns fire.
|
||||
`signal(REPLACE_NOT_GT, ~((rand_a >> 18) > (rand_b >> 18)));
|
||||
`signal(REPLACE_NOT_GTE, ~((rand_a >> 19) >= (rand_b >> 19)));
|
||||
`signal(REPLACE_NOT_GTES, ~((srand_a >>> 20) >= (srand_b >>> 20)));
|
||||
`signal(REPLACE_NOT_GTS, ~((srand_a >>> 21) > (srand_b >>> 21)));
|
||||
`signal(REPLACE_NOT_LT, ~(rand_a < rand_b));
|
||||
`signal(REPLACE_NOT_LTE, ~(rand_a <= rand_b));
|
||||
`signal(REPLACE_NOT_LTES, ~(srand_a <= srand_b));
|
||||
`signal(REPLACE_NOT_LTS, ~(srand_a < srand_b));
|
||||
`signal(REPLACE_NOT_OF_CONST, ~4'd0);
|
||||
`signal(REPLACE_DISTRIBUTIVE_AND_OR_ABAC, ((rand_a >> 10) | (rand_b >> 10)) & ((rand_a >> 10) | (srand_b >> 10)));
|
||||
`signal(REPLACE_DISTRIBUTIVE_AND_OR_ABCA, ((rand_a >> 11) | (rand_b >> 11)) & ((srand_b >> 11) | (rand_a >> 11)));
|
||||
|
|
@ -164,6 +183,7 @@ module t (
|
|||
`signal(REMOVE_OR_WITH_ZERO, 64'd0 | rand_a);
|
||||
`signal(REPLACE_TAUTOLOGICAL_OR, rand_a | ~rand_a);
|
||||
`signal(REPLACE_TAUTOLOGICAL_OR_3, ~(rand_a + 1) | ((rand_a + 1) | rand_b));
|
||||
`signal(FOLD_SELF_SUB, rand_a - rand_a);
|
||||
`signal(REMOVE_SUB_ZERO, rand_a - 64'd0);
|
||||
`signal(REPLACE_SUB_WITH_NOT, rand_a[0] - 1'b1);
|
||||
`signal(REMOVE_REDUNDANT_ZEXT_ON_RHS_OF_SHIFT, rand_a << {2'b0, rand_a[2:0]});
|
||||
|
|
|
|||
Loading…
Reference in New Issue