From f87fe4c3b4123a4c828fbf4c2874bad1ce149f28 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 4 Oct 2022 21:18:39 +0100 Subject: [PATCH] DfgPeephole: add constant folding for all integer types Also added a testing only -fno-const-before-dfg option, as otherwise V3Const eats up a lot of the simple inputs. A lot of the things V3Const swallows in the simple cases can make it to DFG in complex cases, or DFG itself can create them during optimization. In any case to save complexity of testing DFG constant folding, we use this option to turn off V3Const prior to the DFG passes in the relevant test. --- docs/guide/exe_verilator.rst | 6 + src/V3DfgPeephole.cpp | 841 +++++++++++++++++++------------ src/V3DfgPeephole.h | 12 +- src/V3Options.cpp | 1 + src/V3Options.h | 2 + src/Verilator.cpp | 4 +- test_regress/t/t_dfg_peephole.pl | 1 + test_regress/t/t_dfg_peephole.v | 129 +++-- 8 files changed, 636 insertions(+), 360 deletions(-) diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index d06d8ae12..4b1e0a458 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -501,6 +501,12 @@ Summary: .. option:: -fno-const +.. options: -fno-const-before-dfg + + Do not apply any global expression folding prior to the DFG pass. This + option is solely for the purpose of DFG testing and should not be used + otherwise. + .. option:: -fno-const-bit-op-tree .. option:: -fno-dedup diff --git a/src/V3DfgPeephole.cpp b/src/V3DfgPeephole.cpp index 9e8a761ed..ca8d5b95e 100644 --- a/src/V3DfgPeephole.cpp +++ b/src/V3DfgPeephole.cpp @@ -78,6 +78,58 @@ template <> struct BitwiseToReductionImpl { using type = DfgRedOr; }; template <> struct BitwiseToReductionImpl { using type = DfgRedXor; }; template using BitwiseToReduction = typename BitwiseToReductionImpl::type; + +namespace { +template void foldOp(V3Number& out, const V3Number& src); +template <> void foldOp (V3Number& out, const V3Number& src) { out.opCLog2(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opCountOnes(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opAssign(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opExtendS(src, src.width()); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opLogNot(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opNegate(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opNot(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opOneHot(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opOneHot0(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opRedAnd(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opRedOr(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opRedXor(src); } + +template void foldOp(V3Number& out, const V3Number& lhs, const V3Number& rhs); +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opAdd(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opAnd(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opConcat(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opDiv(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opDivS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opEq(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opGt(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opGtS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opGte(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opGteS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLogAnd(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLogEq(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLogIf(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLogOr(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLt(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLtS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLte(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLtS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opModDiv(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opModDivS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opMul(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opMulS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opNeq(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opOr(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opPow(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opPowSS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opPowSU(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opPowUS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opRepl(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opShiftL(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opShiftR(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opShiftRS(lhs, rhs, lhs.width()); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opSub(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opXor(lhs, rhs); } +} // clang-format on class V3DfgPeephole final : public DfgVisitor { @@ -114,6 +166,40 @@ class V3DfgPeephole final : public DfgVisitor { // Create a new DfgConst vertex with the given width and value zero DfgConst* makeZero(FileLine* flp, uint32_t width) { return makeConst(flp, width, 0); } + // Constant fold unary vertex, return true if folded + template + bool foldUnary(Vertex* vtxp) { + static_assert(std::is_base_of::value, "Must invoke on unary"); + static_assert(std::is_final::value, "Must invoke on final class"); + if (DfgConst* const srcp = vtxp->srcp()->template cast()) { + APPLYING(FOLD_UNARY) { + DfgConst* const resultp = makeZero(vtxp->fileline(), vtxp->width()); + foldOp(resultp->num(), srcp->num()); + vtxp->replaceWith(resultp); + return true; + } + } + return false; + } + + // Constant fold binary vertex, return true if folded + template + bool foldBinary(Vertex* vtxp) { + static_assert(std::is_base_of::value, "Must invoke on binary"); + static_assert(std::is_final::value, "Must invoke on final class"); + if (DfgConst* const lhsp = vtxp->lhsp()->template cast()) { + if (DfgConst* const rhsp = vtxp->rhsp()->template cast()) { + APPLYING(FOLD_BINARY) { + DfgConst* const resultp = makeZero(vtxp->fileline(), vtxp->width()); + foldOp(resultp->num(), lhsp->num(), rhsp->num()); + vtxp->replaceWith(resultp); + return true; + } + } + } + return false; + } + // Transformations that apply to all commutative binary vertices void commutativeBinary(DfgVertexBinary* vtxp) { DfgVertex* const lhsp = vtxp->source<0>(); @@ -291,6 +377,8 @@ class V3DfgPeephole final : public DfgVisitor { void optimizeReduction(Reduction* vtxp) { using Bitwise = ReductionToBitwise; + if (foldUnary(vtxp)) return; + DfgVertex* const srcp = vtxp->srcp(); FileLine* const flp = vtxp->fileline(); @@ -348,21 +436,6 @@ class V3DfgPeephole final : public DfgVisitor { } } } - - if (DfgConst* const constp = srcp->cast()) { - APPLYING(REPLACE_REDUCTION_OF_CONST) { - DfgConst* const replacementp = makeZero(flp, 1); - if VL_CONSTEXPR_CXX17 (std::is_same::value) { - replacementp->num().opRedAnd(constp->num()); - } else if VL_CONSTEXPR_CXX17 (std::is_same::value) { - replacementp->num().opRedOr(constp->num()); - } else { - replacementp->num().opRedXor(constp->num()); - } - vtxp->replaceWith(replacementp); - return; - } - } } void optimizeShiftRHS(DfgVertexBinary* vtxp) { @@ -379,26 +452,57 @@ class V3DfgPeephole final : public DfgVisitor { void visit(DfgVertex*) override {} + //========================================================================= + // DfgVertexUnary + //========================================================================= + + void visit(DfgCLog2* vtxp) override { + if (foldUnary(vtxp)) return; + } + + void visit(DfgCountOnes* vtxp) override { + if (foldUnary(vtxp)) return; + } + void visit(DfgExtend* vtxp) override { - const uint32_t extension = vtxp->width() - vtxp->srcp()->width(); - UASSERT_OBJ(extension > 0, vtxp, "Useless Extend"); + UASSERT_OBJ(vtxp->width() > vtxp->srcp()->width(), vtxp, "Invalid zero extend"); - FileLine* const flp = vtxp->fileline(); + if (foldUnary(vtxp)) return; - // Convert Extend into Concat with zeros. This simplifies other patterns as they only + // Convert all Extend into Concat with zeros. This simplifies other patterns as they only // need to handle Concat, which is more generic, and don't need special cases for // Extend. APPLYING(REPLACE_EXTEND) { + FileLine* const flp = vtxp->fileline(); DfgConcat* const replacementp = new DfgConcat{m_dfg, flp, vtxp->dtypep()}; - replacementp->lhsp(makeZero(flp, extension)); + replacementp->lhsp(makeZero(flp, vtxp->width() - vtxp->srcp()->width())); replacementp->rhsp(vtxp->srcp()); vtxp->replaceWith(replacementp); } } + void visit(DfgExtendS* vtxp) override { + UASSERT_OBJ(vtxp->width() > vtxp->srcp()->width(), vtxp, "Invalid sign extend"); + + if (foldUnary(vtxp)) return; + } + + void visit(DfgLogNot* vtxp) override { + UASSERT_OBJ(vtxp->width() == 1, vtxp, "Incorrect width"); + + if (foldUnary(vtxp)) return; + } + + void visit(DfgNegate* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->srcp()->width(), vtxp, "Mismatched width"); + + if (foldUnary(vtxp)) return; + } + void visit(DfgNot* vtxp) override { - UASSERT_OBJ(vtxp->width() == vtxp->srcp()->width(), vtxp, - "Mismatched width: " << vtxp->width() << " != " << vtxp->srcp()->width()); + UASSERT_OBJ(vtxp->width() == vtxp->srcp()->width(), vtxp, "Mismatched width"); + + if (foldUnary(vtxp)) return; // Not of Cond if (DfgCond* const condp = vtxp->srcp()->cast()) { @@ -460,22 +564,32 @@ class V3DfgPeephole final : public DfgVisitor { } } } - - // Not of Const - if (DfgConst* const constp = vtxp->srcp()->cast()) { - APPLYING(REPLACE_NOT_OF_CONST) { - DfgConst* const replacementp = makeZero(vtxp->fileline(), vtxp->width()); - replacementp->num().opNot(constp->num()); - vtxp->replaceWith(replacementp); - return; - } - } } + void visit(DfgOneHot* vtxp) override { + if (foldUnary(vtxp)) return; + } + + void visit(DfgOneHot0* vtxp) override { + if (foldUnary(vtxp)) return; + } + + void visit(DfgRedOr* vtxp) override { optimizeReduction(vtxp); } + + void visit(DfgRedAnd* vtxp) override { optimizeReduction(vtxp); } + + void visit(DfgRedXor* vtxp) override { optimizeReduction(vtxp); } + + //========================================================================= + // DfgVertexBinary - bitwise + //========================================================================= + void visit(DfgAnd* vtxp) override { UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched LHS width"); UASSERT_OBJ(vtxp->width() == vtxp->rhsp()->width(), vtxp, "Mismatched RHS width"); + if (foldBinary(vtxp)) return; + commutativeBinary(vtxp); associativeBinary(vtxp); @@ -515,15 +629,6 @@ class V3DfgPeephole final : public DfgVisitor { } if (DfgConst* const lhsConstp = lhsp->cast()) { - if (DfgConst* const rhsConstp = rhsp->cast()) { - APPLYING(REPLACE_AND_OF_CONST_AND_CONST) { - DfgConst* const replacementp = makeZero(flp, vtxp->width()); - replacementp->num().opAnd(lhsConstp->num(), rhsConstp->num()); - vtxp->replaceWith(replacementp); - return; - } - } - if (lhsConstp->isZero()) { APPLYING(REPLACE_AND_WITH_ZERO) { vtxp->replaceWith(lhsConstp); @@ -561,6 +666,8 @@ class V3DfgPeephole final : public DfgVisitor { UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched LHS width"); UASSERT_OBJ(vtxp->width() == vtxp->rhsp()->width(), vtxp, "Mismatched RHS width"); + if (foldBinary(vtxp)) return; + commutativeBinary(vtxp); associativeBinary(vtxp); @@ -627,15 +734,6 @@ class V3DfgPeephole final : public DfgVisitor { } if (DfgConst* const lhsConstp = lhsp->cast()) { - if (DfgConst* const rhsConstp = rhsp->cast()) { - APPLYING(REPLACE_OR_OF_CONST_AND_CONST) { - DfgConst* const replacementp = makeZero(flp, vtxp->width()); - replacementp->num().opOr(lhsConstp->num(), rhsConstp->num()); - vtxp->replaceWith(replacementp); - return; - } - } - if (lhsConstp->isZero()) { APPLYING(REMOVE_OR_WITH_ZERO) { vtxp->replaceWith(rhsp); @@ -674,6 +772,8 @@ class V3DfgPeephole final : public DfgVisitor { UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched LHS width"); UASSERT_OBJ(vtxp->width() == vtxp->rhsp()->width(), vtxp, "Mismatched RHS width"); + if (foldBinary(vtxp)) return; + commutativeBinary(vtxp); associativeBinary(vtxp); @@ -700,266 +800,44 @@ class V3DfgPeephole final : public DfgVisitor { tryPushBitwiseOpThroughConcat(vtxp, lConstp, rConcatp); return; } - if (DfgConst* const rConstp = rhsp->cast()) { - APPLYING(REPLACE_XOR_OF_CONST_AND_CONST) { - DfgConst* const replacementp = makeZero(flp, vtxp->width()); - replacementp->num().opXor(lConstp->num(), rConstp->num()); - vtxp->replaceWith(replacementp); - return; - } - } } if (tryPushBitwiseOpThroughReductions(vtxp)) return; } + //========================================================================= + // DfgVertexBinary - other + //========================================================================= + void visit(DfgAdd* vtxp) override { UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched LHS width"); UASSERT_OBJ(vtxp->width() == vtxp->rhsp()->width(), vtxp, "Mismatched RHS width"); + if (foldBinary(vtxp)) return; + commutativeBinary(vtxp); associativeBinary(vtxp); } - void visit(DfgSub* vtxp) override { - DfgVertex* const lhsp = vtxp->lhsp(); - DfgVertex* const rhsp = vtxp->rhsp(); - UASSERT_OBJ(lhsp->width() == rhsp->width(), vtxp, "Width mismatch"); - UASSERT_OBJ(lhsp->width() == vtxp->width(), vtxp, "Width mismatch"); - if (DfgConst* const rConstp = rhsp->cast()) { - if (rConstp->isZero()) { - APPLYING(REMOVE_SUB_ZERO) { - vtxp->replaceWith(lhsp); - return; - } - } - if (vtxp->width() == 1 && rConstp->toU32() == 1) { - APPLYING(REPLACE_SUB_WITH_NOT) { - DfgNot* const replacementp = new DfgNot{m_dfg, vtxp->fileline(), m_bitDType}; - replacementp->srcp(lhsp); - vtxp->replaceWith(replacementp); - return; - } - } - } - } - - void visit(DfgShiftL* vtxp) override { optimizeShiftRHS(vtxp); } - void visit(DfgShiftR* vtxp) override { optimizeShiftRHS(vtxp); } - void visit(DfgShiftRS* vtxp) override { optimizeShiftRHS(vtxp); } - - void visit(DfgEq* vtxp) override { - commutativeBinary(vtxp); - - DfgVertex* const lhsp = vtxp->lhsp(); - DfgVertex* const rhsp = vtxp->rhsp(); - - if (DfgConst* const lhsConstp = lhsp->cast()) { - if (DfgConst* const rhsConstp = rhsp->cast()) { - APPLYING(REPLACE_EQ_OF_CONST_AND_CONST) { - DfgConst* const replacementp = makeZero(vtxp->fileline(), 1); - replacementp->num().opEq(lhsConstp->num(), rhsConstp->num()); - vtxp->replaceWith(replacementp); - return; - } - } - - if (DfgConcat* const rhsConcatp = rhsp->cast()) { - if (tryPushCompareOpThroughConcat(vtxp, lhsConstp, rhsConcatp)) return; - } - } - } - - void visit(DfgSel* vtxp) override { - DfgVertex* const fromp = vtxp->fromp(); - DfgConst* const lsbp = vtxp->lsbp()->cast(); - DfgConst* const widthp = vtxp->widthp()->cast(); - if (!lsbp || !widthp) return; - - FileLine* const flp = vtxp->fileline(); - - UASSERT_OBJ(lsbp->toI32() >= 0, vtxp, "Negative LSB in Sel"); - - const uint32_t lsb = lsbp->toU32(); - const uint32_t width = widthp->toU32(); - const uint32_t msb = lsb + width - 1; - - UASSERT_OBJ(width == vtxp->width(), vtxp, "Incorrect Sel width"); - - // Full width select, replace with the source. - if (fromp->width() == width) { - UASSERT_OBJ(lsb == 0, fromp, "OOPS"); - APPLYING(REMOVE_FULL_WIDTH_SEL) { - vtxp->replaceWith(fromp); - return; - } - } - - // Sel from Concat - if (DfgConcat* const concatp = fromp->cast()) { - DfgVertex* const lhsp = concatp->lhsp(); - DfgVertex* const rhsp = concatp->rhsp(); - - if (msb < rhsp->width()) { - // If the select is entirely from rhs, then replace with sel from rhs - APPLYING(REMOVE_SEL_FROM_RHS_OF_CONCAT) { // - vtxp->fromp(rhsp); - } - } else if (lsb >= rhsp->width()) { - // If the select is entirely from the lhs, then replace with sel from lhs - APPLYING(REMOVE_SEL_FROM_LHS_OF_CONCAT) { - vtxp->fromp(lhsp); - vtxp->lsbp(makeI32(flp, lsb - rhsp->width())); - } - } else if (lsb == 0 || msb == concatp->width() - 1 // - || lhsp->is() || rhsp->is() // - || !concatp->hasMultipleSinks()) { - // If the select straddles both sides, but at least one of the sides is wholly - // selected, or at least one of the sides is a Const, or this concat has no other - // use, then push the Sel past the Concat - APPLYING(PUSH_SEL_THROUGH_CONCAT) { - const uint32_t rSelWidth = rhsp->width() - lsb; - const uint32_t lSelWidth = width - rSelWidth; - - // The new Lhs vertex - DfgSel* const newLhsp = new DfgSel{m_dfg, flp, dtypeForWidth(lSelWidth)}; - newLhsp->fromp(lhsp); - newLhsp->lsbp(makeI32(lsbp->fileline(), 0)); - newLhsp->widthp(makeI32(widthp->fileline(), lSelWidth)); - - // The new Rhs vertex - DfgSel* const newRhsp = new DfgSel{m_dfg, flp, dtypeForWidth(rSelWidth)}; - newRhsp->fromp(rhsp); - newRhsp->lsbp(makeI32(lsbp->fileline(), lsb)); - newRhsp->widthp(makeI32(widthp->fileline(), rSelWidth)); - - // The replacement Concat vertex - DfgConcat* const newConcat - = new DfgConcat{m_dfg, concatp->fileline(), vtxp->dtypep()}; - newConcat->lhsp(newLhsp); - newConcat->rhsp(newRhsp); - - // Replace this vertex - vtxp->replaceWith(newConcat); - return; - } - } - } - - if (DfgReplicate* const repp = fromp->cast()) { - // If the Sel is wholly into the source of the Replicate, push the Sel through the - // Replicate and apply it directly to the source of the Replicate. - const uint32_t srcWidth = repp->srcp()->width(); - if (width <= srcWidth) { - const uint32_t newLsb = lsb % srcWidth; - if (newLsb + width <= srcWidth) { - APPLYING(PUSH_SEL_THROUGH_REPLICATE) { - vtxp->fromp(repp->srcp()); - vtxp->lsbp(makeI32(flp, newLsb)); + void visit(DfgArraySel* vtxp) override { + if (DfgConst* const idxp = vtxp->bitp()->cast()) { + if (DfgVarArray* const varp = vtxp->fromp()->cast()) { + const uint32_t idx = idxp->toU32(); + if (DfgVertex* const driverp = varp->driverAt(idx)) { + APPLYING(INLINE_ARRAYSEL) { + vtxp->replaceWith(driverp); + return; } } } } - - // Sel from Not - if (DfgNot* const notp = fromp->cast()) { - // Replace "Sel from Not" with "Not of Sel" - if (!notp->hasMultipleSinks()) { - UASSERT_OBJ(notp->srcp()->width() == notp->width(), notp, "Mismatched widths"); - APPLYING(PUSH_SEL_THROUGH_NOT) { - // Make Sel select from source of Not - vtxp->fromp(notp->srcp()); - // Add Not after Sel - DfgNot* const replacementp - = new DfgNot{m_dfg, notp->fileline(), vtxp->dtypep()}; - vtxp->replaceWith(replacementp); - replacementp->srcp(vtxp); - } - } - } - - // Sel from Sel - if (DfgSel* const selp = fromp->cast()) { - UASSERT_OBJ(widthp->toU32() <= selp->width(), vtxp, "Out of bound Sel"); - if (DfgConst* const sourceLsbp = selp->lsbp()->cast()) { - UASSERT_OBJ(sourceLsbp->toI32() >= 0, selp, "negative"); - UASSERT_OBJ(selp->widthp()->as()->toU32() >= widthp->toU32(), selp, - "negative"); - APPLYING(REPLACE_SEL_FROM_SEL) { - // Make this Sel select from the source of the source Sel - vtxp->fromp(selp->fromp()); - // Adjust LSB - vtxp->lsbp(makeI32(flp, lsb + sourceLsbp->toU32())); - } - } - } - - // Sel from Cond - if (DfgCond* const condp = fromp->cast()) { - // If at least one of the branches are a constant, push the select past the cond - if (condp->thenp()->is() || condp->elsep()->is()) { - APPLYING(PUSH_SEL_THROUGH_COND) { - // The new 'then' vertex - DfgSel* const newThenp = new DfgSel{m_dfg, flp, vtxp->dtypep()}; - newThenp->fromp(condp->thenp()); - newThenp->lsbp(makeI32(lsbp->fileline(), lsb)); - newThenp->widthp(makeI32(widthp->fileline(), width)); - - // The new 'else' vertex - DfgSel* const newElsep = new DfgSel{m_dfg, flp, vtxp->dtypep()}; - newElsep->fromp(condp->elsep()); - newElsep->lsbp(makeI32(lsbp->fileline(), lsb)); - newElsep->widthp(makeI32(widthp->fileline(), width)); - - // The replacement Cond vertex - DfgCond* const newCondp - = new DfgCond{m_dfg, condp->fileline(), vtxp->dtypep()}; - newCondp->condp(condp->condp()); - newCondp->thenp(newThenp); - newCondp->elsep(newElsep); - - // Replace this vertex - vtxp->replaceWith(newCondp); - return; - } - } - } - - // Sel from ShiftL - if (DfgShiftL* const shiftLp = fromp->cast()) { - // If selecting bottom bits of left shift, push the Sel before the shift - if (lsb == 0) { - UASSERT_OBJ(shiftLp->lhsp()->width() >= width, vtxp, "input of shift narrow"); - APPLYING(PUSH_SEL_THROUGH_SHIFTL) { - vtxp->fromp(shiftLp->lhsp()); - DfgShiftL* const newShiftLp - = new DfgShiftL{m_dfg, shiftLp->fileline(), vtxp->dtypep()}; - vtxp->replaceWith(newShiftLp); - newShiftLp->lhsp(vtxp); - newShiftLp->rhsp(shiftLp->rhsp()); - } - } - } - - // Sel from Const - if (DfgConst* const constp = fromp->cast()) { - APPLYING(REPLACE_SEL_FROM_CONST) { - DfgConst* const replacementp = makeZero(flp, width); - replacementp->num().opSel(constp->num(), msb, lsb); - vtxp->replaceWith(replacementp); - return; - } - } } - void visit(DfgRedOr* vtxp) override { optimizeReduction(vtxp); } - void visit(DfgRedAnd* vtxp) override { optimizeReduction(vtxp); } - void visit(DfgRedXor* vtxp) override { optimizeReduction(vtxp); } - void visit(DfgConcat* vtxp) override { UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width() + vtxp->rhsp()->width(), vtxp, - "Incorrect Concat width: " << vtxp->width() << " != " << vtxp->lhsp()->width() - << " + " << vtxp->rhsp()->width()); + "Inconsisend Concat"); + + if (foldBinary(vtxp)) return; DfgVertex* const lhsp = vtxp->lhsp(); DfgVertex* const rhsp = vtxp->rhsp(); @@ -995,13 +873,6 @@ class V3DfgPeephole final : public DfgVisitor { DfgConst* const lConstp = lhsp->cast(); DfgConst* const rConstp = rhsp->cast(); - if (lConstp && rConstp) { - APPLYING(REPLACE_CONCAT_OF_CONSTS) { - vtxp->replaceWith(joinConsts(lConstp, rConstp, flp)); - return; - } - } - if (lConstp) { if (DfgConcat* const rConcatp = rhsp->cast()) { if (DfgConst* const rlConstp = rConcatp->lhsp()->cast()) { @@ -1162,17 +1033,371 @@ class V3DfgPeephole final : public DfgVisitor { } } + void visit(DfgDiv* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgDivS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgEq* vtxp) override { + if (foldBinary(vtxp)) return; + + commutativeBinary(vtxp); + + DfgVertex* const lhsp = vtxp->lhsp(); + DfgVertex* const rhsp = vtxp->rhsp(); + + if (DfgConst* const lhsConstp = lhsp->cast()) { + if (DfgConcat* const rhsConcatp = rhsp->cast()) { + if (tryPushCompareOpThroughConcat(vtxp, lhsConstp, rhsConcatp)) return; + } + } + } + + void visit(DfgGt* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgGtS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgGte* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgGteS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLogAnd* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLogEq* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLogIf* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLogOr* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLt* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLtS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLte* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLteS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgModDiv* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgModDivS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgMul* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgMulS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgNeq* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgPow* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgPowSS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgPowSU* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgPowUS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgReplicate* vtxp) override { + if (vtxp->width() == vtxp->srcp()->width()) { + APPLYING(REMOVE_REPLICATE_ONCE) { + vtxp->replaceWith(vtxp->srcp()); + return; + } + } + + if (foldBinary(vtxp)) return; + } + + void visit(DfgShiftL* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched width"); + + if (foldBinary(vtxp)) return; + + optimizeShiftRHS(vtxp); + } + + void visit(DfgShiftR* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched width"); + + if (foldBinary(vtxp)) return; + + optimizeShiftRHS(vtxp); + } + + void visit(DfgShiftRS* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched width"); + + if (foldBinary(vtxp)) return; + + optimizeShiftRHS(vtxp); + } + + void visit(DfgSub* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched LHS width"); + UASSERT_OBJ(vtxp->width() == vtxp->rhsp()->width(), vtxp, "Mismatched RHS width"); + + if (foldBinary(vtxp)) return; + + DfgVertex* const lhsp = vtxp->lhsp(); + DfgVertex* const rhsp = vtxp->rhsp(); + + if (DfgConst* const rConstp = rhsp->cast()) { + if (rConstp->isZero()) { + APPLYING(REMOVE_SUB_ZERO) { + vtxp->replaceWith(lhsp); + return; + } + } + if (vtxp->width() == 1 && rConstp->toU32() == 1) { + APPLYING(REPLACE_SUB_WITH_NOT) { + DfgNot* const replacementp = new DfgNot{m_dfg, vtxp->fileline(), m_bitDType}; + replacementp->srcp(lhsp); + vtxp->replaceWith(replacementp); + return; + } + } + } + } + + //========================================================================= + // DfgVertexTernary + //========================================================================= + + void visit(DfgSel* vtxp) override { + DfgVertex* const fromp = vtxp->fromp(); + DfgConst* const lsbp = vtxp->lsbp()->cast(); + DfgConst* const widthp = vtxp->widthp()->cast(); + if (!lsbp || !widthp) return; + + FileLine* const flp = vtxp->fileline(); + + UASSERT_OBJ(lsbp->toI32() >= 0, vtxp, "Negative LSB in Sel"); + + const uint32_t lsb = lsbp->toU32(); + const uint32_t width = widthp->toU32(); + const uint32_t msb = lsb + width - 1; + + UASSERT_OBJ(width == vtxp->width(), vtxp, "Incorrect Sel width"); + + if (DfgConst* const constp = fromp->cast()) { + APPLYING(FOLD_SEL) { + DfgConst* const replacementp = makeZero(flp, width); + replacementp->num().opSel(constp->num(), msb, lsb); + vtxp->replaceWith(replacementp); + return; + } + } + + // Full width select, replace with the source. + if (fromp->width() == width) { + UASSERT_OBJ(lsb == 0, fromp, "OOPS"); + APPLYING(REMOVE_FULL_WIDTH_SEL) { + vtxp->replaceWith(fromp); + return; + } + } + + // Sel from Concat + if (DfgConcat* const concatp = fromp->cast()) { + DfgVertex* const lhsp = concatp->lhsp(); + DfgVertex* const rhsp = concatp->rhsp(); + + if (msb < rhsp->width()) { + // If the select is entirely from rhs, then replace with sel from rhs + APPLYING(REMOVE_SEL_FROM_RHS_OF_CONCAT) { // + vtxp->fromp(rhsp); + } + } else if (lsb >= rhsp->width()) { + // If the select is entirely from the lhs, then replace with sel from lhs + APPLYING(REMOVE_SEL_FROM_LHS_OF_CONCAT) { + vtxp->fromp(lhsp); + vtxp->lsbp(makeI32(flp, lsb - rhsp->width())); + } + } else if (lsb == 0 || msb == concatp->width() - 1 // + || lhsp->is() || rhsp->is() // + || !concatp->hasMultipleSinks()) { + // If the select straddles both sides, but at least one of the sides is wholly + // selected, or at least one of the sides is a Const, or this concat has no other + // use, then push the Sel past the Concat + APPLYING(PUSH_SEL_THROUGH_CONCAT) { + const uint32_t rSelWidth = rhsp->width() - lsb; + const uint32_t lSelWidth = width - rSelWidth; + + // The new Lhs vertex + DfgSel* const newLhsp = new DfgSel{m_dfg, flp, dtypeForWidth(lSelWidth)}; + newLhsp->fromp(lhsp); + newLhsp->lsbp(makeI32(lsbp->fileline(), 0)); + newLhsp->widthp(makeI32(widthp->fileline(), lSelWidth)); + + // The new Rhs vertex + DfgSel* const newRhsp = new DfgSel{m_dfg, flp, dtypeForWidth(rSelWidth)}; + newRhsp->fromp(rhsp); + newRhsp->lsbp(makeI32(lsbp->fileline(), lsb)); + newRhsp->widthp(makeI32(widthp->fileline(), rSelWidth)); + + // The replacement Concat vertex + DfgConcat* const newConcat + = new DfgConcat{m_dfg, concatp->fileline(), vtxp->dtypep()}; + newConcat->lhsp(newLhsp); + newConcat->rhsp(newRhsp); + + // Replace this vertex + vtxp->replaceWith(newConcat); + return; + } + } + } + + if (DfgReplicate* const repp = fromp->cast()) { + // If the Sel is wholly into the source of the Replicate, push the Sel through the + // Replicate and apply it directly to the source of the Replicate. + const uint32_t srcWidth = repp->srcp()->width(); + if (width <= srcWidth) { + const uint32_t newLsb = lsb % srcWidth; + if (newLsb + width <= srcWidth) { + APPLYING(PUSH_SEL_THROUGH_REPLICATE) { + vtxp->fromp(repp->srcp()); + vtxp->lsbp(makeI32(flp, newLsb)); + } + } + } + } + + // Sel from Not + if (DfgNot* const notp = fromp->cast()) { + // Replace "Sel from Not" with "Not of Sel" + if (!notp->hasMultipleSinks()) { + UASSERT_OBJ(notp->srcp()->width() == notp->width(), notp, "Mismatched widths"); + APPLYING(PUSH_SEL_THROUGH_NOT) { + // Make Sel select from source of Not + vtxp->fromp(notp->srcp()); + // Add Not after Sel + DfgNot* const replacementp + = new DfgNot{m_dfg, notp->fileline(), vtxp->dtypep()}; + vtxp->replaceWith(replacementp); + replacementp->srcp(vtxp); + } + } + } + + // Sel from Sel + if (DfgSel* const selp = fromp->cast()) { + UASSERT_OBJ(widthp->toU32() <= selp->width(), vtxp, "Out of bound Sel"); + if (DfgConst* const sourceLsbp = selp->lsbp()->cast()) { + UASSERT_OBJ(sourceLsbp->toI32() >= 0, selp, "negative"); + UASSERT_OBJ(selp->widthp()->as()->toU32() >= widthp->toU32(), selp, + "negative"); + APPLYING(REPLACE_SEL_FROM_SEL) { + // Make this Sel select from the source of the source Sel + vtxp->fromp(selp->fromp()); + // Adjust LSB + vtxp->lsbp(makeI32(flp, lsb + sourceLsbp->toU32())); + } + } + } + + // Sel from Cond + if (DfgCond* const condp = fromp->cast()) { + // If at least one of the branches are a constant, push the select past the cond + if (condp->thenp()->is() || condp->elsep()->is()) { + APPLYING(PUSH_SEL_THROUGH_COND) { + // The new 'then' vertex + DfgSel* const newThenp = new DfgSel{m_dfg, flp, vtxp->dtypep()}; + newThenp->fromp(condp->thenp()); + newThenp->lsbp(makeI32(lsbp->fileline(), lsb)); + newThenp->widthp(makeI32(widthp->fileline(), width)); + + // The new 'else' vertex + DfgSel* const newElsep = new DfgSel{m_dfg, flp, vtxp->dtypep()}; + newElsep->fromp(condp->elsep()); + newElsep->lsbp(makeI32(lsbp->fileline(), lsb)); + newElsep->widthp(makeI32(widthp->fileline(), width)); + + // The replacement Cond vertex + DfgCond* const newCondp + = new DfgCond{m_dfg, condp->fileline(), vtxp->dtypep()}; + newCondp->condp(condp->condp()); + newCondp->thenp(newThenp); + newCondp->elsep(newElsep); + + // Replace this vertex + vtxp->replaceWith(newCondp); + return; + } + } + } + + // Sel from ShiftL + if (DfgShiftL* const shiftLp = fromp->cast()) { + // If selecting bottom bits of left shift, push the Sel before the shift + if (lsb == 0) { + UASSERT_OBJ(shiftLp->lhsp()->width() >= width, vtxp, "input of shift narrow"); + APPLYING(PUSH_SEL_THROUGH_SHIFTL) { + vtxp->fromp(shiftLp->lhsp()); + DfgShiftL* const newShiftLp + = new DfgShiftL{m_dfg, shiftLp->fileline(), vtxp->dtypep()}; + vtxp->replaceWith(newShiftLp); + newShiftLp->lhsp(vtxp); + newShiftLp->rhsp(shiftLp->rhsp()); + } + } + } + } + void visit(DfgCond* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->thenp()->width(), vtxp, "Width mismatch"); + UASSERT_OBJ(vtxp->width() == vtxp->elsep()->width(), vtxp, "Width mismatch"); + DfgVertex* const condp = vtxp->condp(); DfgVertex* const thenp = vtxp->thenp(); DfgVertex* const elsep = vtxp->elsep(); - UASSERT_OBJ(vtxp->width() == thenp->width(), vtxp, "Width mismatch"); - UASSERT_OBJ(vtxp->width() == elsep->width(), vtxp, "Width mismatch"); + FileLine* const flp = vtxp->fileline(); if (condp->width() != 1) return; - FileLine* const flp = vtxp->fileline(); - if (condp->isOnes()) { APPLYING(REMOVE_COND_WITH_TRUE_CONDITION) { vtxp->replaceWith(thenp); @@ -1319,19 +1544,9 @@ class V3DfgPeephole final : public DfgVisitor { } } - void visit(DfgArraySel* vtxp) override { - if (DfgConst* const idxp = vtxp->bitp()->cast()) { - if (DfgVarArray* const varp = vtxp->fromp()->cast()) { - const uint32_t idx = idxp->toU32(); - if (DfgVertex* const driverp = varp->driverAt(idx)) { - APPLYING(INLINE_ARRAYSEL) { - vtxp->replaceWith(driverp); - return; - } - } - } - } - } + //========================================================================= + // DfgVertexVar + //========================================================================= void visit(DfgVarPacked* vtxp) override { // Inline variables fully driven by the logic represented by the DFG diff --git a/src/V3DfgPeephole.h b/src/V3DfgPeephole.h index 5d346456d..bec8a5c8f 100644 --- a/src/V3DfgPeephole.h +++ b/src/V3DfgPeephole.h @@ -26,6 +26,9 @@ // Enumeration of each peephole optimization. Must be kept in sorted order (enforced by tests). // clang-format off #define FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION(macro) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_BINARY) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SEL) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_UNARY) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, INLINE_ARRAYSEL) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, INLINE_VAR) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PULL_NOTS_THROUGH_COND) \ @@ -49,16 +52,15 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_NOT_NOT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_OR_WITH_ZERO) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_REDUNDANT_ZEXT_ON_RHS_OF_SHIFT) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_REPLICATE_ONCE) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_SEL_FROM_LHS_OF_CONCAT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_SEL_FROM_RHS_OF_CONCAT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_SUB_ZERO) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_WIDTH_ONE_REDUCTION) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_XOR_WITH_ZERO) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_CONST_AND_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_NOT_AND_NEQ) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_NOT_AND_NOT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_WITH_ZERO) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_CONCAT_OF_CONSTS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_CONCAT_SEL_BOTTOM_AND_ZERO_WITH_SHIFTL) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_CONCAT_ZERO_AND_SEL_TOP_WITH_SHIFTR) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_COND_DEC) \ @@ -68,7 +70,6 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_COND_WITH_THEN_BRANCH_ONES) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_COND_WITH_THEN_BRANCH_ZERO) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_CONTRADICTORY_AND) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_EQ_OF_CONST_AND_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_EXTEND) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS) \ @@ -76,19 +77,14 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_CONSTS_ON_RHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_EQ) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_NEQ) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_OF_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONCAT_LHS_ZERO_AND_CONCAT_ZERO_RHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONCAT_ZERO_LHS_AND_CONCAT_RHS_ZERO) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONST_AND_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_NOT_AND_NEQ) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_NOT_AND_NOT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_WITH_ONES) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_REDUCTION_OF_CONST) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_SEL_FROM_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_SEL_FROM_SEL) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_SUB_WITH_NOT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_TAUTOLOGICAL_OR) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_OF_CONST_AND_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_ONES) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, RIGHT_LEANING_ASSOC) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, RIGHT_LEANING_CONCAT) \ diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 08e5902db..a13c15c48 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1132,6 +1132,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char DECL_OPTION("-fcase", FOnOff, &m_fCase); DECL_OPTION("-fcombine", FOnOff, &m_fCombine); DECL_OPTION("-fconst", FOnOff, &m_fConst); + DECL_OPTION("-fconst-before-dfg", FOnOff, &m_fConstBeforeDfg); DECL_OPTION("-fconst-bit-op-tree", FOnOff, &m_fConstBitOpTree); DECL_OPTION("-fdedup", FOnOff, &m_fDedupe); DECL_OPTION("-fdfg", CbFOnOff, [this](bool flag) { diff --git a/src/V3Options.h b/src/V3Options.h index 8eeff57b4..7503408ad 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -349,6 +349,7 @@ private: bool m_fCase; // main switch: -fno-case: case tree conversion bool m_fCombine; // main switch: -fno-combine: common icode packing bool m_fConst; // main switch: -fno-const: constant folding + bool m_fConstBeforeDfg = true; // main switch: -fno-const-before-dfg for testing only! bool m_fConstBitOpTree; // main switch: -fno-const-bit-op-tree constant bit op tree bool m_fDedupe; // main switch: -fno-dedupe: logic deduplication bool m_fDfgPeephole = true; // main switch: -fno-dfg-peephole @@ -598,6 +599,7 @@ public: bool fCase() const { return m_fCase; } bool fCombine() const { return m_fCombine; } bool fConst() const { return m_fConst; } + bool fConstBeforeDfg() const { return m_fConstBeforeDfg; } bool fConstBitOpTree() const { return m_fConstBitOpTree; } bool fDedupe() const { return m_fDedupe; } bool fDfgPeephole() const { return m_fDfgPeephole; } diff --git a/src/Verilator.cpp b/src/Verilator.cpp index 2bd4798a7..aaf666ea0 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -189,7 +189,7 @@ static void process() { // Push constants, but only true constants preserving liveness // so V3Undriven sees variables to be eliminated, ie "if (0 && foo) ..." - V3Const::constifyAllLive(v3Global.rootp()); + if (v3Global.opt.fConstBeforeDfg()) V3Const::constifyAllLive(v3Global.rootp()); // Signal based lint checks, no change to structures // Must be before first constification pass drops dead code @@ -209,7 +209,7 @@ static void process() { } // Propagate constants into expressions - V3Const::constifyAllLint(v3Global.rootp()); + if (v3Global.opt.fConstBeforeDfg()) V3Const::constifyAllLint(v3Global.rootp()); if (!(v3Global.opt.xmlOnly() && !v3Global.opt.flatten())) { // Split packed variables into multiple pieces to resolve UNOPTFLAT. diff --git a/test_regress/t/t_dfg_peephole.pl b/test_regress/t/t_dfg_peephole.pl index c09227ce8..952898fbb 100755 --- a/test_regress/t/t_dfg_peephole.pl +++ b/test_regress/t/t_dfg_peephole.pl @@ -67,6 +67,7 @@ compile( compile( verilator_flags2 => ["--stats", "--build", "--exe", "+incdir+$Self->{obj_dir}", "-Mdir", "$Self->{obj_dir}/obj_opt", "--prefix", "Vopt", + "-fno-const-before-dfg", # Otherwise V3Const makes testing painful "--dump-dfg", # To fill code coverage "-CFLAGS \"-I .. -I ../obj_ref\"", "../obj_ref/Vref__ALL.a", diff --git a/test_regress/t/t_dfg_peephole.v b/test_regress/t/t_dfg_peephole.v index bc8251345..bf7a1a721 100644 --- a/test_regress/t/t_dfg_peephole.v +++ b/test_regress/t/t_dfg_peephole.v @@ -4,7 +4,7 @@ // any use, without warranty, 2022 by Geza Lore. // SPDX-License-Identifier: CC0-1.0 -`define signal(name, expr) wire [$bits(expr)-1:0] ``name = expr; +`define signal(name, expr) wire [$bits(expr)-1:0] ``name = expr module t ( `include "portlist.vh" // Boilerplate generated by t_dfg_peephole.pl @@ -23,16 +23,69 @@ module t ( wire logic [127:0] rand_aa = {2{rand_a}}; wire logic [63:0] const_a; wire logic [63:0] const_b; + wire logic signed [63:0] sconst_a; + wire logic signed [63:0] sconst_b; wire logic [63:0] array [3:0]; assign array[0] = (rand_a << 32) | (rand_a >> 32); assign array[1] = (rand_a << 16) | (rand_a >> 48); - // 64 bit all 0 but don't tell V3Const -`define ZERO (const_a & ~const_a) - // 64 bit all 1 but don't tell V3Const -`define ONES (const_a | ~const_a) - // x, but in a way only DFG understands -`define DFG(x) ((|`ONES) ? (x) : (~x)) + // x, but with evaluation slightly delayed in DfgPeephole +`define DFG(x) (&16'hffff ? (x) : (~x)) + + `signal(FOLD_UNARY_CLog2, $clog2(const_a)); + `signal(FOLD_UNARY_CountOnes, $countones(const_a)); + `signal(FOLD_UNARY_IsUnknown, $isunknown(const_a)); + `signal(FOLD_UNARY_LogNot, !const_a[0]); + `signal(FOLD_UNARY_Negate, -const_a); + `signal(FOLD_UNARY_Not, ~const_a); + `signal(FOLD_UNARY_OneHot, $onehot(const_a)); + `signal(FOLD_UNARY_OneHot0, $onehot0(const_a)); + `signal(FOLD_UNARY_RedAnd, &const_a); + `signal(FOLD_UNARY_RedOr, |const_a); + `signal(FOLD_UNARY_RedXor, ^const_a); + // verilator lint_off WIDTH + wire logic [79:0] tmp_FOLD_UNARY_Extend = const_a; + wire logic signed [79:0] tmp_FOLD_UNARY_ExtendS = sconst_a; + //verilator lint_on WIDTH + `signal(FOLD_UNARY_Extend, tmp_FOLD_UNARY_Extend); + `signal(FOLD_UNARY_ExtendS, tmp_FOLD_UNARY_ExtendS); + + `signal(FOLD_BINARY_Add, const_a + const_b); + `signal(FOLD_BINARY_And, const_a & const_b); + `signal(FOLD_BINARY_Concat, {const_a, const_b}); + `signal(FOLD_BINARY_Div, const_a / 64'd3); + `signal(FOLD_BINARY_DivS, sconst_a / 64'sd3); + `signal(FOLD_BINARY_Eq, const_a == const_b); + `signal(FOLD_BINARY_Gt, const_a > const_b); + `signal(FOLD_BINARY_GtS, sconst_a > sconst_b); + `signal(FOLD_BINARY_Gte, const_a >= const_b); + `signal(FOLD_BINARY_GteS, sconst_a >= sconst_b); + `signal(FOLD_BINARY_LogAnd, const_a[0] && const_b[0]); + `signal(FOLD_BINARY_LogEq, const_a[0] <-> const_b[0]); + `signal(FOLD_BINARY_LogIf, const_a[0] -> const_b[0]); + `signal(FOLD_BINARY_LogOr, const_a[0] || const_b[0]); + `signal(FOLD_BINARY_Lt, const_a < const_b); + `signal(FOLD_BINARY_LtS, sconst_a < sconst_b); + `signal(FOLD_BINARY_Lte, const_a <= const_b); + `signal(FOLD_BINARY_LteS, sconst_a <= sconst_b); + `signal(FOLD_BINARY_ModDiv, const_a % 64'd3); + `signal(FOLD_BINARY_ModDivS, sconst_a % 64'sd3); + `signal(FOLD_BINARY_Mul, const_a * 64'd3); + `signal(FOLD_BINARY_MulS, sconst_a * 64'sd3); + `signal(FOLD_BINARY_Neq, const_a != const_b); + `signal(FOLD_BINARY_Or, const_a | const_b); + `signal(FOLD_BINARY_Pow, const_a ** 64'd2); + `signal(FOLD_BINARY_PowSS, sconst_a ** 64'sd2); + `signal(FOLD_BINARY_PowSU, sconst_a ** 64'd2); + `signal(FOLD_BINARY_PowUS, const_a ** 64'sd2); + `signal(FOLD_BINARY_Replicate, {2{const_a}}); + `signal(FOLD_BINARY_ShiftL, const_a << 2); + `signal(FOLD_BINARY_ShiftR, const_a >> 2); + `signal(FOLD_BINARY_ShiftRS, sconst_a >>> 2); + `signal(FOLD_BINARY_Sub, const_a - const_b); + `signal(FOLD_BINARY_Xor, const_a ^ const_b); + + `signal(FOLD_SEL, const_a[3:1]); `signal(SWAP_CONST_IN_COMMUTATIVE_BINARY, rand_a + const_a); `signal(SWAP_NOT_IN_COMMUTATIVE_BINARY, rand_a + ~rand_a); @@ -46,25 +99,25 @@ module t ( `signal(REPLACE_REDUCTION_OF_CONST_XOR, ^const_a); `signal(REPLACE_EXTEND, 4'(rand_a[0])); `signal(PUSH_NOT_THROUGH_COND, ~(rand_a[0] ? rand_a[4:0] : 5'hb)); - `signal(REMOVE_NOT_NOT, ~`DFG(~`DFG(rand_a))); - `signal(REPLACE_NOT_NEQ, ~`DFG(rand_a != rand_b)); - `signal(REPLACE_NOT_EQ, ~`DFG(rand_a == rand_b)); + `signal(REMOVE_NOT_NOT, ~~rand_a); + `signal(REPLACE_NOT_NEQ, ~(rand_a != rand_b)); + `signal(REPLACE_NOT_EQ, ~(rand_a == rand_b)); `signal(REPLACE_NOT_OF_CONST, ~4'd0); `signal(REPLACE_AND_OF_NOT_AND_NOT, ~rand_a[1] & ~rand_b[1]); `signal(REPLACE_AND_OF_NOT_AND_NEQ, ~rand_a[2] & (rand_b != 64'd2)); `signal(REPLACE_AND_OF_CONST_AND_CONST, const_a & const_b); - `signal(REPLACE_AND_WITH_ZERO, `ZERO & rand_a); - `signal(REMOVE_AND_WITH_ONES, `ONES & rand_a); + `signal(REPLACE_AND_WITH_ZERO, 64'd0 & rand_a); + `signal(REMOVE_AND_WITH_ONES, -64'd1 & rand_a); `signal(REPLACE_CONTRADICTORY_AND, rand_a & ~rand_a); `signal(REPLACE_OR_OF_NOT_AND_NOT, ~rand_a[3] | ~rand_b[3]); `signal(REPLACE_OR_OF_NOT_AND_NEQ, ~rand_a[4] | (rand_b != 64'd3)); `signal(REPLACE_OR_OF_CONCAT_ZERO_LHS_AND_CONCAT_RHS_ZERO, {2'd0, rand_a[1:0]} | {rand_b[1:0], 2'd0}); `signal(REPLACE_OR_OF_CONCAT_LHS_ZERO_AND_CONCAT_ZERO_RHS, {rand_a[1:0], 2'd0} | {2'd0, rand_b[1:0]}); `signal(REPLACE_OR_OF_CONST_AND_CONST, const_a | const_b); - `signal(REMOVE_OR_WITH_ZERO, `ZERO | rand_a); - `signal(REPLACE_OR_WITH_ONES, `ONES | rand_a); + `signal(REMOVE_OR_WITH_ZERO, 64'd0 | rand_a); + `signal(REPLACE_OR_WITH_ONES, -64'd1 | rand_a); `signal(REPLACE_TAUTOLOGICAL_OR, rand_a | ~rand_a); - `signal(REMOVE_SUB_ZERO, rand_a - `ZERO); + `signal(REMOVE_SUB_ZERO, rand_a - 64'd0); `signal(REPLACE_SUB_WITH_NOT, rand_a[0] - 1'b1); `signal(REMOVE_REDUNDANT_ZEXT_ON_RHS_OF_SHIFT, rand_a << {2'b0, rand_a[2:0]}); `signal(REPLACE_EQ_OF_CONST_AND_CONST, 4'd0 == 4'd1); @@ -80,30 +133,30 @@ module t ( `signal(REPLACE_CONCAT_ZERO_AND_SEL_TOP_WITH_SHIFTR, {62'd0, rand_a[63:62]}); `signal(REPLACE_CONCAT_SEL_BOTTOM_AND_ZERO_WITH_SHIFTL, {rand_a[1:0], 62'd0}); `signal(PUSH_CONCAT_THROUGH_NOTS, {~(rand_a+64'd101), ~(rand_b+64'd101)} ); - `signal(REMOVE_CONCAT_OF_ADJOINING_SELS, {`DFG(rand_a[10:3]), `DFG(rand_a[2:1])}); - `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:3], {rand_a[2:1], rand_b}}); + `signal(REMOVE_CONCAT_OF_ADJOINING_SELS, {rand_a[10:3], rand_a[2:1]}); + `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:3], `DFG({rand_a[2:1], rand_b})}); `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS, {`DFG({rand_b, rand_a[10:3]}), rand_a[2:1]}); - `signal(REMOVE_COND_WITH_FALSE_CONDITION, &`ZERO ? rand_a : rand_b); - `signal(REMOVE_COND_WITH_TRUE_CONDITION, |`ONES ? rand_a : rand_b); - `signal(SWAP_COND_WITH_NOT_CONDITION, (~rand_a[0] & |`ONES) ? rand_a : rand_b); + `signal(REMOVE_COND_WITH_FALSE_CONDITION, 1'd0 ? rand_a : rand_b); + `signal(REMOVE_COND_WITH_TRUE_CONDITION, 1'd1 ? rand_a : rand_b); + `signal(SWAP_COND_WITH_NOT_CONDITION, (~rand_a[0] & 1'd1) ? rand_a : rand_b); `signal(SWAP_COND_WITH_NEQ_CONDITION, rand_b != rand_a ? rand_a : rand_b); `signal(PULL_NOTS_THROUGH_COND, rand_a[0] ? ~rand_a[4:0] : ~rand_b[4:0]); - `signal(REPLACE_COND_WITH_THEN_BRANCH_ZERO, rand_a[0] ? |`ZERO : rand_a[1]); - `signal(REPLACE_COND_WITH_THEN_BRANCH_ONES, rand_a[0] ? |`ONES : rand_a[1]); - `signal(REPLACE_COND_WITH_ELSE_BRANCH_ZERO, rand_a[0] ? rand_a[1] : |`ZERO); - `signal(REPLACE_COND_WITH_ELSE_BRANCH_ONES, rand_a[0] ? rand_a[1] : |`ONES); + `signal(REPLACE_COND_WITH_THEN_BRANCH_ZERO, rand_a[0] ? 1'd0 : rand_a[1]); + `signal(REPLACE_COND_WITH_THEN_BRANCH_ONES, rand_a[0] ? 1'd1 : rand_a[1]); + `signal(REPLACE_COND_WITH_ELSE_BRANCH_ZERO, rand_a[0] ? rand_a[1] : 1'd0); + `signal(REPLACE_COND_WITH_ELSE_BRANCH_ONES, rand_a[0] ? rand_a[1] : 1'd1); `signal(INLINE_ARRAYSEL, array[0]); - `signal(PUSH_BITWISE_THROUGH_REDUCTION_AND, (&rand_a) & (&rand_b)); - `signal(PUSH_BITWISE_THROUGH_REDUCTION_OR, (|rand_a) | (|rand_b)); - `signal(PUSH_BITWISE_THROUGH_REDUCTION_XOR, (^rand_a) ^ (^rand_b)); - `signal(PUSH_REDUCTION_THROUGH_CONCAT_AND, &`DFG({(rand_a + 64'd102), rand_b})); - `signal(PUSH_REDUCTION_THROUGH_CONCAT_OR, |`DFG({(rand_a + 64'd103), rand_b})); - `signal(PUSH_REDUCTION_THROUGH_CONCAT_XOR, ^`DFG({(rand_a + 64'd104), rand_b})); - `signal(REMOVE_WIDTH_ONE_REDUCTION_AND, &`DFG({randbit_a ^ rand_a[0], rand_b})); - `signal(REMOVE_WIDTH_ONE_REDUCTION_OR, |`DFG({randbit_a ^ rand_a[1], rand_b})); - `signal(REMOVE_WIDTH_ONE_REDUCTION_XOR, ^`DFG({randbit_a ^ rand_a[2], rand_b})); - `signal(REMOVE_XOR_WITH_ZERO, `ZERO ^ rand_a); - `signal(REMOVE_XOR_WITH_ONES, `ONES ^ rand_a); + `signal(PUSH_BITWISE_THROUGH_REDUCTION_AND, (&(rand_a + 64'd105)) & (&(rand_b + 64'd108))); + `signal(PUSH_BITWISE_THROUGH_REDUCTION_OR, (|(rand_a + 64'd106)) | (|(rand_b + 64'd109))); + `signal(PUSH_BITWISE_THROUGH_REDUCTION_XOR, (^(rand_a + 64'd107)) ^ (^(rand_b + 64'd110))); + `signal(PUSH_REDUCTION_THROUGH_CONCAT_AND, &{(rand_a + 64'd102), rand_b}); + `signal(PUSH_REDUCTION_THROUGH_CONCAT_OR, |{(rand_a + 64'd103), rand_b}); + `signal(PUSH_REDUCTION_THROUGH_CONCAT_XOR, ^{(rand_a + 64'd104), rand_b}); + `signal(REMOVE_WIDTH_ONE_REDUCTION_AND, &{randbit_a ^ rand_a[0], rand_b}); + `signal(REMOVE_WIDTH_ONE_REDUCTION_OR, |{randbit_a ^ rand_a[1], rand_b}); + `signal(REMOVE_WIDTH_ONE_REDUCTION_XOR, ^{randbit_a ^ rand_a[2], rand_b}); + `signal(REMOVE_XOR_WITH_ZERO, 64'd0 ^ rand_a); + `signal(REMOVE_XOR_WITH_ONES, -64'd1 ^ rand_a); `signal(REPLACE_COND_DEC, randbit_a ? rand_b - 64'b1 : rand_b); `signal(REPLACE_COND_INC, randbit_a ? rand_b + 64'b1 : rand_b); `signal(RIGHT_LEANING_ASSOC, (((rand_a + rand_b) + rand_a) + rand_b)); @@ -125,6 +178,8 @@ module t ( always @(posedge randbit_a) if ($c(0)) $display(sel_from_not); // Do not remove signal // Assigned at the end to avoid inlining by other passes - assign const_a = (rand_a | ~rand_a) & 64'h0123456789abcdef; - assign const_b = ~(rand_a & ~rand_a) & 64'h98badefc10325647; + assign const_a = 64'h0123456789abcdef; + assign const_b = 64'h98badefc10325647; + assign sconst_a = 64'hfedcba9876543210; + assign sconst_b = 64'hba0123456789cdef; endmodule