diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index d06d8ae12..4b1e0a458 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -501,6 +501,12 @@ Summary: .. option:: -fno-const +.. options: -fno-const-before-dfg + + Do not apply any global expression folding prior to the DFG pass. This + option is solely for the purpose of DFG testing and should not be used + otherwise. + .. option:: -fno-const-bit-op-tree .. option:: -fno-dedup diff --git a/src/V3DfgPeephole.cpp b/src/V3DfgPeephole.cpp index 9e8a761ed..ca8d5b95e 100644 --- a/src/V3DfgPeephole.cpp +++ b/src/V3DfgPeephole.cpp @@ -78,6 +78,58 @@ template <> struct BitwiseToReductionImpl { using type = DfgRedOr; }; template <> struct BitwiseToReductionImpl { using type = DfgRedXor; }; template using BitwiseToReduction = typename BitwiseToReductionImpl::type; + +namespace { +template void foldOp(V3Number& out, const V3Number& src); +template <> void foldOp (V3Number& out, const V3Number& src) { out.opCLog2(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opCountOnes(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opAssign(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opExtendS(src, src.width()); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opLogNot(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opNegate(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opNot(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opOneHot(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opOneHot0(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opRedAnd(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opRedOr(src); } +template <> void foldOp (V3Number& out, const V3Number& src) { out.opRedXor(src); } + +template void foldOp(V3Number& out, const V3Number& lhs, const V3Number& rhs); +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opAdd(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opAnd(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opConcat(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opDiv(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opDivS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opEq(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opGt(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opGtS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opGte(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opGteS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLogAnd(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLogEq(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLogIf(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLogOr(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLt(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLtS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLte(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opLtS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opModDiv(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opModDivS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opMul(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opMulS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opNeq(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opOr(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opPow(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opPowSS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opPowSU(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opPowUS(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opRepl(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opShiftL(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opShiftR(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opShiftRS(lhs, rhs, lhs.width()); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opSub(lhs, rhs); } +template <> void foldOp (V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opXor(lhs, rhs); } +} // clang-format on class V3DfgPeephole final : public DfgVisitor { @@ -114,6 +166,40 @@ class V3DfgPeephole final : public DfgVisitor { // Create a new DfgConst vertex with the given width and value zero DfgConst* makeZero(FileLine* flp, uint32_t width) { return makeConst(flp, width, 0); } + // Constant fold unary vertex, return true if folded + template + bool foldUnary(Vertex* vtxp) { + static_assert(std::is_base_of::value, "Must invoke on unary"); + static_assert(std::is_final::value, "Must invoke on final class"); + if (DfgConst* const srcp = vtxp->srcp()->template cast()) { + APPLYING(FOLD_UNARY) { + DfgConst* const resultp = makeZero(vtxp->fileline(), vtxp->width()); + foldOp(resultp->num(), srcp->num()); + vtxp->replaceWith(resultp); + return true; + } + } + return false; + } + + // Constant fold binary vertex, return true if folded + template + bool foldBinary(Vertex* vtxp) { + static_assert(std::is_base_of::value, "Must invoke on binary"); + static_assert(std::is_final::value, "Must invoke on final class"); + if (DfgConst* const lhsp = vtxp->lhsp()->template cast()) { + if (DfgConst* const rhsp = vtxp->rhsp()->template cast()) { + APPLYING(FOLD_BINARY) { + DfgConst* const resultp = makeZero(vtxp->fileline(), vtxp->width()); + foldOp(resultp->num(), lhsp->num(), rhsp->num()); + vtxp->replaceWith(resultp); + return true; + } + } + } + return false; + } + // Transformations that apply to all commutative binary vertices void commutativeBinary(DfgVertexBinary* vtxp) { DfgVertex* const lhsp = vtxp->source<0>(); @@ -291,6 +377,8 @@ class V3DfgPeephole final : public DfgVisitor { void optimizeReduction(Reduction* vtxp) { using Bitwise = ReductionToBitwise; + if (foldUnary(vtxp)) return; + DfgVertex* const srcp = vtxp->srcp(); FileLine* const flp = vtxp->fileline(); @@ -348,21 +436,6 @@ class V3DfgPeephole final : public DfgVisitor { } } } - - if (DfgConst* const constp = srcp->cast()) { - APPLYING(REPLACE_REDUCTION_OF_CONST) { - DfgConst* const replacementp = makeZero(flp, 1); - if VL_CONSTEXPR_CXX17 (std::is_same::value) { - replacementp->num().opRedAnd(constp->num()); - } else if VL_CONSTEXPR_CXX17 (std::is_same::value) { - replacementp->num().opRedOr(constp->num()); - } else { - replacementp->num().opRedXor(constp->num()); - } - vtxp->replaceWith(replacementp); - return; - } - } } void optimizeShiftRHS(DfgVertexBinary* vtxp) { @@ -379,26 +452,57 @@ class V3DfgPeephole final : public DfgVisitor { void visit(DfgVertex*) override {} + //========================================================================= + // DfgVertexUnary + //========================================================================= + + void visit(DfgCLog2* vtxp) override { + if (foldUnary(vtxp)) return; + } + + void visit(DfgCountOnes* vtxp) override { + if (foldUnary(vtxp)) return; + } + void visit(DfgExtend* vtxp) override { - const uint32_t extension = vtxp->width() - vtxp->srcp()->width(); - UASSERT_OBJ(extension > 0, vtxp, "Useless Extend"); + UASSERT_OBJ(vtxp->width() > vtxp->srcp()->width(), vtxp, "Invalid zero extend"); - FileLine* const flp = vtxp->fileline(); + if (foldUnary(vtxp)) return; - // Convert Extend into Concat with zeros. This simplifies other patterns as they only + // Convert all Extend into Concat with zeros. This simplifies other patterns as they only // need to handle Concat, which is more generic, and don't need special cases for // Extend. APPLYING(REPLACE_EXTEND) { + FileLine* const flp = vtxp->fileline(); DfgConcat* const replacementp = new DfgConcat{m_dfg, flp, vtxp->dtypep()}; - replacementp->lhsp(makeZero(flp, extension)); + replacementp->lhsp(makeZero(flp, vtxp->width() - vtxp->srcp()->width())); replacementp->rhsp(vtxp->srcp()); vtxp->replaceWith(replacementp); } } + void visit(DfgExtendS* vtxp) override { + UASSERT_OBJ(vtxp->width() > vtxp->srcp()->width(), vtxp, "Invalid sign extend"); + + if (foldUnary(vtxp)) return; + } + + void visit(DfgLogNot* vtxp) override { + UASSERT_OBJ(vtxp->width() == 1, vtxp, "Incorrect width"); + + if (foldUnary(vtxp)) return; + } + + void visit(DfgNegate* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->srcp()->width(), vtxp, "Mismatched width"); + + if (foldUnary(vtxp)) return; + } + void visit(DfgNot* vtxp) override { - UASSERT_OBJ(vtxp->width() == vtxp->srcp()->width(), vtxp, - "Mismatched width: " << vtxp->width() << " != " << vtxp->srcp()->width()); + UASSERT_OBJ(vtxp->width() == vtxp->srcp()->width(), vtxp, "Mismatched width"); + + if (foldUnary(vtxp)) return; // Not of Cond if (DfgCond* const condp = vtxp->srcp()->cast()) { @@ -460,22 +564,32 @@ class V3DfgPeephole final : public DfgVisitor { } } } - - // Not of Const - if (DfgConst* const constp = vtxp->srcp()->cast()) { - APPLYING(REPLACE_NOT_OF_CONST) { - DfgConst* const replacementp = makeZero(vtxp->fileline(), vtxp->width()); - replacementp->num().opNot(constp->num()); - vtxp->replaceWith(replacementp); - return; - } - } } + void visit(DfgOneHot* vtxp) override { + if (foldUnary(vtxp)) return; + } + + void visit(DfgOneHot0* vtxp) override { + if (foldUnary(vtxp)) return; + } + + void visit(DfgRedOr* vtxp) override { optimizeReduction(vtxp); } + + void visit(DfgRedAnd* vtxp) override { optimizeReduction(vtxp); } + + void visit(DfgRedXor* vtxp) override { optimizeReduction(vtxp); } + + //========================================================================= + // DfgVertexBinary - bitwise + //========================================================================= + void visit(DfgAnd* vtxp) override { UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched LHS width"); UASSERT_OBJ(vtxp->width() == vtxp->rhsp()->width(), vtxp, "Mismatched RHS width"); + if (foldBinary(vtxp)) return; + commutativeBinary(vtxp); associativeBinary(vtxp); @@ -515,15 +629,6 @@ class V3DfgPeephole final : public DfgVisitor { } if (DfgConst* const lhsConstp = lhsp->cast()) { - if (DfgConst* const rhsConstp = rhsp->cast()) { - APPLYING(REPLACE_AND_OF_CONST_AND_CONST) { - DfgConst* const replacementp = makeZero(flp, vtxp->width()); - replacementp->num().opAnd(lhsConstp->num(), rhsConstp->num()); - vtxp->replaceWith(replacementp); - return; - } - } - if (lhsConstp->isZero()) { APPLYING(REPLACE_AND_WITH_ZERO) { vtxp->replaceWith(lhsConstp); @@ -561,6 +666,8 @@ class V3DfgPeephole final : public DfgVisitor { UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched LHS width"); UASSERT_OBJ(vtxp->width() == vtxp->rhsp()->width(), vtxp, "Mismatched RHS width"); + if (foldBinary(vtxp)) return; + commutativeBinary(vtxp); associativeBinary(vtxp); @@ -627,15 +734,6 @@ class V3DfgPeephole final : public DfgVisitor { } if (DfgConst* const lhsConstp = lhsp->cast()) { - if (DfgConst* const rhsConstp = rhsp->cast()) { - APPLYING(REPLACE_OR_OF_CONST_AND_CONST) { - DfgConst* const replacementp = makeZero(flp, vtxp->width()); - replacementp->num().opOr(lhsConstp->num(), rhsConstp->num()); - vtxp->replaceWith(replacementp); - return; - } - } - if (lhsConstp->isZero()) { APPLYING(REMOVE_OR_WITH_ZERO) { vtxp->replaceWith(rhsp); @@ -674,6 +772,8 @@ class V3DfgPeephole final : public DfgVisitor { UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched LHS width"); UASSERT_OBJ(vtxp->width() == vtxp->rhsp()->width(), vtxp, "Mismatched RHS width"); + if (foldBinary(vtxp)) return; + commutativeBinary(vtxp); associativeBinary(vtxp); @@ -700,266 +800,44 @@ class V3DfgPeephole final : public DfgVisitor { tryPushBitwiseOpThroughConcat(vtxp, lConstp, rConcatp); return; } - if (DfgConst* const rConstp = rhsp->cast()) { - APPLYING(REPLACE_XOR_OF_CONST_AND_CONST) { - DfgConst* const replacementp = makeZero(flp, vtxp->width()); - replacementp->num().opXor(lConstp->num(), rConstp->num()); - vtxp->replaceWith(replacementp); - return; - } - } } if (tryPushBitwiseOpThroughReductions(vtxp)) return; } + //========================================================================= + // DfgVertexBinary - other + //========================================================================= + void visit(DfgAdd* vtxp) override { UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched LHS width"); UASSERT_OBJ(vtxp->width() == vtxp->rhsp()->width(), vtxp, "Mismatched RHS width"); + if (foldBinary(vtxp)) return; + commutativeBinary(vtxp); associativeBinary(vtxp); } - void visit(DfgSub* vtxp) override { - DfgVertex* const lhsp = vtxp->lhsp(); - DfgVertex* const rhsp = vtxp->rhsp(); - UASSERT_OBJ(lhsp->width() == rhsp->width(), vtxp, "Width mismatch"); - UASSERT_OBJ(lhsp->width() == vtxp->width(), vtxp, "Width mismatch"); - if (DfgConst* const rConstp = rhsp->cast()) { - if (rConstp->isZero()) { - APPLYING(REMOVE_SUB_ZERO) { - vtxp->replaceWith(lhsp); - return; - } - } - if (vtxp->width() == 1 && rConstp->toU32() == 1) { - APPLYING(REPLACE_SUB_WITH_NOT) { - DfgNot* const replacementp = new DfgNot{m_dfg, vtxp->fileline(), m_bitDType}; - replacementp->srcp(lhsp); - vtxp->replaceWith(replacementp); - return; - } - } - } - } - - void visit(DfgShiftL* vtxp) override { optimizeShiftRHS(vtxp); } - void visit(DfgShiftR* vtxp) override { optimizeShiftRHS(vtxp); } - void visit(DfgShiftRS* vtxp) override { optimizeShiftRHS(vtxp); } - - void visit(DfgEq* vtxp) override { - commutativeBinary(vtxp); - - DfgVertex* const lhsp = vtxp->lhsp(); - DfgVertex* const rhsp = vtxp->rhsp(); - - if (DfgConst* const lhsConstp = lhsp->cast()) { - if (DfgConst* const rhsConstp = rhsp->cast()) { - APPLYING(REPLACE_EQ_OF_CONST_AND_CONST) { - DfgConst* const replacementp = makeZero(vtxp->fileline(), 1); - replacementp->num().opEq(lhsConstp->num(), rhsConstp->num()); - vtxp->replaceWith(replacementp); - return; - } - } - - if (DfgConcat* const rhsConcatp = rhsp->cast()) { - if (tryPushCompareOpThroughConcat(vtxp, lhsConstp, rhsConcatp)) return; - } - } - } - - void visit(DfgSel* vtxp) override { - DfgVertex* const fromp = vtxp->fromp(); - DfgConst* const lsbp = vtxp->lsbp()->cast(); - DfgConst* const widthp = vtxp->widthp()->cast(); - if (!lsbp || !widthp) return; - - FileLine* const flp = vtxp->fileline(); - - UASSERT_OBJ(lsbp->toI32() >= 0, vtxp, "Negative LSB in Sel"); - - const uint32_t lsb = lsbp->toU32(); - const uint32_t width = widthp->toU32(); - const uint32_t msb = lsb + width - 1; - - UASSERT_OBJ(width == vtxp->width(), vtxp, "Incorrect Sel width"); - - // Full width select, replace with the source. - if (fromp->width() == width) { - UASSERT_OBJ(lsb == 0, fromp, "OOPS"); - APPLYING(REMOVE_FULL_WIDTH_SEL) { - vtxp->replaceWith(fromp); - return; - } - } - - // Sel from Concat - if (DfgConcat* const concatp = fromp->cast()) { - DfgVertex* const lhsp = concatp->lhsp(); - DfgVertex* const rhsp = concatp->rhsp(); - - if (msb < rhsp->width()) { - // If the select is entirely from rhs, then replace with sel from rhs - APPLYING(REMOVE_SEL_FROM_RHS_OF_CONCAT) { // - vtxp->fromp(rhsp); - } - } else if (lsb >= rhsp->width()) { - // If the select is entirely from the lhs, then replace with sel from lhs - APPLYING(REMOVE_SEL_FROM_LHS_OF_CONCAT) { - vtxp->fromp(lhsp); - vtxp->lsbp(makeI32(flp, lsb - rhsp->width())); - } - } else if (lsb == 0 || msb == concatp->width() - 1 // - || lhsp->is() || rhsp->is() // - || !concatp->hasMultipleSinks()) { - // If the select straddles both sides, but at least one of the sides is wholly - // selected, or at least one of the sides is a Const, or this concat has no other - // use, then push the Sel past the Concat - APPLYING(PUSH_SEL_THROUGH_CONCAT) { - const uint32_t rSelWidth = rhsp->width() - lsb; - const uint32_t lSelWidth = width - rSelWidth; - - // The new Lhs vertex - DfgSel* const newLhsp = new DfgSel{m_dfg, flp, dtypeForWidth(lSelWidth)}; - newLhsp->fromp(lhsp); - newLhsp->lsbp(makeI32(lsbp->fileline(), 0)); - newLhsp->widthp(makeI32(widthp->fileline(), lSelWidth)); - - // The new Rhs vertex - DfgSel* const newRhsp = new DfgSel{m_dfg, flp, dtypeForWidth(rSelWidth)}; - newRhsp->fromp(rhsp); - newRhsp->lsbp(makeI32(lsbp->fileline(), lsb)); - newRhsp->widthp(makeI32(widthp->fileline(), rSelWidth)); - - // The replacement Concat vertex - DfgConcat* const newConcat - = new DfgConcat{m_dfg, concatp->fileline(), vtxp->dtypep()}; - newConcat->lhsp(newLhsp); - newConcat->rhsp(newRhsp); - - // Replace this vertex - vtxp->replaceWith(newConcat); - return; - } - } - } - - if (DfgReplicate* const repp = fromp->cast()) { - // If the Sel is wholly into the source of the Replicate, push the Sel through the - // Replicate and apply it directly to the source of the Replicate. - const uint32_t srcWidth = repp->srcp()->width(); - if (width <= srcWidth) { - const uint32_t newLsb = lsb % srcWidth; - if (newLsb + width <= srcWidth) { - APPLYING(PUSH_SEL_THROUGH_REPLICATE) { - vtxp->fromp(repp->srcp()); - vtxp->lsbp(makeI32(flp, newLsb)); + void visit(DfgArraySel* vtxp) override { + if (DfgConst* const idxp = vtxp->bitp()->cast()) { + if (DfgVarArray* const varp = vtxp->fromp()->cast()) { + const uint32_t idx = idxp->toU32(); + if (DfgVertex* const driverp = varp->driverAt(idx)) { + APPLYING(INLINE_ARRAYSEL) { + vtxp->replaceWith(driverp); + return; } } } } - - // Sel from Not - if (DfgNot* const notp = fromp->cast()) { - // Replace "Sel from Not" with "Not of Sel" - if (!notp->hasMultipleSinks()) { - UASSERT_OBJ(notp->srcp()->width() == notp->width(), notp, "Mismatched widths"); - APPLYING(PUSH_SEL_THROUGH_NOT) { - // Make Sel select from source of Not - vtxp->fromp(notp->srcp()); - // Add Not after Sel - DfgNot* const replacementp - = new DfgNot{m_dfg, notp->fileline(), vtxp->dtypep()}; - vtxp->replaceWith(replacementp); - replacementp->srcp(vtxp); - } - } - } - - // Sel from Sel - if (DfgSel* const selp = fromp->cast()) { - UASSERT_OBJ(widthp->toU32() <= selp->width(), vtxp, "Out of bound Sel"); - if (DfgConst* const sourceLsbp = selp->lsbp()->cast()) { - UASSERT_OBJ(sourceLsbp->toI32() >= 0, selp, "negative"); - UASSERT_OBJ(selp->widthp()->as()->toU32() >= widthp->toU32(), selp, - "negative"); - APPLYING(REPLACE_SEL_FROM_SEL) { - // Make this Sel select from the source of the source Sel - vtxp->fromp(selp->fromp()); - // Adjust LSB - vtxp->lsbp(makeI32(flp, lsb + sourceLsbp->toU32())); - } - } - } - - // Sel from Cond - if (DfgCond* const condp = fromp->cast()) { - // If at least one of the branches are a constant, push the select past the cond - if (condp->thenp()->is() || condp->elsep()->is()) { - APPLYING(PUSH_SEL_THROUGH_COND) { - // The new 'then' vertex - DfgSel* const newThenp = new DfgSel{m_dfg, flp, vtxp->dtypep()}; - newThenp->fromp(condp->thenp()); - newThenp->lsbp(makeI32(lsbp->fileline(), lsb)); - newThenp->widthp(makeI32(widthp->fileline(), width)); - - // The new 'else' vertex - DfgSel* const newElsep = new DfgSel{m_dfg, flp, vtxp->dtypep()}; - newElsep->fromp(condp->elsep()); - newElsep->lsbp(makeI32(lsbp->fileline(), lsb)); - newElsep->widthp(makeI32(widthp->fileline(), width)); - - // The replacement Cond vertex - DfgCond* const newCondp - = new DfgCond{m_dfg, condp->fileline(), vtxp->dtypep()}; - newCondp->condp(condp->condp()); - newCondp->thenp(newThenp); - newCondp->elsep(newElsep); - - // Replace this vertex - vtxp->replaceWith(newCondp); - return; - } - } - } - - // Sel from ShiftL - if (DfgShiftL* const shiftLp = fromp->cast()) { - // If selecting bottom bits of left shift, push the Sel before the shift - if (lsb == 0) { - UASSERT_OBJ(shiftLp->lhsp()->width() >= width, vtxp, "input of shift narrow"); - APPLYING(PUSH_SEL_THROUGH_SHIFTL) { - vtxp->fromp(shiftLp->lhsp()); - DfgShiftL* const newShiftLp - = new DfgShiftL{m_dfg, shiftLp->fileline(), vtxp->dtypep()}; - vtxp->replaceWith(newShiftLp); - newShiftLp->lhsp(vtxp); - newShiftLp->rhsp(shiftLp->rhsp()); - } - } - } - - // Sel from Const - if (DfgConst* const constp = fromp->cast()) { - APPLYING(REPLACE_SEL_FROM_CONST) { - DfgConst* const replacementp = makeZero(flp, width); - replacementp->num().opSel(constp->num(), msb, lsb); - vtxp->replaceWith(replacementp); - return; - } - } } - void visit(DfgRedOr* vtxp) override { optimizeReduction(vtxp); } - void visit(DfgRedAnd* vtxp) override { optimizeReduction(vtxp); } - void visit(DfgRedXor* vtxp) override { optimizeReduction(vtxp); } - void visit(DfgConcat* vtxp) override { UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width() + vtxp->rhsp()->width(), vtxp, - "Incorrect Concat width: " << vtxp->width() << " != " << vtxp->lhsp()->width() - << " + " << vtxp->rhsp()->width()); + "Inconsisend Concat"); + + if (foldBinary(vtxp)) return; DfgVertex* const lhsp = vtxp->lhsp(); DfgVertex* const rhsp = vtxp->rhsp(); @@ -995,13 +873,6 @@ class V3DfgPeephole final : public DfgVisitor { DfgConst* const lConstp = lhsp->cast(); DfgConst* const rConstp = rhsp->cast(); - if (lConstp && rConstp) { - APPLYING(REPLACE_CONCAT_OF_CONSTS) { - vtxp->replaceWith(joinConsts(lConstp, rConstp, flp)); - return; - } - } - if (lConstp) { if (DfgConcat* const rConcatp = rhsp->cast()) { if (DfgConst* const rlConstp = rConcatp->lhsp()->cast()) { @@ -1162,17 +1033,371 @@ class V3DfgPeephole final : public DfgVisitor { } } + void visit(DfgDiv* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgDivS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgEq* vtxp) override { + if (foldBinary(vtxp)) return; + + commutativeBinary(vtxp); + + DfgVertex* const lhsp = vtxp->lhsp(); + DfgVertex* const rhsp = vtxp->rhsp(); + + if (DfgConst* const lhsConstp = lhsp->cast()) { + if (DfgConcat* const rhsConcatp = rhsp->cast()) { + if (tryPushCompareOpThroughConcat(vtxp, lhsConstp, rhsConcatp)) return; + } + } + } + + void visit(DfgGt* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgGtS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgGte* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgGteS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLogAnd* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLogEq* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLogIf* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLogOr* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLt* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLtS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLte* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgLteS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgModDiv* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgModDivS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgMul* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgMulS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgNeq* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgPow* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgPowSS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgPowSU* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgPowUS* vtxp) override { + if (foldBinary(vtxp)) return; + } + + void visit(DfgReplicate* vtxp) override { + if (vtxp->width() == vtxp->srcp()->width()) { + APPLYING(REMOVE_REPLICATE_ONCE) { + vtxp->replaceWith(vtxp->srcp()); + return; + } + } + + if (foldBinary(vtxp)) return; + } + + void visit(DfgShiftL* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched width"); + + if (foldBinary(vtxp)) return; + + optimizeShiftRHS(vtxp); + } + + void visit(DfgShiftR* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched width"); + + if (foldBinary(vtxp)) return; + + optimizeShiftRHS(vtxp); + } + + void visit(DfgShiftRS* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched width"); + + if (foldBinary(vtxp)) return; + + optimizeShiftRHS(vtxp); + } + + void visit(DfgSub* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->lhsp()->width(), vtxp, "Mismatched LHS width"); + UASSERT_OBJ(vtxp->width() == vtxp->rhsp()->width(), vtxp, "Mismatched RHS width"); + + if (foldBinary(vtxp)) return; + + DfgVertex* const lhsp = vtxp->lhsp(); + DfgVertex* const rhsp = vtxp->rhsp(); + + if (DfgConst* const rConstp = rhsp->cast()) { + if (rConstp->isZero()) { + APPLYING(REMOVE_SUB_ZERO) { + vtxp->replaceWith(lhsp); + return; + } + } + if (vtxp->width() == 1 && rConstp->toU32() == 1) { + APPLYING(REPLACE_SUB_WITH_NOT) { + DfgNot* const replacementp = new DfgNot{m_dfg, vtxp->fileline(), m_bitDType}; + replacementp->srcp(lhsp); + vtxp->replaceWith(replacementp); + return; + } + } + } + } + + //========================================================================= + // DfgVertexTernary + //========================================================================= + + void visit(DfgSel* vtxp) override { + DfgVertex* const fromp = vtxp->fromp(); + DfgConst* const lsbp = vtxp->lsbp()->cast(); + DfgConst* const widthp = vtxp->widthp()->cast(); + if (!lsbp || !widthp) return; + + FileLine* const flp = vtxp->fileline(); + + UASSERT_OBJ(lsbp->toI32() >= 0, vtxp, "Negative LSB in Sel"); + + const uint32_t lsb = lsbp->toU32(); + const uint32_t width = widthp->toU32(); + const uint32_t msb = lsb + width - 1; + + UASSERT_OBJ(width == vtxp->width(), vtxp, "Incorrect Sel width"); + + if (DfgConst* const constp = fromp->cast()) { + APPLYING(FOLD_SEL) { + DfgConst* const replacementp = makeZero(flp, width); + replacementp->num().opSel(constp->num(), msb, lsb); + vtxp->replaceWith(replacementp); + return; + } + } + + // Full width select, replace with the source. + if (fromp->width() == width) { + UASSERT_OBJ(lsb == 0, fromp, "OOPS"); + APPLYING(REMOVE_FULL_WIDTH_SEL) { + vtxp->replaceWith(fromp); + return; + } + } + + // Sel from Concat + if (DfgConcat* const concatp = fromp->cast()) { + DfgVertex* const lhsp = concatp->lhsp(); + DfgVertex* const rhsp = concatp->rhsp(); + + if (msb < rhsp->width()) { + // If the select is entirely from rhs, then replace with sel from rhs + APPLYING(REMOVE_SEL_FROM_RHS_OF_CONCAT) { // + vtxp->fromp(rhsp); + } + } else if (lsb >= rhsp->width()) { + // If the select is entirely from the lhs, then replace with sel from lhs + APPLYING(REMOVE_SEL_FROM_LHS_OF_CONCAT) { + vtxp->fromp(lhsp); + vtxp->lsbp(makeI32(flp, lsb - rhsp->width())); + } + } else if (lsb == 0 || msb == concatp->width() - 1 // + || lhsp->is() || rhsp->is() // + || !concatp->hasMultipleSinks()) { + // If the select straddles both sides, but at least one of the sides is wholly + // selected, or at least one of the sides is a Const, or this concat has no other + // use, then push the Sel past the Concat + APPLYING(PUSH_SEL_THROUGH_CONCAT) { + const uint32_t rSelWidth = rhsp->width() - lsb; + const uint32_t lSelWidth = width - rSelWidth; + + // The new Lhs vertex + DfgSel* const newLhsp = new DfgSel{m_dfg, flp, dtypeForWidth(lSelWidth)}; + newLhsp->fromp(lhsp); + newLhsp->lsbp(makeI32(lsbp->fileline(), 0)); + newLhsp->widthp(makeI32(widthp->fileline(), lSelWidth)); + + // The new Rhs vertex + DfgSel* const newRhsp = new DfgSel{m_dfg, flp, dtypeForWidth(rSelWidth)}; + newRhsp->fromp(rhsp); + newRhsp->lsbp(makeI32(lsbp->fileline(), lsb)); + newRhsp->widthp(makeI32(widthp->fileline(), rSelWidth)); + + // The replacement Concat vertex + DfgConcat* const newConcat + = new DfgConcat{m_dfg, concatp->fileline(), vtxp->dtypep()}; + newConcat->lhsp(newLhsp); + newConcat->rhsp(newRhsp); + + // Replace this vertex + vtxp->replaceWith(newConcat); + return; + } + } + } + + if (DfgReplicate* const repp = fromp->cast()) { + // If the Sel is wholly into the source of the Replicate, push the Sel through the + // Replicate and apply it directly to the source of the Replicate. + const uint32_t srcWidth = repp->srcp()->width(); + if (width <= srcWidth) { + const uint32_t newLsb = lsb % srcWidth; + if (newLsb + width <= srcWidth) { + APPLYING(PUSH_SEL_THROUGH_REPLICATE) { + vtxp->fromp(repp->srcp()); + vtxp->lsbp(makeI32(flp, newLsb)); + } + } + } + } + + // Sel from Not + if (DfgNot* const notp = fromp->cast()) { + // Replace "Sel from Not" with "Not of Sel" + if (!notp->hasMultipleSinks()) { + UASSERT_OBJ(notp->srcp()->width() == notp->width(), notp, "Mismatched widths"); + APPLYING(PUSH_SEL_THROUGH_NOT) { + // Make Sel select from source of Not + vtxp->fromp(notp->srcp()); + // Add Not after Sel + DfgNot* const replacementp + = new DfgNot{m_dfg, notp->fileline(), vtxp->dtypep()}; + vtxp->replaceWith(replacementp); + replacementp->srcp(vtxp); + } + } + } + + // Sel from Sel + if (DfgSel* const selp = fromp->cast()) { + UASSERT_OBJ(widthp->toU32() <= selp->width(), vtxp, "Out of bound Sel"); + if (DfgConst* const sourceLsbp = selp->lsbp()->cast()) { + UASSERT_OBJ(sourceLsbp->toI32() >= 0, selp, "negative"); + UASSERT_OBJ(selp->widthp()->as()->toU32() >= widthp->toU32(), selp, + "negative"); + APPLYING(REPLACE_SEL_FROM_SEL) { + // Make this Sel select from the source of the source Sel + vtxp->fromp(selp->fromp()); + // Adjust LSB + vtxp->lsbp(makeI32(flp, lsb + sourceLsbp->toU32())); + } + } + } + + // Sel from Cond + if (DfgCond* const condp = fromp->cast()) { + // If at least one of the branches are a constant, push the select past the cond + if (condp->thenp()->is() || condp->elsep()->is()) { + APPLYING(PUSH_SEL_THROUGH_COND) { + // The new 'then' vertex + DfgSel* const newThenp = new DfgSel{m_dfg, flp, vtxp->dtypep()}; + newThenp->fromp(condp->thenp()); + newThenp->lsbp(makeI32(lsbp->fileline(), lsb)); + newThenp->widthp(makeI32(widthp->fileline(), width)); + + // The new 'else' vertex + DfgSel* const newElsep = new DfgSel{m_dfg, flp, vtxp->dtypep()}; + newElsep->fromp(condp->elsep()); + newElsep->lsbp(makeI32(lsbp->fileline(), lsb)); + newElsep->widthp(makeI32(widthp->fileline(), width)); + + // The replacement Cond vertex + DfgCond* const newCondp + = new DfgCond{m_dfg, condp->fileline(), vtxp->dtypep()}; + newCondp->condp(condp->condp()); + newCondp->thenp(newThenp); + newCondp->elsep(newElsep); + + // Replace this vertex + vtxp->replaceWith(newCondp); + return; + } + } + } + + // Sel from ShiftL + if (DfgShiftL* const shiftLp = fromp->cast()) { + // If selecting bottom bits of left shift, push the Sel before the shift + if (lsb == 0) { + UASSERT_OBJ(shiftLp->lhsp()->width() >= width, vtxp, "input of shift narrow"); + APPLYING(PUSH_SEL_THROUGH_SHIFTL) { + vtxp->fromp(shiftLp->lhsp()); + DfgShiftL* const newShiftLp + = new DfgShiftL{m_dfg, shiftLp->fileline(), vtxp->dtypep()}; + vtxp->replaceWith(newShiftLp); + newShiftLp->lhsp(vtxp); + newShiftLp->rhsp(shiftLp->rhsp()); + } + } + } + } + void visit(DfgCond* vtxp) override { + UASSERT_OBJ(vtxp->width() == vtxp->thenp()->width(), vtxp, "Width mismatch"); + UASSERT_OBJ(vtxp->width() == vtxp->elsep()->width(), vtxp, "Width mismatch"); + DfgVertex* const condp = vtxp->condp(); DfgVertex* const thenp = vtxp->thenp(); DfgVertex* const elsep = vtxp->elsep(); - UASSERT_OBJ(vtxp->width() == thenp->width(), vtxp, "Width mismatch"); - UASSERT_OBJ(vtxp->width() == elsep->width(), vtxp, "Width mismatch"); + FileLine* const flp = vtxp->fileline(); if (condp->width() != 1) return; - FileLine* const flp = vtxp->fileline(); - if (condp->isOnes()) { APPLYING(REMOVE_COND_WITH_TRUE_CONDITION) { vtxp->replaceWith(thenp); @@ -1319,19 +1544,9 @@ class V3DfgPeephole final : public DfgVisitor { } } - void visit(DfgArraySel* vtxp) override { - if (DfgConst* const idxp = vtxp->bitp()->cast()) { - if (DfgVarArray* const varp = vtxp->fromp()->cast()) { - const uint32_t idx = idxp->toU32(); - if (DfgVertex* const driverp = varp->driverAt(idx)) { - APPLYING(INLINE_ARRAYSEL) { - vtxp->replaceWith(driverp); - return; - } - } - } - } - } + //========================================================================= + // DfgVertexVar + //========================================================================= void visit(DfgVarPacked* vtxp) override { // Inline variables fully driven by the logic represented by the DFG diff --git a/src/V3DfgPeephole.h b/src/V3DfgPeephole.h index 5d346456d..bec8a5c8f 100644 --- a/src/V3DfgPeephole.h +++ b/src/V3DfgPeephole.h @@ -26,6 +26,9 @@ // Enumeration of each peephole optimization. Must be kept in sorted order (enforced by tests). // clang-format off #define FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION(macro) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_BINARY) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_SEL) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_UNARY) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, INLINE_ARRAYSEL) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, INLINE_VAR) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PULL_NOTS_THROUGH_COND) \ @@ -49,16 +52,15 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_NOT_NOT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_OR_WITH_ZERO) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_REDUNDANT_ZEXT_ON_RHS_OF_SHIFT) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_REPLICATE_ONCE) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_SEL_FROM_LHS_OF_CONCAT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_SEL_FROM_RHS_OF_CONCAT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_SUB_ZERO) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_WIDTH_ONE_REDUCTION) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_XOR_WITH_ZERO) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_CONST_AND_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_NOT_AND_NEQ) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_NOT_AND_NOT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_WITH_ZERO) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_CONCAT_OF_CONSTS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_CONCAT_SEL_BOTTOM_AND_ZERO_WITH_SHIFTL) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_CONCAT_ZERO_AND_SEL_TOP_WITH_SHIFTR) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_COND_DEC) \ @@ -68,7 +70,6 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_COND_WITH_THEN_BRANCH_ONES) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_COND_WITH_THEN_BRANCH_ZERO) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_CONTRADICTORY_AND) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_EQ_OF_CONST_AND_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_EXTEND) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS) \ @@ -76,19 +77,14 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NESTED_CONCAT_OF_CONSTS_ON_RHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_EQ) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_NEQ) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_NOT_OF_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONCAT_LHS_ZERO_AND_CONCAT_ZERO_RHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONCAT_ZERO_LHS_AND_CONCAT_RHS_ZERO) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_CONST_AND_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_NOT_AND_NEQ) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_OF_NOT_AND_NOT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_OR_WITH_ONES) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_REDUCTION_OF_CONST) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_SEL_FROM_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_SEL_FROM_SEL) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_SUB_WITH_NOT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_TAUTOLOGICAL_OR) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_OF_CONST_AND_CONST) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_ONES) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, RIGHT_LEANING_ASSOC) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, RIGHT_LEANING_CONCAT) \ diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 08e5902db..a13c15c48 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1132,6 +1132,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char DECL_OPTION("-fcase", FOnOff, &m_fCase); DECL_OPTION("-fcombine", FOnOff, &m_fCombine); DECL_OPTION("-fconst", FOnOff, &m_fConst); + DECL_OPTION("-fconst-before-dfg", FOnOff, &m_fConstBeforeDfg); DECL_OPTION("-fconst-bit-op-tree", FOnOff, &m_fConstBitOpTree); DECL_OPTION("-fdedup", FOnOff, &m_fDedupe); DECL_OPTION("-fdfg", CbFOnOff, [this](bool flag) { diff --git a/src/V3Options.h b/src/V3Options.h index 8eeff57b4..7503408ad 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -349,6 +349,7 @@ private: bool m_fCase; // main switch: -fno-case: case tree conversion bool m_fCombine; // main switch: -fno-combine: common icode packing bool m_fConst; // main switch: -fno-const: constant folding + bool m_fConstBeforeDfg = true; // main switch: -fno-const-before-dfg for testing only! bool m_fConstBitOpTree; // main switch: -fno-const-bit-op-tree constant bit op tree bool m_fDedupe; // main switch: -fno-dedupe: logic deduplication bool m_fDfgPeephole = true; // main switch: -fno-dfg-peephole @@ -598,6 +599,7 @@ public: bool fCase() const { return m_fCase; } bool fCombine() const { return m_fCombine; } bool fConst() const { return m_fConst; } + bool fConstBeforeDfg() const { return m_fConstBeforeDfg; } bool fConstBitOpTree() const { return m_fConstBitOpTree; } bool fDedupe() const { return m_fDedupe; } bool fDfgPeephole() const { return m_fDfgPeephole; } diff --git a/src/Verilator.cpp b/src/Verilator.cpp index 2bd4798a7..aaf666ea0 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -189,7 +189,7 @@ static void process() { // Push constants, but only true constants preserving liveness // so V3Undriven sees variables to be eliminated, ie "if (0 && foo) ..." - V3Const::constifyAllLive(v3Global.rootp()); + if (v3Global.opt.fConstBeforeDfg()) V3Const::constifyAllLive(v3Global.rootp()); // Signal based lint checks, no change to structures // Must be before first constification pass drops dead code @@ -209,7 +209,7 @@ static void process() { } // Propagate constants into expressions - V3Const::constifyAllLint(v3Global.rootp()); + if (v3Global.opt.fConstBeforeDfg()) V3Const::constifyAllLint(v3Global.rootp()); if (!(v3Global.opt.xmlOnly() && !v3Global.opt.flatten())) { // Split packed variables into multiple pieces to resolve UNOPTFLAT. diff --git a/test_regress/t/t_dfg_peephole.pl b/test_regress/t/t_dfg_peephole.pl index c09227ce8..952898fbb 100755 --- a/test_regress/t/t_dfg_peephole.pl +++ b/test_regress/t/t_dfg_peephole.pl @@ -67,6 +67,7 @@ compile( compile( verilator_flags2 => ["--stats", "--build", "--exe", "+incdir+$Self->{obj_dir}", "-Mdir", "$Self->{obj_dir}/obj_opt", "--prefix", "Vopt", + "-fno-const-before-dfg", # Otherwise V3Const makes testing painful "--dump-dfg", # To fill code coverage "-CFLAGS \"-I .. -I ../obj_ref\"", "../obj_ref/Vref__ALL.a", diff --git a/test_regress/t/t_dfg_peephole.v b/test_regress/t/t_dfg_peephole.v index bc8251345..bf7a1a721 100644 --- a/test_regress/t/t_dfg_peephole.v +++ b/test_regress/t/t_dfg_peephole.v @@ -4,7 +4,7 @@ // any use, without warranty, 2022 by Geza Lore. // SPDX-License-Identifier: CC0-1.0 -`define signal(name, expr) wire [$bits(expr)-1:0] ``name = expr; +`define signal(name, expr) wire [$bits(expr)-1:0] ``name = expr module t ( `include "portlist.vh" // Boilerplate generated by t_dfg_peephole.pl @@ -23,16 +23,69 @@ module t ( wire logic [127:0] rand_aa = {2{rand_a}}; wire logic [63:0] const_a; wire logic [63:0] const_b; + wire logic signed [63:0] sconst_a; + wire logic signed [63:0] sconst_b; wire logic [63:0] array [3:0]; assign array[0] = (rand_a << 32) | (rand_a >> 32); assign array[1] = (rand_a << 16) | (rand_a >> 48); - // 64 bit all 0 but don't tell V3Const -`define ZERO (const_a & ~const_a) - // 64 bit all 1 but don't tell V3Const -`define ONES (const_a | ~const_a) - // x, but in a way only DFG understands -`define DFG(x) ((|`ONES) ? (x) : (~x)) + // x, but with evaluation slightly delayed in DfgPeephole +`define DFG(x) (&16'hffff ? (x) : (~x)) + + `signal(FOLD_UNARY_CLog2, $clog2(const_a)); + `signal(FOLD_UNARY_CountOnes, $countones(const_a)); + `signal(FOLD_UNARY_IsUnknown, $isunknown(const_a)); + `signal(FOLD_UNARY_LogNot, !const_a[0]); + `signal(FOLD_UNARY_Negate, -const_a); + `signal(FOLD_UNARY_Not, ~const_a); + `signal(FOLD_UNARY_OneHot, $onehot(const_a)); + `signal(FOLD_UNARY_OneHot0, $onehot0(const_a)); + `signal(FOLD_UNARY_RedAnd, &const_a); + `signal(FOLD_UNARY_RedOr, |const_a); + `signal(FOLD_UNARY_RedXor, ^const_a); + // verilator lint_off WIDTH + wire logic [79:0] tmp_FOLD_UNARY_Extend = const_a; + wire logic signed [79:0] tmp_FOLD_UNARY_ExtendS = sconst_a; + //verilator lint_on WIDTH + `signal(FOLD_UNARY_Extend, tmp_FOLD_UNARY_Extend); + `signal(FOLD_UNARY_ExtendS, tmp_FOLD_UNARY_ExtendS); + + `signal(FOLD_BINARY_Add, const_a + const_b); + `signal(FOLD_BINARY_And, const_a & const_b); + `signal(FOLD_BINARY_Concat, {const_a, const_b}); + `signal(FOLD_BINARY_Div, const_a / 64'd3); + `signal(FOLD_BINARY_DivS, sconst_a / 64'sd3); + `signal(FOLD_BINARY_Eq, const_a == const_b); + `signal(FOLD_BINARY_Gt, const_a > const_b); + `signal(FOLD_BINARY_GtS, sconst_a > sconst_b); + `signal(FOLD_BINARY_Gte, const_a >= const_b); + `signal(FOLD_BINARY_GteS, sconst_a >= sconst_b); + `signal(FOLD_BINARY_LogAnd, const_a[0] && const_b[0]); + `signal(FOLD_BINARY_LogEq, const_a[0] <-> const_b[0]); + `signal(FOLD_BINARY_LogIf, const_a[0] -> const_b[0]); + `signal(FOLD_BINARY_LogOr, const_a[0] || const_b[0]); + `signal(FOLD_BINARY_Lt, const_a < const_b); + `signal(FOLD_BINARY_LtS, sconst_a < sconst_b); + `signal(FOLD_BINARY_Lte, const_a <= const_b); + `signal(FOLD_BINARY_LteS, sconst_a <= sconst_b); + `signal(FOLD_BINARY_ModDiv, const_a % 64'd3); + `signal(FOLD_BINARY_ModDivS, sconst_a % 64'sd3); + `signal(FOLD_BINARY_Mul, const_a * 64'd3); + `signal(FOLD_BINARY_MulS, sconst_a * 64'sd3); + `signal(FOLD_BINARY_Neq, const_a != const_b); + `signal(FOLD_BINARY_Or, const_a | const_b); + `signal(FOLD_BINARY_Pow, const_a ** 64'd2); + `signal(FOLD_BINARY_PowSS, sconst_a ** 64'sd2); + `signal(FOLD_BINARY_PowSU, sconst_a ** 64'd2); + `signal(FOLD_BINARY_PowUS, const_a ** 64'sd2); + `signal(FOLD_BINARY_Replicate, {2{const_a}}); + `signal(FOLD_BINARY_ShiftL, const_a << 2); + `signal(FOLD_BINARY_ShiftR, const_a >> 2); + `signal(FOLD_BINARY_ShiftRS, sconst_a >>> 2); + `signal(FOLD_BINARY_Sub, const_a - const_b); + `signal(FOLD_BINARY_Xor, const_a ^ const_b); + + `signal(FOLD_SEL, const_a[3:1]); `signal(SWAP_CONST_IN_COMMUTATIVE_BINARY, rand_a + const_a); `signal(SWAP_NOT_IN_COMMUTATIVE_BINARY, rand_a + ~rand_a); @@ -46,25 +99,25 @@ module t ( `signal(REPLACE_REDUCTION_OF_CONST_XOR, ^const_a); `signal(REPLACE_EXTEND, 4'(rand_a[0])); `signal(PUSH_NOT_THROUGH_COND, ~(rand_a[0] ? rand_a[4:0] : 5'hb)); - `signal(REMOVE_NOT_NOT, ~`DFG(~`DFG(rand_a))); - `signal(REPLACE_NOT_NEQ, ~`DFG(rand_a != rand_b)); - `signal(REPLACE_NOT_EQ, ~`DFG(rand_a == rand_b)); + `signal(REMOVE_NOT_NOT, ~~rand_a); + `signal(REPLACE_NOT_NEQ, ~(rand_a != rand_b)); + `signal(REPLACE_NOT_EQ, ~(rand_a == rand_b)); `signal(REPLACE_NOT_OF_CONST, ~4'd0); `signal(REPLACE_AND_OF_NOT_AND_NOT, ~rand_a[1] & ~rand_b[1]); `signal(REPLACE_AND_OF_NOT_AND_NEQ, ~rand_a[2] & (rand_b != 64'd2)); `signal(REPLACE_AND_OF_CONST_AND_CONST, const_a & const_b); - `signal(REPLACE_AND_WITH_ZERO, `ZERO & rand_a); - `signal(REMOVE_AND_WITH_ONES, `ONES & rand_a); + `signal(REPLACE_AND_WITH_ZERO, 64'd0 & rand_a); + `signal(REMOVE_AND_WITH_ONES, -64'd1 & rand_a); `signal(REPLACE_CONTRADICTORY_AND, rand_a & ~rand_a); `signal(REPLACE_OR_OF_NOT_AND_NOT, ~rand_a[3] | ~rand_b[3]); `signal(REPLACE_OR_OF_NOT_AND_NEQ, ~rand_a[4] | (rand_b != 64'd3)); `signal(REPLACE_OR_OF_CONCAT_ZERO_LHS_AND_CONCAT_RHS_ZERO, {2'd0, rand_a[1:0]} | {rand_b[1:0], 2'd0}); `signal(REPLACE_OR_OF_CONCAT_LHS_ZERO_AND_CONCAT_ZERO_RHS, {rand_a[1:0], 2'd0} | {2'd0, rand_b[1:0]}); `signal(REPLACE_OR_OF_CONST_AND_CONST, const_a | const_b); - `signal(REMOVE_OR_WITH_ZERO, `ZERO | rand_a); - `signal(REPLACE_OR_WITH_ONES, `ONES | rand_a); + `signal(REMOVE_OR_WITH_ZERO, 64'd0 | rand_a); + `signal(REPLACE_OR_WITH_ONES, -64'd1 | rand_a); `signal(REPLACE_TAUTOLOGICAL_OR, rand_a | ~rand_a); - `signal(REMOVE_SUB_ZERO, rand_a - `ZERO); + `signal(REMOVE_SUB_ZERO, rand_a - 64'd0); `signal(REPLACE_SUB_WITH_NOT, rand_a[0] - 1'b1); `signal(REMOVE_REDUNDANT_ZEXT_ON_RHS_OF_SHIFT, rand_a << {2'b0, rand_a[2:0]}); `signal(REPLACE_EQ_OF_CONST_AND_CONST, 4'd0 == 4'd1); @@ -80,30 +133,30 @@ module t ( `signal(REPLACE_CONCAT_ZERO_AND_SEL_TOP_WITH_SHIFTR, {62'd0, rand_a[63:62]}); `signal(REPLACE_CONCAT_SEL_BOTTOM_AND_ZERO_WITH_SHIFTL, {rand_a[1:0], 62'd0}); `signal(PUSH_CONCAT_THROUGH_NOTS, {~(rand_a+64'd101), ~(rand_b+64'd101)} ); - `signal(REMOVE_CONCAT_OF_ADJOINING_SELS, {`DFG(rand_a[10:3]), `DFG(rand_a[2:1])}); - `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:3], {rand_a[2:1], rand_b}}); + `signal(REMOVE_CONCAT_OF_ADJOINING_SELS, {rand_a[10:3], rand_a[2:1]}); + `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:3], `DFG({rand_a[2:1], rand_b})}); `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS, {`DFG({rand_b, rand_a[10:3]}), rand_a[2:1]}); - `signal(REMOVE_COND_WITH_FALSE_CONDITION, &`ZERO ? rand_a : rand_b); - `signal(REMOVE_COND_WITH_TRUE_CONDITION, |`ONES ? rand_a : rand_b); - `signal(SWAP_COND_WITH_NOT_CONDITION, (~rand_a[0] & |`ONES) ? rand_a : rand_b); + `signal(REMOVE_COND_WITH_FALSE_CONDITION, 1'd0 ? rand_a : rand_b); + `signal(REMOVE_COND_WITH_TRUE_CONDITION, 1'd1 ? rand_a : rand_b); + `signal(SWAP_COND_WITH_NOT_CONDITION, (~rand_a[0] & 1'd1) ? rand_a : rand_b); `signal(SWAP_COND_WITH_NEQ_CONDITION, rand_b != rand_a ? rand_a : rand_b); `signal(PULL_NOTS_THROUGH_COND, rand_a[0] ? ~rand_a[4:0] : ~rand_b[4:0]); - `signal(REPLACE_COND_WITH_THEN_BRANCH_ZERO, rand_a[0] ? |`ZERO : rand_a[1]); - `signal(REPLACE_COND_WITH_THEN_BRANCH_ONES, rand_a[0] ? |`ONES : rand_a[1]); - `signal(REPLACE_COND_WITH_ELSE_BRANCH_ZERO, rand_a[0] ? rand_a[1] : |`ZERO); - `signal(REPLACE_COND_WITH_ELSE_BRANCH_ONES, rand_a[0] ? rand_a[1] : |`ONES); + `signal(REPLACE_COND_WITH_THEN_BRANCH_ZERO, rand_a[0] ? 1'd0 : rand_a[1]); + `signal(REPLACE_COND_WITH_THEN_BRANCH_ONES, rand_a[0] ? 1'd1 : rand_a[1]); + `signal(REPLACE_COND_WITH_ELSE_BRANCH_ZERO, rand_a[0] ? rand_a[1] : 1'd0); + `signal(REPLACE_COND_WITH_ELSE_BRANCH_ONES, rand_a[0] ? rand_a[1] : 1'd1); `signal(INLINE_ARRAYSEL, array[0]); - `signal(PUSH_BITWISE_THROUGH_REDUCTION_AND, (&rand_a) & (&rand_b)); - `signal(PUSH_BITWISE_THROUGH_REDUCTION_OR, (|rand_a) | (|rand_b)); - `signal(PUSH_BITWISE_THROUGH_REDUCTION_XOR, (^rand_a) ^ (^rand_b)); - `signal(PUSH_REDUCTION_THROUGH_CONCAT_AND, &`DFG({(rand_a + 64'd102), rand_b})); - `signal(PUSH_REDUCTION_THROUGH_CONCAT_OR, |`DFG({(rand_a + 64'd103), rand_b})); - `signal(PUSH_REDUCTION_THROUGH_CONCAT_XOR, ^`DFG({(rand_a + 64'd104), rand_b})); - `signal(REMOVE_WIDTH_ONE_REDUCTION_AND, &`DFG({randbit_a ^ rand_a[0], rand_b})); - `signal(REMOVE_WIDTH_ONE_REDUCTION_OR, |`DFG({randbit_a ^ rand_a[1], rand_b})); - `signal(REMOVE_WIDTH_ONE_REDUCTION_XOR, ^`DFG({randbit_a ^ rand_a[2], rand_b})); - `signal(REMOVE_XOR_WITH_ZERO, `ZERO ^ rand_a); - `signal(REMOVE_XOR_WITH_ONES, `ONES ^ rand_a); + `signal(PUSH_BITWISE_THROUGH_REDUCTION_AND, (&(rand_a + 64'd105)) & (&(rand_b + 64'd108))); + `signal(PUSH_BITWISE_THROUGH_REDUCTION_OR, (|(rand_a + 64'd106)) | (|(rand_b + 64'd109))); + `signal(PUSH_BITWISE_THROUGH_REDUCTION_XOR, (^(rand_a + 64'd107)) ^ (^(rand_b + 64'd110))); + `signal(PUSH_REDUCTION_THROUGH_CONCAT_AND, &{(rand_a + 64'd102), rand_b}); + `signal(PUSH_REDUCTION_THROUGH_CONCAT_OR, |{(rand_a + 64'd103), rand_b}); + `signal(PUSH_REDUCTION_THROUGH_CONCAT_XOR, ^{(rand_a + 64'd104), rand_b}); + `signal(REMOVE_WIDTH_ONE_REDUCTION_AND, &{randbit_a ^ rand_a[0], rand_b}); + `signal(REMOVE_WIDTH_ONE_REDUCTION_OR, |{randbit_a ^ rand_a[1], rand_b}); + `signal(REMOVE_WIDTH_ONE_REDUCTION_XOR, ^{randbit_a ^ rand_a[2], rand_b}); + `signal(REMOVE_XOR_WITH_ZERO, 64'd0 ^ rand_a); + `signal(REMOVE_XOR_WITH_ONES, -64'd1 ^ rand_a); `signal(REPLACE_COND_DEC, randbit_a ? rand_b - 64'b1 : rand_b); `signal(REPLACE_COND_INC, randbit_a ? rand_b + 64'b1 : rand_b); `signal(RIGHT_LEANING_ASSOC, (((rand_a + rand_b) + rand_a) + rand_b)); @@ -125,6 +178,8 @@ module t ( always @(posedge randbit_a) if ($c(0)) $display(sel_from_not); // Do not remove signal // Assigned at the end to avoid inlining by other passes - assign const_a = (rand_a | ~rand_a) & 64'h0123456789abcdef; - assign const_b = ~(rand_a & ~rand_a) & 64'h98badefc10325647; + assign const_a = 64'h0123456789abcdef; + assign const_b = 64'h98badefc10325647; + assign sconst_a = 64'hfedcba9876543210; + assign sconst_b = 64'hba0123456789cdef; endmodule