From e0fc69a2f0e64f02582e0e4a93f7ccf936614387 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sun, 22 Mar 2026 13:34:33 +0000 Subject: [PATCH] Optimize CountOnes in Dfg --- src/V3Dfg.cpp | 8 +++ src/V3DfgCse.cpp | 2 + src/V3DfgDfgToAst.cpp | 8 +++ src/V3DfgPeephole.cpp | 114 ++++++++++++++++++++++++++++++++ src/V3DfgPeepholePatterns.h | 2 + src/astgen | 1 - test_regress/t/t_dfg_peephole.v | 5 ++ 7 files changed, 139 insertions(+), 1 deletion(-) diff --git a/src/V3Dfg.cpp b/src/V3Dfg.cpp index 05172e348..76676790e 100644 --- a/src/V3Dfg.cpp +++ b/src/V3Dfg.cpp @@ -722,6 +722,14 @@ void DfgVertex::typeCheck(const DfgGraph& dfg) const { return; } + case VDfgType::CountOnes: { + const DfgCountOnes& v = *as(); + CHECK(v.isPacked(), "Should be Packed type"); + CHECK(v.srcp()->isPacked(), "Source should be Packed type"); + CHECK(v.size() == 32U, "Should yield a 32-bit result"); + return; + } + case VDfgType::Pow: case VDfgType::PowSS: case VDfgType::PowSU: diff --git a/src/V3DfgCse.cpp b/src/V3DfgCse.cpp index d1831b752..b72707494 100644 --- a/src/V3DfgCse.cpp +++ b/src/V3DfgCse.cpp @@ -80,6 +80,7 @@ class V3DfgCse final { case VDfgType::BufIf1: case VDfgType::Concat: case VDfgType::Cond: + case VDfgType::CountOnes: case VDfgType::Div: case VDfgType::DivS: case VDfgType::Eq: @@ -200,6 +201,7 @@ class V3DfgCse final { case VDfgType::BufIf1: case VDfgType::Concat: case VDfgType::Cond: + case VDfgType::CountOnes: case VDfgType::Div: case VDfgType::DivS: case VDfgType::Eq: diff --git a/src/V3DfgDfgToAst.cpp b/src/V3DfgDfgToAst.cpp index 2aadb02fb..dd9d08f22 100644 --- a/src/V3DfgDfgToAst.cpp +++ b/src/V3DfgDfgToAst.cpp @@ -52,6 +52,14 @@ T_Node* makeNode(const T_Vertex* vtxp, Ops... ops) { //====================================================================== // Vertices needing special conversion +template <> +AstCountOnes* makeNode( // + const DfgCountOnes* vtxp, AstNodeExpr* op1) { + AstCountOnes* const nodep = new AstCountOnes{vtxp->fileline(), op1}; + nodep->dtypeSetLogicSized(vtxp->width(), VSigning::UNSIGNED); + return nodep; +} + template <> AstExtend* makeNode( // const DfgExtend* vtxp, AstNodeExpr* op1) { diff --git a/src/V3DfgPeephole.cpp b/src/V3DfgPeephole.cpp index 7c09632a6..2cafbf3f0 100644 --- a/src/V3DfgPeephole.cpp +++ b/src/V3DfgPeephole.cpp @@ -964,6 +964,61 @@ class V3DfgPeephole final : public DfgVisitor { return false; } + // Given an operand of an Add, return the term that could be used for conveting to CountOnes + // Result is a tulpe of (Vertex, Lsb, Width) + std::tuple addToCountOnesTerm(DfgVertex* vtxp) { + if (DfgConcat* const oCatp = vtxp->cast()) { + if (isZero(oCatp->lhsp())) { + if (DfgCountOnes* const countOnesp = oCatp->rhsp()->cast()) { + // Zero extended count ones + if (DfgSel* const selp = countOnesp->srcp()->cast()) { + return {selp->fromp(), selp->lsb(), selp->width()}; + } + } else if (DfgSel* const selp = oCatp->rhsp()->cast()) { + // Zero extended single bit select + if (selp->dtype() == m_bitDType) { // + return {selp->fromp(), selp->lsb(), selp->width()}; + } + } + } + return {nullptr, 0, 0}; + } + if (DfgCountOnes* const countOnesp = vtxp->cast()) { + // Simple count ones + if (DfgSel* const selp = countOnesp->srcp()->cast()) { + return {selp->fromp(), selp->lsb(), selp->width()}; + } + return {nullptr, 0, 0}; + } + if (DfgSel* const oSelp = vtxp->cast()) { + if (oSelp->lsb() == 0) { + // Truncated count ones + if (DfgCountOnes* const countOnesp = oSelp->fromp()->cast()) { + // Zero extended count ones + if (DfgSel* const selp = countOnesp->srcp()->cast()) { + return {selp->fromp(), selp->lsb(), selp->width()}; + } + } + } + // Single bit select + if (oSelp->dtype() == m_bitDType) { // + return {oSelp->fromp(), oSelp->lsb(), 1}; + } + return {nullptr, 0, 0}; + } + // Altered form of extended MSB + if (DfgShiftR* const shiftrp = vtxp->cast()) { + if (DfgConst* const rConstp = shiftrp->rhsp()->cast()) { + if (rConstp->toU32() == shiftrp->width() - 1) { + return {shiftrp->lhsp(), shiftrp->width() - 1, 1}; + } + } + return {nullptr, 0, 0}; + } + // Not applicable + return {nullptr, 0, 0}; + } + // VISIT methods void visit(DfgVertex*) override {} @@ -1461,6 +1516,65 @@ class V3DfgPeephole final : public DfgVisitor { if (associativeBinary(vtxp)) return; if (commutativeBinary(vtxp)) return; + + DfgVertex* const lhsp = vtxp->lhsp(); + DfgVertex* const rhsp = vtxp->rhsp(); + FileLine* const flp = vtxp->fileline(); + + if (isZero(lhsp)) { + APPLYING(REMOVE_ADD_ZERO) { + replace(rhsp); + return; + } + } + + const std::tuple lTerm = addToCountOnesTerm(lhsp); + if (DfgVertex* const lVtxp = std::get<0>(lTerm)) { + std::tuple rTerm = addToCountOnesTerm(rhsp); + DfgVertex* extrap = nullptr; + if (!std::get<0>(rTerm)) { + if (DfgAdd* const rAddp = rhsp->cast()) { + rTerm = addToCountOnesTerm(rAddp->lhsp()); + extrap = rAddp->rhsp(); + } + } + + if (DfgVertex* const rVtxp = std::get<0>(rTerm)) { + if (isSame(lVtxp, rVtxp)) { + const uint32_t lLsb = std::get<1>(lTerm); + const uint32_t rLsb = std::get<1>(rTerm); + const uint32_t lWidth = std::get<2>(lTerm); + const uint32_t rWidth = std::get<2>(rTerm); + bool adjoined = true; + uint32_t lsb = 0; + if (lLsb + lWidth == rLsb) { + lsb = lLsb; + } else if (lLsb == rLsb + rWidth) { + lsb = rLsb; + } else { + adjoined = false; + } + if (adjoined) { + APPLYING(REPLACE_ADD_WITH_COUNT_ONES) { + DfgSel* const selp + = make(vtxp->fileline(), + DfgDataType::packed(lWidth + rWidth), lVtxp, lsb); + DfgVertex* resp + = make(flp, DfgDataType::packed(32), selp); + if (vtxp->width() > 32U) { + resp = make(vtxp, makeZero(flp, vtxp->width() - 32U), + resp); + } else if (vtxp->width() < 32U) { + resp = make(vtxp, resp, 0U); + } + if (extrap) resp = make(vtxp, resp, extrap); + replace(resp); + return; + } + } + } + } + } } void visit(DfgArraySel* const vtxp) override { diff --git a/src/V3DfgPeepholePatterns.h b/src/V3DfgPeepholePatterns.h index 9eed0d456..522d5da9a 100644 --- a/src/V3DfgPeepholePatterns.h +++ b/src/V3DfgPeepholePatterns.h @@ -56,6 +56,7 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PUSH_SEL_THROUGH_SPLICE) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PUSH_SHIFTL_THROUGH_COND) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PUSH_SHIFTR_THROUGH_COND) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_ADD_ZERO) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_AND_WITH_ONES) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_AND_WITH_SELF) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_CONCAT_OF_ADJOINING_SELS) \ @@ -76,6 +77,7 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_VAR) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_WIDTH_ONE_REDUCTION) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_XOR_WITH_ZERO) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_ADD_WITH_COUNT_ONES) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_NOT_AND_NEQ) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_NOT_AND_NOT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_WITH_ZERO) \ diff --git a/src/astgen b/src/astgen index 03fc303d7..b9a9fed51 100755 --- a/src/astgen +++ b/src/astgen @@ -1505,7 +1505,6 @@ DfgIgnored = ( "CAwait", "CCast", "CLog2", - "CountOnes", "IsUnknown", "NullCheck", "OneHot", diff --git a/test_regress/t/t_dfg_peephole.v b/test_regress/t/t_dfg_peephole.v index d703f0886..af36586c1 100644 --- a/test_regress/t/t_dfg_peephole.v +++ b/test_regress/t/t_dfg_peephole.v @@ -290,6 +290,11 @@ module t ( `signal(REPLACE_SHIFTL_SHIFTL, rand_a << 2 << 3); `signal(REPLACE_SHIFTR_SHIFTR, rand_a >> 2 >> 3); `signal(PUSH_COMMUTATIVE_BINARY_THROUGH_COND, 58'h1 + (rand_a[0] ? rand_b[1 +: 58] : ~rand_b[1 +: 58])); + `signal(REMOVE_ADD_ZERO, rand_a + '0); + `signal(REPLACE_ADD_WITH_COUNT_ONES_A, 4'(rand_a[63]) + 4'(rand_a[62]) + 4'(rand_a[61])); + `signal(REPLACE_ADD_WITH_COUNT_ONES_B, 32'(rand_a[63]) + 32'(rand_a[62]) + 32'(rand_a[61])); + `signal(REPLACE_ADD_WITH_COUNT_ONES_C, 200'(rand_a[63]) + 200'(rand_a[62]) + 200'(rand_a[61])); + `signal(REPLACE_ADD_WITH_COUNT_ONES_D, 1'(rand_a[63]) + 1'(rand_a[62]) + 1'(rand_a[61])); // Operators that should work wiht mismatched widths `signal(MISMATCHED_ShiftL,const_a << 4'd2);