Optimize CountOnes in Dfg

This commit is contained in:
Geza Lore 2026-03-22 13:34:33 +00:00
parent 382f5003ca
commit e0fc69a2f0
7 changed files with 139 additions and 1 deletions

View File

@ -722,6 +722,14 @@ void DfgVertex::typeCheck(const DfgGraph& dfg) const {
return;
}
case VDfgType::CountOnes: {
const DfgCountOnes& v = *as<DfgCountOnes>();
CHECK(v.isPacked(), "Should be Packed type");
CHECK(v.srcp()->isPacked(), "Source should be Packed type");
CHECK(v.size() == 32U, "Should yield a 32-bit result");
return;
}
case VDfgType::Pow:
case VDfgType::PowSS:
case VDfgType::PowSU:

View File

@ -80,6 +80,7 @@ class V3DfgCse final {
case VDfgType::BufIf1:
case VDfgType::Concat:
case VDfgType::Cond:
case VDfgType::CountOnes:
case VDfgType::Div:
case VDfgType::DivS:
case VDfgType::Eq:
@ -200,6 +201,7 @@ class V3DfgCse final {
case VDfgType::BufIf1:
case VDfgType::Concat:
case VDfgType::Cond:
case VDfgType::CountOnes:
case VDfgType::Div:
case VDfgType::DivS:
case VDfgType::Eq:

View File

@ -52,6 +52,14 @@ T_Node* makeNode(const T_Vertex* vtxp, Ops... ops) {
//======================================================================
// Vertices needing special conversion
template <>
AstCountOnes* makeNode<AstCountOnes, DfgCountOnes, AstNodeExpr*>( //
const DfgCountOnes* vtxp, AstNodeExpr* op1) {
AstCountOnes* const nodep = new AstCountOnes{vtxp->fileline(), op1};
nodep->dtypeSetLogicSized(vtxp->width(), VSigning::UNSIGNED);
return nodep;
}
template <>
AstExtend* makeNode<AstExtend, DfgExtend, AstNodeExpr*>( //
const DfgExtend* vtxp, AstNodeExpr* op1) {

View File

@ -964,6 +964,61 @@ class V3DfgPeephole final : public DfgVisitor {
return false;
}
// Given an operand of an Add, return the term that could be used for conveting to CountOnes
// Result is a tulpe of (Vertex, Lsb, Width)
std::tuple<DfgVertex*, uint32_t, uint32_t> addToCountOnesTerm(DfgVertex* vtxp) {
if (DfgConcat* const oCatp = vtxp->cast<DfgConcat>()) {
if (isZero(oCatp->lhsp())) {
if (DfgCountOnes* const countOnesp = oCatp->rhsp()->cast<DfgCountOnes>()) {
// Zero extended count ones
if (DfgSel* const selp = countOnesp->srcp()->cast<DfgSel>()) {
return {selp->fromp(), selp->lsb(), selp->width()};
}
} else if (DfgSel* const selp = oCatp->rhsp()->cast<DfgSel>()) {
// Zero extended single bit select
if (selp->dtype() == m_bitDType) { //
return {selp->fromp(), selp->lsb(), selp->width()};
}
}
}
return {nullptr, 0, 0};
}
if (DfgCountOnes* const countOnesp = vtxp->cast<DfgCountOnes>()) {
// Simple count ones
if (DfgSel* const selp = countOnesp->srcp()->cast<DfgSel>()) {
return {selp->fromp(), selp->lsb(), selp->width()};
}
return {nullptr, 0, 0};
}
if (DfgSel* const oSelp = vtxp->cast<DfgSel>()) {
if (oSelp->lsb() == 0) {
// Truncated count ones
if (DfgCountOnes* const countOnesp = oSelp->fromp()->cast<DfgCountOnes>()) {
// Zero extended count ones
if (DfgSel* const selp = countOnesp->srcp()->cast<DfgSel>()) {
return {selp->fromp(), selp->lsb(), selp->width()};
}
}
}
// Single bit select
if (oSelp->dtype() == m_bitDType) { //
return {oSelp->fromp(), oSelp->lsb(), 1};
}
return {nullptr, 0, 0};
}
// Altered form of extended MSB
if (DfgShiftR* const shiftrp = vtxp->cast<DfgShiftR>()) {
if (DfgConst* const rConstp = shiftrp->rhsp()->cast<DfgConst>()) {
if (rConstp->toU32() == shiftrp->width() - 1) {
return {shiftrp->lhsp(), shiftrp->width() - 1, 1};
}
}
return {nullptr, 0, 0};
}
// Not applicable
return {nullptr, 0, 0};
}
// VISIT methods
void visit(DfgVertex*) override {}
@ -1461,6 +1516,65 @@ class V3DfgPeephole final : public DfgVisitor {
if (associativeBinary(vtxp)) return;
if (commutativeBinary(vtxp)) return;
DfgVertex* const lhsp = vtxp->lhsp();
DfgVertex* const rhsp = vtxp->rhsp();
FileLine* const flp = vtxp->fileline();
if (isZero(lhsp)) {
APPLYING(REMOVE_ADD_ZERO) {
replace(rhsp);
return;
}
}
const std::tuple<DfgVertex*, uint32_t, uint32_t> lTerm = addToCountOnesTerm(lhsp);
if (DfgVertex* const lVtxp = std::get<0>(lTerm)) {
std::tuple<DfgVertex*, uint32_t, uint32_t> rTerm = addToCountOnesTerm(rhsp);
DfgVertex* extrap = nullptr;
if (!std::get<0>(rTerm)) {
if (DfgAdd* const rAddp = rhsp->cast<DfgAdd>()) {
rTerm = addToCountOnesTerm(rAddp->lhsp());
extrap = rAddp->rhsp();
}
}
if (DfgVertex* const rVtxp = std::get<0>(rTerm)) {
if (isSame(lVtxp, rVtxp)) {
const uint32_t lLsb = std::get<1>(lTerm);
const uint32_t rLsb = std::get<1>(rTerm);
const uint32_t lWidth = std::get<2>(lTerm);
const uint32_t rWidth = std::get<2>(rTerm);
bool adjoined = true;
uint32_t lsb = 0;
if (lLsb + lWidth == rLsb) {
lsb = lLsb;
} else if (lLsb == rLsb + rWidth) {
lsb = rLsb;
} else {
adjoined = false;
}
if (adjoined) {
APPLYING(REPLACE_ADD_WITH_COUNT_ONES) {
DfgSel* const selp
= make<DfgSel>(vtxp->fileline(),
DfgDataType::packed(lWidth + rWidth), lVtxp, lsb);
DfgVertex* resp
= make<DfgCountOnes>(flp, DfgDataType::packed(32), selp);
if (vtxp->width() > 32U) {
resp = make<DfgConcat>(vtxp, makeZero(flp, vtxp->width() - 32U),
resp);
} else if (vtxp->width() < 32U) {
resp = make<DfgSel>(vtxp, resp, 0U);
}
if (extrap) resp = make<DfgAdd>(vtxp, resp, extrap);
replace(resp);
return;
}
}
}
}
}
}
void visit(DfgArraySel* const vtxp) override {

View File

@ -56,6 +56,7 @@
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PUSH_SEL_THROUGH_SPLICE) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PUSH_SHIFTL_THROUGH_COND) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PUSH_SHIFTR_THROUGH_COND) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_ADD_ZERO) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_AND_WITH_ONES) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_AND_WITH_SELF) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_CONCAT_OF_ADJOINING_SELS) \
@ -76,6 +77,7 @@
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_VAR) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_WIDTH_ONE_REDUCTION) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_XOR_WITH_ZERO) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_ADD_WITH_COUNT_ONES) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_NOT_AND_NEQ) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_NOT_AND_NOT) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_WITH_ZERO) \

View File

@ -1505,7 +1505,6 @@ DfgIgnored = (
"CAwait",
"CCast",
"CLog2",
"CountOnes",
"IsUnknown",
"NullCheck",
"OneHot",

View File

@ -290,6 +290,11 @@ module t (
`signal(REPLACE_SHIFTL_SHIFTL, rand_a << 2 << 3);
`signal(REPLACE_SHIFTR_SHIFTR, rand_a >> 2 >> 3);
`signal(PUSH_COMMUTATIVE_BINARY_THROUGH_COND, 58'h1 + (rand_a[0] ? rand_b[1 +: 58] : ~rand_b[1 +: 58]));
`signal(REMOVE_ADD_ZERO, rand_a + '0);
`signal(REPLACE_ADD_WITH_COUNT_ONES_A, 4'(rand_a[63]) + 4'(rand_a[62]) + 4'(rand_a[61]));
`signal(REPLACE_ADD_WITH_COUNT_ONES_B, 32'(rand_a[63]) + 32'(rand_a[62]) + 32'(rand_a[61]));
`signal(REPLACE_ADD_WITH_COUNT_ONES_C, 200'(rand_a[63]) + 200'(rand_a[62]) + 200'(rand_a[61]));
`signal(REPLACE_ADD_WITH_COUNT_ONES_D, 1'(rand_a[63]) + 1'(rand_a[62]) + 1'(rand_a[61]));
// Operators that should work wiht mismatched widths
`signal(MISMATCHED_ShiftL,const_a << 4'd2);