Optimize CountOnes in Dfg
This commit is contained in:
parent
382f5003ca
commit
e0fc69a2f0
|
|
@ -722,6 +722,14 @@ void DfgVertex::typeCheck(const DfgGraph& dfg) const {
|
|||
return;
|
||||
}
|
||||
|
||||
case VDfgType::CountOnes: {
|
||||
const DfgCountOnes& v = *as<DfgCountOnes>();
|
||||
CHECK(v.isPacked(), "Should be Packed type");
|
||||
CHECK(v.srcp()->isPacked(), "Source should be Packed type");
|
||||
CHECK(v.size() == 32U, "Should yield a 32-bit result");
|
||||
return;
|
||||
}
|
||||
|
||||
case VDfgType::Pow:
|
||||
case VDfgType::PowSS:
|
||||
case VDfgType::PowSU:
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ class V3DfgCse final {
|
|||
case VDfgType::BufIf1:
|
||||
case VDfgType::Concat:
|
||||
case VDfgType::Cond:
|
||||
case VDfgType::CountOnes:
|
||||
case VDfgType::Div:
|
||||
case VDfgType::DivS:
|
||||
case VDfgType::Eq:
|
||||
|
|
@ -200,6 +201,7 @@ class V3DfgCse final {
|
|||
case VDfgType::BufIf1:
|
||||
case VDfgType::Concat:
|
||||
case VDfgType::Cond:
|
||||
case VDfgType::CountOnes:
|
||||
case VDfgType::Div:
|
||||
case VDfgType::DivS:
|
||||
case VDfgType::Eq:
|
||||
|
|
|
|||
|
|
@ -52,6 +52,14 @@ T_Node* makeNode(const T_Vertex* vtxp, Ops... ops) {
|
|||
//======================================================================
|
||||
// Vertices needing special conversion
|
||||
|
||||
template <>
|
||||
AstCountOnes* makeNode<AstCountOnes, DfgCountOnes, AstNodeExpr*>( //
|
||||
const DfgCountOnes* vtxp, AstNodeExpr* op1) {
|
||||
AstCountOnes* const nodep = new AstCountOnes{vtxp->fileline(), op1};
|
||||
nodep->dtypeSetLogicSized(vtxp->width(), VSigning::UNSIGNED);
|
||||
return nodep;
|
||||
}
|
||||
|
||||
template <>
|
||||
AstExtend* makeNode<AstExtend, DfgExtend, AstNodeExpr*>( //
|
||||
const DfgExtend* vtxp, AstNodeExpr* op1) {
|
||||
|
|
|
|||
|
|
@ -964,6 +964,61 @@ class V3DfgPeephole final : public DfgVisitor {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Given an operand of an Add, return the term that could be used for conveting to CountOnes
|
||||
// Result is a tulpe of (Vertex, Lsb, Width)
|
||||
std::tuple<DfgVertex*, uint32_t, uint32_t> addToCountOnesTerm(DfgVertex* vtxp) {
|
||||
if (DfgConcat* const oCatp = vtxp->cast<DfgConcat>()) {
|
||||
if (isZero(oCatp->lhsp())) {
|
||||
if (DfgCountOnes* const countOnesp = oCatp->rhsp()->cast<DfgCountOnes>()) {
|
||||
// Zero extended count ones
|
||||
if (DfgSel* const selp = countOnesp->srcp()->cast<DfgSel>()) {
|
||||
return {selp->fromp(), selp->lsb(), selp->width()};
|
||||
}
|
||||
} else if (DfgSel* const selp = oCatp->rhsp()->cast<DfgSel>()) {
|
||||
// Zero extended single bit select
|
||||
if (selp->dtype() == m_bitDType) { //
|
||||
return {selp->fromp(), selp->lsb(), selp->width()};
|
||||
}
|
||||
}
|
||||
}
|
||||
return {nullptr, 0, 0};
|
||||
}
|
||||
if (DfgCountOnes* const countOnesp = vtxp->cast<DfgCountOnes>()) {
|
||||
// Simple count ones
|
||||
if (DfgSel* const selp = countOnesp->srcp()->cast<DfgSel>()) {
|
||||
return {selp->fromp(), selp->lsb(), selp->width()};
|
||||
}
|
||||
return {nullptr, 0, 0};
|
||||
}
|
||||
if (DfgSel* const oSelp = vtxp->cast<DfgSel>()) {
|
||||
if (oSelp->lsb() == 0) {
|
||||
// Truncated count ones
|
||||
if (DfgCountOnes* const countOnesp = oSelp->fromp()->cast<DfgCountOnes>()) {
|
||||
// Zero extended count ones
|
||||
if (DfgSel* const selp = countOnesp->srcp()->cast<DfgSel>()) {
|
||||
return {selp->fromp(), selp->lsb(), selp->width()};
|
||||
}
|
||||
}
|
||||
}
|
||||
// Single bit select
|
||||
if (oSelp->dtype() == m_bitDType) { //
|
||||
return {oSelp->fromp(), oSelp->lsb(), 1};
|
||||
}
|
||||
return {nullptr, 0, 0};
|
||||
}
|
||||
// Altered form of extended MSB
|
||||
if (DfgShiftR* const shiftrp = vtxp->cast<DfgShiftR>()) {
|
||||
if (DfgConst* const rConstp = shiftrp->rhsp()->cast<DfgConst>()) {
|
||||
if (rConstp->toU32() == shiftrp->width() - 1) {
|
||||
return {shiftrp->lhsp(), shiftrp->width() - 1, 1};
|
||||
}
|
||||
}
|
||||
return {nullptr, 0, 0};
|
||||
}
|
||||
// Not applicable
|
||||
return {nullptr, 0, 0};
|
||||
}
|
||||
|
||||
// VISIT methods
|
||||
|
||||
void visit(DfgVertex*) override {}
|
||||
|
|
@ -1461,6 +1516,65 @@ class V3DfgPeephole final : public DfgVisitor {
|
|||
if (associativeBinary(vtxp)) return;
|
||||
|
||||
if (commutativeBinary(vtxp)) return;
|
||||
|
||||
DfgVertex* const lhsp = vtxp->lhsp();
|
||||
DfgVertex* const rhsp = vtxp->rhsp();
|
||||
FileLine* const flp = vtxp->fileline();
|
||||
|
||||
if (isZero(lhsp)) {
|
||||
APPLYING(REMOVE_ADD_ZERO) {
|
||||
replace(rhsp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const std::tuple<DfgVertex*, uint32_t, uint32_t> lTerm = addToCountOnesTerm(lhsp);
|
||||
if (DfgVertex* const lVtxp = std::get<0>(lTerm)) {
|
||||
std::tuple<DfgVertex*, uint32_t, uint32_t> rTerm = addToCountOnesTerm(rhsp);
|
||||
DfgVertex* extrap = nullptr;
|
||||
if (!std::get<0>(rTerm)) {
|
||||
if (DfgAdd* const rAddp = rhsp->cast<DfgAdd>()) {
|
||||
rTerm = addToCountOnesTerm(rAddp->lhsp());
|
||||
extrap = rAddp->rhsp();
|
||||
}
|
||||
}
|
||||
|
||||
if (DfgVertex* const rVtxp = std::get<0>(rTerm)) {
|
||||
if (isSame(lVtxp, rVtxp)) {
|
||||
const uint32_t lLsb = std::get<1>(lTerm);
|
||||
const uint32_t rLsb = std::get<1>(rTerm);
|
||||
const uint32_t lWidth = std::get<2>(lTerm);
|
||||
const uint32_t rWidth = std::get<2>(rTerm);
|
||||
bool adjoined = true;
|
||||
uint32_t lsb = 0;
|
||||
if (lLsb + lWidth == rLsb) {
|
||||
lsb = lLsb;
|
||||
} else if (lLsb == rLsb + rWidth) {
|
||||
lsb = rLsb;
|
||||
} else {
|
||||
adjoined = false;
|
||||
}
|
||||
if (adjoined) {
|
||||
APPLYING(REPLACE_ADD_WITH_COUNT_ONES) {
|
||||
DfgSel* const selp
|
||||
= make<DfgSel>(vtxp->fileline(),
|
||||
DfgDataType::packed(lWidth + rWidth), lVtxp, lsb);
|
||||
DfgVertex* resp
|
||||
= make<DfgCountOnes>(flp, DfgDataType::packed(32), selp);
|
||||
if (vtxp->width() > 32U) {
|
||||
resp = make<DfgConcat>(vtxp, makeZero(flp, vtxp->width() - 32U),
|
||||
resp);
|
||||
} else if (vtxp->width() < 32U) {
|
||||
resp = make<DfgSel>(vtxp, resp, 0U);
|
||||
}
|
||||
if (extrap) resp = make<DfgAdd>(vtxp, resp, extrap);
|
||||
replace(resp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void visit(DfgArraySel* const vtxp) override {
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@
|
|||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PUSH_SEL_THROUGH_SPLICE) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PUSH_SHIFTL_THROUGH_COND) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, PUSH_SHIFTR_THROUGH_COND) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_ADD_ZERO) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_AND_WITH_ONES) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_AND_WITH_SELF) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_CONCAT_OF_ADJOINING_SELS) \
|
||||
|
|
@ -76,6 +77,7 @@
|
|||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_VAR) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_WIDTH_ONE_REDUCTION) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REMOVE_XOR_WITH_ZERO) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_ADD_WITH_COUNT_ONES) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_NOT_AND_NEQ) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_OF_NOT_AND_NOT) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_AND_WITH_ZERO) \
|
||||
|
|
|
|||
|
|
@ -1505,7 +1505,6 @@ DfgIgnored = (
|
|||
"CAwait",
|
||||
"CCast",
|
||||
"CLog2",
|
||||
"CountOnes",
|
||||
"IsUnknown",
|
||||
"NullCheck",
|
||||
"OneHot",
|
||||
|
|
|
|||
|
|
@ -290,6 +290,11 @@ module t (
|
|||
`signal(REPLACE_SHIFTL_SHIFTL, rand_a << 2 << 3);
|
||||
`signal(REPLACE_SHIFTR_SHIFTR, rand_a >> 2 >> 3);
|
||||
`signal(PUSH_COMMUTATIVE_BINARY_THROUGH_COND, 58'h1 + (rand_a[0] ? rand_b[1 +: 58] : ~rand_b[1 +: 58]));
|
||||
`signal(REMOVE_ADD_ZERO, rand_a + '0);
|
||||
`signal(REPLACE_ADD_WITH_COUNT_ONES_A, 4'(rand_a[63]) + 4'(rand_a[62]) + 4'(rand_a[61]));
|
||||
`signal(REPLACE_ADD_WITH_COUNT_ONES_B, 32'(rand_a[63]) + 32'(rand_a[62]) + 32'(rand_a[61]));
|
||||
`signal(REPLACE_ADD_WITH_COUNT_ONES_C, 200'(rand_a[63]) + 200'(rand_a[62]) + 200'(rand_a[61]));
|
||||
`signal(REPLACE_ADD_WITH_COUNT_ONES_D, 1'(rand_a[63]) + 1'(rand_a[62]) + 1'(rand_a[61]));
|
||||
|
||||
// Operators that should work wiht mismatched widths
|
||||
`signal(MISMATCHED_ShiftL,const_a << 4'd2);
|
||||
|
|
|
|||
Loading…
Reference in New Issue