Optimize commutative vertex operands in Dfg for better combining

This commit is contained in:
Geza Lore 2026-03-24 06:53:19 +00:00
parent d00f23a2b2
commit d33a81d32a
5 changed files with 124 additions and 47 deletions

View File

@ -133,6 +133,7 @@ class V3DfgPeephole final : public DfgVisitor {
struct VertexInfo final {
size_t m_workListIndex = 0; // Position of this vertx m_wlist (0 means not in list)
size_t m_generation = 0; // Generation number of this vertex
size_t m_id = 0; // Unique vertex ID (0 means unassigned)
};
// STATE
@ -144,6 +145,7 @@ class V3DfgPeephole final : public DfgVisitor {
V3DfgCache m_cache{m_dfg}; // Vertex cache to avoid creating redundant vertices
DfgVertex* m_vtxp = nullptr; // Currently considered vertex
size_t m_currentGeneration = 0; // Current generation number
size_t m_lastId = 0; // Last unique vertex ID assigned
// STATIC STATE
static V3DebugBisect s_debugBisect; // Debug aid
@ -303,6 +305,9 @@ class V3DfgPeephole final : public DfgVisitor {
// Sanity check
UASSERT_OBJ(vtxp->dtype() == dtype, vtxp, "Vertex dtype mismatch");
if (VL_UNLIKELY(v3Global.opt.debugCheck())) vtxp->typeCheck(m_dfg);
// Assign vertex ID
VertexInfo& vInfo = m_vInfo[vtxp];
if (!vInfo.m_id) vInfo.m_id = ++m_lastId;
// Add to work list
addToWorkList(vtxp);
// Return new node
@ -457,17 +462,40 @@ class V3DfgPeephole final : public DfgVisitor {
}
}
// if (a OP (b OP c)), check if (a OP b) exists and if so replace with (a OP b) OP c
if (Vertex* rVtxp = vtxp->rhsp()->template cast<Vertex>()) {
const DfgDataType& dtype
= std::is_same<Vertex, DfgConcat>::value
? DfgDataType::packed(lhsp->width() + rVtxp->lhsp()->width())
: vtxp->dtype();
if (Vertex* const cVtxp = m_cache.get<Vertex>(dtype, lhsp, rVtxp->lhsp())) {
if (cVtxp->hasSinks() && cVtxp != rhsp) {
APPLYING(REUSE_ASSOC_BINARY) {
replace(make<Vertex>(vtxp, cVtxp, rVtxp->rhsp()));
return true;
// Attempt to reuse associative binary expressions if hey already exist, e.g.:
// '(a OP (b OP c))' -> '(a OP b) OP c', iff '(a OP b)' already exists, or
// '(a OP c) OP b' iff '(a OP c)' already exists and the vertex is commutative.
// Only do this is 'b OP c' has a single use and can subsequently be removed,
// otherwise there is no improvement.
if (!rhsp->hasMultipleSinks()) {
if (Vertex* rVtxp = rhsp->template cast<Vertex>()) {
DfgVertex* const rlVtxp = rVtxp->lhsp();
DfgVertex* const rrVtxp = rVtxp->rhsp();
const DfgDataType& dtype
= std::is_same<Vertex, DfgConcat>::value
? DfgDataType::packed(lhsp->width() + rlVtxp->width())
: vtxp->dtype();
if (Vertex* const existingp = m_cache.get<Vertex>(dtype, lhsp, rlVtxp)) {
UASSERT_OBJ(existingp->hasSinks(), vtxp, "Existing vertex should be used");
if (existingp != rhsp) {
APPLYING(REUSE_ASSOC_BINARY_LHS_WITH_LHS_OF_RHS) {
replace(make<Vertex>(vtxp, existingp, rrVtxp));
return true;
}
}
}
// Concat is not commutative
if VL_CONSTEXPR_CXX17 (!std::is_same<Vertex, DfgConcat>::value) {
if (Vertex* const existingp = m_cache.get<Vertex>(dtype, lhsp, rrVtxp)) {
UASSERT_OBJ(existingp->hasSinks(), vtxp, "Existing vertex should be used");
if (existingp != rhsp) {
APPLYING(REUSE_ASSOC_BINARY_LHS_WITH_RHS_OF_RHS) {
replace(make<Vertex>(vtxp, existingp, rlVtxp));
return true;
}
}
}
}
}
@ -484,29 +512,59 @@ class V3DfgPeephole final : public DfgVisitor {
DfgVertex* const lhsp = vtxp->lhsp();
DfgVertex* const rhsp = vtxp->rhsp();
// Ensure Const is on left-hand side to simplify other patterns
if (lhsp->is<DfgConst>()) return false;
if (rhsp->is<DfgConst>()) {
APPLYING(SWAP_CONST_IN_COMMUTATIVE_BINARY) {
replace(make<Vertex>(vtxp, rhsp, lhsp));
return true;
{
const bool lIsConst = lhsp->is<DfgConst>();
const bool rIsConst = rhsp->is<DfgConst>();
if (lIsConst != rIsConst) {
if (rIsConst) {
APPLYING(SWAP_CONST_IN_COMMUTATIVE_BINARY) {
replace(make<Vertex>(vtxp, rhsp, lhsp));
return true;
}
}
return false;
}
}
// Ensure Not is on the left-hand side to simplify other patterns
if (lhsp->is<DfgNot>()) return false;
if (rhsp->is<DfgNot>()) {
APPLYING(SWAP_NOT_IN_COMMUTATIVE_BINARY) {
replace(make<Vertex>(vtxp, rhsp, lhsp));
return true;
{
const bool lIsNot = lhsp->is<DfgNot>();
const bool rIsNot = rhsp->is<DfgNot>();
if (lIsNot != rIsNot) {
if (rIsNot) {
APPLYING(SWAP_NOT_IN_COMMUTATIVE_BINARY) {
replace(make<Vertex>(vtxp, rhsp, lhsp));
return true;
}
}
return false;
}
}
// If both sides are variable references, order the side in some defined way. This
// allows CSE to later merge 'a op b' with 'b op a'.
if (lhsp->is<DfgVertexVar>() && rhsp->is<DfgVertexVar>()) {
const AstNode* const lVarp = lhsp->as<DfgVertexVar>()->nodep();
const AstNode* const rVarp = rhsp->as<DfgVertexVar>()->nodep();
if (lVarp->name() > rVarp->name()) {
APPLYING(SWAP_VAR_IN_COMMUTATIVE_BINARY) {
// Ensure same vertex is on the right-hand side to simplify other patterns
{
const bool lIsSame = lhsp->is<Vertex>();
const bool rIsSame = rhsp->is<Vertex>();
if (lIsSame != rIsSame) {
if (lIsSame) {
APPLYING(SWAP_SAME_IN_COMMUTATIVE_BINARY) {
replace(make<Vertex>(vtxp, rhsp, lhsp));
return true;
}
}
return false;
}
}
// Otherwise put sides in order based on unique iD, this makes
// 'a op b' and 'b op a' end up the same for better combining.
{
const VertexInfo& lInfo = m_vInfo[lhsp];
const VertexInfo& rInfo = m_vInfo[rhsp];
if (lInfo.m_id > rInfo.m_id) {
APPLYING(SWAP_SIDE_IN_COMMUTATIVE_BINARY) {
replace(make<Vertex>(vtxp, rhsp, lhsp));
return true;
}
@ -1443,6 +1501,7 @@ class V3DfgPeephole final : public DfgVisitor {
DfgVertex::ScopeCache scopeCache;
DfgSplicePacked* const sp = new DfgSplicePacked{m_dfg, flp, vtxp->dtype()};
sp->addDriver(catp, lsb, flp);
m_vInfo[sp].m_id = ++m_lastId;
replace(sp);
return;
}
@ -1860,6 +1919,9 @@ class V3DfgPeephole final : public DfgVisitor {
: m_dfg{dfg}
, m_ctx{ctx} {
// Assign vertex IDs
m_dfg.forEachVertex([&](DfgVertex& vtx) { m_vInfo[vtx].m_id = ++m_lastId; });
// Initialize the work list
m_wlist.reserve(m_dfg.size() * 4);

View File

@ -107,13 +107,15 @@
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_WITH_EQUIVALENT) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_ONES) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_SELF) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REUSE_ASSOC_BINARY) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REUSE_ASSOC_BINARY_LHS_WITH_LHS_OF_RHS) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REUSE_ASSOC_BINARY_LHS_WITH_RHS_OF_RHS) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, RIGHT_LEANING_ASSOC) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_COND_WITH_NEQ_CONDITION) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_COND_WITH_NOT_CONDITION) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_CONST_IN_COMMUTATIVE_BINARY) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_NOT_IN_COMMUTATIVE_BINARY) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_VAR_IN_COMMUTATIVE_BINARY)
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_NOT_IN_COMMUTATIVE_BINARY) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_SAME_IN_COMMUTATIVE_BINARY) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_SIDE_IN_COMMUTATIVE_BINARY)
// clang-format on
class VDfgPeepholePattern final {

View File

@ -218,20 +218,33 @@ module t (
`signal(REPLACE_LOGOR_WITH_OR, rand_a[0] || rand_a[1]);
`signal(RIGHT_LEANING_ASSOC, (((rand_a + rand_b) + rand_a) + rand_b));
`signal(RIGHT_LEANING_CONCET, {{{rand_a, rand_b}, rand_a}, rand_b});
`signal(REUSE_ASSOC_ADD_COMMON, rand_a[23:4] + ~rand_b[23:4]);
`signal(REUSE_ASSOC_ADD, rand_a[23:4] + (~rand_b[23:4] + rand_a[39:20]));
`signal(REUSE_ASSOC_MUL_COMMON, rand_a[23:4] * ~rand_b[23:4]);
`signal(REUSE_ASSOC_MUL, rand_a[23:4] * (~rand_b[23:4] * rand_a[39:20]));
`signal(REUSE_ASSOC_MULS_COMMON, srand_a[23:4] * ~srand_b[23:4]);
`signal(REUSE_ASSOC_MULS, srand_a[23:4] * (~srand_b[23:4] * srand_a[39:20]));
`signal(REUSE_ASSOC_AND_COMMON, rand_a[23:4] & ~rand_b[23:4]);
`signal(REUSE_ASSOC_AND, rand_a[23:4] & (~rand_b[23:4] & rand_a[39:20]));
`signal(REUSE_ASSOC_OR_COMMON, rand_a[23:4] | ~rand_b[23:4]);
`signal(REUSE_ASSOC_OR, rand_a[23:4] | (~rand_b[23:4] | rand_a[39:20]));
`signal(REUSE_ASSOC_XOR_COMMON, rand_a[23:4] ^ ~rand_b[23:4]);
`signal(REUSE_ASSOC_XOR, rand_a[23:4] ^ (~rand_b[23:4] ^ rand_a[39:20]));
`signal(REUSE_ASSOC_CAT_COMMON, {rand_a[23:4], ~rand_b[23:4]});
`signal(REUSE_ASSOC_CAT, {rand_a[23:4], {~rand_b[23:4], rand_a[39:20]}});
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_ADD_COMMON, rand_a[23:4] + ~rand_b[23:4]);
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_ADD, rand_a[23:4] + (~rand_b[23:4] + rand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_MUL_COMMON, rand_a[23:4] * ~rand_b[23:4]);
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_MUL, rand_a[23:4] * (~rand_b[23:4] * rand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_MULS_COMMON, srand_a[23:4] * ~srand_b[23:4]);
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_MULS, srand_a[23:4] * (~srand_b[23:4] * srand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_AND_COMMON, rand_a[23:4] & ~rand_b[23:4]);
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_AND, rand_a[23:4] & (~rand_b[23:4] & rand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_OR_COMMON, rand_a[23:4] | ~rand_b[23:4]);
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_OR, rand_a[23:4] | (~rand_b[23:4] | rand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_XOR_COMMON, rand_a[23:4] ^ ~rand_b[23:4]);
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_XOR, rand_a[23:4] ^ (~rand_b[23:4] ^ rand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_CAT_COMMON, {rand_a[23:4], ~rand_b[23:4]});
`signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_CAT, {rand_a[23:4], {~rand_b[23:4], rand_a[39:20]}});
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_ADD_COMMON, rand_a[23:4] + rand_a[39:20]);
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_ADD, rand_a[23:4] + (~rand_b[24:5] + rand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_MUL_COMMON, rand_a[23:4] * rand_a[39:20]);
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_MUL, rand_a[23:4] * (~rand_b[24:5] * rand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_MULS_COMMON, srand_a[23:4] * rand_a[39:20]);
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_MULS, srand_a[23:4] * (~srand_b[24:5] * srand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_AND_COMMON, rand_a[23:4] & rand_a[39:20]);
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_AND, rand_a[23:4] & (~rand_b[24:5] & rand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_OR_COMMON, rand_a[23:4] | rand_a[39:20]);
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_OR, rand_a[23:4] | (~rand_b[24:5] | rand_a[39:20]));
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_XOR_COMMON, rand_a[23:4] ^ rand_a[39:20]);
`signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_XOR, rand_a[23:4] ^ (~rand_b[24:5] ^ rand_a[39:20]));
// Operators that should work wiht mismatched widths
`signal(MISMATCHED_ShiftL,const_a << 4'd2);

View File

@ -18,7 +18,7 @@ test.compile(verilator_flags2=["-Wno-UNOPTTHREADS", "--stats", test.pli_filename
test.execute()
if test.vlt:
test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 43)
test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 42)
test.file_grep(test.stats, r'SplitVar, packed variables split automatically\s+(\d+)', 1)
test.passes()

View File

@ -16,6 +16,6 @@ test.compile(verilator_flags2=["-Wno-UNOPTTHREADS", "--stats"])
test.execute()
if test.vlt:
test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 160)
test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 148)
test.passes()