From d33a81d32acf1aa1af5e849c262e415fceaf86fa Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 24 Mar 2026 06:53:19 +0000 Subject: [PATCH] Optimize commutative vertex operands in Dfg for better combining --- src/V3DfgPeephole.cpp | 118 +++++++++++++++++++++++------- src/V3DfgPeepholePatterns.h | 8 +- test_regress/t/t_dfg_peephole.v | 41 +++++++---- test_regress/t/t_opt_const_dfg.py | 2 +- test_regress/t/t_opt_const_red.py | 2 +- 5 files changed, 124 insertions(+), 47 deletions(-) diff --git a/src/V3DfgPeephole.cpp b/src/V3DfgPeephole.cpp index dde1a5368..1f4cc8ce7 100644 --- a/src/V3DfgPeephole.cpp +++ b/src/V3DfgPeephole.cpp @@ -133,6 +133,7 @@ class V3DfgPeephole final : public DfgVisitor { struct VertexInfo final { size_t m_workListIndex = 0; // Position of this vertx m_wlist (0 means not in list) size_t m_generation = 0; // Generation number of this vertex + size_t m_id = 0; // Unique vertex ID (0 means unassigned) }; // STATE @@ -144,6 +145,7 @@ class V3DfgPeephole final : public DfgVisitor { V3DfgCache m_cache{m_dfg}; // Vertex cache to avoid creating redundant vertices DfgVertex* m_vtxp = nullptr; // Currently considered vertex size_t m_currentGeneration = 0; // Current generation number + size_t m_lastId = 0; // Last unique vertex ID assigned // STATIC STATE static V3DebugBisect s_debugBisect; // Debug aid @@ -303,6 +305,9 @@ class V3DfgPeephole final : public DfgVisitor { // Sanity check UASSERT_OBJ(vtxp->dtype() == dtype, vtxp, "Vertex dtype mismatch"); if (VL_UNLIKELY(v3Global.opt.debugCheck())) vtxp->typeCheck(m_dfg); + // Assign vertex ID + VertexInfo& vInfo = m_vInfo[vtxp]; + if (!vInfo.m_id) vInfo.m_id = ++m_lastId; // Add to work list addToWorkList(vtxp); // Return new node @@ -457,17 +462,40 @@ class V3DfgPeephole final : public DfgVisitor { } } - // if (a OP (b OP c)), check if (a OP b) exists and if so replace with (a OP b) OP c - if (Vertex* rVtxp = vtxp->rhsp()->template cast()) { - const DfgDataType& dtype - = std::is_same::value - ? DfgDataType::packed(lhsp->width() + rVtxp->lhsp()->width()) - : vtxp->dtype(); - if (Vertex* const cVtxp = m_cache.get(dtype, lhsp, rVtxp->lhsp())) { - if (cVtxp->hasSinks() && cVtxp != rhsp) { - APPLYING(REUSE_ASSOC_BINARY) { - replace(make(vtxp, cVtxp, rVtxp->rhsp())); - return true; + // Attempt to reuse associative binary expressions if hey already exist, e.g.: + // '(a OP (b OP c))' -> '(a OP b) OP c', iff '(a OP b)' already exists, or + // '(a OP c) OP b' iff '(a OP c)' already exists and the vertex is commutative. + // Only do this is 'b OP c' has a single use and can subsequently be removed, + // otherwise there is no improvement. + if (!rhsp->hasMultipleSinks()) { + if (Vertex* rVtxp = rhsp->template cast()) { + DfgVertex* const rlVtxp = rVtxp->lhsp(); + DfgVertex* const rrVtxp = rVtxp->rhsp(); + + const DfgDataType& dtype + = std::is_same::value + ? DfgDataType::packed(lhsp->width() + rlVtxp->width()) + : vtxp->dtype(); + + if (Vertex* const existingp = m_cache.get(dtype, lhsp, rlVtxp)) { + UASSERT_OBJ(existingp->hasSinks(), vtxp, "Existing vertex should be used"); + if (existingp != rhsp) { + APPLYING(REUSE_ASSOC_BINARY_LHS_WITH_LHS_OF_RHS) { + replace(make(vtxp, existingp, rrVtxp)); + return true; + } + } + } + // Concat is not commutative + if VL_CONSTEXPR_CXX17 (!std::is_same::value) { + if (Vertex* const existingp = m_cache.get(dtype, lhsp, rrVtxp)) { + UASSERT_OBJ(existingp->hasSinks(), vtxp, "Existing vertex should be used"); + if (existingp != rhsp) { + APPLYING(REUSE_ASSOC_BINARY_LHS_WITH_RHS_OF_RHS) { + replace(make(vtxp, existingp, rlVtxp)); + return true; + } + } } } } @@ -484,29 +512,59 @@ class V3DfgPeephole final : public DfgVisitor { DfgVertex* const lhsp = vtxp->lhsp(); DfgVertex* const rhsp = vtxp->rhsp(); + // Ensure Const is on left-hand side to simplify other patterns - if (lhsp->is()) return false; - if (rhsp->is()) { - APPLYING(SWAP_CONST_IN_COMMUTATIVE_BINARY) { - replace(make(vtxp, rhsp, lhsp)); - return true; + { + const bool lIsConst = lhsp->is(); + const bool rIsConst = rhsp->is(); + if (lIsConst != rIsConst) { + if (rIsConst) { + APPLYING(SWAP_CONST_IN_COMMUTATIVE_BINARY) { + replace(make(vtxp, rhsp, lhsp)); + return true; + } + } + return false; } } + // Ensure Not is on the left-hand side to simplify other patterns - if (lhsp->is()) return false; - if (rhsp->is()) { - APPLYING(SWAP_NOT_IN_COMMUTATIVE_BINARY) { - replace(make(vtxp, rhsp, lhsp)); - return true; + { + const bool lIsNot = lhsp->is(); + const bool rIsNot = rhsp->is(); + if (lIsNot != rIsNot) { + if (rIsNot) { + APPLYING(SWAP_NOT_IN_COMMUTATIVE_BINARY) { + replace(make(vtxp, rhsp, lhsp)); + return true; + } + } + return false; } } - // If both sides are variable references, order the side in some defined way. This - // allows CSE to later merge 'a op b' with 'b op a'. - if (lhsp->is() && rhsp->is()) { - const AstNode* const lVarp = lhsp->as()->nodep(); - const AstNode* const rVarp = rhsp->as()->nodep(); - if (lVarp->name() > rVarp->name()) { - APPLYING(SWAP_VAR_IN_COMMUTATIVE_BINARY) { + + // Ensure same vertex is on the right-hand side to simplify other patterns + { + const bool lIsSame = lhsp->is(); + const bool rIsSame = rhsp->is(); + if (lIsSame != rIsSame) { + if (lIsSame) { + APPLYING(SWAP_SAME_IN_COMMUTATIVE_BINARY) { + replace(make(vtxp, rhsp, lhsp)); + return true; + } + } + return false; + } + } + + // Otherwise put sides in order based on unique iD, this makes + // 'a op b' and 'b op a' end up the same for better combining. + { + const VertexInfo& lInfo = m_vInfo[lhsp]; + const VertexInfo& rInfo = m_vInfo[rhsp]; + if (lInfo.m_id > rInfo.m_id) { + APPLYING(SWAP_SIDE_IN_COMMUTATIVE_BINARY) { replace(make(vtxp, rhsp, lhsp)); return true; } @@ -1443,6 +1501,7 @@ class V3DfgPeephole final : public DfgVisitor { DfgVertex::ScopeCache scopeCache; DfgSplicePacked* const sp = new DfgSplicePacked{m_dfg, flp, vtxp->dtype()}; sp->addDriver(catp, lsb, flp); + m_vInfo[sp].m_id = ++m_lastId; replace(sp); return; } @@ -1860,6 +1919,9 @@ class V3DfgPeephole final : public DfgVisitor { : m_dfg{dfg} , m_ctx{ctx} { + // Assign vertex IDs + m_dfg.forEachVertex([&](DfgVertex& vtx) { m_vInfo[vtx].m_id = ++m_lastId; }); + // Initialize the work list m_wlist.reserve(m_dfg.size() * 4); diff --git a/src/V3DfgPeepholePatterns.h b/src/V3DfgPeepholePatterns.h index 1ca3ec7f4..6c5db5b9c 100644 --- a/src/V3DfgPeepholePatterns.h +++ b/src/V3DfgPeepholePatterns.h @@ -107,13 +107,15 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_WITH_EQUIVALENT) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_ONES) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_SELF) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REUSE_ASSOC_BINARY) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REUSE_ASSOC_BINARY_LHS_WITH_LHS_OF_RHS) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REUSE_ASSOC_BINARY_LHS_WITH_RHS_OF_RHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, RIGHT_LEANING_ASSOC) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_COND_WITH_NEQ_CONDITION) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_COND_WITH_NOT_CONDITION) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_CONST_IN_COMMUTATIVE_BINARY) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_NOT_IN_COMMUTATIVE_BINARY) \ - _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_VAR_IN_COMMUTATIVE_BINARY) + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_NOT_IN_COMMUTATIVE_BINARY) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_SAME_IN_COMMUTATIVE_BINARY) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_SIDE_IN_COMMUTATIVE_BINARY) // clang-format on class VDfgPeepholePattern final { diff --git a/test_regress/t/t_dfg_peephole.v b/test_regress/t/t_dfg_peephole.v index 34ca8b755..b1033e51d 100644 --- a/test_regress/t/t_dfg_peephole.v +++ b/test_regress/t/t_dfg_peephole.v @@ -218,20 +218,33 @@ module t ( `signal(REPLACE_LOGOR_WITH_OR, rand_a[0] || rand_a[1]); `signal(RIGHT_LEANING_ASSOC, (((rand_a + rand_b) + rand_a) + rand_b)); `signal(RIGHT_LEANING_CONCET, {{{rand_a, rand_b}, rand_a}, rand_b}); - `signal(REUSE_ASSOC_ADD_COMMON, rand_a[23:4] + ~rand_b[23:4]); - `signal(REUSE_ASSOC_ADD, rand_a[23:4] + (~rand_b[23:4] + rand_a[39:20])); - `signal(REUSE_ASSOC_MUL_COMMON, rand_a[23:4] * ~rand_b[23:4]); - `signal(REUSE_ASSOC_MUL, rand_a[23:4] * (~rand_b[23:4] * rand_a[39:20])); - `signal(REUSE_ASSOC_MULS_COMMON, srand_a[23:4] * ~srand_b[23:4]); - `signal(REUSE_ASSOC_MULS, srand_a[23:4] * (~srand_b[23:4] * srand_a[39:20])); - `signal(REUSE_ASSOC_AND_COMMON, rand_a[23:4] & ~rand_b[23:4]); - `signal(REUSE_ASSOC_AND, rand_a[23:4] & (~rand_b[23:4] & rand_a[39:20])); - `signal(REUSE_ASSOC_OR_COMMON, rand_a[23:4] | ~rand_b[23:4]); - `signal(REUSE_ASSOC_OR, rand_a[23:4] | (~rand_b[23:4] | rand_a[39:20])); - `signal(REUSE_ASSOC_XOR_COMMON, rand_a[23:4] ^ ~rand_b[23:4]); - `signal(REUSE_ASSOC_XOR, rand_a[23:4] ^ (~rand_b[23:4] ^ rand_a[39:20])); - `signal(REUSE_ASSOC_CAT_COMMON, {rand_a[23:4], ~rand_b[23:4]}); - `signal(REUSE_ASSOC_CAT, {rand_a[23:4], {~rand_b[23:4], rand_a[39:20]}}); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_ADD_COMMON, rand_a[23:4] + ~rand_b[23:4]); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_ADD, rand_a[23:4] + (~rand_b[23:4] + rand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_MUL_COMMON, rand_a[23:4] * ~rand_b[23:4]); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_MUL, rand_a[23:4] * (~rand_b[23:4] * rand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_MULS_COMMON, srand_a[23:4] * ~srand_b[23:4]); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_MULS, srand_a[23:4] * (~srand_b[23:4] * srand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_AND_COMMON, rand_a[23:4] & ~rand_b[23:4]); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_AND, rand_a[23:4] & (~rand_b[23:4] & rand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_OR_COMMON, rand_a[23:4] | ~rand_b[23:4]); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_OR, rand_a[23:4] | (~rand_b[23:4] | rand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_XOR_COMMON, rand_a[23:4] ^ ~rand_b[23:4]); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_XOR, rand_a[23:4] ^ (~rand_b[23:4] ^ rand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_CAT_COMMON, {rand_a[23:4], ~rand_b[23:4]}); + `signal(REUSE_ASSOC_LHS_WITH_LHS_OF_RHS_CAT, {rand_a[23:4], {~rand_b[23:4], rand_a[39:20]}}); + + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_ADD_COMMON, rand_a[23:4] + rand_a[39:20]); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_ADD, rand_a[23:4] + (~rand_b[24:5] + rand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_MUL_COMMON, rand_a[23:4] * rand_a[39:20]); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_MUL, rand_a[23:4] * (~rand_b[24:5] * rand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_MULS_COMMON, srand_a[23:4] * rand_a[39:20]); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_MULS, srand_a[23:4] * (~srand_b[24:5] * srand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_AND_COMMON, rand_a[23:4] & rand_a[39:20]); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_AND, rand_a[23:4] & (~rand_b[24:5] & rand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_OR_COMMON, rand_a[23:4] | rand_a[39:20]); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_OR, rand_a[23:4] | (~rand_b[24:5] | rand_a[39:20])); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_XOR_COMMON, rand_a[23:4] ^ rand_a[39:20]); + `signal(REUSE_ASSOC_LHS_WITH_RHS_OF_RHS_XOR, rand_a[23:4] ^ (~rand_b[24:5] ^ rand_a[39:20])); // Operators that should work wiht mismatched widths `signal(MISMATCHED_ShiftL,const_a << 4'd2); diff --git a/test_regress/t/t_opt_const_dfg.py b/test_regress/t/t_opt_const_dfg.py index eb5626311..88bf99d4f 100755 --- a/test_regress/t/t_opt_const_dfg.py +++ b/test_regress/t/t_opt_const_dfg.py @@ -18,7 +18,7 @@ test.compile(verilator_flags2=["-Wno-UNOPTTHREADS", "--stats", test.pli_filename test.execute() if test.vlt: - test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 43) + test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 42) test.file_grep(test.stats, r'SplitVar, packed variables split automatically\s+(\d+)', 1) test.passes() diff --git a/test_regress/t/t_opt_const_red.py b/test_regress/t/t_opt_const_red.py index 11c4ebbff..a47814311 100755 --- a/test_regress/t/t_opt_const_red.py +++ b/test_regress/t/t_opt_const_red.py @@ -16,6 +16,6 @@ test.compile(verilator_flags2=["-Wno-UNOPTTHREADS", "--stats"]) test.execute() if test.vlt: - test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 160) + test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 148) test.passes()