From 4dae9ed4e95d6b49c6cf65803561bc0fff0cc3d5 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Tue, 17 Mar 2026 08:20:00 +0000 Subject: [PATCH] Optimize reuse of existing associative terms in DfgPeephole Enable V3DfgCache to look up vertices without creating one. Reuse existing terms in associative expression trees if they already exist somewhere in the graph. --- src/V3DfgCache.h | 59 +++++++++++++++++++++++++------ src/V3DfgPeephole.cpp | 20 ++++++++++- src/V3DfgPeepholePatterns.h | 1 + test_regress/t/t_dfg_peephole.v | 19 ++++++++-- test_regress/t/t_opt_const_red.py | 2 +- 5 files changed, 86 insertions(+), 15 deletions(-) diff --git a/src/V3DfgCache.h b/src/V3DfgCache.h index 010605151..14822391c 100644 --- a/src/V3DfgCache.h +++ b/src/V3DfgCache.h @@ -188,24 +188,26 @@ inline DfgVertexTernary*& getEntry(CacheTernary& cache, const DfgDataType&, DfgV return cache[key]; } -// These return a reference to the mapped entry, inserting a nullptr if not yet exists -inline CacheSel::iterator find(CacheSel& cache, DfgVertex* src0p, uint32_t lsb, uint32_t size) { - const KeySel key{src0p, lsb, size}; +// These return an iterator which might be cache.end() if not contained +inline CacheSel::iterator find(CacheSel& cache, const DfgDataType& dtype, DfgVertex* src0p, + uint32_t lsb) { + const KeySel key{src0p, lsb, dtype.size()}; return cache.find(key); } -inline CacheUnary::iterator find(CacheUnary& cache, DfgVertex* src0p) { +inline CacheUnary::iterator find(CacheUnary& cache, const DfgDataType&, DfgVertex* src0p) { const KeyUnary key{src0p}; return cache.find(key); } -inline CacheBinary::iterator find(CacheBinary& cache, DfgVertex* src0p, DfgVertex* src1p) { +inline CacheBinary::iterator find(CacheBinary& cache, const DfgDataType&, DfgVertex* src0p, + DfgVertex* src1p) { const KeyBinary key{src0p, src1p}; return cache.find(key); } -inline CacheTernary::iterator find(CacheTernary& cache, DfgVertex* src0p, DfgVertex* src1p, - DfgVertex* src2p) { +inline CacheTernary::iterator find(CacheTernary& cache, const DfgDataType&, DfgVertex* src0p, + DfgVertex* src1p, DfgVertex* src2p) { const KeyTernary key{src0p, src1p, src2p}; return cache.find(key); } @@ -245,6 +247,13 @@ inline Vertex* getOrCreate(DfgGraph& dfg, FileLine* flp, T_Cache& cache, const D return reinterpret_cast(entrypr); } +// Get vertex with given operands, return nullptr if not in cache +template +inline Vertex* get(DfgGraph& dfg, T_Cache& cache, const DfgDataType& dtype, Operands... operands) { + const auto it = find(cache, dtype, operands...); + return it != cache.end() ? reinterpret_cast(it->second) : nullptr; +} + // These add an existing vertex to the table, if an equivalent does not yet exist inline void cache(CacheSel& cache, DfgSel* vtxp) { DfgSel*& entrypr = getEntry(cache, vtxp->dtype(), vtxp->fromp(), vtxp->lsb()); @@ -269,22 +278,22 @@ inline void cache(CacheTernary& cache, DfgVertexTernary* vtxp) { // These remove an existing vertex from the cache, if it is the cached vertex inline void invalidateByValue(CacheSel& cache, const DfgSel* vtxp) { - const auto it = find(cache, vtxp->fromp(), vtxp->lsb(), vtxp->size()); + const auto it = find(cache, vtxp->dtype(), vtxp->fromp(), vtxp->lsb()); if (it != cache.end() && it->second == vtxp) cache.erase(it); } inline void invalidateByValue(CacheUnary& cache, const DfgVertexUnary* vtxp) { - const auto it = find(cache, vtxp->inputp(0)); + const auto it = find(cache, vtxp->dtype(), vtxp->inputp(0)); if (it != cache.end() && it->second == vtxp) cache.erase(it); } inline void invalidateByValue(CacheBinary& cache, const DfgVertexBinary* vtxp) { - const auto it = find(cache, vtxp->inputp(0), vtxp->inputp(1)); + const auto it = find(cache, vtxp->dtype(), vtxp->inputp(0), vtxp->inputp(1)); if (it != cache.end() && it->second == vtxp) cache.erase(it); } inline void invalidateByValue(CacheTernary& cache, const DfgVertexTernary* vtxp) { - const auto it = find(cache, vtxp->inputp(0), vtxp->inputp(1), vtxp->inputp(2)); + const auto it = find(cache, vtxp->dtype(), vtxp->inputp(0), vtxp->inputp(1), vtxp->inputp(2)); if (it != cache.end() && it->second == vtxp) cache.erase(it); } @@ -344,6 +353,10 @@ public: template inline Vertex* getOrCreate(FileLine* flp, const DfgDataType& dtype, Operands... operands); + // Find a vertex of type 'Vertex', with the given operands, return nullptr if not in cache. + template + inline Vertex* get(const DfgDataType& dtype, Operands... operands); + // Add an existing vertex of the table. If an equivalent already exists, then nothing happens. void cache(DfgVertex* vtxp); @@ -383,6 +396,30 @@ Vertex* V3DfgCache::getOrCreate(FileLine* flp, const DfgDataType& dtype, Operand m_dfg, flp, cacheForType(), dtype, operands...); } +// Find a vertex of type 'Vertex', with the given operands, return nullptr if not in cache. +template +Vertex* V3DfgCache::get(const DfgDataType& dtype, Operands... operands) { + static_assert(std::is_final::value, "Must invoke on final vertex type"); + constexpr bool isSel = std::is_same::value; + constexpr bool isUnary = !isSel && std::is_base_of::value; + constexpr bool isBinary = std::is_base_of::value; + constexpr bool isTernary = std::is_base_of::value; + static_assert(isSel || isUnary || isBinary || isTernary, + "'get' called with unknown vertex type"); + + static_assert(!isSel || sizeof...(Operands) == 2, // + "Wrong number of operands to DfgSel"); + static_assert(!isUnary || sizeof...(Operands) == 1, + "Wrong number of operands to DfgVertexUnary"); + static_assert(!isBinary || sizeof...(Operands) == 2, + "Wrong number of operands to DfgVertexBinary"); + static_assert(!isTernary || sizeof...(Operands) == 3, + "Wrong number of operands to DfgVertexTernary"); + + return V3DfgCacheInternal::get, Operands...>( + m_dfg, cacheForType(), dtype, operands...); +} + } // namespace V3DfgCacheInternal // Export only the public interface class diff --git a/src/V3DfgPeephole.cpp b/src/V3DfgPeephole.cpp index 993050db0..9ed5f586f 100644 --- a/src/V3DfgPeephole.cpp +++ b/src/V3DfgPeephole.cpp @@ -399,8 +399,26 @@ class V3DfgPeephole final : public DfgVisitor { // If we didn't apply the change (pattern was disabled), break the loop break; } + if (changed) return true; - return changed; + // if (a OP (b OP c)), check if (a OP b) exists and if so replace with (a OP b) OP c + if (Vertex* rVtxp = vtxp->rhsp()->template cast()) { + const DfgDataType& dtype + = std::is_same::value + ? DfgDataType::packed(lhsp->width() + rVtxp->lhsp()->width()) + : vtxp->dtype(); + if (Vertex* const cVtxp = m_cache.get(dtype, lhsp, rVtxp->lhsp())) { + if (cVtxp->hasSinks() && cVtxp != rhsp) { + APPLYING(REUSE_ASSOC_BINARY) { + Vertex* const resp = make(vtxp, cVtxp, rVtxp->rhsp()); + replace(vtxp, resp); + return true; + } + } + } + } + + return false; } // Transformations that apply to all commutative binary vertices diff --git a/src/V3DfgPeepholePatterns.h b/src/V3DfgPeepholePatterns.h index fd0711e0c..33d1bb644 100644 --- a/src/V3DfgPeepholePatterns.h +++ b/src/V3DfgPeepholePatterns.h @@ -106,6 +106,7 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_TAUTOLOGICAL_OR_3) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_ONES) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_SELF) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REUSE_ASSOC_BINARY) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, RIGHT_LEANING_ASSOC) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_COND_WITH_NEQ_CONDITION) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_COND_WITH_NOT_CONDITION) \ diff --git a/test_regress/t/t_dfg_peephole.v b/test_regress/t/t_dfg_peephole.v index 026112bf4..34ca8b755 100644 --- a/test_regress/t/t_dfg_peephole.v +++ b/test_regress/t/t_dfg_peephole.v @@ -178,8 +178,9 @@ module t ( `signal(REMOVE_CONCAT_OF_ADJOINING_SELS, {rand_a[10:3], rand_a[2:1]}); `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS_CAT, {rand_a[2:1], rand_b}); `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS_CAT, {rand_b, rand_a[10:3]}); - `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:3], {rand_a[2:1], rand_b}}); - `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS, {{rand_b, rand_a[10:3]}, rand_a[2:1]}); + `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:4], {rand_a[3:1], rand_b}}); + `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS, {{rand_a, rand_b[11:6]}, rand_b[5:1]}); + `signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS_COMMON, {rand_a, rand_b[11:6]}); `signal(REMOVE_COND_WITH_FALSE_CONDITION, 1'd0 ? rand_a : rand_b); `signal(REMOVE_COND_WITH_TRUE_CONDITION, 1'd1 ? rand_a : rand_b); `signal(REMOVE_COND_WITH_BRANCHES_SAME, rand_a[0] ? ~rand_b : ~rand_b); @@ -217,6 +218,20 @@ module t ( `signal(REPLACE_LOGOR_WITH_OR, rand_a[0] || rand_a[1]); `signal(RIGHT_LEANING_ASSOC, (((rand_a + rand_b) + rand_a) + rand_b)); `signal(RIGHT_LEANING_CONCET, {{{rand_a, rand_b}, rand_a}, rand_b}); + `signal(REUSE_ASSOC_ADD_COMMON, rand_a[23:4] + ~rand_b[23:4]); + `signal(REUSE_ASSOC_ADD, rand_a[23:4] + (~rand_b[23:4] + rand_a[39:20])); + `signal(REUSE_ASSOC_MUL_COMMON, rand_a[23:4] * ~rand_b[23:4]); + `signal(REUSE_ASSOC_MUL, rand_a[23:4] * (~rand_b[23:4] * rand_a[39:20])); + `signal(REUSE_ASSOC_MULS_COMMON, srand_a[23:4] * ~srand_b[23:4]); + `signal(REUSE_ASSOC_MULS, srand_a[23:4] * (~srand_b[23:4] * srand_a[39:20])); + `signal(REUSE_ASSOC_AND_COMMON, rand_a[23:4] & ~rand_b[23:4]); + `signal(REUSE_ASSOC_AND, rand_a[23:4] & (~rand_b[23:4] & rand_a[39:20])); + `signal(REUSE_ASSOC_OR_COMMON, rand_a[23:4] | ~rand_b[23:4]); + `signal(REUSE_ASSOC_OR, rand_a[23:4] | (~rand_b[23:4] | rand_a[39:20])); + `signal(REUSE_ASSOC_XOR_COMMON, rand_a[23:4] ^ ~rand_b[23:4]); + `signal(REUSE_ASSOC_XOR, rand_a[23:4] ^ (~rand_b[23:4] ^ rand_a[39:20])); + `signal(REUSE_ASSOC_CAT_COMMON, {rand_a[23:4], ~rand_b[23:4]}); + `signal(REUSE_ASSOC_CAT, {rand_a[23:4], {~rand_b[23:4], rand_a[39:20]}}); // Operators that should work wiht mismatched widths `signal(MISMATCHED_ShiftL,const_a << 4'd2); diff --git a/test_regress/t/t_opt_const_red.py b/test_regress/t/t_opt_const_red.py index 11c4ebbff..a47814311 100755 --- a/test_regress/t/t_opt_const_red.py +++ b/test_regress/t/t_opt_const_red.py @@ -16,6 +16,6 @@ test.compile(verilator_flags2=["-Wno-UNOPTTHREADS", "--stats"]) test.execute() if test.vlt: - test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 160) + test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 148) test.passes()