Optimize reuse of existing associative terms in DfgPeephole

Enable V3DfgCache to look up vertices without creating one. Reuse
existing terms in associative expression trees if they already exist
somewhere in the graph.
This commit is contained in:
Geza Lore 2026-03-17 08:20:00 +00:00
parent 92172e32c4
commit 4dae9ed4e9
5 changed files with 86 additions and 15 deletions

View File

@ -188,24 +188,26 @@ inline DfgVertexTernary*& getEntry(CacheTernary& cache, const DfgDataType&, DfgV
return cache[key];
}
// These return a reference to the mapped entry, inserting a nullptr if not yet exists
inline CacheSel::iterator find(CacheSel& cache, DfgVertex* src0p, uint32_t lsb, uint32_t size) {
const KeySel key{src0p, lsb, size};
// These return an iterator which might be cache.end() if not contained
inline CacheSel::iterator find(CacheSel& cache, const DfgDataType& dtype, DfgVertex* src0p,
uint32_t lsb) {
const KeySel key{src0p, lsb, dtype.size()};
return cache.find(key);
}
inline CacheUnary::iterator find(CacheUnary& cache, DfgVertex* src0p) {
inline CacheUnary::iterator find(CacheUnary& cache, const DfgDataType&, DfgVertex* src0p) {
const KeyUnary key{src0p};
return cache.find(key);
}
inline CacheBinary::iterator find(CacheBinary& cache, DfgVertex* src0p, DfgVertex* src1p) {
inline CacheBinary::iterator find(CacheBinary& cache, const DfgDataType&, DfgVertex* src0p,
DfgVertex* src1p) {
const KeyBinary key{src0p, src1p};
return cache.find(key);
}
inline CacheTernary::iterator find(CacheTernary& cache, DfgVertex* src0p, DfgVertex* src1p,
DfgVertex* src2p) {
inline CacheTernary::iterator find(CacheTernary& cache, const DfgDataType&, DfgVertex* src0p,
DfgVertex* src1p, DfgVertex* src2p) {
const KeyTernary key{src0p, src1p, src2p};
return cache.find(key);
}
@ -245,6 +247,13 @@ inline Vertex* getOrCreate(DfgGraph& dfg, FileLine* flp, T_Cache& cache, const D
return reinterpret_cast<Vertex*>(entrypr);
}
// Get vertex with given operands, return nullptr if not in cache
template <typename Vertex, typename T_Cache, typename... Operands>
inline Vertex* get(DfgGraph& dfg, T_Cache& cache, const DfgDataType& dtype, Operands... operands) {
const auto it = find(cache, dtype, operands...);
return it != cache.end() ? reinterpret_cast<Vertex*>(it->second) : nullptr;
}
// These add an existing vertex to the table, if an equivalent does not yet exist
inline void cache(CacheSel& cache, DfgSel* vtxp) {
DfgSel*& entrypr = getEntry(cache, vtxp->dtype(), vtxp->fromp(), vtxp->lsb());
@ -269,22 +278,22 @@ inline void cache(CacheTernary& cache, DfgVertexTernary* vtxp) {
// These remove an existing vertex from the cache, if it is the cached vertex
inline void invalidateByValue(CacheSel& cache, const DfgSel* vtxp) {
const auto it = find(cache, vtxp->fromp(), vtxp->lsb(), vtxp->size());
const auto it = find(cache, vtxp->dtype(), vtxp->fromp(), vtxp->lsb());
if (it != cache.end() && it->second == vtxp) cache.erase(it);
}
inline void invalidateByValue(CacheUnary& cache, const DfgVertexUnary* vtxp) {
const auto it = find(cache, vtxp->inputp(0));
const auto it = find(cache, vtxp->dtype(), vtxp->inputp(0));
if (it != cache.end() && it->second == vtxp) cache.erase(it);
}
inline void invalidateByValue(CacheBinary& cache, const DfgVertexBinary* vtxp) {
const auto it = find(cache, vtxp->inputp(0), vtxp->inputp(1));
const auto it = find(cache, vtxp->dtype(), vtxp->inputp(0), vtxp->inputp(1));
if (it != cache.end() && it->second == vtxp) cache.erase(it);
}
inline void invalidateByValue(CacheTernary& cache, const DfgVertexTernary* vtxp) {
const auto it = find(cache, vtxp->inputp(0), vtxp->inputp(1), vtxp->inputp(2));
const auto it = find(cache, vtxp->dtype(), vtxp->inputp(0), vtxp->inputp(1), vtxp->inputp(2));
if (it != cache.end() && it->second == vtxp) cache.erase(it);
}
@ -344,6 +353,10 @@ public:
template <typename Vertex, typename... Operands>
inline Vertex* getOrCreate(FileLine* flp, const DfgDataType& dtype, Operands... operands);
// Find a vertex of type 'Vertex', with the given operands, return nullptr if not in cache.
template <typename Vertex, typename... Operands>
inline Vertex* get(const DfgDataType& dtype, Operands... operands);
// Add an existing vertex of the table. If an equivalent already exists, then nothing happens.
void cache(DfgVertex* vtxp);
@ -383,6 +396,30 @@ Vertex* V3DfgCache::getOrCreate(FileLine* flp, const DfgDataType& dtype, Operand
m_dfg, flp, cacheForType<Vertex>(), dtype, operands...);
}
// Find a vertex of type 'Vertex', with the given operands, return nullptr if not in cache.
template <typename Vertex, typename... Operands>
Vertex* V3DfgCache::get(const DfgDataType& dtype, Operands... operands) {
static_assert(std::is_final<Vertex>::value, "Must invoke on final vertex type");
constexpr bool isSel = std::is_same<DfgSel, Vertex>::value;
constexpr bool isUnary = !isSel && std::is_base_of<DfgVertexUnary, Vertex>::value;
constexpr bool isBinary = std::is_base_of<DfgVertexBinary, Vertex>::value;
constexpr bool isTernary = std::is_base_of<DfgVertexTernary, Vertex>::value;
static_assert(isSel || isUnary || isBinary || isTernary,
"'get' called with unknown vertex type");
static_assert(!isSel || sizeof...(Operands) == 2, //
"Wrong number of operands to DfgSel");
static_assert(!isUnary || sizeof...(Operands) == 1,
"Wrong number of operands to DfgVertexUnary");
static_assert(!isBinary || sizeof...(Operands) == 2,
"Wrong number of operands to DfgVertexBinary");
static_assert(!isTernary || sizeof...(Operands) == 3,
"Wrong number of operands to DfgVertexTernary");
return V3DfgCacheInternal::get<Vertex, CacheType<Vertex>, Operands...>(
m_dfg, cacheForType<Vertex>(), dtype, operands...);
}
} // namespace V3DfgCacheInternal
// Export only the public interface class

View File

@ -399,8 +399,26 @@ class V3DfgPeephole final : public DfgVisitor {
// If we didn't apply the change (pattern was disabled), break the loop
break;
}
if (changed) return true;
return changed;
// if (a OP (b OP c)), check if (a OP b) exists and if so replace with (a OP b) OP c
if (Vertex* rVtxp = vtxp->rhsp()->template cast<Vertex>()) {
const DfgDataType& dtype
= std::is_same<Vertex, DfgConcat>::value
? DfgDataType::packed(lhsp->width() + rVtxp->lhsp()->width())
: vtxp->dtype();
if (Vertex* const cVtxp = m_cache.get<Vertex>(dtype, lhsp, rVtxp->lhsp())) {
if (cVtxp->hasSinks() && cVtxp != rhsp) {
APPLYING(REUSE_ASSOC_BINARY) {
Vertex* const resp = make<Vertex>(vtxp, cVtxp, rVtxp->rhsp());
replace(vtxp, resp);
return true;
}
}
}
}
return false;
}
// Transformations that apply to all commutative binary vertices

View File

@ -106,6 +106,7 @@
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_TAUTOLOGICAL_OR_3) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_ONES) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_SELF) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REUSE_ASSOC_BINARY) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, RIGHT_LEANING_ASSOC) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_COND_WITH_NEQ_CONDITION) \
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_COND_WITH_NOT_CONDITION) \

View File

@ -178,8 +178,9 @@ module t (
`signal(REMOVE_CONCAT_OF_ADJOINING_SELS, {rand_a[10:3], rand_a[2:1]});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS_CAT, {rand_a[2:1], rand_b});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS_CAT, {rand_b, rand_a[10:3]});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:3], {rand_a[2:1], rand_b}});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS, {{rand_b, rand_a[10:3]}, rand_a[2:1]});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:4], {rand_a[3:1], rand_b}});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS, {{rand_a, rand_b[11:6]}, rand_b[5:1]});
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS_COMMON, {rand_a, rand_b[11:6]});
`signal(REMOVE_COND_WITH_FALSE_CONDITION, 1'd0 ? rand_a : rand_b);
`signal(REMOVE_COND_WITH_TRUE_CONDITION, 1'd1 ? rand_a : rand_b);
`signal(REMOVE_COND_WITH_BRANCHES_SAME, rand_a[0] ? ~rand_b : ~rand_b);
@ -217,6 +218,20 @@ module t (
`signal(REPLACE_LOGOR_WITH_OR, rand_a[0] || rand_a[1]);
`signal(RIGHT_LEANING_ASSOC, (((rand_a + rand_b) + rand_a) + rand_b));
`signal(RIGHT_LEANING_CONCET, {{{rand_a, rand_b}, rand_a}, rand_b});
`signal(REUSE_ASSOC_ADD_COMMON, rand_a[23:4] + ~rand_b[23:4]);
`signal(REUSE_ASSOC_ADD, rand_a[23:4] + (~rand_b[23:4] + rand_a[39:20]));
`signal(REUSE_ASSOC_MUL_COMMON, rand_a[23:4] * ~rand_b[23:4]);
`signal(REUSE_ASSOC_MUL, rand_a[23:4] * (~rand_b[23:4] * rand_a[39:20]));
`signal(REUSE_ASSOC_MULS_COMMON, srand_a[23:4] * ~srand_b[23:4]);
`signal(REUSE_ASSOC_MULS, srand_a[23:4] * (~srand_b[23:4] * srand_a[39:20]));
`signal(REUSE_ASSOC_AND_COMMON, rand_a[23:4] & ~rand_b[23:4]);
`signal(REUSE_ASSOC_AND, rand_a[23:4] & (~rand_b[23:4] & rand_a[39:20]));
`signal(REUSE_ASSOC_OR_COMMON, rand_a[23:4] | ~rand_b[23:4]);
`signal(REUSE_ASSOC_OR, rand_a[23:4] | (~rand_b[23:4] | rand_a[39:20]));
`signal(REUSE_ASSOC_XOR_COMMON, rand_a[23:4] ^ ~rand_b[23:4]);
`signal(REUSE_ASSOC_XOR, rand_a[23:4] ^ (~rand_b[23:4] ^ rand_a[39:20]));
`signal(REUSE_ASSOC_CAT_COMMON, {rand_a[23:4], ~rand_b[23:4]});
`signal(REUSE_ASSOC_CAT, {rand_a[23:4], {~rand_b[23:4], rand_a[39:20]}});
// Operators that should work wiht mismatched widths
`signal(MISMATCHED_ShiftL,const_a << 4'd2);

View File

@ -16,6 +16,6 @@ test.compile(verilator_flags2=["-Wno-UNOPTTHREADS", "--stats"])
test.execute()
if test.vlt:
test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 160)
test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 148)
test.passes()