Optimize reuse of existing associative terms in DfgPeephole
Enable V3DfgCache to look up vertices without creating one. Reuse existing terms in associative expression trees if they already exist somewhere in the graph.
This commit is contained in:
parent
92172e32c4
commit
4dae9ed4e9
|
|
@ -188,24 +188,26 @@ inline DfgVertexTernary*& getEntry(CacheTernary& cache, const DfgDataType&, DfgV
|
|||
return cache[key];
|
||||
}
|
||||
|
||||
// These return a reference to the mapped entry, inserting a nullptr if not yet exists
|
||||
inline CacheSel::iterator find(CacheSel& cache, DfgVertex* src0p, uint32_t lsb, uint32_t size) {
|
||||
const KeySel key{src0p, lsb, size};
|
||||
// These return an iterator which might be cache.end() if not contained
|
||||
inline CacheSel::iterator find(CacheSel& cache, const DfgDataType& dtype, DfgVertex* src0p,
|
||||
uint32_t lsb) {
|
||||
const KeySel key{src0p, lsb, dtype.size()};
|
||||
return cache.find(key);
|
||||
}
|
||||
|
||||
inline CacheUnary::iterator find(CacheUnary& cache, DfgVertex* src0p) {
|
||||
inline CacheUnary::iterator find(CacheUnary& cache, const DfgDataType&, DfgVertex* src0p) {
|
||||
const KeyUnary key{src0p};
|
||||
return cache.find(key);
|
||||
}
|
||||
|
||||
inline CacheBinary::iterator find(CacheBinary& cache, DfgVertex* src0p, DfgVertex* src1p) {
|
||||
inline CacheBinary::iterator find(CacheBinary& cache, const DfgDataType&, DfgVertex* src0p,
|
||||
DfgVertex* src1p) {
|
||||
const KeyBinary key{src0p, src1p};
|
||||
return cache.find(key);
|
||||
}
|
||||
|
||||
inline CacheTernary::iterator find(CacheTernary& cache, DfgVertex* src0p, DfgVertex* src1p,
|
||||
DfgVertex* src2p) {
|
||||
inline CacheTernary::iterator find(CacheTernary& cache, const DfgDataType&, DfgVertex* src0p,
|
||||
DfgVertex* src1p, DfgVertex* src2p) {
|
||||
const KeyTernary key{src0p, src1p, src2p};
|
||||
return cache.find(key);
|
||||
}
|
||||
|
|
@ -245,6 +247,13 @@ inline Vertex* getOrCreate(DfgGraph& dfg, FileLine* flp, T_Cache& cache, const D
|
|||
return reinterpret_cast<Vertex*>(entrypr);
|
||||
}
|
||||
|
||||
// Get vertex with given operands, return nullptr if not in cache
|
||||
template <typename Vertex, typename T_Cache, typename... Operands>
|
||||
inline Vertex* get(DfgGraph& dfg, T_Cache& cache, const DfgDataType& dtype, Operands... operands) {
|
||||
const auto it = find(cache, dtype, operands...);
|
||||
return it != cache.end() ? reinterpret_cast<Vertex*>(it->second) : nullptr;
|
||||
}
|
||||
|
||||
// These add an existing vertex to the table, if an equivalent does not yet exist
|
||||
inline void cache(CacheSel& cache, DfgSel* vtxp) {
|
||||
DfgSel*& entrypr = getEntry(cache, vtxp->dtype(), vtxp->fromp(), vtxp->lsb());
|
||||
|
|
@ -269,22 +278,22 @@ inline void cache(CacheTernary& cache, DfgVertexTernary* vtxp) {
|
|||
|
||||
// These remove an existing vertex from the cache, if it is the cached vertex
|
||||
inline void invalidateByValue(CacheSel& cache, const DfgSel* vtxp) {
|
||||
const auto it = find(cache, vtxp->fromp(), vtxp->lsb(), vtxp->size());
|
||||
const auto it = find(cache, vtxp->dtype(), vtxp->fromp(), vtxp->lsb());
|
||||
if (it != cache.end() && it->second == vtxp) cache.erase(it);
|
||||
}
|
||||
|
||||
inline void invalidateByValue(CacheUnary& cache, const DfgVertexUnary* vtxp) {
|
||||
const auto it = find(cache, vtxp->inputp(0));
|
||||
const auto it = find(cache, vtxp->dtype(), vtxp->inputp(0));
|
||||
if (it != cache.end() && it->second == vtxp) cache.erase(it);
|
||||
}
|
||||
|
||||
inline void invalidateByValue(CacheBinary& cache, const DfgVertexBinary* vtxp) {
|
||||
const auto it = find(cache, vtxp->inputp(0), vtxp->inputp(1));
|
||||
const auto it = find(cache, vtxp->dtype(), vtxp->inputp(0), vtxp->inputp(1));
|
||||
if (it != cache.end() && it->second == vtxp) cache.erase(it);
|
||||
}
|
||||
|
||||
inline void invalidateByValue(CacheTernary& cache, const DfgVertexTernary* vtxp) {
|
||||
const auto it = find(cache, vtxp->inputp(0), vtxp->inputp(1), vtxp->inputp(2));
|
||||
const auto it = find(cache, vtxp->dtype(), vtxp->inputp(0), vtxp->inputp(1), vtxp->inputp(2));
|
||||
if (it != cache.end() && it->second == vtxp) cache.erase(it);
|
||||
}
|
||||
|
||||
|
|
@ -344,6 +353,10 @@ public:
|
|||
template <typename Vertex, typename... Operands>
|
||||
inline Vertex* getOrCreate(FileLine* flp, const DfgDataType& dtype, Operands... operands);
|
||||
|
||||
// Find a vertex of type 'Vertex', with the given operands, return nullptr if not in cache.
|
||||
template <typename Vertex, typename... Operands>
|
||||
inline Vertex* get(const DfgDataType& dtype, Operands... operands);
|
||||
|
||||
// Add an existing vertex of the table. If an equivalent already exists, then nothing happens.
|
||||
void cache(DfgVertex* vtxp);
|
||||
|
||||
|
|
@ -383,6 +396,30 @@ Vertex* V3DfgCache::getOrCreate(FileLine* flp, const DfgDataType& dtype, Operand
|
|||
m_dfg, flp, cacheForType<Vertex>(), dtype, operands...);
|
||||
}
|
||||
|
||||
// Find a vertex of type 'Vertex', with the given operands, return nullptr if not in cache.
|
||||
template <typename Vertex, typename... Operands>
|
||||
Vertex* V3DfgCache::get(const DfgDataType& dtype, Operands... operands) {
|
||||
static_assert(std::is_final<Vertex>::value, "Must invoke on final vertex type");
|
||||
constexpr bool isSel = std::is_same<DfgSel, Vertex>::value;
|
||||
constexpr bool isUnary = !isSel && std::is_base_of<DfgVertexUnary, Vertex>::value;
|
||||
constexpr bool isBinary = std::is_base_of<DfgVertexBinary, Vertex>::value;
|
||||
constexpr bool isTernary = std::is_base_of<DfgVertexTernary, Vertex>::value;
|
||||
static_assert(isSel || isUnary || isBinary || isTernary,
|
||||
"'get' called with unknown vertex type");
|
||||
|
||||
static_assert(!isSel || sizeof...(Operands) == 2, //
|
||||
"Wrong number of operands to DfgSel");
|
||||
static_assert(!isUnary || sizeof...(Operands) == 1,
|
||||
"Wrong number of operands to DfgVertexUnary");
|
||||
static_assert(!isBinary || sizeof...(Operands) == 2,
|
||||
"Wrong number of operands to DfgVertexBinary");
|
||||
static_assert(!isTernary || sizeof...(Operands) == 3,
|
||||
"Wrong number of operands to DfgVertexTernary");
|
||||
|
||||
return V3DfgCacheInternal::get<Vertex, CacheType<Vertex>, Operands...>(
|
||||
m_dfg, cacheForType<Vertex>(), dtype, operands...);
|
||||
}
|
||||
|
||||
} // namespace V3DfgCacheInternal
|
||||
|
||||
// Export only the public interface class
|
||||
|
|
|
|||
|
|
@ -399,8 +399,26 @@ class V3DfgPeephole final : public DfgVisitor {
|
|||
// If we didn't apply the change (pattern was disabled), break the loop
|
||||
break;
|
||||
}
|
||||
if (changed) return true;
|
||||
|
||||
return changed;
|
||||
// if (a OP (b OP c)), check if (a OP b) exists and if so replace with (a OP b) OP c
|
||||
if (Vertex* rVtxp = vtxp->rhsp()->template cast<Vertex>()) {
|
||||
const DfgDataType& dtype
|
||||
= std::is_same<Vertex, DfgConcat>::value
|
||||
? DfgDataType::packed(lhsp->width() + rVtxp->lhsp()->width())
|
||||
: vtxp->dtype();
|
||||
if (Vertex* const cVtxp = m_cache.get<Vertex>(dtype, lhsp, rVtxp->lhsp())) {
|
||||
if (cVtxp->hasSinks() && cVtxp != rhsp) {
|
||||
APPLYING(REUSE_ASSOC_BINARY) {
|
||||
Vertex* const resp = make<Vertex>(vtxp, cVtxp, rVtxp->rhsp());
|
||||
replace(vtxp, resp);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Transformations that apply to all commutative binary vertices
|
||||
|
|
|
|||
|
|
@ -106,6 +106,7 @@
|
|||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_TAUTOLOGICAL_OR_3) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_ONES) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REPLACE_XOR_WITH_SELF) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, REUSE_ASSOC_BINARY) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, RIGHT_LEANING_ASSOC) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_COND_WITH_NEQ_CONDITION) \
|
||||
_FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, SWAP_COND_WITH_NOT_CONDITION) \
|
||||
|
|
|
|||
|
|
@ -178,8 +178,9 @@ module t (
|
|||
`signal(REMOVE_CONCAT_OF_ADJOINING_SELS, {rand_a[10:3], rand_a[2:1]});
|
||||
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS_CAT, {rand_a[2:1], rand_b});
|
||||
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS_CAT, {rand_b, rand_a[10:3]});
|
||||
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:3], {rand_a[2:1], rand_b}});
|
||||
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS, {{rand_b, rand_a[10:3]}, rand_a[2:1]});
|
||||
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_LHS, {rand_a[10:4], {rand_a[3:1], rand_b}});
|
||||
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS, {{rand_a, rand_b[11:6]}, rand_b[5:1]});
|
||||
`signal(REPLACE_NESTED_CONCAT_OF_ADJOINING_SELS_ON_RHS_COMMON, {rand_a, rand_b[11:6]});
|
||||
`signal(REMOVE_COND_WITH_FALSE_CONDITION, 1'd0 ? rand_a : rand_b);
|
||||
`signal(REMOVE_COND_WITH_TRUE_CONDITION, 1'd1 ? rand_a : rand_b);
|
||||
`signal(REMOVE_COND_WITH_BRANCHES_SAME, rand_a[0] ? ~rand_b : ~rand_b);
|
||||
|
|
@ -217,6 +218,20 @@ module t (
|
|||
`signal(REPLACE_LOGOR_WITH_OR, rand_a[0] || rand_a[1]);
|
||||
`signal(RIGHT_LEANING_ASSOC, (((rand_a + rand_b) + rand_a) + rand_b));
|
||||
`signal(RIGHT_LEANING_CONCET, {{{rand_a, rand_b}, rand_a}, rand_b});
|
||||
`signal(REUSE_ASSOC_ADD_COMMON, rand_a[23:4] + ~rand_b[23:4]);
|
||||
`signal(REUSE_ASSOC_ADD, rand_a[23:4] + (~rand_b[23:4] + rand_a[39:20]));
|
||||
`signal(REUSE_ASSOC_MUL_COMMON, rand_a[23:4] * ~rand_b[23:4]);
|
||||
`signal(REUSE_ASSOC_MUL, rand_a[23:4] * (~rand_b[23:4] * rand_a[39:20]));
|
||||
`signal(REUSE_ASSOC_MULS_COMMON, srand_a[23:4] * ~srand_b[23:4]);
|
||||
`signal(REUSE_ASSOC_MULS, srand_a[23:4] * (~srand_b[23:4] * srand_a[39:20]));
|
||||
`signal(REUSE_ASSOC_AND_COMMON, rand_a[23:4] & ~rand_b[23:4]);
|
||||
`signal(REUSE_ASSOC_AND, rand_a[23:4] & (~rand_b[23:4] & rand_a[39:20]));
|
||||
`signal(REUSE_ASSOC_OR_COMMON, rand_a[23:4] | ~rand_b[23:4]);
|
||||
`signal(REUSE_ASSOC_OR, rand_a[23:4] | (~rand_b[23:4] | rand_a[39:20]));
|
||||
`signal(REUSE_ASSOC_XOR_COMMON, rand_a[23:4] ^ ~rand_b[23:4]);
|
||||
`signal(REUSE_ASSOC_XOR, rand_a[23:4] ^ (~rand_b[23:4] ^ rand_a[39:20]));
|
||||
`signal(REUSE_ASSOC_CAT_COMMON, {rand_a[23:4], ~rand_b[23:4]});
|
||||
`signal(REUSE_ASSOC_CAT, {rand_a[23:4], {~rand_b[23:4], rand_a[39:20]}});
|
||||
|
||||
// Operators that should work wiht mismatched widths
|
||||
`signal(MISMATCHED_ShiftL,const_a << 4'd2);
|
||||
|
|
|
|||
|
|
@ -16,6 +16,6 @@ test.compile(verilator_flags2=["-Wno-UNOPTTHREADS", "--stats"])
|
|||
test.execute()
|
||||
|
||||
if test.vlt:
|
||||
test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 160)
|
||||
test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 148)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
Loading…
Reference in New Issue