Move Concat balancing from DFG to FuncOpt (#5602)
This means it applies more widely, e.g. inside sequential logic.
This commit is contained in:
parent
4257fcf9d0
commit
03bd1bfc63
|
|
@ -591,6 +591,8 @@ Summary:
|
||||||
|
|
||||||
.. option:: -fno-func-opt
|
.. option:: -fno-func-opt
|
||||||
|
|
||||||
|
.. option:: -fno-func-opt-balance-cat
|
||||||
|
|
||||||
.. option:: -fno-func-opt-split-cat
|
.. option:: -fno-func-opt-split-cat
|
||||||
|
|
||||||
.. option:: -fno-gate
|
.. option:: -fno-gate
|
||||||
|
|
|
||||||
|
|
@ -225,7 +225,6 @@ set(COMMON_SOURCES
|
||||||
V3Descope.cpp
|
V3Descope.cpp
|
||||||
V3Dfg.cpp
|
V3Dfg.cpp
|
||||||
V3DfgAstToDfg.cpp
|
V3DfgAstToDfg.cpp
|
||||||
V3DfgBalanceTrees.cpp
|
|
||||||
V3DfgCache.cpp
|
V3DfgCache.cpp
|
||||||
V3DfgDecomposition.cpp
|
V3DfgDecomposition.cpp
|
||||||
V3DfgDfgToAst.cpp
|
V3DfgDfgToAst.cpp
|
||||||
|
|
|
||||||
|
|
@ -238,7 +238,6 @@ RAW_OBJS_PCH_ASTNOMT = \
|
||||||
V3Descope.o \
|
V3Descope.o \
|
||||||
V3Dfg.o \
|
V3Dfg.o \
|
||||||
V3DfgAstToDfg.o \
|
V3DfgAstToDfg.o \
|
||||||
V3DfgBalanceTrees.o \
|
|
||||||
V3DfgCache.o \
|
V3DfgCache.o \
|
||||||
V3DfgDecomposition.o \
|
V3DfgDecomposition.o \
|
||||||
V3DfgDfgToAst.o \
|
V3DfgDfgToAst.o \
|
||||||
|
|
|
||||||
|
|
@ -274,9 +274,6 @@ public:
|
||||||
// Predicate: has 1 or more sinks
|
// Predicate: has 1 or more sinks
|
||||||
bool hasSinks() const { return m_sinksp != nullptr; }
|
bool hasSinks() const { return m_sinksp != nullptr; }
|
||||||
|
|
||||||
// Predicate: has precisely 1 sink
|
|
||||||
bool hasSingleSink() const { return m_sinksp && !m_sinksp->m_nextp; }
|
|
||||||
|
|
||||||
// Predicate: has 2 or more sinks
|
// Predicate: has 2 or more sinks
|
||||||
bool hasMultipleSinks() const { return m_sinksp && m_sinksp->m_nextp; }
|
bool hasMultipleSinks() const { return m_sinksp && m_sinksp->m_nextp; }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,197 +0,0 @@
|
||||||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
|
||||||
//*************************************************************************
|
|
||||||
// DESCRIPTION: Verilator: Balance associative op trees in DfgGraphs
|
|
||||||
//
|
|
||||||
// Code available from: https://verilator.org
|
|
||||||
//
|
|
||||||
//*************************************************************************
|
|
||||||
//
|
|
||||||
// Copyright 2003-2024 by Wilson Snyder. This program is free software; you
|
|
||||||
// can redistribute it and/or modify it under the terms of either the GNU
|
|
||||||
// Lesser General Public License Version 3 or the Perl Artistic License
|
|
||||||
// Version 2.0.
|
|
||||||
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
|
||||||
//
|
|
||||||
//*************************************************************************
|
|
||||||
//
|
|
||||||
// - Convert concatenation trees into balanced form
|
|
||||||
//
|
|
||||||
//*************************************************************************
|
|
||||||
|
|
||||||
#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT
|
|
||||||
|
|
||||||
#include "V3Dfg.h"
|
|
||||||
#include "V3DfgPasses.h"
|
|
||||||
|
|
||||||
VL_DEFINE_DEBUG_FUNCTIONS;
|
|
||||||
|
|
||||||
class DfgBalanceTrees final {
|
|
||||||
// We keep the expressions, together with their offsets within a concatenation tree
|
|
||||||
struct ConcatTerm final {
|
|
||||||
DfgVertex* vtxp = nullptr;
|
|
||||||
size_t offset = 0;
|
|
||||||
|
|
||||||
ConcatTerm() = default;
|
|
||||||
ConcatTerm(DfgVertex* vtxp, size_t offset)
|
|
||||||
: vtxp{vtxp}
|
|
||||||
, offset{offset} {}
|
|
||||||
};
|
|
||||||
|
|
||||||
DfgGraph& m_dfg; // The graph being processed
|
|
||||||
V3DfgBalanceTreesContext& m_ctx; // The optimization context for stats
|
|
||||||
|
|
||||||
// Is the given vertex the root of a tree (of potentially size 1), of the given type?
|
|
||||||
template <typename Vertex>
|
|
||||||
static bool isRoot(const DfgVertex& vtx) {
|
|
||||||
static_assert(std::is_base_of<DfgVertexBinary, Vertex>::value,
|
|
||||||
"'Vertex' must be a 'DfgVertexBinary'");
|
|
||||||
if (!vtx.is<Vertex>()) return false;
|
|
||||||
// Has a single sink, and that sink is not another vertex of the same type
|
|
||||||
return vtx.hasSingleSink() && !vtx.findSink<Vertex>();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recursive implementation of 'gatherTerms' below.
|
|
||||||
template <typename Vertex>
|
|
||||||
static void gatherTermsImpl(DfgVertex* vtxp, std::vector<DfgVertex*>& terms) {
|
|
||||||
// Base case: different type, or multiple sinks -> it's a term
|
|
||||||
if (!vtxp->is<Vertex>() || vtxp->hasMultipleSinks()) {
|
|
||||||
terms.emplace_back(vtxp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Recursive case: gather sub terms, right to right
|
|
||||||
DfgVertexBinary* const binp = vtxp->as<Vertex>();
|
|
||||||
gatherTermsImpl<Vertex>(binp->rhsp(), terms);
|
|
||||||
gatherTermsImpl<Vertex>(binp->lhsp(), terms);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Gather terms in the tree of given type, rooted at the given vertex.
|
|
||||||
// Results are right to left, that is, index 0 in the returned vector
|
|
||||||
// is the rightmost term, index size()-1 is the leftmost term.
|
|
||||||
template <typename Vertex>
|
|
||||||
static std::vector<DfgVertex*> gatherTerms(Vertex& root) {
|
|
||||||
static_assert(std::is_base_of<DfgVertexBinary, Vertex>::value,
|
|
||||||
"'Vertex' must be a 'DfgVertexBinary'");
|
|
||||||
std::vector<DfgVertex*> terms;
|
|
||||||
gatherTermsImpl<Vertex>(root.rhsp(), terms);
|
|
||||||
gatherTermsImpl<Vertex>(root.lhsp(), terms);
|
|
||||||
return terms;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Construct a balanced concatenation from the given terms,
|
|
||||||
// between indices begin (inclusive), and end (exclusive).
|
|
||||||
// Note term[end].offset must be valid. term[end].vtxp is
|
|
||||||
// never referenced.
|
|
||||||
DfgVertex* constructConcat(const std::vector<ConcatTerm>& terms, const size_t begin,
|
|
||||||
const size_t end) {
|
|
||||||
UASSERT(end < terms.size(), "Invalid end");
|
|
||||||
UASSERT(begin < end, "Invalid range");
|
|
||||||
// Base case: just return the term
|
|
||||||
if (end == begin + 1) return terms[begin].vtxp;
|
|
||||||
|
|
||||||
// Recursive case:
|
|
||||||
// Compute the mid-point, trying to create roughly equal width intermediates
|
|
||||||
const size_t width = terms[end].offset - terms[begin].offset;
|
|
||||||
const size_t midOffset = width / 2 + terms[begin].offset;
|
|
||||||
const auto beginIt = terms.begin() + begin;
|
|
||||||
const auto endIt = terms.begin() + end;
|
|
||||||
const auto midIt = std::lower_bound(beginIt + 1, endIt - 1, midOffset, //
|
|
||||||
[&](const ConcatTerm& term, size_t value) { //
|
|
||||||
return term.offset < value;
|
|
||||||
});
|
|
||||||
const size_t mid = begin + std::distance(beginIt, midIt);
|
|
||||||
UASSERT(begin < mid && mid < end, "Must make some progress");
|
|
||||||
// Construct the subtrees
|
|
||||||
DfgVertex* const rhsp = constructConcat(terms, begin, mid);
|
|
||||||
DfgVertex* const lhsp = constructConcat(terms, mid, end);
|
|
||||||
// Construct new node
|
|
||||||
AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(lhsp->width() + rhsp->width());
|
|
||||||
DfgConcat* const newp = new DfgConcat{m_dfg, lhsp->fileline(), dtypep};
|
|
||||||
newp->rhsp(rhsp);
|
|
||||||
newp->lhsp(lhsp);
|
|
||||||
return newp;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete unused tree rooted at the given vertex
|
|
||||||
void deleteTree(DfgVertexBinary* const vtxp) {
|
|
||||||
UASSERT_OBJ(!vtxp->hasSinks(), vtxp, "Trying to remove used vertex");
|
|
||||||
DfgVertexBinary* const lhsp = vtxp->lhsp()->cast<DfgVertexBinary>();
|
|
||||||
DfgVertexBinary* const rhsp = vtxp->rhsp()->cast<DfgVertexBinary>();
|
|
||||||
VL_DO_DANGLING(vtxp->unlinkDelete(m_dfg), vtxp);
|
|
||||||
if (lhsp && !lhsp->hasSinks()) deleteTree(lhsp);
|
|
||||||
if (rhsp && !rhsp->hasSinks()) deleteTree(rhsp);
|
|
||||||
}
|
|
||||||
|
|
||||||
void balanceConcat(DfgConcat* const rootp) {
|
|
||||||
// Gather all input vertices of the tree
|
|
||||||
const std::vector<DfgVertex*> vtxps = gatherTerms<DfgConcat>(*rootp);
|
|
||||||
// Don't bother with trivial trees
|
|
||||||
if (vtxps.size() <= 3) return;
|
|
||||||
|
|
||||||
// Construct the terms Vector that we are going to do processing on
|
|
||||||
std::vector<ConcatTerm> terms(vtxps.size() + 1);
|
|
||||||
// These are redundant (constructor does the same), but here they are for clarity
|
|
||||||
terms[0].offset = 0;
|
|
||||||
terms[vtxps.size()].vtxp = nullptr;
|
|
||||||
for (size_t i = 0; i < vtxps.size(); ++i) {
|
|
||||||
terms[i].vtxp = vtxps[i];
|
|
||||||
terms[i + 1].offset = terms[i].offset + vtxps[i]->width();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Round 1: try to create terms ending on VL_EDATASIZE boundaries.
|
|
||||||
// This ensures we pack bits within a VL_EDATASIZE first is possible,
|
|
||||||
// and then hopefully we can just assemble VL_EDATASIZE words afterward.
|
|
||||||
std::vector<ConcatTerm> terms2;
|
|
||||||
{
|
|
||||||
terms2.reserve(terms.size());
|
|
||||||
|
|
||||||
size_t begin = 0; // Start of current range considered
|
|
||||||
size_t end = 0; // End of current range considered
|
|
||||||
size_t offset = 0; // Offset of current range considered
|
|
||||||
|
|
||||||
// Create a term from the current range
|
|
||||||
const auto makeTerm = [&]() {
|
|
||||||
DfgVertex* const vtxp = constructConcat(terms, begin, end);
|
|
||||||
terms2.emplace_back(vtxp, offset);
|
|
||||||
offset += vtxp->width();
|
|
||||||
begin = end;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Create all terms ending on a boundary.
|
|
||||||
while (++end < terms.size() - 1) {
|
|
||||||
if (terms[end].offset % VL_EDATASIZE == 0) makeTerm();
|
|
||||||
}
|
|
||||||
// Final term. Loop condition above ensures this always exists,
|
|
||||||
// and might or might not be on a boundary.
|
|
||||||
makeTerm();
|
|
||||||
// Sentinel term
|
|
||||||
terms2.emplace_back(nullptr, offset);
|
|
||||||
// should have ended up with the same number of bits at least...
|
|
||||||
UASSERT(terms2.back().offset == terms.back().offset, "Inconsitent terms");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Round 2: Combine the partial terms
|
|
||||||
rootp->replaceWith(constructConcat(terms2, 0, terms2.size() - 1));
|
|
||||||
VL_DO_DANGLING(deleteTree(rootp), rootp);
|
|
||||||
|
|
||||||
++m_ctx.m_balancedConcats;
|
|
||||||
}
|
|
||||||
|
|
||||||
DfgBalanceTrees(DfgGraph& dfg, V3DfgBalanceTreesContext& ctx)
|
|
||||||
: m_dfg{dfg}
|
|
||||||
, m_ctx{ctx} {
|
|
||||||
// Find all roots
|
|
||||||
std::vector<DfgConcat*> rootps;
|
|
||||||
for (DfgVertex& vtx : dfg.opVertices()) {
|
|
||||||
if (isRoot<DfgConcat>(vtx)) rootps.emplace_back(vtx.as<DfgConcat>());
|
|
||||||
}
|
|
||||||
// Balance them
|
|
||||||
for (DfgConcat* const rootp : rootps) balanceConcat(rootp);
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
static void apply(DfgGraph& dfg, V3DfgBalanceTreesContext& ctx) { DfgBalanceTrees{dfg, ctx}; }
|
|
||||||
};
|
|
||||||
|
|
||||||
void V3DfgPasses::balanceTrees(DfgGraph& dfg, V3DfgBalanceTreesContext& ctx) {
|
|
||||||
DfgBalanceTrees::apply(dfg, ctx);
|
|
||||||
}
|
|
||||||
|
|
@ -236,7 +236,7 @@ void V3DfgOptimizer::extract(AstNetlist* netlistp) {
|
||||||
V3Global::dumpCheckGlobalTree("dfg-extract", 0, dumpTreeEitherLevel() >= 3);
|
V3Global::dumpCheckGlobalTree("dfg-extract", 0, dumpTreeEitherLevel() >= 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void V3DfgOptimizer::optimize(AstNetlist* netlistp, const string& label, bool lastInvocation) {
|
void V3DfgOptimizer::optimize(AstNetlist* netlistp, const string& label) {
|
||||||
UINFO(2, __FUNCTION__ << ": " << endl);
|
UINFO(2, __FUNCTION__ << ": " << endl);
|
||||||
|
|
||||||
// NODE STATE
|
// NODE STATE
|
||||||
|
|
@ -282,7 +282,7 @@ void V3DfgOptimizer::optimize(AstNetlist* netlistp, const string& label, bool la
|
||||||
for (auto& component : acyclicComponents) {
|
for (auto& component : acyclicComponents) {
|
||||||
if (dumpDfgLevel() >= 7) component->dumpDotFilePrefixed(ctx.prefix() + "source");
|
if (dumpDfgLevel() >= 7) component->dumpDotFilePrefixed(ctx.prefix() + "source");
|
||||||
// Optimize the component
|
// Optimize the component
|
||||||
V3DfgPasses::optimize(*component, ctx, lastInvocation);
|
V3DfgPasses::optimize(*component, ctx);
|
||||||
// Add back under the main DFG (we will convert everything back in one go)
|
// Add back under the main DFG (we will convert everything back in one go)
|
||||||
dfg->addGraph(*component);
|
dfg->addGraph(*component);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ namespace V3DfgOptimizer {
|
||||||
void extract(AstNetlist*) VL_MT_DISABLED;
|
void extract(AstNetlist*) VL_MT_DISABLED;
|
||||||
|
|
||||||
// Optimize the design
|
// Optimize the design
|
||||||
void optimize(AstNetlist*, const string& label, bool lastInvocation) VL_MT_DISABLED;
|
void optimize(AstNetlist*, const string& label) VL_MT_DISABLED;
|
||||||
} // namespace V3DfgOptimizer
|
} // namespace V3DfgOptimizer
|
||||||
|
|
||||||
#endif // Guard
|
#endif // Guard
|
||||||
|
|
|
||||||
|
|
@ -42,11 +42,6 @@ V3DfgEliminateVarsContext::~V3DfgEliminateVarsContext() {
|
||||||
m_varsRemoved);
|
m_varsRemoved);
|
||||||
}
|
}
|
||||||
|
|
||||||
V3DfgBalanceTreesContext::~V3DfgBalanceTreesContext() {
|
|
||||||
V3Stats::addStat("Optimizations, DFG " + m_label + " BalanceTrees, concat trees balanced",
|
|
||||||
m_balancedConcats);
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string getPrefix(const std::string& label) {
|
static std::string getPrefix(const std::string& label) {
|
||||||
if (label.empty()) return "";
|
if (label.empty()) return "";
|
||||||
std::string str = VString::removeWhitespace(label);
|
std::string str = VString::removeWhitespace(label);
|
||||||
|
|
@ -337,7 +332,7 @@ void V3DfgPasses::eliminateVars(DfgGraph& dfg, V3DfgEliminateVarsContext& ctx) {
|
||||||
for (AstVar* const varp : replacedVariables) varp->unlinkFrBack()->deleteTree();
|
for (AstVar* const varp : replacedVariables) varp->unlinkFrBack()->deleteTree();
|
||||||
}
|
}
|
||||||
|
|
||||||
void V3DfgPasses::optimize(DfgGraph& dfg, V3DfgOptimizationContext& ctx, bool lastInvocation) {
|
void V3DfgPasses::optimize(DfgGraph& dfg, V3DfgOptimizationContext& ctx) {
|
||||||
// There is absolutely nothing useful we can do with a graph of size 2 or less
|
// There is absolutely nothing useful we can do with a graph of size 2 or less
|
||||||
if (dfg.size() <= 2) return;
|
if (dfg.size() <= 2) return;
|
||||||
|
|
||||||
|
|
@ -365,10 +360,6 @@ void V3DfgPasses::optimize(DfgGraph& dfg, V3DfgOptimizationContext& ctx, bool la
|
||||||
}
|
}
|
||||||
// Accumulate patterns for reporting
|
// Accumulate patterns for reporting
|
||||||
if (v3Global.opt.stats()) ctx.m_patternStats.accumulate(dfg);
|
if (v3Global.opt.stats()) ctx.m_patternStats.accumulate(dfg);
|
||||||
// The peephole pass covnerts all trees to right leaning, so only do this on the last DFG run.
|
|
||||||
if (lastInvocation) {
|
|
||||||
apply(4, "balanceTrees", [&]() { balanceTrees(dfg, ctx.m_balanceTreesContext); });
|
|
||||||
}
|
|
||||||
apply(4, "regularize", [&]() { regularize(dfg, ctx.m_regularizeContext); });
|
apply(4, "regularize", [&]() { regularize(dfg, ctx.m_regularizeContext); });
|
||||||
if (dumpDfgLevel() >= 8) dfg.dumpDotAllVarConesPrefixed(ctx.prefix() + "optimized");
|
if (dumpDfgLevel() >= 8) dfg.dumpDotAllVarConesPrefixed(ctx.prefix() + "optimized");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -68,17 +68,6 @@ public:
|
||||||
~V3DfgEliminateVarsContext() VL_MT_DISABLED;
|
~V3DfgEliminateVarsContext() VL_MT_DISABLED;
|
||||||
};
|
};
|
||||||
|
|
||||||
class V3DfgBalanceTreesContext final {
|
|
||||||
const std::string m_label; // Label to apply to stats
|
|
||||||
|
|
||||||
public:
|
|
||||||
VDouble0 m_balancedConcats; // Number of temporaries introduced
|
|
||||||
|
|
||||||
explicit V3DfgBalanceTreesContext(const std::string& label)
|
|
||||||
: m_label{label} {}
|
|
||||||
~V3DfgBalanceTreesContext() VL_MT_DISABLED;
|
|
||||||
};
|
|
||||||
|
|
||||||
class V3DfgOptimizationContext final {
|
class V3DfgOptimizationContext final {
|
||||||
const std::string m_label; // Label to add to stats, etc.
|
const std::string m_label; // Label to add to stats, etc.
|
||||||
const std::string m_prefix; // Prefix to add to file dumps (derived from label)
|
const std::string m_prefix; // Prefix to add to file dumps (derived from label)
|
||||||
|
|
@ -103,7 +92,6 @@ public:
|
||||||
V3DfgPeepholeContext m_peepholeContext{m_label};
|
V3DfgPeepholeContext m_peepholeContext{m_label};
|
||||||
V3DfgRegularizeContext m_regularizeContext{m_label};
|
V3DfgRegularizeContext m_regularizeContext{m_label};
|
||||||
V3DfgEliminateVarsContext m_eliminateVarsContext{m_label};
|
V3DfgEliminateVarsContext m_eliminateVarsContext{m_label};
|
||||||
V3DfgBalanceTreesContext m_balanceTreesContext{m_label};
|
|
||||||
|
|
||||||
V3DfgPatternStats m_patternStats;
|
V3DfgPatternStats m_patternStats;
|
||||||
|
|
||||||
|
|
@ -124,7 +112,7 @@ namespace V3DfgPasses {
|
||||||
DfgGraph* astToDfg(AstModule&, V3DfgOptimizationContext&) VL_MT_DISABLED;
|
DfgGraph* astToDfg(AstModule&, V3DfgOptimizationContext&) VL_MT_DISABLED;
|
||||||
|
|
||||||
// Optimize the given DfgGraph
|
// Optimize the given DfgGraph
|
||||||
void optimize(DfgGraph&, V3DfgOptimizationContext&, bool lastInvocation) VL_MT_DISABLED;
|
void optimize(DfgGraph&, V3DfgOptimizationContext&) VL_MT_DISABLED;
|
||||||
|
|
||||||
// Convert DfgGraph back into Ast, and insert converted graph back into its parent module.
|
// Convert DfgGraph back into Ast, and insert converted graph back into its parent module.
|
||||||
// Returns the parent module.
|
// Returns the parent module.
|
||||||
|
|
@ -146,8 +134,6 @@ void regularize(DfgGraph&, V3DfgRegularizeContext&) VL_MT_DISABLED;
|
||||||
void removeUnused(DfgGraph&) VL_MT_DISABLED;
|
void removeUnused(DfgGraph&) VL_MT_DISABLED;
|
||||||
// Eliminate (remove or replace) redundant variables. Also removes resulting unused logic.
|
// Eliminate (remove or replace) redundant variables. Also removes resulting unused logic.
|
||||||
void eliminateVars(DfgGraph&, V3DfgEliminateVarsContext&) VL_MT_DISABLED;
|
void eliminateVars(DfgGraph&, V3DfgEliminateVarsContext&) VL_MT_DISABLED;
|
||||||
// Make computation trees balanced
|
|
||||||
void balanceTrees(DfgGraph&, V3DfgBalanceTreesContext&) VL_MT_DISABLED;
|
|
||||||
|
|
||||||
} // namespace V3DfgPasses
|
} // namespace V3DfgPasses
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,12 @@
|
||||||
// foo[_:_] = r;
|
// foo[_:_] = r;
|
||||||
// foo[_:_] = l;
|
// foo[_:_] = l;
|
||||||
//
|
//
|
||||||
|
// - Balance concatenation trees, e.g.:
|
||||||
|
// {a, {b, {c, d}}
|
||||||
|
// becomes:
|
||||||
|
// {{a, b}, {c, d}}
|
||||||
|
// Reality is more complex here, see the code.
|
||||||
|
//
|
||||||
//*************************************************************************
|
//*************************************************************************
|
||||||
|
|
||||||
#include "V3PchAstMT.h"
|
#include "V3PchAstMT.h"
|
||||||
|
|
@ -33,11 +39,144 @@
|
||||||
|
|
||||||
VL_DEFINE_DEBUG_FUNCTIONS;
|
VL_DEFINE_DEBUG_FUNCTIONS;
|
||||||
|
|
||||||
|
class BalanceConcatTree final {
|
||||||
|
// STATELESS
|
||||||
|
|
||||||
|
// We keep the expressions, together with their offsets within a concatenation tree
|
||||||
|
struct Term final {
|
||||||
|
AstNodeExpr* exprp = nullptr;
|
||||||
|
size_t offset = 0;
|
||||||
|
|
||||||
|
Term() = default;
|
||||||
|
Term(AstNodeExpr* exprp, size_t offset)
|
||||||
|
: exprp{exprp}
|
||||||
|
, offset{offset} {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Recursive implementation of 'gatherTerms' below.
|
||||||
|
static void gatherTermsRecursive(AstNodeExpr* exprp, std::vector<AstNodeExpr*>& terms) {
|
||||||
|
if (AstConcat* const catp = VN_CAST(exprp, Concat)) {
|
||||||
|
// Recursive case: gather sub terms, right to left
|
||||||
|
gatherTermsRecursive(catp->rhsp(), terms);
|
||||||
|
gatherTermsRecursive(catp->lhsp(), terms);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Base case: different operation
|
||||||
|
terms.emplace_back(exprp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gather terms in the tree rooted at the given node.
|
||||||
|
// Results are right to left, that is, index 0 in the returned vector
|
||||||
|
// is the rightmost term, index size()-1 is the leftmost term.
|
||||||
|
static std::vector<AstNodeExpr*> gatherTerms(AstConcat* rootp) {
|
||||||
|
std::vector<AstNodeExpr*> terms;
|
||||||
|
gatherTermsRecursive(rootp->rhsp(), terms);
|
||||||
|
gatherTermsRecursive(rootp->lhsp(), terms);
|
||||||
|
return terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Construct a balanced concatenation from the given terms,
|
||||||
|
// between indices begin (inclusive), and end (exclusive).
|
||||||
|
// Note term[end].offset must be valid. term[end].vtxp is
|
||||||
|
// never referenced.
|
||||||
|
static AstNodeExpr* construct(const std::vector<Term>& terms, const size_t begin,
|
||||||
|
const size_t end) {
|
||||||
|
UASSERT(end < terms.size(), "Invalid end");
|
||||||
|
UASSERT(begin < end, "Invalid range");
|
||||||
|
// Base case: just return the term
|
||||||
|
if (end == begin + 1) return terms[begin].exprp;
|
||||||
|
|
||||||
|
// Recursive case:
|
||||||
|
// Compute the mid-point, trying to create roughly equal width intermediates
|
||||||
|
const size_t width = terms[end].offset - terms[begin].offset;
|
||||||
|
const size_t midOffset = width / 2 + terms[begin].offset;
|
||||||
|
const auto beginIt = terms.begin() + begin;
|
||||||
|
const auto endIt = terms.begin() + end;
|
||||||
|
const auto midIt = std::lower_bound(beginIt + 1, endIt - 1, midOffset, //
|
||||||
|
[&](const Term& term, size_t value) { //
|
||||||
|
return term.offset < value;
|
||||||
|
});
|
||||||
|
const size_t mid = begin + std::distance(beginIt, midIt);
|
||||||
|
UASSERT(begin < mid && mid < end, "Must make some progress");
|
||||||
|
// Construct the subtrees
|
||||||
|
AstNodeExpr* const rhsp = construct(terms, begin, mid);
|
||||||
|
AstNodeExpr* const lhsp = construct(terms, mid, end);
|
||||||
|
// Construct new node
|
||||||
|
AstNodeExpr* newp = new AstConcat{lhsp->fileline(), lhsp, rhsp};
|
||||||
|
newp->user1(true); // Must not attempt to balance again.
|
||||||
|
return newp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns replacement node, or nullptr if no change
|
||||||
|
static AstConcat* balance(AstConcat* const rootp) {
|
||||||
|
UINFO(9, "balanceConcat " << rootp << "\n");
|
||||||
|
// Gather all input vertices of the tree
|
||||||
|
const std::vector<AstNodeExpr*> exprps = gatherTerms(rootp);
|
||||||
|
// Don't bother with trivial trees
|
||||||
|
if (exprps.size() <= 3) return nullptr;
|
||||||
|
// Don't do it if any of the terms are impure
|
||||||
|
for (AstNodeExpr* const exprp : exprps) {
|
||||||
|
if (!exprp->isPure()) return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Construct the terms Vector that we are going to do processing on
|
||||||
|
std::vector<Term> terms(exprps.size() + 1);
|
||||||
|
// These are redundant (constructor does the same), but here they are for clarity
|
||||||
|
terms[0].offset = 0;
|
||||||
|
terms[exprps.size()].exprp = nullptr;
|
||||||
|
for (size_t i = 0; i < exprps.size(); ++i) {
|
||||||
|
terms[i].exprp = exprps[i]->unlinkFrBack();
|
||||||
|
terms[i + 1].offset = terms[i].offset + exprps[i]->width();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round 1: try to create terms ending on VL_EDATASIZE boundaries.
|
||||||
|
// This ensures we pack bits within a VL_EDATASIZE first is possible,
|
||||||
|
// and then hopefully we can just assemble VL_EDATASIZE words afterward.
|
||||||
|
std::vector<Term> terms2;
|
||||||
|
{
|
||||||
|
terms2.reserve(terms.size());
|
||||||
|
|
||||||
|
size_t begin = 0; // Start of current range considered
|
||||||
|
size_t end = 0; // End of current range considered
|
||||||
|
size_t offset = 0; // Offset of current range considered
|
||||||
|
|
||||||
|
// Create a term from the current range
|
||||||
|
const auto makeTerm = [&]() {
|
||||||
|
AstNodeExpr* const exprp = construct(terms, begin, end);
|
||||||
|
terms2.emplace_back(exprp, offset);
|
||||||
|
offset += exprp->width();
|
||||||
|
begin = end;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create all terms ending on a boundary.
|
||||||
|
while (++end < terms.size() - 1) {
|
||||||
|
if (terms[end].offset % VL_EDATASIZE == 0) makeTerm();
|
||||||
|
}
|
||||||
|
// Final term. Loop condition above ensures this always exists,
|
||||||
|
// and might or might not be on a boundary.
|
||||||
|
makeTerm();
|
||||||
|
// Sentinel term
|
||||||
|
terms2.emplace_back(nullptr, offset);
|
||||||
|
// should have ended up with the same number of bits at least...
|
||||||
|
UASSERT(terms2.back().offset == terms.back().offset, "Inconsitent terms");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round 2: Combine the partial terms
|
||||||
|
return VN_AS(construct(terms2, 0, terms2.size() - 1), Concat);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
static AstConcat* apply(AstConcat* rootp) { return balance(rootp); }
|
||||||
|
};
|
||||||
|
|
||||||
class FuncOptVisitor final : public VNVisitor {
|
class FuncOptVisitor final : public VNVisitor {
|
||||||
// NODE STATE
|
// NODE STATE
|
||||||
// AstNodeAssign::user() -> bool. Already checked, safe to split. Omit expensive check.
|
// AstNodeAssign::user() -> bool. Already checked, safe to split. Omit expensive check.
|
||||||
|
// AstConcat::user() -> bool. Already balanced.
|
||||||
|
|
||||||
// STATE - Statistic tracking
|
// STATE - Statistic tracking
|
||||||
|
VDouble0 m_balancedConcats; // Number of concatenations balanced
|
||||||
VDouble0 m_concatSplits; // Number of splits in assignments with Concat on RHS
|
VDouble0 m_concatSplits; // Number of splits in assignments with Concat on RHS
|
||||||
|
|
||||||
// True for e.g.: foo = foo >> 1; or foo[foo[0]] = ...;
|
// True for e.g.: foo = foo >> 1; or foo[foo[0]] = ...;
|
||||||
|
|
@ -142,18 +281,34 @@ class FuncOptVisitor final : public VNVisitor {
|
||||||
void visit(AstNodeAssign* nodep) override {
|
void visit(AstNodeAssign* nodep) override {
|
||||||
// TODO: Only thing remaining inside functions should be AstAssign (that is, an actual
|
// TODO: Only thing remaining inside functions should be AstAssign (that is, an actual
|
||||||
// assignment statemant), but we stil use AstAssignW, AstAssignDly, and all, fix.
|
// assignment statemant), but we stil use AstAssignW, AstAssignDly, and all, fix.
|
||||||
|
iterateChildren(nodep);
|
||||||
|
|
||||||
if (v3Global.opt.fFuncSplitCat()) {
|
if (v3Global.opt.fFuncSplitCat()) {
|
||||||
if (splitConcat(nodep)) return; // Must return here, in case more code is added below
|
if (splitConcat(nodep)) return; // Must return here, in case more code is added below
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void visit(AstNodeExpr*) override {} // No need to descend further (Ignore AstExprStmt...)
|
void visit(AstConcat* nodep) override {
|
||||||
|
if (v3Global.opt.fFuncBalanceCat() && !nodep->user1() && !VN_IS(nodep->backp(), Concat)) {
|
||||||
|
if (AstConcat* const newp = BalanceConcatTree::apply(nodep)) {
|
||||||
|
UINFO(5, "balanceConcat optimizing " << nodep << "\n");
|
||||||
|
++m_balancedConcats;
|
||||||
|
nodep->replaceWith(newp);
|
||||||
|
VL_DO_DANGLING(pushDeletep(nodep), nodep);
|
||||||
|
newp->user1(true); // Must not attempt again.
|
||||||
|
// Return here. The new node will be iterated next.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iterateChildren(nodep);
|
||||||
|
}
|
||||||
|
|
||||||
void visit(AstNode* nodep) override { iterateChildren(nodep); }
|
void visit(AstNode* nodep) override { iterateChildren(nodep); }
|
||||||
|
|
||||||
// CONSTRUCTORS
|
// CONSTRUCTORS
|
||||||
explicit FuncOptVisitor(AstCFunc* funcp) { iterateChildren(funcp); }
|
explicit FuncOptVisitor(AstCFunc* funcp) { iterateChildren(funcp); }
|
||||||
~FuncOptVisitor() override {
|
~FuncOptVisitor() override {
|
||||||
|
V3Stats::addStatSum("Optimizations, FuncOpt concat trees balanced", m_balancedConcats);
|
||||||
V3Stats::addStatSum("Optimizations, FuncOpt concat splits", m_concatSplits);
|
V3Stats::addStatSum("Optimizations, FuncOpt concat splits", m_concatSplits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1305,7 +1305,9 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
|
||||||
DECL_OPTION("-fexpand", FOnOff, &m_fExpand);
|
DECL_OPTION("-fexpand", FOnOff, &m_fExpand);
|
||||||
DECL_OPTION("-ffunc-opt", CbFOnOff, [this](bool flag) { //
|
DECL_OPTION("-ffunc-opt", CbFOnOff, [this](bool flag) { //
|
||||||
m_fFuncSplitCat = flag;
|
m_fFuncSplitCat = flag;
|
||||||
|
m_fFuncBalanceCat = flag;
|
||||||
});
|
});
|
||||||
|
DECL_OPTION("-ffunc-opt-balance-cat", FOnOff, &m_fFuncBalanceCat);
|
||||||
DECL_OPTION("-ffunc-opt-split-cat", FOnOff, &m_fFuncSplitCat);
|
DECL_OPTION("-ffunc-opt-split-cat", FOnOff, &m_fFuncSplitCat);
|
||||||
DECL_OPTION("-fgate", FOnOff, &m_fGate);
|
DECL_OPTION("-fgate", FOnOff, &m_fGate);
|
||||||
DECL_OPTION("-finline", FOnOff, &m_fInline);
|
DECL_OPTION("-finline", FOnOff, &m_fInline);
|
||||||
|
|
|
||||||
|
|
@ -384,6 +384,7 @@ private:
|
||||||
bool m_fDeadAssigns; // main switch: -fno-dead-assigns: remove dead assigns
|
bool m_fDeadAssigns; // main switch: -fno-dead-assigns: remove dead assigns
|
||||||
bool m_fDeadCells; // main switch: -fno-dead-cells: remove dead cells
|
bool m_fDeadCells; // main switch: -fno-dead-cells: remove dead cells
|
||||||
bool m_fExpand; // main switch: -fno-expand: expansion of C macros
|
bool m_fExpand; // main switch: -fno-expand: expansion of C macros
|
||||||
|
bool m_fFuncBalanceCat = true; // main switch: -fno-func-balance-cat: expansion of C macros
|
||||||
bool m_fFuncSplitCat = true; // main switch: -fno-func-split-cat: expansion of C macros
|
bool m_fFuncSplitCat = true; // main switch: -fno-func-split-cat: expansion of C macros
|
||||||
bool m_fGate; // main switch: -fno-gate: gate wire elimination
|
bool m_fGate; // main switch: -fno-gate: gate wire elimination
|
||||||
bool m_fInline; // main switch: -fno-inline: module inlining
|
bool m_fInline; // main switch: -fno-inline: module inlining
|
||||||
|
|
@ -675,8 +676,9 @@ public:
|
||||||
bool fDeadAssigns() const { return m_fDeadAssigns; }
|
bool fDeadAssigns() const { return m_fDeadAssigns; }
|
||||||
bool fDeadCells() const { return m_fDeadCells; }
|
bool fDeadCells() const { return m_fDeadCells; }
|
||||||
bool fExpand() const { return m_fExpand; }
|
bool fExpand() const { return m_fExpand; }
|
||||||
|
bool fFuncBalanceCat() const { return m_fFuncBalanceCat; }
|
||||||
bool fFuncSplitCat() const { return m_fFuncSplitCat; }
|
bool fFuncSplitCat() const { return m_fFuncSplitCat; }
|
||||||
bool fFunc() const { return fFuncSplitCat(); }
|
bool fFunc() const { return fFuncSplitCat() || fFuncBalanceCat(); }
|
||||||
bool fGate() const { return m_fGate; }
|
bool fGate() const { return m_fGate; }
|
||||||
bool fInline() const { return m_fInline; }
|
bool fInline() const { return m_fInline; }
|
||||||
bool fLife() const { return m_fLife; }
|
bool fLife() const { return m_fLife; }
|
||||||
|
|
|
||||||
|
|
@ -287,7 +287,7 @@ static void process() {
|
||||||
|
|
||||||
if (v3Global.opt.fDfgPreInline()) {
|
if (v3Global.opt.fDfgPreInline()) {
|
||||||
// Pre inline DFG optimization
|
// Pre inline DFG optimization
|
||||||
V3DfgOptimizer::optimize(v3Global.rootp(), "pre inline", /* lastInvocation: */ false);
|
V3DfgOptimizer::optimize(v3Global.rootp(), "pre inline");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(v3Global.opt.serializeOnly() && !v3Global.opt.flatten())) {
|
if (!(v3Global.opt.serializeOnly() && !v3Global.opt.flatten())) {
|
||||||
|
|
@ -304,7 +304,7 @@ static void process() {
|
||||||
|
|
||||||
if (v3Global.opt.fDfgPostInline()) {
|
if (v3Global.opt.fDfgPostInline()) {
|
||||||
// Post inline DFG optimization
|
// Post inline DFG optimization
|
||||||
V3DfgOptimizer::optimize(v3Global.rootp(), "post inline", /* lastInvocation: */ true);
|
V3DfgOptimizer::optimize(v3Global.rootp(), "post inline");
|
||||||
}
|
}
|
||||||
|
|
||||||
// --PRE-FLAT OPTIMIZATIONS------------------
|
// --PRE-FLAT OPTIMIZATIONS------------------
|
||||||
|
|
|
||||||
|
|
@ -13,12 +13,7 @@ test.scenarios('vlt')
|
||||||
|
|
||||||
test.compile(verilator_flags2=["--stats"])
|
test.compile(verilator_flags2=["--stats"])
|
||||||
|
|
||||||
test.file_grep(test.stats,
|
test.file_grep(test.stats, r'Optimizations, FuncOpt concat trees balanced\s+(\d+)', 1)
|
||||||
r' Optimizations, DFG pre inline BalanceTrees, concat trees balanced\s+(\d+)', 0)
|
|
||||||
test.file_grep(test.stats,
|
|
||||||
r' Optimizations, DFG post inline BalanceTrees, concat trees balanced\s+(\d+)', 1)
|
|
||||||
test.file_grep(test.stats, r'Optimizations, DFG pre inline Dfg2Ast, result equations\s+(\d+)', 1)
|
|
||||||
test.file_grep(test.stats, r'Optimizations, DFG post inline Dfg2Ast, result equations\s+(\d+)', 1)
|
|
||||||
test.file_grep(test.stats, r'Optimizations, FuncOpt concat splits\s+(\d+)', 62)
|
test.file_grep(test.stats, r'Optimizations, FuncOpt concat splits\s+(\d+)', 62)
|
||||||
|
|
||||||
test.passes()
|
test.passes()
|
||||||
|
|
@ -11,16 +11,11 @@ import vltest_bootstrap
|
||||||
|
|
||||||
test.scenarios('vlt')
|
test.scenarios('vlt')
|
||||||
|
|
||||||
test.top_filename = "t/t_dfg_balance_cats.v"
|
test.top_filename = "t/t_balance_cats.v"
|
||||||
|
|
||||||
test.compile(verilator_flags2=["--stats", "-fno-func-opt"])
|
test.compile(verilator_flags2=["--stats", "-fno-func-opt"])
|
||||||
|
|
||||||
test.file_grep(test.stats,
|
test.file_grep_not(test.stats, r'Optimizations, FuncOpt concat trees balances')
|
||||||
r' Optimizations, DFG pre inline BalanceTrees, concat trees balanced\s+(\d+)', 0)
|
|
||||||
test.file_grep(test.stats,
|
|
||||||
r' Optimizations, DFG post inline BalanceTrees, concat trees balanced\s+(\d+)', 1)
|
|
||||||
test.file_grep(test.stats, r'Optimizations, DFG pre inline Dfg2Ast, result equations\s+(\d+)', 1)
|
|
||||||
test.file_grep(test.stats, r'Optimizations, DFG post inline Dfg2Ast, result equations\s+(\d+)', 1)
|
|
||||||
test.file_grep_not(test.stats, r'Optimizations, FuncOpt concat splits')
|
test.file_grep_not(test.stats, r'Optimizations, FuncOpt concat splits')
|
||||||
|
|
||||||
test.passes()
|
test.passes()
|
||||||
|
|
@ -17,6 +17,6 @@ test.compile(verilator_flags2=["-Wno-UNOPTTHREADS", "--stats", test.t_dir + "/t_
|
||||||
test.execute()
|
test.execute()
|
||||||
|
|
||||||
if test.vlt:
|
if test.vlt:
|
||||||
test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 39)
|
test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 40)
|
||||||
|
|
||||||
test.passes()
|
test.passes()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue