Optimize generated function inlining (#7811)
Previously V3InlineCFuncs inlined call sites but never deleted the now dead callees. Also missed a lot of opportunities due to evaluation order. Rewrite using a graph based algorithm, using only a single traversal of the netlist. This is clearer, more accurate, and faster at compile time. Also add a clean -fno-inline-cfuncs disable. Setting the limits to 0 still disables inlining, except of empty functions, which can be inlined with 0 limits (they are no ops). It will also prune unused functions without -fno-inline-cfuncs. Pass now also respects `--output-split`
This commit is contained in:
parent
5fc03ae913
commit
bcaa110f60
|
|
@ -771,10 +771,24 @@ Summary:
|
|||
|
||||
.. option:: -fno-inline
|
||||
|
||||
Rarely needed. Disable module inlining.
|
||||
|
||||
.. option:: -fno-inline-cfuncs
|
||||
|
||||
Rarely needed. Disable inlining of small generated C++ functions into their
|
||||
callers.
|
||||
|
||||
This optimization is automatically disabled when :vlopt:`--prof-cfuncs` is
|
||||
used.
|
||||
|
||||
.. option:: -fno-inline-funcs
|
||||
|
||||
Rarely needed. Disable inlining of SystemVerilog functions and tasks.
|
||||
|
||||
.. option:: -fno-inline-funcs-eager
|
||||
|
||||
Rarely needed. Disable eager inlining of SystemVerilog functions and tasks.
|
||||
|
||||
.. option:: -fno-life
|
||||
|
||||
.. option:: -fno-life-post
|
||||
|
|
@ -976,27 +990,21 @@ Summary:
|
|||
|
||||
.. option:: --inline-cfuncs <value>
|
||||
|
||||
Inline small C++ function (internal AstCFunc) calls directly into their
|
||||
callers when the function has at most <value> nodes. This reduces
|
||||
function call overhead when :vlopt:`--output-split-cfuncs` places
|
||||
functions in separate compilation units that the C++ compiler cannot
|
||||
inline.
|
||||
Tune the inlining of small generated C++ function. Functions no bigger than
|
||||
<value> nodes will be inlined if possible. The default is 20.
|
||||
|
||||
Set to 0 to disable this optimization. The default is 20.
|
||||
|
||||
This optimization is automatically disabled when :vlopt:`--prof-cfuncs`
|
||||
or :vlopt:`--trace` is used.
|
||||
See also :vlopt:`--inline-cfuncs-product` and :vlopt:`-fno-inline-cfuncs`.
|
||||
|
||||
.. option:: --inline-cfuncs-product <value>
|
||||
|
||||
Tune the inlining of C++ function (internal AstCFunc) calls for larger
|
||||
functions. When a function is too large to always inline (exceeds
|
||||
:vlopt:`--inline-cfuncs` threshold), it may still be inlined if the
|
||||
function size multiplied by the number of call sites is at most <value>.
|
||||
Tune the inlining of small generated C++ function. If a function's node
|
||||
count multiplied by the number of calls is not bigger than <value>, the
|
||||
function will be inlined if possible.
|
||||
|
||||
This allows functions that are called only once or twice to be inlined
|
||||
even if they exceed the small function threshold. Set to 0 to only inline
|
||||
functions below the :vlopt:`--inline-cfuncs` threshold. The default is 200.
|
||||
This allows functions that are called only once or twice to be inlined even
|
||||
if they exceed the small function threshold. The default is 200.
|
||||
|
||||
See also :vlopt:`--inline-cfuncs` and :vlopt:`-fno-inline-cfuncs`.
|
||||
|
||||
.. option:: --inline-mult <value>
|
||||
|
||||
|
|
|
|||
|
|
@ -15,14 +15,13 @@
|
|||
//*************************************************************************
|
||||
// V3InlineCFuncs's Transformations:
|
||||
//
|
||||
// For each CCall to a small CFunc:
|
||||
// - Check if function is eligible for inlining (small enough, same scope)
|
||||
// - Clone local variables with unique names to avoid collisions
|
||||
// - Replace CCall with cloned function body statements
|
||||
// Build a bipartite call graph containing function and call site vertices,
|
||||
// then iterate functions leaf to root, inlining if size heuristics are met.
|
||||
// Finally, remove unused functions.
|
||||
//
|
||||
// Two tunables control inlining:
|
||||
// --inline-cfuncs <n> : Always inline if size <= n (default 20)
|
||||
// --inline-cfuncs-product <n> : Also inline if size * call_count <= n (default 200)
|
||||
// --inline-cfuncs <n> : Inline if size <= n
|
||||
// --inline-cfuncs-product <n> : Also inline if size * call_count <= n
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
|
|
@ -31,231 +30,465 @@
|
|||
#include "V3InlineCFuncs.h"
|
||||
|
||||
#include "V3AstUserAllocator.h"
|
||||
#include "V3ExecGraph.h"
|
||||
#include "V3Graph.h"
|
||||
#include "V3Stats.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
VL_DEFINE_DEBUG_FUNCTIONS;
|
||||
|
||||
//######################################################################
|
||||
// Helper visitor to check if a CFunc contains C statements
|
||||
// Uses clearOptimizable pattern for debugging
|
||||
// Bipartite call graph containing function and call site vertices
|
||||
|
||||
class CFuncInlineCheckVisitor final : public VNVisitorConst {
|
||||
// STATE
|
||||
bool m_optimizable = true; // True if function can be inlined
|
||||
string m_whyNot; // Reason why not optimizable
|
||||
AstNode* m_whyNotNodep = nullptr; // Node that caused non-optimizable
|
||||
class InlineCFuncsFunctionVertex;
|
||||
class InlineCFuncsCallSiteVertex;
|
||||
|
||||
// METHODS
|
||||
void clearOptimizable(AstNode* nodep, const string& why) {
|
||||
if (m_optimizable) {
|
||||
m_optimizable = false;
|
||||
m_whyNot = why;
|
||||
m_whyNotNodep = nodep;
|
||||
UINFO(9, "CFunc not inlineable: " << why);
|
||||
if (nodep) UINFO(9, ": " << nodep);
|
||||
UINFO(9, "");
|
||||
}
|
||||
}
|
||||
class InlineCFuncsCallGraph final : public V3Graph {
|
||||
public:
|
||||
InlineCFuncsCallGraph()
|
||||
: V3Graph{} {}
|
||||
~InlineCFuncsCallGraph() override = default;
|
||||
|
||||
// VISITORS
|
||||
void visit(AstCStmt* nodep) override { clearOptimizable(nodep, "contains AstCStmt"); }
|
||||
void visit(AstCExpr* nodep) override { clearOptimizable(nodep, "contains AstCExpr"); }
|
||||
void visit(AstCStmtUser* nodep) override { clearOptimizable(nodep, "contains AstCStmtUser"); }
|
||||
void visit(AstCExprUser* nodep) override { clearOptimizable(nodep, "contains AstCExprUser"); }
|
||||
void visit(AstNode* nodep) override { iterateChildrenConst(nodep); }
|
||||
void addEdge(InlineCFuncsFunctionVertex& from, InlineCFuncsCallSiteVertex& top);
|
||||
void addEdge(InlineCFuncsCallSiteVertex& from, InlineCFuncsFunctionVertex& top);
|
||||
};
|
||||
|
||||
class EitherVertex VL_NOT_FINAL : public V3GraphVertex {
|
||||
VL_RTTI_IMPL(EitherVertex, V3GraphVertex)
|
||||
protected:
|
||||
explicit EitherVertex(InlineCFuncsCallGraph& graph)
|
||||
: V3GraphVertex{&graph} {}
|
||||
};
|
||||
|
||||
class InlineCFuncsFunctionVertex final : public EitherVertex {
|
||||
VL_RTTI_IMPL(InlineCFuncsFunctionVertex, EitherVertex)
|
||||
AstCFunc* const m_cfuncp; // The function
|
||||
const char* m_noInlineWyp = nullptr; // First reason the function should not be inlined
|
||||
const char* m_keepWyp = nullptr; // Why the function should not be removed
|
||||
size_t m_size = 0; // The size of the function
|
||||
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
explicit CFuncInlineCheckVisitor(AstCFunc* cfuncp) { iterateConst(cfuncp); }
|
||||
InlineCFuncsFunctionVertex(InlineCFuncsCallGraph& graph, AstCFunc* cfuncp)
|
||||
: EitherVertex{graph}
|
||||
, m_cfuncp{cfuncp} {}
|
||||
~InlineCFuncsFunctionVertex() override = default;
|
||||
|
||||
// ACCESSORS
|
||||
bool optimizable() const { return m_optimizable; }
|
||||
string whyNot() const { return m_whyNot; }
|
||||
AstNode* whyNotNodep() const { return m_whyNotNodep; }
|
||||
AstCFunc* cfuncp() const { return m_cfuncp; }
|
||||
size_t size() const { return m_size; }
|
||||
void sizeInc(size_t value = 1) { m_size += value; }
|
||||
bool noInline() const { return m_noInlineWyp; }
|
||||
void setNoInline(const char* whyp) {
|
||||
if (!m_noInlineWyp) m_noInlineWyp = whyp;
|
||||
}
|
||||
bool keep() const { return m_keepWyp; }
|
||||
void setKeep(const char* whyp) {
|
||||
if (!m_keepWyp) m_keepWyp = whyp;
|
||||
}
|
||||
std::string dotColor() const override {
|
||||
return m_noInlineWyp ? "red" : m_keepWyp ? "orange" : "black";
|
||||
}
|
||||
|
||||
// debug
|
||||
FileLine* fileline() const override { return m_cfuncp->fileline(); }
|
||||
std::string dotShape() const override { return "box"; }
|
||||
std::string name() const override VL_MT_STABLE {
|
||||
std::string str = cvtToHex(m_cfuncp);
|
||||
str += "\n" + m_cfuncp->name();
|
||||
str += "\nsize: " + std::to_string(m_size);
|
||||
if (m_noInlineWyp) str += "\nNoInline: "s + m_noInlineWyp;
|
||||
if (m_keepWyp) str += "\nKeep: "s + m_keepWyp;
|
||||
return str;
|
||||
}
|
||||
};
|
||||
|
||||
class InlineCFuncsCallSiteVertex final : public EitherVertex {
|
||||
VL_RTTI_IMPL(InlineCFuncsCallSiteVertex, EitherVertex)
|
||||
AstCCall* const m_callp; // The call site
|
||||
const char* m_noInlineWyp = nullptr; // First reason the function should not be inlined
|
||||
|
||||
public:
|
||||
InlineCFuncsCallSiteVertex(InlineCFuncsCallGraph& graph, AstCCall* callp)
|
||||
: EitherVertex{graph}
|
||||
, m_callp{callp} {}
|
||||
~InlineCFuncsCallSiteVertex() override = default;
|
||||
|
||||
// ACCESSORS
|
||||
AstCCall* callp() const { return m_callp; }
|
||||
bool noInline() const { return m_noInlineWyp; }
|
||||
void setNoInline(const char* whyp) {
|
||||
if (!m_noInlineWyp) m_noInlineWyp = whyp;
|
||||
}
|
||||
|
||||
// debug
|
||||
FileLine* fileline() const override { return m_callp->fileline(); }
|
||||
std::string dotColor() const override { return m_noInlineWyp ? "red" : "black"; }
|
||||
std::string dotShape() const override { return "ellipse"; }
|
||||
std::string name() const override VL_MT_STABLE {
|
||||
std::string str = cvtToHex(m_callp);
|
||||
if (m_noInlineWyp) str += "\nNoInline: "s + m_noInlineWyp;
|
||||
return str;
|
||||
}
|
||||
};
|
||||
|
||||
void InlineCFuncsCallGraph::addEdge(InlineCFuncsFunctionVertex& caller,
|
||||
InlineCFuncsCallSiteVertex& callsite) {
|
||||
UASSERT_OBJ(callsite.inEmpty(), &callsite, "Call site should have at most one incoming edge");
|
||||
new V3GraphEdge{this, &caller, &callsite, 1, true}; // Can cut caller -> callsite
|
||||
}
|
||||
void InlineCFuncsCallGraph::addEdge(InlineCFuncsCallSiteVertex& callsite,
|
||||
InlineCFuncsFunctionVertex& callee) {
|
||||
UASSERT_OBJ(callsite.outEmpty(), &callsite, "Call site should have at most one outgoing edge");
|
||||
new V3GraphEdge{this, &callsite, &callee, 1, false};
|
||||
}
|
||||
|
||||
//######################################################################
|
||||
|
||||
class InlineCFuncsVisitor final : public VNVisitor {
|
||||
// NODE STATE
|
||||
// AstCFunc::user1() -> vector of AstCCall* pointing to this function
|
||||
// AstCFunc::user2() -> bool: true if checked for C statements
|
||||
// AstCFunc::user3() -> bool: true if contains C statements (not inlineable)
|
||||
// AstCFunc::user1p() -> InlineCFuncsFunctionVertex*, the function vertex
|
||||
// AstCCall::user1p() -> InlineCFuncsCallSiteVertex*, the call site vertex
|
||||
// AstVar::user2p() -> AstVar*, the cloned inlined local variable
|
||||
const VNUser1InUse m_user1InUse;
|
||||
const VNUser2InUse m_user2InUse;
|
||||
const VNUser3InUse m_user3InUse;
|
||||
AstUser1Allocator<AstCFunc, std::vector<AstCCall*>> m_callSites;
|
||||
|
||||
// STATE
|
||||
VDouble0 m_statInlined; // Statistic tracking
|
||||
const int m_threshold1; // Size threshold: always inline if size <= this
|
||||
const int m_threshold2; // Product threshold: inline if size * calls <= this
|
||||
AstCFunc* m_callerFuncp = nullptr; // Current caller function
|
||||
// Tuples of (StmtExpr to replace, CFunc to inline from, caller func for vars)
|
||||
std::vector<std::tuple<AstStmtExpr*, AstCFunc*, AstCFunc*>> m_toInline;
|
||||
InlineCFuncsCallGraph m_graph; // The call graph
|
||||
VDouble0 m_statCallsInlined; // Number of calls inlined
|
||||
VDouble0 m_statFuncsInlined; // Number of functions inlined at least once
|
||||
VDouble0 m_statFuncsRemoved; // Number of fully-inlined functions removed
|
||||
// Size threshold: always inline if size <= this
|
||||
const size_t m_sizeThreshold = v3Global.opt.inlineCFuncs();
|
||||
// Product threshold: inline if size * calls <= this
|
||||
const size_t m_prodThreshold = v3Global.opt.inlineCFuncsProduct();
|
||||
// Maximum size of caller to consider inlining into
|
||||
const size_t m_maxSizeCFunc = []() -> size_t {
|
||||
int maxCFunc = v3Global.opt.outputSplitCFuncs();
|
||||
int maxFile = v3Global.opt.outputSplit();
|
||||
if (maxCFunc <= 0) maxCFunc = std::numeric_limits<int>::max();
|
||||
if (maxFile <= 0) maxFile = std::numeric_limits<int>::max();
|
||||
return std::min(maxCFunc, maxFile);
|
||||
}();
|
||||
const size_t m_maxSizeTrace = []() -> size_t {
|
||||
int maxTrace = v3Global.opt.outputSplitCTrace();
|
||||
int maxFile = v3Global.opt.outputSplit();
|
||||
if (maxTrace <= 0) maxTrace = std::numeric_limits<int>::max();
|
||||
if (maxFile <= 0) maxFile = std::numeric_limits<int>::max();
|
||||
return std::min(maxTrace, maxFile);
|
||||
}();
|
||||
InlineCFuncsFunctionVertex* m_cfuncVtxp = nullptr; // Vertex of currently iterated function
|
||||
bool m_inExecGraph = false; // True while inside an AstExecGraph subtree
|
||||
|
||||
// METHODS
|
||||
|
||||
// Check if a function contains any $c() calls (user or internal)
|
||||
// Results are cached in user2/user3 for efficiency
|
||||
bool containsCStatements(AstCFunc* cfuncp) {
|
||||
if (!cfuncp->user2()) {
|
||||
// Not yet checked - run the check visitor
|
||||
cfuncp->user2(true); // Mark as checked
|
||||
const CFuncInlineCheckVisitor checker{cfuncp};
|
||||
cfuncp->user3(!checker.optimizable()); // Store result (true = contains C stmts)
|
||||
}
|
||||
return cfuncp->user3();
|
||||
InlineCFuncsFunctionVertex* getInlineCFuncsFunctionVertexp(AstCFunc* cfuncp) {
|
||||
if (!cfuncp->user1p()) cfuncp->user1p(new InlineCFuncsFunctionVertex{m_graph, cfuncp});
|
||||
return cfuncp->user1u().to<InlineCFuncsFunctionVertex*>();
|
||||
}
|
||||
|
||||
// Check if a function is eligible for inlining into caller
|
||||
bool isInlineable(const AstCFunc* callerp, AstCFunc* cfuncp) {
|
||||
// Must be in the same scope (same class) to access the same members
|
||||
if (callerp->scopep() != cfuncp->scopep()) return false;
|
||||
InlineCFuncsCallSiteVertex* getInlineCFuncsCallSiteVertexp(AstCCall* callp) {
|
||||
if (!callp->user1p()) callp->user1p(new InlineCFuncsCallSiteVertex{m_graph, callp});
|
||||
return callp->user1u().to<InlineCFuncsCallSiteVertex*>();
|
||||
}
|
||||
|
||||
// Check for $c() calls that might use 'this'
|
||||
if (containsCStatements(cfuncp)) return false;
|
||||
AstCLocalScope* inlineCall(AstCFunc* const callerp, //
|
||||
AstCCall* const callp, //
|
||||
AstCFunc* const calleep, //
|
||||
const size_t seqNum) {
|
||||
UINFO(6, "Inlining CFunc " << calleep->name() << " into " << callerp->name()
|
||||
<< " at call site " << callp);
|
||||
|
||||
// Check it's a void function (not a coroutine)
|
||||
if (cfuncp->rtnTypeVoid() != "void") return false;
|
||||
AstNodeStmt* const callSitep = VN_AS(callp->backp(), StmtExpr);
|
||||
++m_statCallsInlined;
|
||||
|
||||
// Don't inline functions marked dontCombine (e.g. trace, entryPoint)
|
||||
if (cfuncp->dontCombine()) return false;
|
||||
|
||||
// Don't inline entry point functions
|
||||
if (cfuncp->entryPoint()) return false;
|
||||
|
||||
// Must have statements to inline
|
||||
if (!cfuncp->stmtsp()) return false;
|
||||
|
||||
// Check size thresholds
|
||||
const size_t funcSize = cfuncp->nodeCount();
|
||||
|
||||
// Always inline if small enough
|
||||
if (funcSize <= static_cast<size_t>(m_threshold1)) return true;
|
||||
|
||||
// Also inline if size * call_count is reasonable
|
||||
const size_t callCount = m_callSites(cfuncp).size();
|
||||
if (callCount > 0 && funcSize * callCount <= static_cast<size_t>(m_threshold2)) {
|
||||
return true;
|
||||
// Callee might be empty, just delete the call
|
||||
if (!calleep->stmtsp()) {
|
||||
VL_DO_DANGLING(pushDeletep(callSitep->unlinkFrBack()), callSitep);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return false;
|
||||
// Replace call site with a local scope
|
||||
FileLine* const flp = callSitep->fileline();
|
||||
AstCLocalScope* const lscopep = new AstCLocalScope{flp, nullptr};
|
||||
callSitep->replaceWith(lscopep);
|
||||
VL_DO_DANGLING(pushDeletep(callSitep), callSitep);
|
||||
lscopep->addStmtsp(new AstComment{flp, "Inlined CFunc: " + calleep->name()});
|
||||
|
||||
// Although it's in a local scope, we still make names of cloned locals unique
|
||||
const std::string varPrefix
|
||||
= "__Vinline_" + std::to_string(seqNum) + "_" + calleep->name() + "_";
|
||||
|
||||
// AstVar::user2p() -> AstVar*, the cloned inlined local variable
|
||||
const VNUser2InUse user2InUse;
|
||||
|
||||
// Clone local variables, add them to the local scope
|
||||
for (AstVar* varp = calleep->varsp(); varp; varp = VN_AS(varp->nextp(), Var)) {
|
||||
AstVar* const newVarp = varp->cloneTree(false);
|
||||
newVarp->name(varPrefix + varp->name());
|
||||
lscopep->addStmtsp(newVarp);
|
||||
varp->user2p(newVarp);
|
||||
}
|
||||
|
||||
// Clone the function body
|
||||
AstNode* const bodyp = calleep->stmtsp()->cloneTree(true);
|
||||
lscopep->addStmtsp(bodyp);
|
||||
|
||||
// Retarget local variable references to the cloned locals
|
||||
// Rename locals defined in the body, TODO: there should be none after #6280
|
||||
// Reset vertex pointers on calls
|
||||
bodyp->foreachAndNext([&](AstNode* nodep) {
|
||||
if (AstVarRef* const refp = VN_CAST(nodep, VarRef)) {
|
||||
if (AstVar* const varp = VN_AS(refp->varp()->user2p(), Var)) refp->varp(varp);
|
||||
} else if (AstVar* const varp = VN_CAST(nodep, Var)) {
|
||||
varp->name(varPrefix + varp->name());
|
||||
} else if (AstCCall* const callp = VN_CAST(nodep, CCall)) {
|
||||
callp->user1p(nullptr);
|
||||
}
|
||||
});
|
||||
|
||||
// Return the local scope
|
||||
return lscopep;
|
||||
}
|
||||
|
||||
void doInlining() {
|
||||
// Need to gather vertices as we are changing the graph
|
||||
std::vector<InlineCFuncsFunctionVertex*> m_fVtxps;
|
||||
for (V3GraphVertex& vtx : m_graph.vertices()) {
|
||||
if (InlineCFuncsFunctionVertex* const fVtxp = vtx.cast<InlineCFuncsFunctionVertex>()) {
|
||||
m_fVtxps.emplace_back(fVtxp);
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate functions leaf to root
|
||||
for (InlineCFuncsFunctionVertex* const calleeVtxp : vlstd::reverse_view(m_fVtxps)) {
|
||||
// Should we inline this function?
|
||||
if (calleeVtxp->noInline()) continue; // Told not to
|
||||
|
||||
// Check size heuristics
|
||||
const bool doIt = [&]() {
|
||||
// Inline if small enough
|
||||
if (calleeVtxp->size() <= m_sizeThreshold) return true;
|
||||
// Inline if not too much bloat
|
||||
const size_t nCalls = calleeVtxp->inEdges().size();
|
||||
if (nCalls * calleeVtxp->size() <= m_prodThreshold) return true;
|
||||
// Otherwise don't inline
|
||||
return false;
|
||||
}();
|
||||
if (!doIt) continue;
|
||||
|
||||
// Ok, attempt to inline call sites
|
||||
size_t nInlined = 0;
|
||||
for (const V3GraphEdge* const edgep : calleeVtxp->inEdges().unlinkable()) {
|
||||
InlineCFuncsCallSiteVertex* const callVtxp
|
||||
= edgep->fromp()->as<InlineCFuncsCallSiteVertex>();
|
||||
|
||||
AstCFunc* const calleep = calleeVtxp->cfuncp();
|
||||
AstCCall* const callp = callVtxp->callp();
|
||||
UINFO(6, "Consider inlining " << calleep->name() << " at call site " << callp);
|
||||
// Should we inline this call site?
|
||||
if (callVtxp->noInline()) continue; // Told not to
|
||||
if (callVtxp->inEmpty()) continue; // Don't know where it's called from
|
||||
|
||||
// Pick up the caller
|
||||
UASSERT_OBJ(callVtxp->inSize1(), callVtxp->callp(),
|
||||
"Expected exactly one input edge for call site");
|
||||
InlineCFuncsFunctionVertex* const callerVtxp
|
||||
= callVtxp->inEdges().frontp()->fromp()->as<InlineCFuncsFunctionVertex>();
|
||||
AstCFunc* const callerp = callerVtxp->cfuncp();
|
||||
|
||||
// Don't make a function bigger than the limit
|
||||
const size_t limit = callerp->isTrace() ? m_maxSizeTrace : m_maxSizeCFunc;
|
||||
if (callerVtxp->size() + calleeVtxp->size() > limit) continue;
|
||||
|
||||
// Can't do it if it's in a different scope, self pointers differ
|
||||
if (callerp->scopep() != calleep->scopep()) continue;
|
||||
|
||||
// Inline it
|
||||
if (!nInlined) ++m_statFuncsInlined;
|
||||
AstNode* const inlinedp = inlineCall(callerp, callp, calleep, nInlined++);
|
||||
|
||||
// Need to adjust the graph:
|
||||
// 1. Delete inlined call site
|
||||
VL_DO_DANGLING(callVtxp->unlinkDelete(&m_graph), callVtxp);
|
||||
// 2. Add new inlined call sites - also increments size of caller
|
||||
VL_RESTORER(m_cfuncVtxp);
|
||||
m_cfuncVtxp = callerVtxp;
|
||||
if (inlinedp) iterateChildrenConst(inlinedp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void removeUnusedFuncs() {
|
||||
// Iterate root to leaves
|
||||
for (V3GraphVertex* const vtxp : m_graph.vertices().unlinkable()) {
|
||||
InlineCFuncsFunctionVertex* const fVtxp = vtxp->cast<InlineCFuncsFunctionVertex>();
|
||||
if (!fVtxp) continue;
|
||||
// Keep if still called
|
||||
if (!fVtxp->inEmpty()) continue;
|
||||
// Keep for other reasons
|
||||
if (fVtxp->keep()) continue;
|
||||
|
||||
AstCFunc* const funcp = fVtxp->cfuncp();
|
||||
UINFO(6, "Removing unused CFunc " << funcp);
|
||||
++m_statFuncsRemoved;
|
||||
|
||||
// Unlink all call sites
|
||||
for (const V3GraphEdge* const edgep : vtxp->outEdges().unlinkable()) {
|
||||
edgep->top()->unlinkEdges(&m_graph);
|
||||
}
|
||||
// Delete function vertex
|
||||
vtxp->unlinkDelete(&m_graph);
|
||||
// Delete the function
|
||||
VL_DO_DANGLING(pushDeletep(funcp->unlinkFrBack()), funcp);
|
||||
}
|
||||
|
||||
// Delete inlined/deleted call site vertices (for debugging only)
|
||||
for (V3GraphVertex* const vtxp : m_graph.vertices().unlinkable()) {
|
||||
InlineCFuncsCallSiteVertex* const cVtxp = vtxp->cast<InlineCFuncsCallSiteVertex>();
|
||||
if (!cVtxp) continue;
|
||||
if (!cVtxp->inEmpty()) continue;
|
||||
if (!cVtxp->outEmpty()) continue;
|
||||
vtxp->unlinkDelete(&m_graph);
|
||||
}
|
||||
}
|
||||
|
||||
// VISITORS
|
||||
void visit(AstCCall* nodep) override {
|
||||
iterateChildren(nodep);
|
||||
|
||||
AstCFunc* const cfuncp = nodep->funcp();
|
||||
if (!cfuncp) return;
|
||||
|
||||
// Track call site for call counting
|
||||
m_callSites(cfuncp).emplace_back(nodep);
|
||||
}
|
||||
|
||||
void visit(AstCFunc* nodep) override {
|
||||
VL_RESTORER(m_callerFuncp);
|
||||
m_callerFuncp = nodep;
|
||||
iterateChildren(nodep);
|
||||
// Create the function vertex
|
||||
InlineCFuncsFunctionVertex* const vtxp = getInlineCFuncsFunctionVertexp(nodep);
|
||||
|
||||
// Check if the function itself is not inlineable
|
||||
if (nodep->rtnTypeVoid() != "void") vtxp->setNoInline("Not void");
|
||||
if (nodep->dpiImportPrototype()) vtxp->setNoInline("DPI import prototype");
|
||||
if (nodep->recursive()) vtxp->setNoInline("Recursive");
|
||||
if (nodep->argsp()) vtxp->setNoInline("Has arguments");
|
||||
if (nodep->isVirtual()) vtxp->setNoInline("Virtual method");
|
||||
|
||||
// Check if the function should not be removed
|
||||
if (nodep->entryPoint()) vtxp->setKeep("Entry point");
|
||||
if (nodep->dpiImportPrototype()) vtxp->setKeep("DPI import prototype");
|
||||
if (nodep->dpiExportDispatcher()) vtxp->setKeep("DPI export implementation");
|
||||
if (nodep->isVirtual()) vtxp->setKeep("Virtual method");
|
||||
|
||||
// Iterate children
|
||||
VL_RESTORER(m_cfuncVtxp);
|
||||
m_cfuncVtxp = vtxp;
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
|
||||
void visit(AstNodeModule* nodep) override {
|
||||
// Process per module for better cache behavior
|
||||
m_toInline.clear();
|
||||
// Inlineable calls
|
||||
void visit(AstCCall* nodep) override {
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->sizeInc();
|
||||
AstCFunc* const calleep = nodep->funcp();
|
||||
|
||||
// Phase 1: Collect call sites within this module
|
||||
iterateChildren(nodep);
|
||||
// Create the call site vertex
|
||||
InlineCFuncsCallSiteVertex* const vtxp = getInlineCFuncsCallSiteVertexp(nodep);
|
||||
|
||||
// Phase 2: Determine which calls to inline
|
||||
collectInlineCandidates(nodep);
|
||||
// Check if the call site is not inlineable
|
||||
if (!VN_IS(nodep->backp(), StmtExpr)) vtxp->setNoInline("Not in statement position");
|
||||
if (m_inExecGraph) vtxp->setNoInline("In ExecGraph");
|
||||
if (calleep->isVirtual()) vtxp->setNoInline("Virtual method");
|
||||
|
||||
// Phase 3: Perform inlining for this module
|
||||
doInlining();
|
||||
// Add caller/callee edges
|
||||
if (m_cfuncVtxp) m_graph.addEdge(*m_cfuncVtxp, *vtxp);
|
||||
m_graph.addEdge(*vtxp, *getInlineCFuncsFunctionVertexp(calleep));
|
||||
|
||||
// Iterate children
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
|
||||
void visit(AstNode* nodep) override { iterateChildren(nodep); }
|
||||
// Nodes that reference functions/calls
|
||||
void visit(AstNetlist* nodep) override {
|
||||
UASSERT_OBJ(!nodep->evalp(), nodep, "evalp should not be null at this stage");
|
||||
UASSERT_OBJ(!nodep->evalNbap(), nodep, "evalNbap should be null at this stage");
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
|
||||
// Collect calls that should be inlined within this module
|
||||
void collectInlineCandidates(AstNodeModule* modp) {
|
||||
for (AstNode* stmtp = modp->stmtsp(); stmtp; stmtp = stmtp->nextp()) {
|
||||
AstCFunc* const callerp = VN_CAST(stmtp, CFunc);
|
||||
if (!callerp) continue;
|
||||
void visit(AstNodeCCall* nodep) override {
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->sizeInc();
|
||||
getInlineCFuncsFunctionVertexp(nodep->funcp())->setKeep("Called elsewhere");
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
|
||||
callerp->foreach([&](AstCCall* callp) {
|
||||
AstCFunc* const cfuncp = callp->funcp();
|
||||
if (!cfuncp) return;
|
||||
if (!isInlineable(callerp, cfuncp)) return;
|
||||
void visit(AstAddrOfCFunc* nodep) override {
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->sizeInc();
|
||||
getInlineCFuncsFunctionVertexp(nodep->funcp())->setKeep("Referenced by AddressOfCFunc");
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
|
||||
// Walk up to find the containing StmtExpr
|
||||
AstNode* stmtNodep = callp;
|
||||
while (stmtNodep && !VN_IS(stmtNodep, StmtExpr) && !VN_IS(stmtNodep, CFunc)) {
|
||||
stmtNodep = stmtNodep->backp();
|
||||
}
|
||||
// Nodes preventing inlining
|
||||
void visit(AstTraceDecl* nodep) override {
|
||||
// Referenced by TraceInc
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains TraceDecl");
|
||||
|
||||
AstStmtExpr* const stmtExprp = VN_CAST(stmtNodep, StmtExpr);
|
||||
if (!stmtExprp) return;
|
||||
|
||||
m_toInline.emplace_back(stmtExprp, cfuncp, callerp);
|
||||
});
|
||||
if (AstCCall* const callp = nodep->dtypeCallp()) {
|
||||
getInlineCFuncsCallSiteVertexp(callp)->setNoInline("Referenced by TraceDecl");
|
||||
}
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
void visit(AstExecGraph* nodep) override {
|
||||
// AstExecGraph is not cloneable, so can't inline the containing function
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains ExecGraph");
|
||||
// Also mark functions referenced in the dependency graph
|
||||
for (const V3GraphVertex& vtx : nodep->depGraphp()->vertices()) {
|
||||
getInlineCFuncsFunctionVertexp(vtx.as<ExecMTask>()->funcp())
|
||||
->setKeep("MTask function");
|
||||
}
|
||||
VL_RESTORER(m_inExecGraph);
|
||||
m_inExecGraph = true;
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
void visit(AstCStmt* nodep) override {
|
||||
// Can reference anything in text
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains CStmt");
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
void visit(AstCExpr* nodep) override {
|
||||
// Can reference anything in text
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains CExpr");
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
void visit(AstCStmtUser* nodep) override {
|
||||
// Can reference anything in text
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains CStmtUser");
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
void visit(AstCExprUser* nodep) override {
|
||||
// Can reference anything in text
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains CExprUser");
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
void visit(AstCReturn* nodep) override {
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains CReturn");
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
|
||||
// Perform the actual inlining after iteration is complete
|
||||
void doInlining() {
|
||||
for (const auto& tuple : m_toInline) {
|
||||
AstStmtExpr* const stmtExprp = std::get<0>(tuple);
|
||||
AstCFunc* const cfuncp = std::get<1>(tuple);
|
||||
AstCFunc* const callerp = std::get<2>(tuple);
|
||||
|
||||
UINFO(6, "Inlining CFunc " << cfuncp->name() << " into " << callerp->name());
|
||||
++m_statInlined;
|
||||
|
||||
// Clone local variables with unique names to avoid collisions
|
||||
std::map<AstVar*, AstVar*> varMap;
|
||||
for (AstVar* varp = cfuncp->varsp(); varp; varp = VN_AS(varp->nextp(), Var)) {
|
||||
const string newName = "__Vinline_" + cfuncp->name() + "_" + varp->name();
|
||||
AstVar* const newVarp = varp->cloneTree(false);
|
||||
newVarp->name(newName);
|
||||
callerp->addVarsp(newVarp);
|
||||
varMap[varp] = newVarp;
|
||||
}
|
||||
|
||||
// Clone the function body
|
||||
AstNode* const bodyp = cfuncp->stmtsp()->cloneTree(true);
|
||||
|
||||
// Retarget variable references to the cloned variables
|
||||
// Must iterate all sibling statements, not just the first
|
||||
if (!varMap.empty()) {
|
||||
for (AstNode* stmtp = bodyp; stmtp; stmtp = stmtp->nextp()) {
|
||||
stmtp->foreach([&](AstVarRef* refp) {
|
||||
auto it = varMap.find(refp->varp());
|
||||
if (it != varMap.end()) refp->varp(it->second);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Replace the statement with the inlined body
|
||||
stmtExprp->addNextHere(bodyp);
|
||||
VL_DO_DANGLING(stmtExprp->unlinkFrBack()->deleteTree(), stmtExprp);
|
||||
}
|
||||
// Base node
|
||||
void visit(AstNode* nodep) override {
|
||||
if (m_cfuncVtxp) m_cfuncVtxp->sizeInc();
|
||||
iterateChildrenConst(nodep);
|
||||
}
|
||||
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
explicit InlineCFuncsVisitor(const AstNetlist* nodep)
|
||||
: m_threshold1{v3Global.opt.inlineCFuncs()}
|
||||
, m_threshold2{v3Global.opt.inlineCFuncsProduct()} {
|
||||
// Don't inline when profiling or tracing
|
||||
if (v3Global.opt.profCFuncs() || v3Global.opt.trace()) return;
|
||||
// Process modules one at a time for better cache behavior
|
||||
iterateAndNextNull(nodep->modulesp());
|
||||
explicit InlineCFuncsVisitor(AstNetlist* nodep) {
|
||||
// Phase 1: Build call graph
|
||||
iterateConst(nodep);
|
||||
// Make acyclic in case there is recursion
|
||||
m_graph.acyclic(V3GraphEdge::followAlwaysTrue);
|
||||
// Order vertices (any topological order is fine)
|
||||
m_graph.order();
|
||||
if (dumpGraphLevel() >= 6) m_graph.dumpDotFilePrefixed("inlinecfuncs-graph");
|
||||
// Phase 2: Inline calls
|
||||
doInlining();
|
||||
if (dumpGraphLevel() >= 6) m_graph.dumpDotFilePrefixed("inlinecfuncs-inlined");
|
||||
// Phase 3: Remove unused functions
|
||||
removeUnusedFuncs();
|
||||
if (dumpGraphLevel() >= 6) m_graph.dumpDotFilePrefixed("inlinecfuncs-kept");
|
||||
}
|
||||
~InlineCFuncsVisitor() override {
|
||||
V3Stats::addStat("Optimizations, Inlined CFuncs", m_statInlined);
|
||||
V3Stats::addStat("Optimizations, Inline CFuncs, calls inlined", m_statCallsInlined);
|
||||
V3Stats::addStat("Optimizations, Inline CFuncs, functions inlined", m_statFuncsInlined);
|
||||
V3Stats::addStat("Optimizations, Inline CFuncs, functions removed", m_statFuncsRemoved);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -264,6 +497,8 @@ public:
|
|||
|
||||
void V3InlineCFuncs::inlineAll(AstNetlist* nodep) {
|
||||
UINFO(2, __FUNCTION__ << ":");
|
||||
// Don't inline when profiling per-function (it would lose granularity)
|
||||
if (v3Global.opt.profCFuncs()) return;
|
||||
{ InlineCFuncsVisitor{nodep}; } // Destruct before checking
|
||||
V3Global::dumpCheckGlobalTree("inlinecfuncs", 0, dumpTreeEitherLevel() >= 6);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1497,6 +1497,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
|
|||
m_fIcoChangeDetect.setTrueOrFalse(flag);
|
||||
});
|
||||
DECL_OPTION("-finline", FOnOff, &m_fInline);
|
||||
DECL_OPTION("-finline-cfuncs", FOnOff, &m_fInlineCFuncs);
|
||||
DECL_OPTION("-finline-funcs", FOnOff, &m_fInlineFuncs);
|
||||
DECL_OPTION("-finline-funcs-eager", FOnOff, &m_fInlineFuncsEager);
|
||||
DECL_OPTION("-flife", FOnOff, &m_fLife);
|
||||
|
|
@ -2371,6 +2372,7 @@ void V3Options::optimize(int level) {
|
|||
m_fExpand = flag;
|
||||
m_fGate = flag;
|
||||
m_fInline = flag;
|
||||
m_fInlineCFuncs = flag;
|
||||
m_fLife = flag;
|
||||
m_fLifePost = flag;
|
||||
m_fLocalize = flag;
|
||||
|
|
|
|||
|
|
@ -415,6 +415,7 @@ private:
|
|||
// main switch: -fno-ico-change-detect: input change detection optimization
|
||||
VOptionBool m_fIcoChangeDetect{VOptionBool::OPT_DEFAULT_TRUE};
|
||||
bool m_fInline; // main switch: -fno-inline: module inlining
|
||||
bool m_fInlineCFuncs; // main switch: -fno-inline-cfuncs: inline small C functions
|
||||
bool m_fInlineFuncs = true; // main switch: -fno-inline-funcs: function inlining
|
||||
bool m_fInlineFuncsEager = true; // main switch: -fno-inline-funcs-eager: don't inline eagerly
|
||||
bool m_fLife; // main switch: -fno-life: variable lifetime
|
||||
|
|
@ -753,6 +754,7 @@ public:
|
|||
bool fGate() const { return m_fGate; }
|
||||
VOptionBool fIcoChangeDetect() const { return m_fIcoChangeDetect; }
|
||||
bool fInline() const { return m_fInline; }
|
||||
bool fInlineCFuncs() const { return m_fInlineCFuncs; }
|
||||
bool fInlineFuncs() const { return m_fInlineFuncs; }
|
||||
bool fInlineFuncsEager() const { return m_fInlineFuncsEager; }
|
||||
bool fLife() const { return m_fLife; }
|
||||
|
|
|
|||
|
|
@ -1094,6 +1094,7 @@ class TraceVisitor final : public VNVisitor {
|
|||
// Create the trace registration function
|
||||
m_regFuncp = new AstCFunc{m_topScopep->fileline(), "trace_register", m_topScopep};
|
||||
m_regFuncp->argTypes(v3Global.opt.traceClassBase() + "* tracep");
|
||||
m_regFuncp->entryPoint(true);
|
||||
m_regFuncp->isTrace(true);
|
||||
m_regFuncp->slow(true);
|
||||
m_regFuncp->isStatic(false);
|
||||
|
|
|
|||
|
|
@ -980,6 +980,7 @@ public:
|
|||
AstCFunc* rootFuncp = nullptr;
|
||||
if (!v3Global.opt.libCreate().empty()) {
|
||||
rootFuncp = newCFunc(flp, "trace_init_root");
|
||||
rootFuncp->entryPoint(true);
|
||||
for (size_t i = 0; i < m_topScopeRootFuncCount; ++i) {
|
||||
AstCCall* const callp = new AstCCall{flp, topScopeFuncps.at(i)};
|
||||
callp->dtypeSetVoid();
|
||||
|
|
@ -1017,6 +1018,7 @@ public:
|
|||
// Set name of top level function
|
||||
AstCFunc* const topFuncp = m_topFuncps.front();
|
||||
topFuncp->name("trace_init_top");
|
||||
topFuncp->entryPoint(true);
|
||||
|
||||
if (rootFuncp && v3Global.opt.debugCheck()) checkCallsRecurse(rootFuncp);
|
||||
checkCalls(topFuncp);
|
||||
|
|
|
|||
|
|
@ -582,8 +582,14 @@ static void process() {
|
|||
// Must be after all Sel/array index based optimizations
|
||||
V3Reloop::reloopAll(v3Global.rootp());
|
||||
}
|
||||
}
|
||||
|
||||
if (v3Global.opt.inlineCFuncs()) {
|
||||
// These are no longer needed, remove references before CFunc inlining
|
||||
v3Global.rootp()->evalp(nullptr);
|
||||
v3Global.rootp()->evalNbap(nullptr);
|
||||
|
||||
if (!v3Global.opt.lintOnly() && !v3Global.opt.serializeOnly()) {
|
||||
if (v3Global.opt.fInlineCFuncs()) {
|
||||
// Inline small CFuncs to reduce function call overhead
|
||||
V3InlineCFuncs::inlineAll(v3Global.rootp());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
|||
|
||||
test.scenarios('simulator_st')
|
||||
|
||||
test.compile(verilator_flags2=["--stats", "--inline-cfuncs", "0"])
|
||||
test.compile(verilator_flags2=["--stats", "-fno-inline-cfuncs"])
|
||||
|
||||
test.execute(expect_filename=test.golden_filename)
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ def check_evals():
|
|||
test.error("Too few _eval functions found: " + str(got))
|
||||
|
||||
|
||||
test.compile(v_flags2=["--output-split 1 --output-split-cfuncs 20"],
|
||||
test.compile(v_flags2=["--output-split 1 --output-split-cfuncs 20 -fno-inline-cfuncs"],
|
||||
verilator_make_gmake=False) # Slow to compile, so skip it)
|
||||
|
||||
check_evals()
|
||||
|
|
|
|||
|
|
@ -31,9 +31,9 @@ test.compile(v_flags2=[
|
|||
|
||||
if test.vltmt:
|
||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Thread schedule count\s+(\d+)', 3)
|
||||
r'Optimizations, Thread schedule count\s+(\d+)', 4)
|
||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Thread schedule total tasks\s+(\d+)', 5)
|
||||
r'Optimizations, Thread schedule total tasks\s+(\d+)', 6)
|
||||
|
||||
test.execute()
|
||||
|
||||
|
|
|
|||
|
|
@ -35,9 +35,9 @@ test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "_
|
|||
|
||||
if test.vltmt:
|
||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Thread schedule count\s+(\d+)', 1)
|
||||
r'Optimizations, Thread schedule count\s+(\d+)', 2)
|
||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Thread schedule total tasks\s+(\d+)', 2)
|
||||
r'Optimizations, Thread schedule total tasks\s+(\d+)', 3)
|
||||
|
||||
test.execute(all_run_flags=[
|
||||
"+verilator+prof+exec+start+2",
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ test.top_filename = "t/t_inst_tree.v"
|
|||
|
||||
default_vltmt_threads = test.get_default_vltmt_threads
|
||||
test.compile(
|
||||
# Disable --inline-cfuncs so functions exist to be combined
|
||||
verilator_flags2=['--stats', '--inline-cfuncs', '0', test.t_dir + "/" + test.name + ".vlt"],
|
||||
# Disable CFunc inlining so functions exist to be combined
|
||||
verilator_flags2=['--stats', '-fno-inline-cfuncs', test.t_dir + "/" + test.name + ".vlt"],
|
||||
# Force 3 threads even if we have fewer cores
|
||||
threads=(default_vltmt_threads if test.vltmt else 1))
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ test.top_filename = "t/t_enum_type_methods.v"
|
|||
out_filename = test.obj_dir + "/V" + test.name + ".tree.json"
|
||||
|
||||
test.compile(verilator_flags2=[
|
||||
'--no-std', '--debug-check', '--no-json-edit-nums', '--flatten', '--inline-cfuncs', '0'
|
||||
'--no-std', '--debug-check', '--no-json-edit-nums', '--flatten', '-fno-inline-cfuncs'
|
||||
],
|
||||
verilator_make_gmake=False,
|
||||
make_top_shell=False,
|
||||
|
|
|
|||
|
|
@ -9,17 +9,17 @@
|
|||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
test.scenarios('vlt_all')
|
||||
|
||||
# Use --output-split-cfuncs to create small functions that can be inlined
|
||||
# Also test --inline-cfuncs-product option
|
||||
test.compile(verilator_flags2=[
|
||||
"--stats", "--binary", "--output-split-cfuncs", "1", "--inline-cfuncs-product", "200"
|
||||
"--stats", "--binary", "--inline-cfuncs-product", "200", "--dumpi-V3InlineCFuncs", "9"
|
||||
])
|
||||
|
||||
# Verify inlining happened with exact count
|
||||
test.file_grep(test.stats, r'Optimizations, Inlined CFuncs\s+(\d+)', 39)
|
||||
|
||||
test.execute()
|
||||
|
||||
if test.vlt:
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+(\d+)', 7)
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions inlined\s+(\d+)', 7)
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions removed\s+(\d+)', 7)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,20 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt_all')
|
||||
|
||||
test.compile(verilator_flags2=["--stats", "--binary"])
|
||||
|
||||
test.execute()
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+[1-9]')
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain.
|
||||
// SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
module t (
|
||||
input wire clk
|
||||
);
|
||||
|
||||
integer cyc = 0;
|
||||
reg [31:0] acc;
|
||||
|
||||
task automatic add_pair(input [31:0] a, input [31:0] b, inout [31:0] sum);
|
||||
// verilator no_inline_task
|
||||
sum = sum + a + b;
|
||||
endtask
|
||||
|
||||
always @(posedge clk) begin
|
||||
cyc <= cyc + 1;
|
||||
acc = 0;
|
||||
add_pair(cyc[31:0], 32'd1, acc); // + cyc + 1
|
||||
add_pair(32'd1000, cyc[31:0], acc); // + 1000 + cyc
|
||||
// acc = (cyc + 1) + (1000 + cyc) = 2*cyc + 1001
|
||||
if (cyc > 1) begin
|
||||
if (acc !== (2 * cyc[31:0] + 32'd1001)) begin
|
||||
$write("%%Error: cyc=%0d acc=%0d expected %0d\n", cyc, acc, 2 * cyc + 1001);
|
||||
$stop;
|
||||
end
|
||||
end
|
||||
if (cyc == 20) begin
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt_all')
|
||||
|
||||
test.compile(verilator_flags2=["--stats", "--binary"])
|
||||
|
||||
test.execute()
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+[1-9]')
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain.
|
||||
// SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
module t (
|
||||
input wire clk
|
||||
);
|
||||
|
||||
integer cyc = 0;
|
||||
|
||||
task automatic tick();
|
||||
// verilator no_inline_task
|
||||
automatic time t = $time;
|
||||
$display("TICK: %0t", t);
|
||||
endtask
|
||||
|
||||
always @(posedge clk) begin
|
||||
cyc <= cyc + 1;
|
||||
tick();
|
||||
tick();
|
||||
tick();
|
||||
tick();
|
||||
if (cyc == 20) begin
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -9,15 +9,13 @@
|
|||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
test.scenarios('vlt_all')
|
||||
test.top_filename = "t/t_opt_inline_cfuncs.v"
|
||||
|
||||
# Disable inlining with --inline-cfuncs 0
|
||||
test.compile(verilator_flags2=["--stats", "--binary", "--inline-cfuncs", "0"])
|
||||
|
||||
# Verify inlining did NOT happen (stat doesn't exist when pass is skipped)
|
||||
test.file_grep_not(test.stats, r'Optimizations, Inlined CFuncs\s+[1-9]')
|
||||
test.compile(verilator_flags2=["--stats", "--binary", "-fno-inline-cfuncs"])
|
||||
|
||||
test.execute()
|
||||
|
||||
test.file_grep_not(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+[1-9]')
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
|
|
@ -9,17 +9,16 @@
|
|||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
test.scenarios('vlt_all')
|
||||
|
||||
# Use thresholds that guarantee rejection to test the "return false" path in isInlineable()
|
||||
# --inline-cfuncs 1: pass still runs (not skipped)
|
||||
# --inline-cfuncs-product 0: guarantees all functions rejected (node_count * call_count > 0 always)
|
||||
test.compile(verilator_flags2=[
|
||||
"--stats", "--binary", "--inline-cfuncs", "1", "--inline-cfuncs-product", "0"
|
||||
"--stats", "--binary", "--inline-cfuncs", "0", "--inline-cfuncs-product", "0"
|
||||
])
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Inlined CFuncs\s+(\d+)', 0)
|
||||
|
||||
test.execute()
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions inlined\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions removed\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt_all')
|
||||
test.top_filename = "t/t_opt_inline_cfuncs.v"
|
||||
|
||||
test.compile(verilator_flags2=["--stats", "--binary", "--trace", "--inline-cfuncs-product", "200"])
|
||||
|
||||
if test.vlt:
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+(\d+)', 8)
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions inlined\s+(\d+)', 7)
|
||||
test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions removed\s+(\d+)', 9)
|
||||
|
||||
test.execute()
|
||||
|
||||
test.passes()
|
||||
|
|
@ -16,13 +16,11 @@
|
|||
<map from="PSScAO" to="__VactTriggered"/>
|
||||
<map from="PSx9Nt" to="__Vconfigure"/>
|
||||
<map from="PSrjMj" to="__Vdly__secret_cyc"/>
|
||||
<map from="PSAW38" to="__Vdly__t__DOT__secret_inst2__DOT__secret_cyc"/>
|
||||
<map from="PS4o5S" to="__Vdpiexp_dpix_a_func_TOP__t__DOT__secret_inst"/>
|
||||
<map from="PStVA8" to="__Vdpiexp_dpix_a_task_TOP__t__DOT__secret_inst"/>
|
||||
<map from="PSxbIE" to="__Vdpiimwrap_dpii_a_func_TOP__t__DOT__secret_inst"/>
|
||||
<map from="PSIv2l" to="__Vdpiimwrap_dpii_a_task_TOP__t__DOT__secret_inst"/>
|
||||
<map from="PS76My" to="__Vfunc_dpii_a_func__0__Vfuncout"/>
|
||||
<map from="PSZAsk" to="__Vinline__nba_sequent__TOP__0___Vdly__t__DOT__secret_inst2__DOT__secret_cyc"/>
|
||||
<map from="PSywKw" to="__Vinline_0__eval_nba___Vinline_0__nba_sequent__TOP__0___Vdly__t__DOT__secret_inst2__DOT__secret_cyc"/>
|
||||
<map from="PSo9XV" to="__VnbaExecute"/>
|
||||
<map from="PSEtOH" to="__VnbaIterCount"/>
|
||||
<map from="PSeNXP" to="__VnbaPhaseResult"/>
|
||||
|
|
@ -36,13 +34,10 @@
|
|||
<map from="PSKZ7c" to="_eval_debug_assertions"/>
|
||||
<map from="PSEZzj" to="_eval_final"/>
|
||||
<map from="PSABAY" to="_eval_initial"/>
|
||||
<map from="PSjoVa" to="_eval_nba"/>
|
||||
<map from="PS0BBP" to="_eval_phase__act"/>
|
||||
<map from="PSfNDT" to="_eval_phase__nba"/>
|
||||
<map from="PSBUJ6" to="_eval_settle"/>
|
||||
<map from="PS0mmd" to="_eval_static"/>
|
||||
<map from="PSf5GY" to="_eval_triggers_vec__act"/>
|
||||
<map from="PSgp53" to="_nba_sequent__TOP__0"/>
|
||||
<map from="PSoFVg" to="_nba_sequent__TOP__t__DOT__secret_inst__0"/>
|
||||
<map from="PSMRXn" to="_trigger_anySet__act"/>
|
||||
<map from="PSBKaZ" to="_trigger_clear__act"/>
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ test.scenarios('vlt_all')
|
|||
test.top_filename = "t/t_timing_sched.v"
|
||||
|
||||
test.compile(
|
||||
verilator_flags2=["--binary", "--timing", "--inline-cfuncs", "0", "-CFLAGS", "-DVL_DEBUG"])
|
||||
verilator_flags2=["--binary", "--timing", "-fno-inline-cfuncs", "-CFLAGS", "-DVL_DEBUG"])
|
||||
|
||||
test.execute(all_run_flags=["+verilator+debug"])
|
||||
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ import vltest_bootstrap
|
|||
test.scenarios('vlt_all')
|
||||
test.top_filename = "t/t_timing_class.v"
|
||||
|
||||
# Disable --inline-cfuncs so debug traces show all function entries
|
||||
test.compile(verilator_flags2=["--exe --main --timing --inline-cfuncs 0"])
|
||||
# Disable CFunc inlining so debug traces show all function entries
|
||||
test.compile(verilator_flags2=["--exe --main --timing -fno-inline-cfuncs"])
|
||||
|
||||
test.execute(all_run_flags=["+verilator+debug"])
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
-V{t#,#}+ Vt_timing_eval_act___024root___eval_debug_assertions
|
||||
-V{t#,#}+ Initial
|
||||
-V{t#,#}+ Vt_timing_eval_act___024root___eval_static
|
||||
-V{t#,#}+ Vt_timing_eval_act___024root___eval_static__TOP
|
||||
-V{t#,#}+ Vt_timing_eval_act___024root___timing_ready
|
||||
-V{t#,#}+ Vt_timing_eval_act___024root___eval_initial
|
||||
-V{t#,#}+ Vt_timing_eval_act___024root___eval_initial__TOP__Vtiming__0
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
|||
|
||||
test.scenarios('vlt')
|
||||
|
||||
test.compile(verilator_flags2=["--binary", "--runtime-debug"])
|
||||
test.compile(verilator_flags2=["--binary", "--runtime-debug", "-fno-inline-cfuncs"])
|
||||
|
||||
test.file_grep(
|
||||
test.obj_dir + "/" + test.vm_prefix + "___024root__0.cpp", r'void\s+' + test.vm_prefix +
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ internalsDump:
|
|||
-V{t#,#}+ Vt_verilated_debug___024root___eval_phase__nba
|
||||
-V{t#,#}+ Vt_verilated_debug___024root___trigger_anySet__act
|
||||
-V{t#,#}+ Vt_verilated_debug___024root___eval_nba
|
||||
-V{t#,#}+ Vt_verilated_debug___024root___nba_sequent__TOP__0
|
||||
*-* All Finished *-*
|
||||
-V{t#,#}+ Vt_verilated_debug___024root___trigger_clear__act
|
||||
-V{t#,#}+ Vt_verilated_debug___024root___eval_phase__act
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import vltest_bootstrap
|
|||
test.scenarios('vlt_all')
|
||||
test.verilated_debug = True
|
||||
|
||||
test.compile(verilator_flags2=[])
|
||||
test.compile(verilator_flags2=['-fno-inline-cfuncs'])
|
||||
|
||||
test.execute()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue