Optimize inlining small C functions and add `-inline-cfuncs` (#6815)
This commit is contained in:
parent
e6877e83fd
commit
c0a0f0dab9
|
|
@ -408,6 +408,8 @@ detailed descriptions of these arguments.
|
||||||
-I<dir> Directory to search for includes
|
-I<dir> Directory to search for includes
|
||||||
--if-depth <value> Tune IFDEPTH warning
|
--if-depth <value> Tune IFDEPTH warning
|
||||||
+incdir+<dir> Directory to search for includes
|
+incdir+<dir> Directory to search for includes
|
||||||
|
--inline-cfuncs <value> Inline CFuncs with <=value nodes (0=off)
|
||||||
|
--inline-cfuncs-product <value> Inline CFuncs if size*calls <= value
|
||||||
--inline-mult <value> Tune module inlining
|
--inline-mult <value> Tune module inlining
|
||||||
--instr-count-dpi <value> Assumed dynamic instruction count of DPI imports
|
--instr-count-dpi <value> Assumed dynamic instruction count of DPI imports
|
||||||
-j <jobs> Parallelism for --build-jobs/--verilate-jobs
|
-j <jobs> Parallelism for --build-jobs/--verilate-jobs
|
||||||
|
|
|
||||||
|
|
@ -125,6 +125,7 @@ John Wehle
|
||||||
Jonathan Drolet
|
Jonathan Drolet
|
||||||
Jonathan Schröter
|
Jonathan Schröter
|
||||||
Jordan McConnon
|
Jordan McConnon
|
||||||
|
Jose Drowne
|
||||||
Jose Loyola
|
Jose Loyola
|
||||||
Josep Sans
|
Josep Sans
|
||||||
Joseph Nwabueze
|
Joseph Nwabueze
|
||||||
|
|
|
||||||
|
|
@ -867,6 +867,29 @@ Summary:
|
||||||
compatibility and is not recommended usage as this is not supported by
|
compatibility and is not recommended usage as this is not supported by
|
||||||
some third-party tools.
|
some third-party tools.
|
||||||
|
|
||||||
|
.. option:: --inline-cfuncs <value>
|
||||||
|
|
||||||
|
Inline small CFunc calls directly into their callers when the function
|
||||||
|
has at most <value> nodes. This reduces function call overhead when
|
||||||
|
:vlopt:`--output-split-cfuncs` places functions in separate compilation
|
||||||
|
units that the C++ compiler cannot inline.
|
||||||
|
|
||||||
|
Set to 0 to disable this optimization. The default is 20.
|
||||||
|
|
||||||
|
This optimization is automatically disabled when :vlopt:`--prof-cfuncs`
|
||||||
|
or :vlopt:`--trace` is used.
|
||||||
|
|
||||||
|
.. option:: --inline-cfuncs-product <value>
|
||||||
|
|
||||||
|
Tune the inlining of CFunc calls for larger functions. When a function
|
||||||
|
is too large to always inline (exceeds :vlopt:`--inline-cfuncs` threshold),
|
||||||
|
it may still be inlined if the function size multiplied by the number of
|
||||||
|
call sites is at most <value>.
|
||||||
|
|
||||||
|
This allows functions that are called only once or twice to be inlined
|
||||||
|
even if they exceed the small function threshold. Set to 0 to only inline
|
||||||
|
functions below the :vlopt:`--inline-cfuncs` threshold. The default is 200.
|
||||||
|
|
||||||
.. option:: --inline-mult <value>
|
.. option:: --inline-mult <value>
|
||||||
|
|
||||||
Tune the inlining of modules. The default value of 2000 specifies that
|
Tune the inlining of modules. The default value of 2000 specifies that
|
||||||
|
|
|
||||||
|
|
@ -114,6 +114,7 @@ set(HEADERS
|
||||||
V3Hasher.h
|
V3Hasher.h
|
||||||
V3HierBlock.h
|
V3HierBlock.h
|
||||||
V3Inline.h
|
V3Inline.h
|
||||||
|
V3InlineCFuncs.h
|
||||||
V3Inst.h
|
V3Inst.h
|
||||||
V3InstrCount.h
|
V3InstrCount.h
|
||||||
V3Interface.h
|
V3Interface.h
|
||||||
|
|
@ -287,6 +288,7 @@ set(COMMON_SOURCES
|
||||||
V3Hasher.cpp
|
V3Hasher.cpp
|
||||||
V3HierBlock.cpp
|
V3HierBlock.cpp
|
||||||
V3Inline.cpp
|
V3Inline.cpp
|
||||||
|
V3InlineCFuncs.cpp
|
||||||
V3Inst.cpp
|
V3Inst.cpp
|
||||||
V3InstrCount.cpp
|
V3InstrCount.cpp
|
||||||
V3Interface.cpp
|
V3Interface.cpp
|
||||||
|
|
|
||||||
|
|
@ -284,6 +284,7 @@ RAW_OBJS_PCH_ASTNOMT = \
|
||||||
V3Gate.o \
|
V3Gate.o \
|
||||||
V3HierBlock.o \
|
V3HierBlock.o \
|
||||||
V3Inline.o \
|
V3Inline.o \
|
||||||
|
V3InlineCFuncs.o \
|
||||||
V3Inst.o \
|
V3Inst.o \
|
||||||
V3InstrCount.o \
|
V3InstrCount.o \
|
||||||
V3Interface.o \
|
V3Interface.o \
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,269 @@
|
||||||
|
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||||
|
//*************************************************************************
|
||||||
|
// DESCRIPTION: Verilator: Inline small CFuncs into their callers
|
||||||
|
//
|
||||||
|
// Code available from: https://verilator.org
|
||||||
|
//
|
||||||
|
//*************************************************************************
|
||||||
|
//
|
||||||
|
// Copyright 2003-2025 by Wilson Snyder. This program is free software; you
|
||||||
|
// can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
// Version 2.0.
|
||||||
|
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
//
|
||||||
|
//*************************************************************************
|
||||||
|
// V3InlineCFuncs's Transformations:
|
||||||
|
//
|
||||||
|
// For each CCall to a small CFunc:
|
||||||
|
// - Check if function is eligible for inlining (small enough, same scope)
|
||||||
|
// - Clone local variables with unique names to avoid collisions
|
||||||
|
// - Replace CCall with cloned function body statements
|
||||||
|
//
|
||||||
|
// Two tunables control inlining:
|
||||||
|
// --inline-cfuncs <n> : Always inline if size <= n (default 20)
|
||||||
|
// --inline-cfuncs-product <n> : Also inline if size * call_count <= n (default 200)
|
||||||
|
//
|
||||||
|
//*************************************************************************
|
||||||
|
|
||||||
|
#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT
|
||||||
|
|
||||||
|
#include "V3InlineCFuncs.h"
|
||||||
|
|
||||||
|
#include "V3AstUserAllocator.h"
|
||||||
|
#include "V3Stats.h"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
VL_DEFINE_DEBUG_FUNCTIONS;
|
||||||
|
|
||||||
|
//######################################################################
|
||||||
|
// Helper visitor to check if a CFunc contains C statements
|
||||||
|
// Uses clearOptimizable pattern for debugging
|
||||||
|
|
||||||
|
class CFuncInlineCheckVisitor final : public VNVisitorConst {
|
||||||
|
// STATE
|
||||||
|
bool m_optimizable = true; // True if function can be inlined
|
||||||
|
string m_whyNot; // Reason why not optimizable
|
||||||
|
AstNode* m_whyNotNodep = nullptr; // Node that caused non-optimizable
|
||||||
|
|
||||||
|
// METHODS
|
||||||
|
void clearOptimizable(AstNode* nodep, const string& why) {
|
||||||
|
if (m_optimizable) {
|
||||||
|
m_optimizable = false;
|
||||||
|
m_whyNot = why;
|
||||||
|
m_whyNotNodep = nodep;
|
||||||
|
UINFO(9, "CFunc not inlineable: " << why);
|
||||||
|
if (nodep) UINFO(9, ": " << nodep);
|
||||||
|
UINFO(9, endl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// VISITORS
|
||||||
|
void visit(AstCStmt* nodep) override { clearOptimizable(nodep, "contains AstCStmt"); }
|
||||||
|
void visit(AstCExpr* nodep) override { clearOptimizable(nodep, "contains AstCExpr"); }
|
||||||
|
void visit(AstCStmtUser* nodep) override { clearOptimizable(nodep, "contains AstCStmtUser"); }
|
||||||
|
void visit(AstCExprUser* nodep) override { clearOptimizable(nodep, "contains AstCExprUser"); }
|
||||||
|
void visit(AstNode* nodep) override { iterateChildrenConst(nodep); }
|
||||||
|
|
||||||
|
public:
|
||||||
|
// CONSTRUCTORS
|
||||||
|
explicit CFuncInlineCheckVisitor(AstCFunc* cfuncp) { iterateConst(cfuncp); }
|
||||||
|
|
||||||
|
// ACCESSORS
|
||||||
|
bool optimizable() const { return m_optimizable; }
|
||||||
|
string whyNot() const { return m_whyNot; }
|
||||||
|
AstNode* whyNotNodep() const { return m_whyNotNodep; }
|
||||||
|
};
|
||||||
|
|
||||||
|
//######################################################################
|
||||||
|
|
||||||
|
class InlineCFuncsVisitor final : public VNVisitor {
|
||||||
|
// NODE STATE
|
||||||
|
// AstCFunc::user1() -> vector of AstCCall* pointing to this function
|
||||||
|
// AstCFunc::user2() -> bool: true if checked for C statements
|
||||||
|
// AstCFunc::user3() -> bool: true if contains C statements (not inlineable)
|
||||||
|
const VNUser1InUse m_user1InUse;
|
||||||
|
const VNUser2InUse m_user2InUse;
|
||||||
|
const VNUser3InUse m_user3InUse;
|
||||||
|
AstUser1Allocator<AstCFunc, std::vector<AstCCall*>> m_callSites;
|
||||||
|
|
||||||
|
// STATE
|
||||||
|
VDouble0 m_statInlined; // Statistic tracking
|
||||||
|
const int m_threshold1; // Size threshold: always inline if size <= this
|
||||||
|
const int m_threshold2; // Product threshold: inline if size * calls <= this
|
||||||
|
AstCFunc* m_callerFuncp = nullptr; // Current caller function
|
||||||
|
// Tuples of (StmtExpr to replace, CFunc to inline from, caller func for vars)
|
||||||
|
std::vector<std::tuple<AstStmtExpr*, AstCFunc*, AstCFunc*>> m_toInline;
|
||||||
|
|
||||||
|
// METHODS
|
||||||
|
|
||||||
|
// Check if a function contains any $c() calls (user or internal)
|
||||||
|
// Results are cached in user2/user3 for efficiency
|
||||||
|
bool containsCStatements(AstCFunc* cfuncp) {
|
||||||
|
if (!cfuncp->user2()) {
|
||||||
|
// Not yet checked - run the check visitor
|
||||||
|
cfuncp->user2(true); // Mark as checked
|
||||||
|
const CFuncInlineCheckVisitor checker{cfuncp};
|
||||||
|
cfuncp->user3(!checker.optimizable()); // Store result (true = contains C stmts)
|
||||||
|
}
|
||||||
|
return cfuncp->user3();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if a function is eligible for inlining into caller
|
||||||
|
bool isInlineable(AstCFunc* callerp, AstCFunc* cfuncp) {
|
||||||
|
// Must be in the same scope (same class) to access the same members
|
||||||
|
if (callerp->scopep() != cfuncp->scopep()) return false;
|
||||||
|
|
||||||
|
// Check for $c() calls that might use 'this'
|
||||||
|
if (containsCStatements(cfuncp)) return false;
|
||||||
|
|
||||||
|
// Check it's a void function (not a coroutine)
|
||||||
|
if (cfuncp->rtnTypeVoid() != "void") return false;
|
||||||
|
|
||||||
|
// Don't inline functions marked dontCombine (e.g. trace, entryPoint)
|
||||||
|
if (cfuncp->dontCombine()) return false;
|
||||||
|
|
||||||
|
// Don't inline entry point functions
|
||||||
|
if (cfuncp->entryPoint()) return false;
|
||||||
|
|
||||||
|
// Must have statements to inline
|
||||||
|
if (!cfuncp->stmtsp()) return false;
|
||||||
|
|
||||||
|
// Check size thresholds
|
||||||
|
const size_t funcSize = cfuncp->nodeCount();
|
||||||
|
|
||||||
|
// Always inline if small enough
|
||||||
|
if (funcSize <= static_cast<size_t>(m_threshold1)) return true;
|
||||||
|
|
||||||
|
// Also inline if size * call_count is reasonable
|
||||||
|
const size_t callCount = m_callSites(cfuncp).size();
|
||||||
|
if (callCount > 0 && funcSize * callCount <= static_cast<size_t>(m_threshold2)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// VISITORS
|
||||||
|
void visit(AstCCall* nodep) override {
|
||||||
|
iterateChildren(nodep);
|
||||||
|
|
||||||
|
AstCFunc* const cfuncp = nodep->funcp();
|
||||||
|
if (!cfuncp) return;
|
||||||
|
|
||||||
|
// Track call site for call counting
|
||||||
|
m_callSites(cfuncp).emplace_back(nodep);
|
||||||
|
}
|
||||||
|
|
||||||
|
void visit(AstCFunc* nodep) override {
|
||||||
|
VL_RESTORER(m_callerFuncp);
|
||||||
|
m_callerFuncp = nodep;
|
||||||
|
iterateChildren(nodep);
|
||||||
|
}
|
||||||
|
|
||||||
|
void visit(AstNodeModule* nodep) override {
|
||||||
|
// Process per module for better cache behavior
|
||||||
|
m_toInline.clear();
|
||||||
|
|
||||||
|
// Phase 1: Collect call sites within this module
|
||||||
|
iterateChildren(nodep);
|
||||||
|
|
||||||
|
// Phase 2: Determine which calls to inline
|
||||||
|
collectInlineCandidates(nodep);
|
||||||
|
|
||||||
|
// Phase 3: Perform inlining for this module
|
||||||
|
doInlining();
|
||||||
|
}
|
||||||
|
|
||||||
|
void visit(AstNode* nodep) override { iterateChildren(nodep); }
|
||||||
|
|
||||||
|
// Collect calls that should be inlined within this module
|
||||||
|
void collectInlineCandidates(AstNodeModule* modp) {
|
||||||
|
for (AstNode* stmtp = modp->stmtsp(); stmtp; stmtp = stmtp->nextp()) {
|
||||||
|
AstCFunc* const callerp = VN_CAST(stmtp, CFunc);
|
||||||
|
if (!callerp) continue;
|
||||||
|
|
||||||
|
callerp->foreach([&](AstCCall* callp) {
|
||||||
|
AstCFunc* const cfuncp = callp->funcp();
|
||||||
|
if (!cfuncp) return;
|
||||||
|
if (!isInlineable(callerp, cfuncp)) return;
|
||||||
|
|
||||||
|
// Walk up to find the containing StmtExpr
|
||||||
|
AstNode* stmtNodep = callp;
|
||||||
|
while (stmtNodep && !VN_IS(stmtNodep, StmtExpr) && !VN_IS(stmtNodep, CFunc)) {
|
||||||
|
stmtNodep = stmtNodep->backp();
|
||||||
|
}
|
||||||
|
|
||||||
|
AstStmtExpr* const stmtExprp = VN_CAST(stmtNodep, StmtExpr);
|
||||||
|
if (!stmtExprp) return;
|
||||||
|
|
||||||
|
m_toInline.emplace_back(stmtExprp, cfuncp, callerp);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform the actual inlining after iteration is complete
|
||||||
|
void doInlining() {
|
||||||
|
for (const auto& tuple : m_toInline) {
|
||||||
|
AstStmtExpr* const stmtExprp = std::get<0>(tuple);
|
||||||
|
AstCFunc* const cfuncp = std::get<1>(tuple);
|
||||||
|
AstCFunc* const callerp = std::get<2>(tuple);
|
||||||
|
|
||||||
|
UINFO(6, "Inlining CFunc " << cfuncp->name() << " into " << callerp->name() << endl);
|
||||||
|
++m_statInlined;
|
||||||
|
|
||||||
|
// Clone local variables with unique names to avoid collisions
|
||||||
|
std::map<AstVar*, AstVar*> varMap;
|
||||||
|
for (AstVar* varp = cfuncp->varsp(); varp; varp = VN_AS(varp->nextp(), Var)) {
|
||||||
|
const string newName = "__Vinline_" + cfuncp->name() + "_" + varp->name();
|
||||||
|
AstVar* const newVarp = varp->cloneTree(false);
|
||||||
|
newVarp->name(newName);
|
||||||
|
callerp->addVarsp(newVarp);
|
||||||
|
varMap[varp] = newVarp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clone the function body
|
||||||
|
AstNode* const bodyp = cfuncp->stmtsp()->cloneTree(true);
|
||||||
|
|
||||||
|
// Retarget variable references to the cloned variables
|
||||||
|
// Must iterate all sibling statements, not just the first
|
||||||
|
if (!varMap.empty()) {
|
||||||
|
for (AstNode* stmtp = bodyp; stmtp; stmtp = stmtp->nextp()) {
|
||||||
|
stmtp->foreach([&](AstVarRef* refp) {
|
||||||
|
auto it = varMap.find(refp->varp());
|
||||||
|
if (it != varMap.end()) refp->varp(it->second);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace the statement with the inlined body
|
||||||
|
stmtExprp->addNextHere(bodyp);
|
||||||
|
VL_DO_DANGLING(stmtExprp->unlinkFrBack()->deleteTree(), stmtExprp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
// CONSTRUCTORS
|
||||||
|
explicit InlineCFuncsVisitor(AstNetlist* nodep)
|
||||||
|
: m_threshold1{v3Global.opt.inlineCFuncs()}
|
||||||
|
, m_threshold2{v3Global.opt.inlineCFuncsProduct()} {
|
||||||
|
// Don't inline when profiling or tracing
|
||||||
|
if (v3Global.opt.profCFuncs() || v3Global.opt.trace()) return;
|
||||||
|
// Process modules one at a time for better cache behavior
|
||||||
|
iterateAndNextNull(nodep->modulesp());
|
||||||
|
}
|
||||||
|
~InlineCFuncsVisitor() override {
|
||||||
|
V3Stats::addStat("Optimizations, Inlined CFuncs", m_statInlined);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//######################################################################
|
||||||
|
// InlineCFuncs class functions
|
||||||
|
|
||||||
|
void V3InlineCFuncs::inlineAll(AstNetlist* nodep) {
|
||||||
|
UINFO(2, __FUNCTION__ << ":");
|
||||||
|
{ InlineCFuncsVisitor{nodep}; } // Destruct before checking
|
||||||
|
V3Global::dumpCheckGlobalTree("inlinecfuncs", 0, dumpTreeEitherLevel() >= 6);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||||
|
//*************************************************************************
|
||||||
|
// DESCRIPTION: Verilator: Inline small CFuncs into their callers
|
||||||
|
//
|
||||||
|
// Code available from: https://verilator.org
|
||||||
|
//
|
||||||
|
//*************************************************************************
|
||||||
|
//
|
||||||
|
// Copyright 2003-2025 by Wilson Snyder. This program is free software; you
|
||||||
|
// can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
// Version 2.0.
|
||||||
|
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
//
|
||||||
|
//*************************************************************************
|
||||||
|
|
||||||
|
#ifndef VERILATOR_V3INLINECFUNCS_H_
|
||||||
|
#define VERILATOR_V3INLINECFUNCS_H_
|
||||||
|
|
||||||
|
#include "config_build.h"
|
||||||
|
#include "verilatedos.h"
|
||||||
|
|
||||||
|
class AstNetlist;
|
||||||
|
|
||||||
|
class V3InlineCFuncs final {
|
||||||
|
public:
|
||||||
|
static void inlineAll(AstNetlist* nodep) VL_MT_DISABLED;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // Guard
|
||||||
|
|
@ -1528,6 +1528,8 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
|
||||||
}).notForRerun();
|
}).notForRerun();
|
||||||
DECL_OPTION("-if-depth", Set, &m_ifDepth);
|
DECL_OPTION("-if-depth", Set, &m_ifDepth);
|
||||||
DECL_OPTION("-ignc", OnOff, &m_ignc).undocumented();
|
DECL_OPTION("-ignc", OnOff, &m_ignc).undocumented();
|
||||||
|
DECL_OPTION("-inline-cfuncs", Set, &m_inlineCFuncs);
|
||||||
|
DECL_OPTION("-inline-cfuncs-product", Set, &m_inlineCFuncsProduct);
|
||||||
DECL_OPTION("-inline-mult", Set, &m_inlineMult);
|
DECL_OPTION("-inline-mult", Set, &m_inlineMult);
|
||||||
DECL_OPTION("-instr-count-dpi", CbVal, [this, fl](int val) {
|
DECL_OPTION("-instr-count-dpi", CbVal, [this, fl](int val) {
|
||||||
m_instrCountDpi = val;
|
m_instrCountDpi = val;
|
||||||
|
|
|
||||||
|
|
@ -319,6 +319,8 @@ private:
|
||||||
int m_hierChild = 0; // main switch: --hierarchical-child
|
int m_hierChild = 0; // main switch: --hierarchical-child
|
||||||
int m_hierThreads = 0; // main switch: --hierarchical-threads
|
int m_hierThreads = 0; // main switch: --hierarchical-threads
|
||||||
int m_ifDepth = 0; // main switch: --if-depth
|
int m_ifDepth = 0; // main switch: --if-depth
|
||||||
|
int m_inlineCFuncs = 20; // main switch: --inline-cfuncs
|
||||||
|
int m_inlineCFuncsProduct = 200; // main switch: --inline-cfuncs-product
|
||||||
int m_inlineMult = 2000; // main switch: --inline-mult
|
int m_inlineMult = 2000; // main switch: --inline-mult
|
||||||
int m_instrCountDpi = 200; // main switch: --instr-count-dpi
|
int m_instrCountDpi = 200; // main switch: --instr-count-dpi
|
||||||
bool m_jsonEditNums = true; // main switch: --no-json-edit-nums
|
bool m_jsonEditNums = true; // main switch: --no-json-edit-nums
|
||||||
|
|
@ -595,6 +597,8 @@ public:
|
||||||
int expandLimit() const { return m_expandLimit; }
|
int expandLimit() const { return m_expandLimit; }
|
||||||
int gateStmts() const { return m_gateStmts; }
|
int gateStmts() const { return m_gateStmts; }
|
||||||
int ifDepth() const { return m_ifDepth; }
|
int ifDepth() const { return m_ifDepth; }
|
||||||
|
int inlineCFuncs() const { return m_inlineCFuncs; }
|
||||||
|
int inlineCFuncsProduct() const { return m_inlineCFuncsProduct; }
|
||||||
int inlineMult() const { return m_inlineMult; }
|
int inlineMult() const { return m_inlineMult; }
|
||||||
int instrCountDpi() const { return m_instrCountDpi; }
|
int instrCountDpi() const { return m_instrCountDpi; }
|
||||||
int localizeMaxSize() const { return m_localizeMaxSize; }
|
int localizeMaxSize() const { return m_localizeMaxSize; }
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,7 @@
|
||||||
#include "V3Graph.h"
|
#include "V3Graph.h"
|
||||||
#include "V3HierBlock.h"
|
#include "V3HierBlock.h"
|
||||||
#include "V3Inline.h"
|
#include "V3Inline.h"
|
||||||
|
#include "V3InlineCFuncs.h"
|
||||||
#include "V3Inst.h"
|
#include "V3Inst.h"
|
||||||
#include "V3Interface.h"
|
#include "V3Interface.h"
|
||||||
#include "V3LibMap.h"
|
#include "V3LibMap.h"
|
||||||
|
|
@ -565,6 +566,11 @@ static void process() {
|
||||||
V3Reloop::reloopAll(v3Global.rootp());
|
V3Reloop::reloopAll(v3Global.rootp());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (v3Global.opt.inlineCFuncs()) {
|
||||||
|
// Inline small CFuncs to reduce function call overhead
|
||||||
|
V3InlineCFuncs::inlineAll(v3Global.rootp());
|
||||||
|
}
|
||||||
|
|
||||||
// Fix very deep expressions
|
// Fix very deep expressions
|
||||||
// Mark evaluation functions as member functions, if needed.
|
// Mark evaluation functions as member functions, if needed.
|
||||||
V3Depth::depthAll(v3Global.rootp());
|
V3Depth::depthAll(v3Global.rootp());
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
||||||
|
|
||||||
test.scenarios('simulator_st')
|
test.scenarios('simulator_st')
|
||||||
|
|
||||||
test.compile(verilator_flags2=["--stats"])
|
test.compile(verilator_flags2=["--stats", "--inline-cfuncs", "0"])
|
||||||
|
|
||||||
test.execute(expect_filename=test.golden_filename)
|
test.execute(expect_filename=test.golden_filename)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,8 @@ test.top_filename = "t/t_inst_tree.v"
|
||||||
|
|
||||||
default_vltmt_threads = test.get_default_vltmt_threads
|
default_vltmt_threads = test.get_default_vltmt_threads
|
||||||
test.compile(
|
test.compile(
|
||||||
verilator_flags2=['--stats', test.t_dir + "/" + test.name + ".vlt"],
|
# Disable --inline-cfuncs so functions exist to be combined
|
||||||
|
verilator_flags2=['--stats', '--inline-cfuncs', '0', test.t_dir + "/" + test.name + ".vlt"],
|
||||||
# Force 3 threads even if we have fewer cores
|
# Force 3 threads even if we have fewer cores
|
||||||
threads=(default_vltmt_threads if test.vltmt else 1))
|
threads=(default_vltmt_threads if test.vltmt else 1))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,9 @@ test.top_filename = "t/t_enum_type_methods.v"
|
||||||
|
|
||||||
out_filename = test.obj_dir + "/V" + test.name + ".tree.json"
|
out_filename = test.obj_dir + "/V" + test.name + ".tree.json"
|
||||||
|
|
||||||
test.compile(verilator_flags2=['--no-std', '--debug-check', '--no-json-edit-nums', '--flatten'],
|
test.compile(verilator_flags2=[
|
||||||
|
'--no-std', '--debug-check', '--no-json-edit-nums', '--flatten', '--inline-cfuncs', '0'
|
||||||
|
],
|
||||||
verilator_make_gmake=False,
|
verilator_make_gmake=False,
|
||||||
make_top_shell=False,
|
make_top_shell=False,
|
||||||
make_main=False)
|
make_main=False)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||||
|
#
|
||||||
|
# Copyright 2024 by Wilson Snyder. This program is free software; you
|
||||||
|
# can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
# Version 2.0.
|
||||||
|
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
|
||||||
|
import vltest_bootstrap
|
||||||
|
|
||||||
|
test.scenarios('vlt')
|
||||||
|
|
||||||
|
# Use --output-split-cfuncs to create small functions that can be inlined
|
||||||
|
# Also test --inline-cfuncs-product option
|
||||||
|
test.compile(verilator_flags2=[
|
||||||
|
"--stats", "--binary", "--output-split-cfuncs", "1", "--inline-cfuncs-product", "200"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Verify inlining happened with exact count
|
||||||
|
test.file_grep(test.stats, r'Optimizations, Inlined CFuncs\s+(\d+)', 39)
|
||||||
|
|
||||||
|
test.execute()
|
||||||
|
|
||||||
|
test.passes()
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
// DESCRIPTION: Verilator: Verilog Test module
|
||||||
|
//
|
||||||
|
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||||
|
// any use, without warranty, 2024 by Wilson Snyder.
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
// Test module designed to generate multiple small CFuncs that can be inlined
|
||||||
|
// Uses generate to create multiple sub-module instances
|
||||||
|
module t (/*AUTOARG*/
|
||||||
|
// Inputs
|
||||||
|
clk
|
||||||
|
);
|
||||||
|
input clk;
|
||||||
|
|
||||||
|
integer cyc = 0;
|
||||||
|
|
||||||
|
parameter CNT = 8;
|
||||||
|
|
||||||
|
wire [31:0] w [CNT:0];
|
||||||
|
reg [31:0] w0;
|
||||||
|
assign w[0] = w0;
|
||||||
|
|
||||||
|
// Generate multiple sub-modules - each creates CFuncs that can be inlined
|
||||||
|
generate
|
||||||
|
for (genvar g=0; g<CNT; g++) begin : gen_sub
|
||||||
|
sub sub_inst (.clk(clk), .i(w[g]), .z(w[g+1]));
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
// Test loop
|
||||||
|
always @ (posedge clk) begin
|
||||||
|
cyc <= cyc + 1;
|
||||||
|
if (cyc==0) begin
|
||||||
|
w0 <= 32'h10;
|
||||||
|
end
|
||||||
|
else if (cyc==10) begin
|
||||||
|
// Each sub adds 1, so final value is 0x10 + 8 = 0x18
|
||||||
|
if (w[CNT] !== 32'h18) begin
|
||||||
|
$write("%%Error: w[CNT]=%0x, expected 0x18\n", w[CNT]);
|
||||||
|
$stop;
|
||||||
|
end
|
||||||
|
$write("*-* All Finished *-*\n");
|
||||||
|
$finish;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
// Small sub-module that generates inlineable CFuncs
|
||||||
|
module sub (input clk, input [31:0] i, output reg [31:0] z);
|
||||||
|
reg [7:0] local_a;
|
||||||
|
reg [7:0] local_b;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
local_a <= i[7:0];
|
||||||
|
local_b <= 8'd1;
|
||||||
|
z <= i + {24'b0, local_b};
|
||||||
|
end
|
||||||
|
endmodule
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||||
|
#
|
||||||
|
# Copyright 2024 by Wilson Snyder. This program is free software; you
|
||||||
|
# can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
# Version 2.0.
|
||||||
|
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
|
||||||
|
import vltest_bootstrap
|
||||||
|
|
||||||
|
test.scenarios('vlt')
|
||||||
|
test.top_filename = "t/t_opt_inline_cfuncs.v"
|
||||||
|
|
||||||
|
# Disable inlining with --inline-cfuncs 0
|
||||||
|
test.compile(verilator_flags2=["--stats", "--binary", "--inline-cfuncs", "0"])
|
||||||
|
|
||||||
|
# Verify inlining did NOT happen (stat doesn't exist when pass is skipped)
|
||||||
|
test.file_grep_not(test.stats, r'Optimizations, Inlined CFuncs\s+[1-9]')
|
||||||
|
|
||||||
|
test.execute()
|
||||||
|
|
||||||
|
test.passes()
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||||
|
#
|
||||||
|
# Copyright 2024 by Wilson Snyder. This program is free software; you
|
||||||
|
# can redistribute it and/or modify it under the terms of either the GNU
|
||||||
|
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||||
|
# Version 2.0.
|
||||||
|
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||||
|
|
||||||
|
import vltest_bootstrap
|
||||||
|
|
||||||
|
test.scenarios('vlt')
|
||||||
|
|
||||||
|
# Use thresholds that guarantee rejection to test the "return false" path in isInlineable()
|
||||||
|
# --inline-cfuncs 1: pass still runs (not skipped)
|
||||||
|
# --inline-cfuncs-product 0: guarantees all functions rejected (node_count * call_count > 0 always)
|
||||||
|
test.compile(verilator_flags2=[
|
||||||
|
"--stats", "--binary", "--inline-cfuncs", "1", "--inline-cfuncs-product", "0"
|
||||||
|
])
|
||||||
|
|
||||||
|
test.file_grep(test.stats, r'Optimizations, Inlined CFuncs\s+(\d+)', 0)
|
||||||
|
|
||||||
|
test.execute()
|
||||||
|
|
||||||
|
test.passes()
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
// DESCRIPTION: Verilator: Verilog Test module
|
||||||
|
//
|
||||||
|
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||||
|
// any use, without warranty, 2024 by Wilson Snyder.
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
// Test module to exercise threshold checking in CFunc inlining
|
||||||
|
// With low thresholds, these functions should NOT be inlined
|
||||||
|
module t;
|
||||||
|
reg [31:0] a, b, c, d, e, f, g, h;
|
||||||
|
|
||||||
|
initial begin
|
||||||
|
// Multiple operations to create larger CFuncs
|
||||||
|
a = 32'd1;
|
||||||
|
b = 32'd2;
|
||||||
|
c = a + b;
|
||||||
|
d = c * 2;
|
||||||
|
e = d - 1;
|
||||||
|
f = e + a;
|
||||||
|
g = f * b;
|
||||||
|
h = g + c + d + e + f;
|
||||||
|
|
||||||
|
if (h != 32'd32) $stop;
|
||||||
|
$write("*-* All Finished *-*\n");
|
||||||
|
$finish;
|
||||||
|
end
|
||||||
|
endmodule
|
||||||
|
|
@ -12,7 +12,8 @@ import vltest_bootstrap
|
||||||
test.scenarios('vlt_all')
|
test.scenarios('vlt_all')
|
||||||
test.top_filename = "t/t_timing_sched.v"
|
test.top_filename = "t/t_timing_sched.v"
|
||||||
|
|
||||||
test.compile(verilator_flags2=["--exe --main --timing"])
|
test.compile(
|
||||||
|
verilator_flags2=["--binary", "--timing", "--inline-cfuncs", "0", "-CFLAGS", "-DVL_DEBUG"])
|
||||||
|
|
||||||
test.execute(all_run_flags=["+verilator+debug"])
|
test.execute(all_run_flags=["+verilator+debug"])
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,8 @@ import vltest_bootstrap
|
||||||
test.scenarios('vlt_all')
|
test.scenarios('vlt_all')
|
||||||
test.top_filename = "t/t_timing_class.v"
|
test.top_filename = "t/t_timing_class.v"
|
||||||
|
|
||||||
test.compile(verilator_flags2=["--exe --main --timing"])
|
# Disable --inline-cfuncs so debug traces show all function entries
|
||||||
|
test.compile(verilator_flags2=["--exe --main --timing --inline-cfuncs 0"])
|
||||||
|
|
||||||
test.execute(all_run_flags=["+verilator+debug"])
|
test.execute(all_run_flags=["+verilator+debug"])
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ internalsDump:
|
||||||
-V{t#,#}+ Vt_verilated_debug___024root___eval_phase__nba
|
-V{t#,#}+ Vt_verilated_debug___024root___eval_phase__nba
|
||||||
-V{t#,#}+ Vt_verilated_debug___024root___trigger_anySet__act
|
-V{t#,#}+ Vt_verilated_debug___024root___trigger_anySet__act
|
||||||
-V{t#,#}+ Vt_verilated_debug___024root___eval_nba
|
-V{t#,#}+ Vt_verilated_debug___024root___eval_nba
|
||||||
-V{t#,#}+ Vt_verilated_debug___024root___nba_sequent__TOP__0
|
|
||||||
*-* All Finished *-*
|
*-* All Finished *-*
|
||||||
-V{t#,#}+ Vt_verilated_debug___024root___trigger_clear__act
|
-V{t#,#}+ Vt_verilated_debug___024root___trigger_clear__act
|
||||||
-V{t#,#}+ Vt_verilated_debug___024root___eval_phase__act
|
-V{t#,#}+ Vt_verilated_debug___024root___eval_phase__act
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ test.top_filename = "t/t_enum_type_methods.v"
|
||||||
|
|
||||||
out_filename = test.obj_dir + "/V" + test.name + ".xml"
|
out_filename = test.obj_dir + "/V" + test.name + ".xml"
|
||||||
|
|
||||||
test.compile(verilator_flags2=['--no-std', '--debug-check', '--flatten'],
|
test.compile(verilator_flags2=['--no-std', '--debug-check', '--flatten', '--inline-cfuncs', '0'],
|
||||||
verilator_make_gmake=False,
|
verilator_make_gmake=False,
|
||||||
make_top_shell=False,
|
make_top_shell=False,
|
||||||
make_main=False)
|
make_main=False)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue