Optimize inlining small C functions and add `-inline-cfuncs` (#6815)
This commit is contained in:
parent
e6877e83fd
commit
c0a0f0dab9
|
|
@ -408,6 +408,8 @@ detailed descriptions of these arguments.
|
|||
-I<dir> Directory to search for includes
|
||||
--if-depth <value> Tune IFDEPTH warning
|
||||
+incdir+<dir> Directory to search for includes
|
||||
--inline-cfuncs <value> Inline CFuncs with <=value nodes (0=off)
|
||||
--inline-cfuncs-product <value> Inline CFuncs if size*calls <= value
|
||||
--inline-mult <value> Tune module inlining
|
||||
--instr-count-dpi <value> Assumed dynamic instruction count of DPI imports
|
||||
-j <jobs> Parallelism for --build-jobs/--verilate-jobs
|
||||
|
|
|
|||
|
|
@ -125,6 +125,7 @@ John Wehle
|
|||
Jonathan Drolet
|
||||
Jonathan Schröter
|
||||
Jordan McConnon
|
||||
Jose Drowne
|
||||
Jose Loyola
|
||||
Josep Sans
|
||||
Joseph Nwabueze
|
||||
|
|
|
|||
|
|
@ -867,6 +867,29 @@ Summary:
|
|||
compatibility and is not recommended usage as this is not supported by
|
||||
some third-party tools.
|
||||
|
||||
.. option:: --inline-cfuncs <value>
|
||||
|
||||
Inline small CFunc calls directly into their callers when the function
|
||||
has at most <value> nodes. This reduces function call overhead when
|
||||
:vlopt:`--output-split-cfuncs` places functions in separate compilation
|
||||
units that the C++ compiler cannot inline.
|
||||
|
||||
Set to 0 to disable this optimization. The default is 20.
|
||||
|
||||
This optimization is automatically disabled when :vlopt:`--prof-cfuncs`
|
||||
or :vlopt:`--trace` is used.
|
||||
|
||||
.. option:: --inline-cfuncs-product <value>
|
||||
|
||||
Tune the inlining of CFunc calls for larger functions. When a function
|
||||
is too large to always inline (exceeds :vlopt:`--inline-cfuncs` threshold),
|
||||
it may still be inlined if the function size multiplied by the number of
|
||||
call sites is at most <value>.
|
||||
|
||||
This allows functions that are called only once or twice to be inlined
|
||||
even if they exceed the small function threshold. Set to 0 to only inline
|
||||
functions below the :vlopt:`--inline-cfuncs` threshold. The default is 200.
|
||||
|
||||
.. option:: --inline-mult <value>
|
||||
|
||||
Tune the inlining of modules. The default value of 2000 specifies that
|
||||
|
|
|
|||
|
|
@ -114,6 +114,7 @@ set(HEADERS
|
|||
V3Hasher.h
|
||||
V3HierBlock.h
|
||||
V3Inline.h
|
||||
V3InlineCFuncs.h
|
||||
V3Inst.h
|
||||
V3InstrCount.h
|
||||
V3Interface.h
|
||||
|
|
@ -287,6 +288,7 @@ set(COMMON_SOURCES
|
|||
V3Hasher.cpp
|
||||
V3HierBlock.cpp
|
||||
V3Inline.cpp
|
||||
V3InlineCFuncs.cpp
|
||||
V3Inst.cpp
|
||||
V3InstrCount.cpp
|
||||
V3Interface.cpp
|
||||
|
|
|
|||
|
|
@ -284,6 +284,7 @@ RAW_OBJS_PCH_ASTNOMT = \
|
|||
V3Gate.o \
|
||||
V3HierBlock.o \
|
||||
V3Inline.o \
|
||||
V3InlineCFuncs.o \
|
||||
V3Inst.o \
|
||||
V3InstrCount.o \
|
||||
V3Interface.o \
|
||||
|
|
|
|||
|
|
@ -0,0 +1,269 @@
|
|||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||
//*************************************************************************
|
||||
// DESCRIPTION: Verilator: Inline small CFuncs into their callers
|
||||
//
|
||||
// Code available from: https://verilator.org
|
||||
//
|
||||
//*************************************************************************
|
||||
//
|
||||
// Copyright 2003-2025 by Wilson Snyder. This program is free software; you
|
||||
// can redistribute it and/or modify it under the terms of either the GNU
|
||||
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||
// Version 2.0.
|
||||
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
//
|
||||
//*************************************************************************
|
||||
// V3InlineCFuncs's Transformations:
|
||||
//
|
||||
// For each CCall to a small CFunc:
|
||||
// - Check if function is eligible for inlining (small enough, same scope)
|
||||
// - Clone local variables with unique names to avoid collisions
|
||||
// - Replace CCall with cloned function body statements
|
||||
//
|
||||
// Two tunables control inlining:
|
||||
// --inline-cfuncs <n> : Always inline if size <= n (default 20)
|
||||
// --inline-cfuncs-product <n> : Also inline if size * call_count <= n (default 200)
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT
|
||||
|
||||
#include "V3InlineCFuncs.h"
|
||||
|
||||
#include "V3AstUserAllocator.h"
|
||||
#include "V3Stats.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
VL_DEFINE_DEBUG_FUNCTIONS;
|
||||
|
||||
//######################################################################
|
||||
// Helper visitor to check if a CFunc contains C statements
|
||||
// Uses clearOptimizable pattern for debugging
|
||||
|
||||
class CFuncInlineCheckVisitor final : public VNVisitorConst {
|
||||
// STATE
|
||||
bool m_optimizable = true; // True if function can be inlined
|
||||
string m_whyNot; // Reason why not optimizable
|
||||
AstNode* m_whyNotNodep = nullptr; // Node that caused non-optimizable
|
||||
|
||||
// METHODS
|
||||
void clearOptimizable(AstNode* nodep, const string& why) {
|
||||
if (m_optimizable) {
|
||||
m_optimizable = false;
|
||||
m_whyNot = why;
|
||||
m_whyNotNodep = nodep;
|
||||
UINFO(9, "CFunc not inlineable: " << why);
|
||||
if (nodep) UINFO(9, ": " << nodep);
|
||||
UINFO(9, endl);
|
||||
}
|
||||
}
|
||||
|
||||
// VISITORS
|
||||
void visit(AstCStmt* nodep) override { clearOptimizable(nodep, "contains AstCStmt"); }
|
||||
void visit(AstCExpr* nodep) override { clearOptimizable(nodep, "contains AstCExpr"); }
|
||||
void visit(AstCStmtUser* nodep) override { clearOptimizable(nodep, "contains AstCStmtUser"); }
|
||||
void visit(AstCExprUser* nodep) override { clearOptimizable(nodep, "contains AstCExprUser"); }
|
||||
void visit(AstNode* nodep) override { iterateChildrenConst(nodep); }
|
||||
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
explicit CFuncInlineCheckVisitor(AstCFunc* cfuncp) { iterateConst(cfuncp); }
|
||||
|
||||
// ACCESSORS
|
||||
bool optimizable() const { return m_optimizable; }
|
||||
string whyNot() const { return m_whyNot; }
|
||||
AstNode* whyNotNodep() const { return m_whyNotNodep; }
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
|
||||
class InlineCFuncsVisitor final : public VNVisitor {
|
||||
// NODE STATE
|
||||
// AstCFunc::user1() -> vector of AstCCall* pointing to this function
|
||||
// AstCFunc::user2() -> bool: true if checked for C statements
|
||||
// AstCFunc::user3() -> bool: true if contains C statements (not inlineable)
|
||||
const VNUser1InUse m_user1InUse;
|
||||
const VNUser2InUse m_user2InUse;
|
||||
const VNUser3InUse m_user3InUse;
|
||||
AstUser1Allocator<AstCFunc, std::vector<AstCCall*>> m_callSites;
|
||||
|
||||
// STATE
|
||||
VDouble0 m_statInlined; // Statistic tracking
|
||||
const int m_threshold1; // Size threshold: always inline if size <= this
|
||||
const int m_threshold2; // Product threshold: inline if size * calls <= this
|
||||
AstCFunc* m_callerFuncp = nullptr; // Current caller function
|
||||
// Tuples of (StmtExpr to replace, CFunc to inline from, caller func for vars)
|
||||
std::vector<std::tuple<AstStmtExpr*, AstCFunc*, AstCFunc*>> m_toInline;
|
||||
|
||||
// METHODS
|
||||
|
||||
// Check if a function contains any $c() calls (user or internal)
|
||||
// Results are cached in user2/user3 for efficiency
|
||||
bool containsCStatements(AstCFunc* cfuncp) {
|
||||
if (!cfuncp->user2()) {
|
||||
// Not yet checked - run the check visitor
|
||||
cfuncp->user2(true); // Mark as checked
|
||||
const CFuncInlineCheckVisitor checker{cfuncp};
|
||||
cfuncp->user3(!checker.optimizable()); // Store result (true = contains C stmts)
|
||||
}
|
||||
return cfuncp->user3();
|
||||
}
|
||||
|
||||
// Check if a function is eligible for inlining into caller
|
||||
bool isInlineable(AstCFunc* callerp, AstCFunc* cfuncp) {
|
||||
// Must be in the same scope (same class) to access the same members
|
||||
if (callerp->scopep() != cfuncp->scopep()) return false;
|
||||
|
||||
// Check for $c() calls that might use 'this'
|
||||
if (containsCStatements(cfuncp)) return false;
|
||||
|
||||
// Check it's a void function (not a coroutine)
|
||||
if (cfuncp->rtnTypeVoid() != "void") return false;
|
||||
|
||||
// Don't inline functions marked dontCombine (e.g. trace, entryPoint)
|
||||
if (cfuncp->dontCombine()) return false;
|
||||
|
||||
// Don't inline entry point functions
|
||||
if (cfuncp->entryPoint()) return false;
|
||||
|
||||
// Must have statements to inline
|
||||
if (!cfuncp->stmtsp()) return false;
|
||||
|
||||
// Check size thresholds
|
||||
const size_t funcSize = cfuncp->nodeCount();
|
||||
|
||||
// Always inline if small enough
|
||||
if (funcSize <= static_cast<size_t>(m_threshold1)) return true;
|
||||
|
||||
// Also inline if size * call_count is reasonable
|
||||
const size_t callCount = m_callSites(cfuncp).size();
|
||||
if (callCount > 0 && funcSize * callCount <= static_cast<size_t>(m_threshold2)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// VISITORS
|
||||
void visit(AstCCall* nodep) override {
|
||||
iterateChildren(nodep);
|
||||
|
||||
AstCFunc* const cfuncp = nodep->funcp();
|
||||
if (!cfuncp) return;
|
||||
|
||||
// Track call site for call counting
|
||||
m_callSites(cfuncp).emplace_back(nodep);
|
||||
}
|
||||
|
||||
void visit(AstCFunc* nodep) override {
|
||||
VL_RESTORER(m_callerFuncp);
|
||||
m_callerFuncp = nodep;
|
||||
iterateChildren(nodep);
|
||||
}
|
||||
|
||||
void visit(AstNodeModule* nodep) override {
|
||||
// Process per module for better cache behavior
|
||||
m_toInline.clear();
|
||||
|
||||
// Phase 1: Collect call sites within this module
|
||||
iterateChildren(nodep);
|
||||
|
||||
// Phase 2: Determine which calls to inline
|
||||
collectInlineCandidates(nodep);
|
||||
|
||||
// Phase 3: Perform inlining for this module
|
||||
doInlining();
|
||||
}
|
||||
|
||||
void visit(AstNode* nodep) override { iterateChildren(nodep); }
|
||||
|
||||
// Collect calls that should be inlined within this module
|
||||
void collectInlineCandidates(AstNodeModule* modp) {
|
||||
for (AstNode* stmtp = modp->stmtsp(); stmtp; stmtp = stmtp->nextp()) {
|
||||
AstCFunc* const callerp = VN_CAST(stmtp, CFunc);
|
||||
if (!callerp) continue;
|
||||
|
||||
callerp->foreach([&](AstCCall* callp) {
|
||||
AstCFunc* const cfuncp = callp->funcp();
|
||||
if (!cfuncp) return;
|
||||
if (!isInlineable(callerp, cfuncp)) return;
|
||||
|
||||
// Walk up to find the containing StmtExpr
|
||||
AstNode* stmtNodep = callp;
|
||||
while (stmtNodep && !VN_IS(stmtNodep, StmtExpr) && !VN_IS(stmtNodep, CFunc)) {
|
||||
stmtNodep = stmtNodep->backp();
|
||||
}
|
||||
|
||||
AstStmtExpr* const stmtExprp = VN_CAST(stmtNodep, StmtExpr);
|
||||
if (!stmtExprp) return;
|
||||
|
||||
m_toInline.emplace_back(stmtExprp, cfuncp, callerp);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Perform the actual inlining after iteration is complete
|
||||
void doInlining() {
|
||||
for (const auto& tuple : m_toInline) {
|
||||
AstStmtExpr* const stmtExprp = std::get<0>(tuple);
|
||||
AstCFunc* const cfuncp = std::get<1>(tuple);
|
||||
AstCFunc* const callerp = std::get<2>(tuple);
|
||||
|
||||
UINFO(6, "Inlining CFunc " << cfuncp->name() << " into " << callerp->name() << endl);
|
||||
++m_statInlined;
|
||||
|
||||
// Clone local variables with unique names to avoid collisions
|
||||
std::map<AstVar*, AstVar*> varMap;
|
||||
for (AstVar* varp = cfuncp->varsp(); varp; varp = VN_AS(varp->nextp(), Var)) {
|
||||
const string newName = "__Vinline_" + cfuncp->name() + "_" + varp->name();
|
||||
AstVar* const newVarp = varp->cloneTree(false);
|
||||
newVarp->name(newName);
|
||||
callerp->addVarsp(newVarp);
|
||||
varMap[varp] = newVarp;
|
||||
}
|
||||
|
||||
// Clone the function body
|
||||
AstNode* const bodyp = cfuncp->stmtsp()->cloneTree(true);
|
||||
|
||||
// Retarget variable references to the cloned variables
|
||||
// Must iterate all sibling statements, not just the first
|
||||
if (!varMap.empty()) {
|
||||
for (AstNode* stmtp = bodyp; stmtp; stmtp = stmtp->nextp()) {
|
||||
stmtp->foreach([&](AstVarRef* refp) {
|
||||
auto it = varMap.find(refp->varp());
|
||||
if (it != varMap.end()) refp->varp(it->second);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Replace the statement with the inlined body
|
||||
stmtExprp->addNextHere(bodyp);
|
||||
VL_DO_DANGLING(stmtExprp->unlinkFrBack()->deleteTree(), stmtExprp);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
explicit InlineCFuncsVisitor(AstNetlist* nodep)
|
||||
: m_threshold1{v3Global.opt.inlineCFuncs()}
|
||||
, m_threshold2{v3Global.opt.inlineCFuncsProduct()} {
|
||||
// Don't inline when profiling or tracing
|
||||
if (v3Global.opt.profCFuncs() || v3Global.opt.trace()) return;
|
||||
// Process modules one at a time for better cache behavior
|
||||
iterateAndNextNull(nodep->modulesp());
|
||||
}
|
||||
~InlineCFuncsVisitor() override {
|
||||
V3Stats::addStat("Optimizations, Inlined CFuncs", m_statInlined);
|
||||
}
|
||||
};
|
||||
|
||||
//######################################################################
|
||||
// InlineCFuncs class functions
|
||||
|
||||
void V3InlineCFuncs::inlineAll(AstNetlist* nodep) {
|
||||
UINFO(2, __FUNCTION__ << ":");
|
||||
{ InlineCFuncsVisitor{nodep}; } // Destruct before checking
|
||||
V3Global::dumpCheckGlobalTree("inlinecfuncs", 0, dumpTreeEitherLevel() >= 6);
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||
//*************************************************************************
|
||||
// DESCRIPTION: Verilator: Inline small CFuncs into their callers
|
||||
//
|
||||
// Code available from: https://verilator.org
|
||||
//
|
||||
//*************************************************************************
|
||||
//
|
||||
// Copyright 2003-2025 by Wilson Snyder. This program is free software; you
|
||||
// can redistribute it and/or modify it under the terms of either the GNU
|
||||
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||
// Version 2.0.
|
||||
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
#ifndef VERILATOR_V3INLINECFUNCS_H_
|
||||
#define VERILATOR_V3INLINECFUNCS_H_
|
||||
|
||||
#include "config_build.h"
|
||||
#include "verilatedos.h"
|
||||
|
||||
class AstNetlist;
|
||||
|
||||
class V3InlineCFuncs final {
|
||||
public:
|
||||
static void inlineAll(AstNetlist* nodep) VL_MT_DISABLED;
|
||||
};
|
||||
|
||||
#endif // Guard
|
||||
|
|
@ -1528,6 +1528,8 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
|
|||
}).notForRerun();
|
||||
DECL_OPTION("-if-depth", Set, &m_ifDepth);
|
||||
DECL_OPTION("-ignc", OnOff, &m_ignc).undocumented();
|
||||
DECL_OPTION("-inline-cfuncs", Set, &m_inlineCFuncs);
|
||||
DECL_OPTION("-inline-cfuncs-product", Set, &m_inlineCFuncsProduct);
|
||||
DECL_OPTION("-inline-mult", Set, &m_inlineMult);
|
||||
DECL_OPTION("-instr-count-dpi", CbVal, [this, fl](int val) {
|
||||
m_instrCountDpi = val;
|
||||
|
|
|
|||
|
|
@ -319,6 +319,8 @@ private:
|
|||
int m_hierChild = 0; // main switch: --hierarchical-child
|
||||
int m_hierThreads = 0; // main switch: --hierarchical-threads
|
||||
int m_ifDepth = 0; // main switch: --if-depth
|
||||
int m_inlineCFuncs = 20; // main switch: --inline-cfuncs
|
||||
int m_inlineCFuncsProduct = 200; // main switch: --inline-cfuncs-product
|
||||
int m_inlineMult = 2000; // main switch: --inline-mult
|
||||
int m_instrCountDpi = 200; // main switch: --instr-count-dpi
|
||||
bool m_jsonEditNums = true; // main switch: --no-json-edit-nums
|
||||
|
|
@ -595,6 +597,8 @@ public:
|
|||
int expandLimit() const { return m_expandLimit; }
|
||||
int gateStmts() const { return m_gateStmts; }
|
||||
int ifDepth() const { return m_ifDepth; }
|
||||
int inlineCFuncs() const { return m_inlineCFuncs; }
|
||||
int inlineCFuncsProduct() const { return m_inlineCFuncsProduct; }
|
||||
int inlineMult() const { return m_inlineMult; }
|
||||
int instrCountDpi() const { return m_instrCountDpi; }
|
||||
int localizeMaxSize() const { return m_localizeMaxSize; }
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@
|
|||
#include "V3Graph.h"
|
||||
#include "V3HierBlock.h"
|
||||
#include "V3Inline.h"
|
||||
#include "V3InlineCFuncs.h"
|
||||
#include "V3Inst.h"
|
||||
#include "V3Interface.h"
|
||||
#include "V3LibMap.h"
|
||||
|
|
@ -565,6 +566,11 @@ static void process() {
|
|||
V3Reloop::reloopAll(v3Global.rootp());
|
||||
}
|
||||
|
||||
if (v3Global.opt.inlineCFuncs()) {
|
||||
// Inline small CFuncs to reduce function call overhead
|
||||
V3InlineCFuncs::inlineAll(v3Global.rootp());
|
||||
}
|
||||
|
||||
// Fix very deep expressions
|
||||
// Mark evaluation functions as member functions, if needed.
|
||||
V3Depth::depthAll(v3Global.rootp());
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
|||
|
||||
test.scenarios('simulator_st')
|
||||
|
||||
test.compile(verilator_flags2=["--stats"])
|
||||
test.compile(verilator_flags2=["--stats", "--inline-cfuncs", "0"])
|
||||
|
||||
test.execute(expect_filename=test.golden_filename)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@ test.top_filename = "t/t_inst_tree.v"
|
|||
|
||||
default_vltmt_threads = test.get_default_vltmt_threads
|
||||
test.compile(
|
||||
verilator_flags2=['--stats', test.t_dir + "/" + test.name + ".vlt"],
|
||||
# Disable --inline-cfuncs so functions exist to be combined
|
||||
verilator_flags2=['--stats', '--inline-cfuncs', '0', test.t_dir + "/" + test.name + ".vlt"],
|
||||
# Force 3 threads even if we have fewer cores
|
||||
threads=(default_vltmt_threads if test.vltmt else 1))
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,9 @@ test.top_filename = "t/t_enum_type_methods.v"
|
|||
|
||||
out_filename = test.obj_dir + "/V" + test.name + ".tree.json"
|
||||
|
||||
test.compile(verilator_flags2=['--no-std', '--debug-check', '--no-json-edit-nums', '--flatten'],
|
||||
test.compile(verilator_flags2=[
|
||||
'--no-std', '--debug-check', '--no-json-edit-nums', '--flatten', '--inline-cfuncs', '0'
|
||||
],
|
||||
verilator_make_gmake=False,
|
||||
make_top_shell=False,
|
||||
make_main=False)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,25 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2024 by Wilson Snyder. This program is free software; you
|
||||
# can redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
# Use --output-split-cfuncs to create small functions that can be inlined
|
||||
# Also test --inline-cfuncs-product option
|
||||
test.compile(verilator_flags2=[
|
||||
"--stats", "--binary", "--output-split-cfuncs", "1", "--inline-cfuncs-product", "200"
|
||||
])
|
||||
|
||||
# Verify inlining happened with exact count
|
||||
test.file_grep(test.stats, r'Optimizations, Inlined CFuncs\s+(\d+)', 39)
|
||||
|
||||
test.execute()
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||
// any use, without warranty, 2024 by Wilson Snyder.
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
// Test module designed to generate multiple small CFuncs that can be inlined
|
||||
// Uses generate to create multiple sub-module instances
|
||||
module t (/*AUTOARG*/
|
||||
// Inputs
|
||||
clk
|
||||
);
|
||||
input clk;
|
||||
|
||||
integer cyc = 0;
|
||||
|
||||
parameter CNT = 8;
|
||||
|
||||
wire [31:0] w [CNT:0];
|
||||
reg [31:0] w0;
|
||||
assign w[0] = w0;
|
||||
|
||||
// Generate multiple sub-modules - each creates CFuncs that can be inlined
|
||||
generate
|
||||
for (genvar g=0; g<CNT; g++) begin : gen_sub
|
||||
sub sub_inst (.clk(clk), .i(w[g]), .z(w[g+1]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Test loop
|
||||
always @ (posedge clk) begin
|
||||
cyc <= cyc + 1;
|
||||
if (cyc==0) begin
|
||||
w0 <= 32'h10;
|
||||
end
|
||||
else if (cyc==10) begin
|
||||
// Each sub adds 1, so final value is 0x10 + 8 = 0x18
|
||||
if (w[CNT] !== 32'h18) begin
|
||||
$write("%%Error: w[CNT]=%0x, expected 0x18\n", w[CNT]);
|
||||
$stop;
|
||||
end
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
||||
// Small sub-module that generates inlineable CFuncs
|
||||
module sub (input clk, input [31:0] i, output reg [31:0] z);
|
||||
reg [7:0] local_a;
|
||||
reg [7:0] local_b;
|
||||
|
||||
always @(posedge clk) begin
|
||||
local_a <= i[7:0];
|
||||
local_b <= 8'd1;
|
||||
z <= i + {24'b0, local_b};
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2024 by Wilson Snyder. This program is free software; you
|
||||
# can redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
test.top_filename = "t/t_opt_inline_cfuncs.v"
|
||||
|
||||
# Disable inlining with --inline-cfuncs 0
|
||||
test.compile(verilator_flags2=["--stats", "--binary", "--inline-cfuncs", "0"])
|
||||
|
||||
# Verify inlining did NOT happen (stat doesn't exist when pass is skipped)
|
||||
test.file_grep_not(test.stats, r'Optimizations, Inlined CFuncs\s+[1-9]')
|
||||
|
||||
test.execute()
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2024 by Wilson Snyder. This program is free software; you
|
||||
# can redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
# Use thresholds that guarantee rejection to test the "return false" path in isInlineable()
|
||||
# --inline-cfuncs 1: pass still runs (not skipped)
|
||||
# --inline-cfuncs-product 0: guarantees all functions rejected (node_count * call_count > 0 always)
|
||||
test.compile(verilator_flags2=[
|
||||
"--stats", "--binary", "--inline-cfuncs", "1", "--inline-cfuncs-product", "0"
|
||||
])
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Inlined CFuncs\s+(\d+)', 0)
|
||||
|
||||
test.execute()
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||
// any use, without warranty, 2024 by Wilson Snyder.
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
// Test module to exercise threshold checking in CFunc inlining
|
||||
// With low thresholds, these functions should NOT be inlined
|
||||
module t;
|
||||
reg [31:0] a, b, c, d, e, f, g, h;
|
||||
|
||||
initial begin
|
||||
// Multiple operations to create larger CFuncs
|
||||
a = 32'd1;
|
||||
b = 32'd2;
|
||||
c = a + b;
|
||||
d = c * 2;
|
||||
e = d - 1;
|
||||
f = e + a;
|
||||
g = f * b;
|
||||
h = g + c + d + e + f;
|
||||
|
||||
if (h != 32'd32) $stop;
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -12,7 +12,8 @@ import vltest_bootstrap
|
|||
test.scenarios('vlt_all')
|
||||
test.top_filename = "t/t_timing_sched.v"
|
||||
|
||||
test.compile(verilator_flags2=["--exe --main --timing"])
|
||||
test.compile(
|
||||
verilator_flags2=["--binary", "--timing", "--inline-cfuncs", "0", "-CFLAGS", "-DVL_DEBUG"])
|
||||
|
||||
test.execute(all_run_flags=["+verilator+debug"])
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,8 @@ import vltest_bootstrap
|
|||
test.scenarios('vlt_all')
|
||||
test.top_filename = "t/t_timing_class.v"
|
||||
|
||||
test.compile(verilator_flags2=["--exe --main --timing"])
|
||||
# Disable --inline-cfuncs so debug traces show all function entries
|
||||
test.compile(verilator_flags2=["--exe --main --timing --inline-cfuncs 0"])
|
||||
|
||||
test.execute(all_run_flags=["+verilator+debug"])
|
||||
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ internalsDump:
|
|||
-V{t#,#}+ Vt_verilated_debug___024root___eval_phase__nba
|
||||
-V{t#,#}+ Vt_verilated_debug___024root___trigger_anySet__act
|
||||
-V{t#,#}+ Vt_verilated_debug___024root___eval_nba
|
||||
-V{t#,#}+ Vt_verilated_debug___024root___nba_sequent__TOP__0
|
||||
*-* All Finished *-*
|
||||
-V{t#,#}+ Vt_verilated_debug___024root___trigger_clear__act
|
||||
-V{t#,#}+ Vt_verilated_debug___024root___eval_phase__act
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ test.top_filename = "t/t_enum_type_methods.v"
|
|||
|
||||
out_filename = test.obj_dir + "/V" + test.name + ".xml"
|
||||
|
||||
test.compile(verilator_flags2=['--no-std', '--debug-check', '--flatten'],
|
||||
test.compile(verilator_flags2=['--no-std', '--debug-check', '--flatten', '--inline-cfuncs', '0'],
|
||||
verilator_make_gmake=False,
|
||||
make_top_shell=False,
|
||||
make_main=False)
|
||||
|
|
|
|||
Loading…
Reference in New Issue