diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index 4e302f06d..c35cd3e57 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -771,10 +771,24 @@ Summary: .. option:: -fno-inline + Rarely needed. Disable module inlining. + +.. option:: -fno-inline-cfuncs + + Rarely needed. Disable inlining of small generated C++ functions into their + callers. + + This optimization is automatically disabled when :vlopt:`--prof-cfuncs` is + used. + .. option:: -fno-inline-funcs + Rarely needed. Disable inlining of SystemVerilog functions and tasks. + .. option:: -fno-inline-funcs-eager + Rarely needed. Disable eager inlining of SystemVerilog functions and tasks. + .. option:: -fno-life .. option:: -fno-life-post @@ -976,27 +990,21 @@ Summary: .. option:: --inline-cfuncs - Inline small C++ function (internal AstCFunc) calls directly into their - callers when the function has at most nodes. This reduces - function call overhead when :vlopt:`--output-split-cfuncs` places - functions in separate compilation units that the C++ compiler cannot - inline. + Tune the inlining of small generated C++ function. Functions no bigger than + nodes will be inlined if possible. The default is 20. - Set to 0 to disable this optimization. The default is 20. - - This optimization is automatically disabled when :vlopt:`--prof-cfuncs` - or :vlopt:`--trace` is used. + See also :vlopt:`--inline-cfuncs-product` and :vlopt:`-fno-inline-cfuncs`. .. option:: --inline-cfuncs-product - Tune the inlining of C++ function (internal AstCFunc) calls for larger - functions. When a function is too large to always inline (exceeds - :vlopt:`--inline-cfuncs` threshold), it may still be inlined if the - function size multiplied by the number of call sites is at most . + Tune the inlining of small generated C++ function. If a function's node + count multiplied by the number of calls is not bigger than , the + function will be inlined if possible. - This allows functions that are called only once or twice to be inlined - even if they exceed the small function threshold. Set to 0 to only inline - functions below the :vlopt:`--inline-cfuncs` threshold. The default is 200. + This allows functions that are called only once or twice to be inlined even + if they exceed the small function threshold. The default is 200. + + See also :vlopt:`--inline-cfuncs` and :vlopt:`-fno-inline-cfuncs`. .. option:: --inline-mult diff --git a/src/V3InlineCFuncs.cpp b/src/V3InlineCFuncs.cpp index 99efbc201..cdfc24062 100644 --- a/src/V3InlineCFuncs.cpp +++ b/src/V3InlineCFuncs.cpp @@ -15,14 +15,13 @@ //************************************************************************* // V3InlineCFuncs's Transformations: // -// For each CCall to a small CFunc: -// - Check if function is eligible for inlining (small enough, same scope) -// - Clone local variables with unique names to avoid collisions -// - Replace CCall with cloned function body statements +// Build a bipartite call graph containing function and call site vertices, +// then iterate functions leaf to root, inlining if size heuristics are met. +// Finally, remove unused functions. // // Two tunables control inlining: -// --inline-cfuncs : Always inline if size <= n (default 20) -// --inline-cfuncs-product : Also inline if size * call_count <= n (default 200) +// --inline-cfuncs : Inline if size <= n +// --inline-cfuncs-product : Also inline if size * call_count <= n // //************************************************************************* @@ -31,231 +30,465 @@ #include "V3InlineCFuncs.h" #include "V3AstUserAllocator.h" +#include "V3ExecGraph.h" +#include "V3Graph.h" #include "V3Stats.h" -#include #include VL_DEFINE_DEBUG_FUNCTIONS; //###################################################################### -// Helper visitor to check if a CFunc contains C statements -// Uses clearOptimizable pattern for debugging +// Bipartite call graph containing function and call site vertices -class CFuncInlineCheckVisitor final : public VNVisitorConst { - // STATE - bool m_optimizable = true; // True if function can be inlined - string m_whyNot; // Reason why not optimizable - AstNode* m_whyNotNodep = nullptr; // Node that caused non-optimizable +class InlineCFuncsFunctionVertex; +class InlineCFuncsCallSiteVertex; - // METHODS - void clearOptimizable(AstNode* nodep, const string& why) { - if (m_optimizable) { - m_optimizable = false; - m_whyNot = why; - m_whyNotNodep = nodep; - UINFO(9, "CFunc not inlineable: " << why); - if (nodep) UINFO(9, ": " << nodep); - UINFO(9, ""); - } - } +class InlineCFuncsCallGraph final : public V3Graph { +public: + InlineCFuncsCallGraph() + : V3Graph{} {} + ~InlineCFuncsCallGraph() override = default; - // VISITORS - void visit(AstCStmt* nodep) override { clearOptimizable(nodep, "contains AstCStmt"); } - void visit(AstCExpr* nodep) override { clearOptimizable(nodep, "contains AstCExpr"); } - void visit(AstCStmtUser* nodep) override { clearOptimizable(nodep, "contains AstCStmtUser"); } - void visit(AstCExprUser* nodep) override { clearOptimizable(nodep, "contains AstCExprUser"); } - void visit(AstNode* nodep) override { iterateChildrenConst(nodep); } + void addEdge(InlineCFuncsFunctionVertex& from, InlineCFuncsCallSiteVertex& top); + void addEdge(InlineCFuncsCallSiteVertex& from, InlineCFuncsFunctionVertex& top); +}; + +class EitherVertex VL_NOT_FINAL : public V3GraphVertex { + VL_RTTI_IMPL(EitherVertex, V3GraphVertex) +protected: + explicit EitherVertex(InlineCFuncsCallGraph& graph) + : V3GraphVertex{&graph} {} +}; + +class InlineCFuncsFunctionVertex final : public EitherVertex { + VL_RTTI_IMPL(InlineCFuncsFunctionVertex, EitherVertex) + AstCFunc* const m_cfuncp; // The function + const char* m_noInlineWyp = nullptr; // First reason the function should not be inlined + const char* m_keepWyp = nullptr; // Why the function should not be removed + size_t m_size = 0; // The size of the function public: - // CONSTRUCTORS - explicit CFuncInlineCheckVisitor(AstCFunc* cfuncp) { iterateConst(cfuncp); } + InlineCFuncsFunctionVertex(InlineCFuncsCallGraph& graph, AstCFunc* cfuncp) + : EitherVertex{graph} + , m_cfuncp{cfuncp} {} + ~InlineCFuncsFunctionVertex() override = default; // ACCESSORS - bool optimizable() const { return m_optimizable; } - string whyNot() const { return m_whyNot; } - AstNode* whyNotNodep() const { return m_whyNotNodep; } + AstCFunc* cfuncp() const { return m_cfuncp; } + size_t size() const { return m_size; } + void sizeInc(size_t value = 1) { m_size += value; } + bool noInline() const { return m_noInlineWyp; } + void setNoInline(const char* whyp) { + if (!m_noInlineWyp) m_noInlineWyp = whyp; + } + bool keep() const { return m_keepWyp; } + void setKeep(const char* whyp) { + if (!m_keepWyp) m_keepWyp = whyp; + } + std::string dotColor() const override { + return m_noInlineWyp ? "red" : m_keepWyp ? "orange" : "black"; + } + + // debug + FileLine* fileline() const override { return m_cfuncp->fileline(); } + std::string dotShape() const override { return "box"; } + std::string name() const override VL_MT_STABLE { + std::string str = cvtToHex(m_cfuncp); + str += "\n" + m_cfuncp->name(); + str += "\nsize: " + std::to_string(m_size); + if (m_noInlineWyp) str += "\nNoInline: "s + m_noInlineWyp; + if (m_keepWyp) str += "\nKeep: "s + m_keepWyp; + return str; + } }; +class InlineCFuncsCallSiteVertex final : public EitherVertex { + VL_RTTI_IMPL(InlineCFuncsCallSiteVertex, EitherVertex) + AstCCall* const m_callp; // The call site + const char* m_noInlineWyp = nullptr; // First reason the function should not be inlined + +public: + InlineCFuncsCallSiteVertex(InlineCFuncsCallGraph& graph, AstCCall* callp) + : EitherVertex{graph} + , m_callp{callp} {} + ~InlineCFuncsCallSiteVertex() override = default; + + // ACCESSORS + AstCCall* callp() const { return m_callp; } + bool noInline() const { return m_noInlineWyp; } + void setNoInline(const char* whyp) { + if (!m_noInlineWyp) m_noInlineWyp = whyp; + } + + // debug + FileLine* fileline() const override { return m_callp->fileline(); } + std::string dotColor() const override { return m_noInlineWyp ? "red" : "black"; } + std::string dotShape() const override { return "ellipse"; } + std::string name() const override VL_MT_STABLE { + std::string str = cvtToHex(m_callp); + if (m_noInlineWyp) str += "\nNoInline: "s + m_noInlineWyp; + return str; + } +}; + +void InlineCFuncsCallGraph::addEdge(InlineCFuncsFunctionVertex& caller, + InlineCFuncsCallSiteVertex& callsite) { + UASSERT_OBJ(callsite.inEmpty(), &callsite, "Call site should have at most one incoming edge"); + new V3GraphEdge{this, &caller, &callsite, 1, true}; // Can cut caller -> callsite +} +void InlineCFuncsCallGraph::addEdge(InlineCFuncsCallSiteVertex& callsite, + InlineCFuncsFunctionVertex& callee) { + UASSERT_OBJ(callsite.outEmpty(), &callsite, "Call site should have at most one outgoing edge"); + new V3GraphEdge{this, &callsite, &callee, 1, false}; +} + //###################################################################### class InlineCFuncsVisitor final : public VNVisitor { // NODE STATE - // AstCFunc::user1() -> vector of AstCCall* pointing to this function - // AstCFunc::user2() -> bool: true if checked for C statements - // AstCFunc::user3() -> bool: true if contains C statements (not inlineable) + // AstCFunc::user1p() -> InlineCFuncsFunctionVertex*, the function vertex + // AstCCall::user1p() -> InlineCFuncsCallSiteVertex*, the call site vertex + // AstVar::user2p() -> AstVar*, the cloned inlined local variable const VNUser1InUse m_user1InUse; - const VNUser2InUse m_user2InUse; - const VNUser3InUse m_user3InUse; - AstUser1Allocator> m_callSites; // STATE - VDouble0 m_statInlined; // Statistic tracking - const int m_threshold1; // Size threshold: always inline if size <= this - const int m_threshold2; // Product threshold: inline if size * calls <= this - AstCFunc* m_callerFuncp = nullptr; // Current caller function - // Tuples of (StmtExpr to replace, CFunc to inline from, caller func for vars) - std::vector> m_toInline; + InlineCFuncsCallGraph m_graph; // The call graph + VDouble0 m_statCallsInlined; // Number of calls inlined + VDouble0 m_statFuncsInlined; // Number of functions inlined at least once + VDouble0 m_statFuncsRemoved; // Number of fully-inlined functions removed + // Size threshold: always inline if size <= this + const size_t m_sizeThreshold = v3Global.opt.inlineCFuncs(); + // Product threshold: inline if size * calls <= this + const size_t m_prodThreshold = v3Global.opt.inlineCFuncsProduct(); + // Maximum size of caller to consider inlining into + const size_t m_maxSizeCFunc = []() -> size_t { + int maxCFunc = v3Global.opt.outputSplitCFuncs(); + int maxFile = v3Global.opt.outputSplit(); + if (maxCFunc <= 0) maxCFunc = std::numeric_limits::max(); + if (maxFile <= 0) maxFile = std::numeric_limits::max(); + return std::min(maxCFunc, maxFile); + }(); + const size_t m_maxSizeTrace = []() -> size_t { + int maxTrace = v3Global.opt.outputSplitCTrace(); + int maxFile = v3Global.opt.outputSplit(); + if (maxTrace <= 0) maxTrace = std::numeric_limits::max(); + if (maxFile <= 0) maxFile = std::numeric_limits::max(); + return std::min(maxTrace, maxFile); + }(); + InlineCFuncsFunctionVertex* m_cfuncVtxp = nullptr; // Vertex of currently iterated function + bool m_inExecGraph = false; // True while inside an AstExecGraph subtree // METHODS - - // Check if a function contains any $c() calls (user or internal) - // Results are cached in user2/user3 for efficiency - bool containsCStatements(AstCFunc* cfuncp) { - if (!cfuncp->user2()) { - // Not yet checked - run the check visitor - cfuncp->user2(true); // Mark as checked - const CFuncInlineCheckVisitor checker{cfuncp}; - cfuncp->user3(!checker.optimizable()); // Store result (true = contains C stmts) - } - return cfuncp->user3(); + InlineCFuncsFunctionVertex* getInlineCFuncsFunctionVertexp(AstCFunc* cfuncp) { + if (!cfuncp->user1p()) cfuncp->user1p(new InlineCFuncsFunctionVertex{m_graph, cfuncp}); + return cfuncp->user1u().to(); } - // Check if a function is eligible for inlining into caller - bool isInlineable(const AstCFunc* callerp, AstCFunc* cfuncp) { - // Must be in the same scope (same class) to access the same members - if (callerp->scopep() != cfuncp->scopep()) return false; + InlineCFuncsCallSiteVertex* getInlineCFuncsCallSiteVertexp(AstCCall* callp) { + if (!callp->user1p()) callp->user1p(new InlineCFuncsCallSiteVertex{m_graph, callp}); + return callp->user1u().to(); + } - // Check for $c() calls that might use 'this' - if (containsCStatements(cfuncp)) return false; + AstCLocalScope* inlineCall(AstCFunc* const callerp, // + AstCCall* const callp, // + AstCFunc* const calleep, // + const size_t seqNum) { + UINFO(6, "Inlining CFunc " << calleep->name() << " into " << callerp->name() + << " at call site " << callp); - // Check it's a void function (not a coroutine) - if (cfuncp->rtnTypeVoid() != "void") return false; + AstNodeStmt* const callSitep = VN_AS(callp->backp(), StmtExpr); + ++m_statCallsInlined; - // Don't inline functions marked dontCombine (e.g. trace, entryPoint) - if (cfuncp->dontCombine()) return false; - - // Don't inline entry point functions - if (cfuncp->entryPoint()) return false; - - // Must have statements to inline - if (!cfuncp->stmtsp()) return false; - - // Check size thresholds - const size_t funcSize = cfuncp->nodeCount(); - - // Always inline if small enough - if (funcSize <= static_cast(m_threshold1)) return true; - - // Also inline if size * call_count is reasonable - const size_t callCount = m_callSites(cfuncp).size(); - if (callCount > 0 && funcSize * callCount <= static_cast(m_threshold2)) { - return true; + // Callee might be empty, just delete the call + if (!calleep->stmtsp()) { + VL_DO_DANGLING(pushDeletep(callSitep->unlinkFrBack()), callSitep); + return nullptr; } - return false; + // Replace call site with a local scope + FileLine* const flp = callSitep->fileline(); + AstCLocalScope* const lscopep = new AstCLocalScope{flp, nullptr}; + callSitep->replaceWith(lscopep); + VL_DO_DANGLING(pushDeletep(callSitep), callSitep); + lscopep->addStmtsp(new AstComment{flp, "Inlined CFunc: " + calleep->name()}); + + // Although it's in a local scope, we still make names of cloned locals unique + const std::string varPrefix + = "__Vinline_" + std::to_string(seqNum) + "_" + calleep->name() + "_"; + + // AstVar::user2p() -> AstVar*, the cloned inlined local variable + const VNUser2InUse user2InUse; + + // Clone local variables, add them to the local scope + for (AstVar* varp = calleep->varsp(); varp; varp = VN_AS(varp->nextp(), Var)) { + AstVar* const newVarp = varp->cloneTree(false); + newVarp->name(varPrefix + varp->name()); + lscopep->addStmtsp(newVarp); + varp->user2p(newVarp); + } + + // Clone the function body + AstNode* const bodyp = calleep->stmtsp()->cloneTree(true); + lscopep->addStmtsp(bodyp); + + // Retarget local variable references to the cloned locals + // Rename locals defined in the body, TODO: there should be none after #6280 + // Reset vertex pointers on calls + bodyp->foreachAndNext([&](AstNode* nodep) { + if (AstVarRef* const refp = VN_CAST(nodep, VarRef)) { + if (AstVar* const varp = VN_AS(refp->varp()->user2p(), Var)) refp->varp(varp); + } else if (AstVar* const varp = VN_CAST(nodep, Var)) { + varp->name(varPrefix + varp->name()); + } else if (AstCCall* const callp = VN_CAST(nodep, CCall)) { + callp->user1p(nullptr); + } + }); + + // Return the local scope + return lscopep; + } + + void doInlining() { + // Need to gather vertices as we are changing the graph + std::vector m_fVtxps; + for (V3GraphVertex& vtx : m_graph.vertices()) { + if (InlineCFuncsFunctionVertex* const fVtxp = vtx.cast()) { + m_fVtxps.emplace_back(fVtxp); + } + } + + // Iterate functions leaf to root + for (InlineCFuncsFunctionVertex* const calleeVtxp : vlstd::reverse_view(m_fVtxps)) { + // Should we inline this function? + if (calleeVtxp->noInline()) continue; // Told not to + + // Check size heuristics + const bool doIt = [&]() { + // Inline if small enough + if (calleeVtxp->size() <= m_sizeThreshold) return true; + // Inline if not too much bloat + const size_t nCalls = calleeVtxp->inEdges().size(); + if (nCalls * calleeVtxp->size() <= m_prodThreshold) return true; + // Otherwise don't inline + return false; + }(); + if (!doIt) continue; + + // Ok, attempt to inline call sites + size_t nInlined = 0; + for (const V3GraphEdge* const edgep : calleeVtxp->inEdges().unlinkable()) { + InlineCFuncsCallSiteVertex* const callVtxp + = edgep->fromp()->as(); + + AstCFunc* const calleep = calleeVtxp->cfuncp(); + AstCCall* const callp = callVtxp->callp(); + UINFO(6, "Consider inlining " << calleep->name() << " at call site " << callp); + // Should we inline this call site? + if (callVtxp->noInline()) continue; // Told not to + if (callVtxp->inEmpty()) continue; // Don't know where it's called from + + // Pick up the caller + UASSERT_OBJ(callVtxp->inSize1(), callVtxp->callp(), + "Expected exactly one input edge for call site"); + InlineCFuncsFunctionVertex* const callerVtxp + = callVtxp->inEdges().frontp()->fromp()->as(); + AstCFunc* const callerp = callerVtxp->cfuncp(); + + // Don't make a function bigger than the limit + const size_t limit = callerp->isTrace() ? m_maxSizeTrace : m_maxSizeCFunc; + if (callerVtxp->size() + calleeVtxp->size() > limit) continue; + + // Can't do it if it's in a different scope, self pointers differ + if (callerp->scopep() != calleep->scopep()) continue; + + // Inline it + if (!nInlined) ++m_statFuncsInlined; + AstNode* const inlinedp = inlineCall(callerp, callp, calleep, nInlined++); + + // Need to adjust the graph: + // 1. Delete inlined call site + VL_DO_DANGLING(callVtxp->unlinkDelete(&m_graph), callVtxp); + // 2. Add new inlined call sites - also increments size of caller + VL_RESTORER(m_cfuncVtxp); + m_cfuncVtxp = callerVtxp; + if (inlinedp) iterateChildrenConst(inlinedp); + } + } + } + + void removeUnusedFuncs() { + // Iterate root to leaves + for (V3GraphVertex* const vtxp : m_graph.vertices().unlinkable()) { + InlineCFuncsFunctionVertex* const fVtxp = vtxp->cast(); + if (!fVtxp) continue; + // Keep if still called + if (!fVtxp->inEmpty()) continue; + // Keep for other reasons + if (fVtxp->keep()) continue; + + AstCFunc* const funcp = fVtxp->cfuncp(); + UINFO(6, "Removing unused CFunc " << funcp); + ++m_statFuncsRemoved; + + // Unlink all call sites + for (const V3GraphEdge* const edgep : vtxp->outEdges().unlinkable()) { + edgep->top()->unlinkEdges(&m_graph); + } + // Delete function vertex + vtxp->unlinkDelete(&m_graph); + // Delete the function + VL_DO_DANGLING(pushDeletep(funcp->unlinkFrBack()), funcp); + } + + // Delete inlined/deleted call site vertices (for debugging only) + for (V3GraphVertex* const vtxp : m_graph.vertices().unlinkable()) { + InlineCFuncsCallSiteVertex* const cVtxp = vtxp->cast(); + if (!cVtxp) continue; + if (!cVtxp->inEmpty()) continue; + if (!cVtxp->outEmpty()) continue; + vtxp->unlinkDelete(&m_graph); + } } // VISITORS - void visit(AstCCall* nodep) override { - iterateChildren(nodep); - - AstCFunc* const cfuncp = nodep->funcp(); - if (!cfuncp) return; - - // Track call site for call counting - m_callSites(cfuncp).emplace_back(nodep); - } - void visit(AstCFunc* nodep) override { - VL_RESTORER(m_callerFuncp); - m_callerFuncp = nodep; - iterateChildren(nodep); + // Create the function vertex + InlineCFuncsFunctionVertex* const vtxp = getInlineCFuncsFunctionVertexp(nodep); + + // Check if the function itself is not inlineable + if (nodep->rtnTypeVoid() != "void") vtxp->setNoInline("Not void"); + if (nodep->dpiImportPrototype()) vtxp->setNoInline("DPI import prototype"); + if (nodep->recursive()) vtxp->setNoInline("Recursive"); + if (nodep->argsp()) vtxp->setNoInline("Has arguments"); + if (nodep->isVirtual()) vtxp->setNoInline("Virtual method"); + + // Check if the function should not be removed + if (nodep->entryPoint()) vtxp->setKeep("Entry point"); + if (nodep->dpiImportPrototype()) vtxp->setKeep("DPI import prototype"); + if (nodep->dpiExportDispatcher()) vtxp->setKeep("DPI export implementation"); + if (nodep->isVirtual()) vtxp->setKeep("Virtual method"); + + // Iterate children + VL_RESTORER(m_cfuncVtxp); + m_cfuncVtxp = vtxp; + iterateChildrenConst(nodep); } - void visit(AstNodeModule* nodep) override { - // Process per module for better cache behavior - m_toInline.clear(); + // Inlineable calls + void visit(AstCCall* nodep) override { + if (m_cfuncVtxp) m_cfuncVtxp->sizeInc(); + AstCFunc* const calleep = nodep->funcp(); - // Phase 1: Collect call sites within this module - iterateChildren(nodep); + // Create the call site vertex + InlineCFuncsCallSiteVertex* const vtxp = getInlineCFuncsCallSiteVertexp(nodep); - // Phase 2: Determine which calls to inline - collectInlineCandidates(nodep); + // Check if the call site is not inlineable + if (!VN_IS(nodep->backp(), StmtExpr)) vtxp->setNoInline("Not in statement position"); + if (m_inExecGraph) vtxp->setNoInline("In ExecGraph"); + if (calleep->isVirtual()) vtxp->setNoInline("Virtual method"); - // Phase 3: Perform inlining for this module - doInlining(); + // Add caller/callee edges + if (m_cfuncVtxp) m_graph.addEdge(*m_cfuncVtxp, *vtxp); + m_graph.addEdge(*vtxp, *getInlineCFuncsFunctionVertexp(calleep)); + + // Iterate children + iterateChildrenConst(nodep); } - void visit(AstNode* nodep) override { iterateChildren(nodep); } + // Nodes that reference functions/calls + void visit(AstNetlist* nodep) override { + UASSERT_OBJ(!nodep->evalp(), nodep, "evalp should not be null at this stage"); + UASSERT_OBJ(!nodep->evalNbap(), nodep, "evalNbap should be null at this stage"); + iterateChildrenConst(nodep); + } - // Collect calls that should be inlined within this module - void collectInlineCandidates(AstNodeModule* modp) { - for (AstNode* stmtp = modp->stmtsp(); stmtp; stmtp = stmtp->nextp()) { - AstCFunc* const callerp = VN_CAST(stmtp, CFunc); - if (!callerp) continue; + void visit(AstNodeCCall* nodep) override { + if (m_cfuncVtxp) m_cfuncVtxp->sizeInc(); + getInlineCFuncsFunctionVertexp(nodep->funcp())->setKeep("Called elsewhere"); + iterateChildrenConst(nodep); + } - callerp->foreach([&](AstCCall* callp) { - AstCFunc* const cfuncp = callp->funcp(); - if (!cfuncp) return; - if (!isInlineable(callerp, cfuncp)) return; + void visit(AstAddrOfCFunc* nodep) override { + if (m_cfuncVtxp) m_cfuncVtxp->sizeInc(); + getInlineCFuncsFunctionVertexp(nodep->funcp())->setKeep("Referenced by AddressOfCFunc"); + iterateChildrenConst(nodep); + } - // Walk up to find the containing StmtExpr - AstNode* stmtNodep = callp; - while (stmtNodep && !VN_IS(stmtNodep, StmtExpr) && !VN_IS(stmtNodep, CFunc)) { - stmtNodep = stmtNodep->backp(); - } + // Nodes preventing inlining + void visit(AstTraceDecl* nodep) override { + // Referenced by TraceInc + if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains TraceDecl"); - AstStmtExpr* const stmtExprp = VN_CAST(stmtNodep, StmtExpr); - if (!stmtExprp) return; - - m_toInline.emplace_back(stmtExprp, cfuncp, callerp); - }); + if (AstCCall* const callp = nodep->dtypeCallp()) { + getInlineCFuncsCallSiteVertexp(callp)->setNoInline("Referenced by TraceDecl"); } + iterateChildrenConst(nodep); + } + void visit(AstExecGraph* nodep) override { + // AstExecGraph is not cloneable, so can't inline the containing function + if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains ExecGraph"); + // Also mark functions referenced in the dependency graph + for (const V3GraphVertex& vtx : nodep->depGraphp()->vertices()) { + getInlineCFuncsFunctionVertexp(vtx.as()->funcp()) + ->setKeep("MTask function"); + } + VL_RESTORER(m_inExecGraph); + m_inExecGraph = true; + iterateChildrenConst(nodep); + } + void visit(AstCStmt* nodep) override { + // Can reference anything in text + if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains CStmt"); + iterateChildrenConst(nodep); + } + void visit(AstCExpr* nodep) override { + // Can reference anything in text + if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains CExpr"); + iterateChildrenConst(nodep); + } + void visit(AstCStmtUser* nodep) override { + // Can reference anything in text + if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains CStmtUser"); + iterateChildrenConst(nodep); + } + void visit(AstCExprUser* nodep) override { + // Can reference anything in text + if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains CExprUser"); + iterateChildrenConst(nodep); + } + void visit(AstCReturn* nodep) override { + if (m_cfuncVtxp) m_cfuncVtxp->setNoInline("Contains CReturn"); + iterateChildrenConst(nodep); } - // Perform the actual inlining after iteration is complete - void doInlining() { - for (const auto& tuple : m_toInline) { - AstStmtExpr* const stmtExprp = std::get<0>(tuple); - AstCFunc* const cfuncp = std::get<1>(tuple); - AstCFunc* const callerp = std::get<2>(tuple); - - UINFO(6, "Inlining CFunc " << cfuncp->name() << " into " << callerp->name()); - ++m_statInlined; - - // Clone local variables with unique names to avoid collisions - std::map varMap; - for (AstVar* varp = cfuncp->varsp(); varp; varp = VN_AS(varp->nextp(), Var)) { - const string newName = "__Vinline_" + cfuncp->name() + "_" + varp->name(); - AstVar* const newVarp = varp->cloneTree(false); - newVarp->name(newName); - callerp->addVarsp(newVarp); - varMap[varp] = newVarp; - } - - // Clone the function body - AstNode* const bodyp = cfuncp->stmtsp()->cloneTree(true); - - // Retarget variable references to the cloned variables - // Must iterate all sibling statements, not just the first - if (!varMap.empty()) { - for (AstNode* stmtp = bodyp; stmtp; stmtp = stmtp->nextp()) { - stmtp->foreach([&](AstVarRef* refp) { - auto it = varMap.find(refp->varp()); - if (it != varMap.end()) refp->varp(it->second); - }); - } - } - - // Replace the statement with the inlined body - stmtExprp->addNextHere(bodyp); - VL_DO_DANGLING(stmtExprp->unlinkFrBack()->deleteTree(), stmtExprp); - } + // Base node + void visit(AstNode* nodep) override { + if (m_cfuncVtxp) m_cfuncVtxp->sizeInc(); + iterateChildrenConst(nodep); } public: // CONSTRUCTORS - explicit InlineCFuncsVisitor(const AstNetlist* nodep) - : m_threshold1{v3Global.opt.inlineCFuncs()} - , m_threshold2{v3Global.opt.inlineCFuncsProduct()} { - // Don't inline when profiling or tracing - if (v3Global.opt.profCFuncs() || v3Global.opt.trace()) return; - // Process modules one at a time for better cache behavior - iterateAndNextNull(nodep->modulesp()); + explicit InlineCFuncsVisitor(AstNetlist* nodep) { + // Phase 1: Build call graph + iterateConst(nodep); + // Make acyclic in case there is recursion + m_graph.acyclic(V3GraphEdge::followAlwaysTrue); + // Order vertices (any topological order is fine) + m_graph.order(); + if (dumpGraphLevel() >= 6) m_graph.dumpDotFilePrefixed("inlinecfuncs-graph"); + // Phase 2: Inline calls + doInlining(); + if (dumpGraphLevel() >= 6) m_graph.dumpDotFilePrefixed("inlinecfuncs-inlined"); + // Phase 3: Remove unused functions + removeUnusedFuncs(); + if (dumpGraphLevel() >= 6) m_graph.dumpDotFilePrefixed("inlinecfuncs-kept"); } ~InlineCFuncsVisitor() override { - V3Stats::addStat("Optimizations, Inlined CFuncs", m_statInlined); + V3Stats::addStat("Optimizations, Inline CFuncs, calls inlined", m_statCallsInlined); + V3Stats::addStat("Optimizations, Inline CFuncs, functions inlined", m_statFuncsInlined); + V3Stats::addStat("Optimizations, Inline CFuncs, functions removed", m_statFuncsRemoved); } }; @@ -264,6 +497,8 @@ public: void V3InlineCFuncs::inlineAll(AstNetlist* nodep) { UINFO(2, __FUNCTION__ << ":"); + // Don't inline when profiling per-function (it would lose granularity) + if (v3Global.opt.profCFuncs()) return; { InlineCFuncsVisitor{nodep}; } // Destruct before checking V3Global::dumpCheckGlobalTree("inlinecfuncs", 0, dumpTreeEitherLevel() >= 6); } diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 22f96ca7e..a23a408b2 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1497,6 +1497,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, m_fIcoChangeDetect.setTrueOrFalse(flag); }); DECL_OPTION("-finline", FOnOff, &m_fInline); + DECL_OPTION("-finline-cfuncs", FOnOff, &m_fInlineCFuncs); DECL_OPTION("-finline-funcs", FOnOff, &m_fInlineFuncs); DECL_OPTION("-finline-funcs-eager", FOnOff, &m_fInlineFuncsEager); DECL_OPTION("-flife", FOnOff, &m_fLife); @@ -2371,6 +2372,7 @@ void V3Options::optimize(int level) { m_fExpand = flag; m_fGate = flag; m_fInline = flag; + m_fInlineCFuncs = flag; m_fLife = flag; m_fLifePost = flag; m_fLocalize = flag; diff --git a/src/V3Options.h b/src/V3Options.h index 71a32c31b..6894f8312 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -415,6 +415,7 @@ private: // main switch: -fno-ico-change-detect: input change detection optimization VOptionBool m_fIcoChangeDetect{VOptionBool::OPT_DEFAULT_TRUE}; bool m_fInline; // main switch: -fno-inline: module inlining + bool m_fInlineCFuncs; // main switch: -fno-inline-cfuncs: inline small C functions bool m_fInlineFuncs = true; // main switch: -fno-inline-funcs: function inlining bool m_fInlineFuncsEager = true; // main switch: -fno-inline-funcs-eager: don't inline eagerly bool m_fLife; // main switch: -fno-life: variable lifetime @@ -753,6 +754,7 @@ public: bool fGate() const { return m_fGate; } VOptionBool fIcoChangeDetect() const { return m_fIcoChangeDetect; } bool fInline() const { return m_fInline; } + bool fInlineCFuncs() const { return m_fInlineCFuncs; } bool fInlineFuncs() const { return m_fInlineFuncs; } bool fInlineFuncsEager() const { return m_fInlineFuncsEager; } bool fLife() const { return m_fLife; } diff --git a/src/V3Trace.cpp b/src/V3Trace.cpp index 2a7bb37e3..8cee034fa 100644 --- a/src/V3Trace.cpp +++ b/src/V3Trace.cpp @@ -1094,6 +1094,7 @@ class TraceVisitor final : public VNVisitor { // Create the trace registration function m_regFuncp = new AstCFunc{m_topScopep->fileline(), "trace_register", m_topScopep}; m_regFuncp->argTypes(v3Global.opt.traceClassBase() + "* tracep"); + m_regFuncp->entryPoint(true); m_regFuncp->isTrace(true); m_regFuncp->slow(true); m_regFuncp->isStatic(false); diff --git a/src/V3TraceDecl.cpp b/src/V3TraceDecl.cpp index 820a314c7..1a1ad6b89 100644 --- a/src/V3TraceDecl.cpp +++ b/src/V3TraceDecl.cpp @@ -980,6 +980,7 @@ public: AstCFunc* rootFuncp = nullptr; if (!v3Global.opt.libCreate().empty()) { rootFuncp = newCFunc(flp, "trace_init_root"); + rootFuncp->entryPoint(true); for (size_t i = 0; i < m_topScopeRootFuncCount; ++i) { AstCCall* const callp = new AstCCall{flp, topScopeFuncps.at(i)}; callp->dtypeSetVoid(); @@ -1017,6 +1018,7 @@ public: // Set name of top level function AstCFunc* const topFuncp = m_topFuncps.front(); topFuncp->name("trace_init_top"); + topFuncp->entryPoint(true); if (rootFuncp && v3Global.opt.debugCheck()) checkCallsRecurse(rootFuncp); checkCalls(topFuncp); diff --git a/src/Verilator.cpp b/src/Verilator.cpp index b37336a52..3d7da1ca3 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -582,8 +582,14 @@ static void process() { // Must be after all Sel/array index based optimizations V3Reloop::reloopAll(v3Global.rootp()); } + } - if (v3Global.opt.inlineCFuncs()) { + // These are no longer needed, remove references before CFunc inlining + v3Global.rootp()->evalp(nullptr); + v3Global.rootp()->evalNbap(nullptr); + + if (!v3Global.opt.lintOnly() && !v3Global.opt.serializeOnly()) { + if (v3Global.opt.fInlineCFuncs()) { // Inline small CFuncs to reduce function call overhead V3InlineCFuncs::inlineAll(v3Global.rootp()); } diff --git a/test_regress/t/t_display_merge.py b/test_regress/t/t_display_merge.py index 999f03507..205550676 100755 --- a/test_regress/t/t_display_merge.py +++ b/test_regress/t/t_display_merge.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator_st') -test.compile(verilator_flags2=["--stats", "--inline-cfuncs", "0"]) +test.compile(verilator_flags2=["--stats", "-fno-inline-cfuncs"]) test.execute(expect_filename=test.golden_filename) diff --git a/test_regress/t/t_flag_csplit_eval.py b/test_regress/t/t_flag_csplit_eval.py index 1b88de3bb..8311f5d9b 100755 --- a/test_regress/t/t_flag_csplit_eval.py +++ b/test_regress/t/t_flag_csplit_eval.py @@ -23,7 +23,7 @@ def check_evals(): test.error("Too few _eval functions found: " + str(got)) -test.compile(v_flags2=["--output-split 1 --output-split-cfuncs 20"], +test.compile(v_flags2=["--output-split 1 --output-split-cfuncs 20 -fno-inline-cfuncs"], verilator_make_gmake=False) # Slow to compile, so skip it) check_evals() diff --git a/test_regress/t/t_hier_block_chained.py b/test_regress/t/t_hier_block_chained.py index fb17db8e6..41e89faab 100755 --- a/test_regress/t/t_hier_block_chained.py +++ b/test_regress/t/t_hier_block_chained.py @@ -31,9 +31,9 @@ test.compile(v_flags2=[ if test.vltmt: test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt", - r'Optimizations, Thread schedule count\s+(\d+)', 3) + r'Optimizations, Thread schedule count\s+(\d+)', 4) test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt", - r'Optimizations, Thread schedule total tasks\s+(\d+)', 5) + r'Optimizations, Thread schedule total tasks\s+(\d+)', 6) test.execute() diff --git a/test_regress/t/t_hier_block_perf.py b/test_regress/t/t_hier_block_perf.py index d91e3475e..ea6d40c8b 100755 --- a/test_regress/t/t_hier_block_perf.py +++ b/test_regress/t/t_hier_block_perf.py @@ -35,9 +35,9 @@ test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "_ if test.vltmt: test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt", - r'Optimizations, Thread schedule count\s+(\d+)', 1) + r'Optimizations, Thread schedule count\s+(\d+)', 2) test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt", - r'Optimizations, Thread schedule total tasks\s+(\d+)', 2) + r'Optimizations, Thread schedule total tasks\s+(\d+)', 3) test.execute(all_run_flags=[ "+verilator+prof+exec+start+2", diff --git a/test_regress/t/t_inst_tree_inl0_pub1.py b/test_regress/t/t_inst_tree_inl0_pub1.py index 31d27a095..be831d2c5 100755 --- a/test_regress/t/t_inst_tree_inl0_pub1.py +++ b/test_regress/t/t_inst_tree_inl0_pub1.py @@ -14,8 +14,8 @@ test.top_filename = "t/t_inst_tree.v" default_vltmt_threads = test.get_default_vltmt_threads test.compile( - # Disable --inline-cfuncs so functions exist to be combined - verilator_flags2=['--stats', '--inline-cfuncs', '0', test.t_dir + "/" + test.name + ".vlt"], + # Disable CFunc inlining so functions exist to be combined + verilator_flags2=['--stats', '-fno-inline-cfuncs', test.t_dir + "/" + test.name + ".vlt"], # Force 3 threads even if we have fewer cores threads=(default_vltmt_threads if test.vltmt else 1)) diff --git a/test_regress/t/t_json_only_debugcheck.py b/test_regress/t/t_json_only_debugcheck.py index 3137b8632..aeef0a17b 100755 --- a/test_regress/t/t_json_only_debugcheck.py +++ b/test_regress/t/t_json_only_debugcheck.py @@ -16,7 +16,7 @@ test.top_filename = "t/t_enum_type_methods.v" out_filename = test.obj_dir + "/V" + test.name + ".tree.json" test.compile(verilator_flags2=[ - '--no-std', '--debug-check', '--no-json-edit-nums', '--flatten', '--inline-cfuncs', '0' + '--no-std', '--debug-check', '--no-json-edit-nums', '--flatten', '-fno-inline-cfuncs' ], verilator_make_gmake=False, make_top_shell=False, diff --git a/test_regress/t/t_opt_inline_cfuncs.py b/test_regress/t/t_opt_inline_cfuncs.py index 7981f48c3..1821f5ec5 100755 --- a/test_regress/t/t_opt_inline_cfuncs.py +++ b/test_regress/t/t_opt_inline_cfuncs.py @@ -9,17 +9,17 @@ import vltest_bootstrap -test.scenarios('vlt') +test.scenarios('vlt_all') -# Use --output-split-cfuncs to create small functions that can be inlined -# Also test --inline-cfuncs-product option test.compile(verilator_flags2=[ - "--stats", "--binary", "--output-split-cfuncs", "1", "--inline-cfuncs-product", "200" + "--stats", "--binary", "--inline-cfuncs-product", "200", "--dumpi-V3InlineCFuncs", "9" ]) -# Verify inlining happened with exact count -test.file_grep(test.stats, r'Optimizations, Inlined CFuncs\s+(\d+)', 39) - test.execute() +if test.vlt: + test.file_grep(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+(\d+)', 7) + test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions inlined\s+(\d+)', 7) + test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions removed\s+(\d+)', 7) + test.passes() diff --git a/test_regress/t/t_opt_inline_cfuncs_args.py b/test_regress/t/t_opt_inline_cfuncs_args.py new file mode 100755 index 000000000..05fb055dd --- /dev/null +++ b/test_regress/t/t_opt_inline_cfuncs_args.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt_all') + +test.compile(verilator_flags2=["--stats", "--binary"]) + +test.execute() + +test.file_grep(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+[1-9]') + +test.passes() diff --git a/test_regress/t/t_opt_inline_cfuncs_args.v b/test_regress/t/t_opt_inline_cfuncs_args.v new file mode 100644 index 000000000..d94ac40c4 --- /dev/null +++ b/test_regress/t/t_opt_inline_cfuncs_args.v @@ -0,0 +1,36 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain. +// SPDX-FileCopyrightText: 2026 Wilson Snyder +// SPDX-License-Identifier: CC0-1.0 + +module t ( + input wire clk +); + + integer cyc = 0; + reg [31:0] acc; + + task automatic add_pair(input [31:0] a, input [31:0] b, inout [31:0] sum); + // verilator no_inline_task + sum = sum + a + b; + endtask + + always @(posedge clk) begin + cyc <= cyc + 1; + acc = 0; + add_pair(cyc[31:0], 32'd1, acc); // + cyc + 1 + add_pair(32'd1000, cyc[31:0], acc); // + 1000 + cyc + // acc = (cyc + 1) + (1000 + cyc) = 2*cyc + 1001 + if (cyc > 1) begin + if (acc !== (2 * cyc[31:0] + 32'd1001)) begin + $write("%%Error: cyc=%0d acc=%0d expected %0d\n", cyc, acc, 2 * cyc + 1001); + $stop; + end + end + if (cyc == 20) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end +endmodule diff --git a/test_regress/t/t_opt_inline_cfuncs_dup.py b/test_regress/t/t_opt_inline_cfuncs_dup.py new file mode 100755 index 000000000..05fb055dd --- /dev/null +++ b/test_regress/t/t_opt_inline_cfuncs_dup.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt_all') + +test.compile(verilator_flags2=["--stats", "--binary"]) + +test.execute() + +test.file_grep(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+[1-9]') + +test.passes() diff --git a/test_regress/t/t_opt_inline_cfuncs_dup.v b/test_regress/t/t_opt_inline_cfuncs_dup.v new file mode 100644 index 000000000..2a39fa47e --- /dev/null +++ b/test_regress/t/t_opt_inline_cfuncs_dup.v @@ -0,0 +1,30 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain. +// SPDX-FileCopyrightText: 2026 Wilson Snyder +// SPDX-License-Identifier: CC0-1.0 + +module t ( + input wire clk +); + + integer cyc = 0; + + task automatic tick(); + // verilator no_inline_task + automatic time t = $time; + $display("TICK: %0t", t); + endtask + + always @(posedge clk) begin + cyc <= cyc + 1; + tick(); + tick(); + tick(); + tick(); + if (cyc == 20) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end +endmodule diff --git a/test_regress/t/t_opt_inline_cfuncs_off.py b/test_regress/t/t_opt_inline_cfuncs_off.py index 8b045e8ca..7e7361d8c 100755 --- a/test_regress/t/t_opt_inline_cfuncs_off.py +++ b/test_regress/t/t_opt_inline_cfuncs_off.py @@ -9,15 +9,13 @@ import vltest_bootstrap -test.scenarios('vlt') +test.scenarios('vlt_all') test.top_filename = "t/t_opt_inline_cfuncs.v" -# Disable inlining with --inline-cfuncs 0 -test.compile(verilator_flags2=["--stats", "--binary", "--inline-cfuncs", "0"]) - -# Verify inlining did NOT happen (stat doesn't exist when pass is skipped) -test.file_grep_not(test.stats, r'Optimizations, Inlined CFuncs\s+[1-9]') +test.compile(verilator_flags2=["--stats", "--binary", "-fno-inline-cfuncs"]) test.execute() +test.file_grep_not(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+[1-9]') + test.passes() diff --git a/test_regress/t/t_opt_inline_cfuncs_threshold.py b/test_regress/t/t_opt_inline_cfuncs_threshold.py index 677dee894..c7c6713b4 100755 --- a/test_regress/t/t_opt_inline_cfuncs_threshold.py +++ b/test_regress/t/t_opt_inline_cfuncs_threshold.py @@ -9,17 +9,16 @@ import vltest_bootstrap -test.scenarios('vlt') +test.scenarios('vlt_all') -# Use thresholds that guarantee rejection to test the "return false" path in isInlineable() -# --inline-cfuncs 1: pass still runs (not skipped) -# --inline-cfuncs-product 0: guarantees all functions rejected (node_count * call_count > 0 always) test.compile(verilator_flags2=[ - "--stats", "--binary", "--inline-cfuncs", "1", "--inline-cfuncs-product", "0" + "--stats", "--binary", "--inline-cfuncs", "0", "--inline-cfuncs-product", "0" ]) -test.file_grep(test.stats, r'Optimizations, Inlined CFuncs\s+(\d+)', 0) - test.execute() +test.file_grep(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions inlined\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions removed\s+(\d+)', 0) + test.passes() diff --git a/test_regress/t/t_opt_inline_cfuncs_trace.py b/test_regress/t/t_opt_inline_cfuncs_trace.py new file mode 100755 index 000000000..515aa4510 --- /dev/null +++ b/test_regress/t/t_opt_inline_cfuncs_trace.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt_all') +test.top_filename = "t/t_opt_inline_cfuncs.v" + +test.compile(verilator_flags2=["--stats", "--binary", "--trace", "--inline-cfuncs-product", "200"]) + +if test.vlt: + test.file_grep(test.stats, r'Optimizations, Inline CFuncs, calls inlined\s+(\d+)', 8) + test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions inlined\s+(\d+)', 7) + test.file_grep(test.stats, r'Optimizations, Inline CFuncs, functions removed\s+(\d+)', 9) + +test.execute() + +test.passes() diff --git a/test_regress/t/t_protect_ids_key.out b/test_regress/t/t_protect_ids_key.out index 110f826c1..a9a6d264e 100644 --- a/test_regress/t/t_protect_ids_key.out +++ b/test_regress/t/t_protect_ids_key.out @@ -16,13 +16,11 @@ - - - + @@ -36,13 +34,10 @@ - - - diff --git a/test_regress/t/t_timing_debug1.py b/test_regress/t/t_timing_debug1.py index 94534429f..a8648a97a 100755 --- a/test_regress/t/t_timing_debug1.py +++ b/test_regress/t/t_timing_debug1.py @@ -13,7 +13,7 @@ test.scenarios('vlt_all') test.top_filename = "t/t_timing_sched.v" test.compile( - verilator_flags2=["--binary", "--timing", "--inline-cfuncs", "0", "-CFLAGS", "-DVL_DEBUG"]) + verilator_flags2=["--binary", "--timing", "-fno-inline-cfuncs", "-CFLAGS", "-DVL_DEBUG"]) test.execute(all_run_flags=["+verilator+debug"]) diff --git a/test_regress/t/t_timing_debug2.py b/test_regress/t/t_timing_debug2.py index 18a53b76f..e5bb01e3c 100755 --- a/test_regress/t/t_timing_debug2.py +++ b/test_regress/t/t_timing_debug2.py @@ -12,8 +12,8 @@ import vltest_bootstrap test.scenarios('vlt_all') test.top_filename = "t/t_timing_class.v" -# Disable --inline-cfuncs so debug traces show all function entries -test.compile(verilator_flags2=["--exe --main --timing --inline-cfuncs 0"]) +# Disable CFunc inlining so debug traces show all function entries +test.compile(verilator_flags2=["--exe --main --timing -fno-inline-cfuncs"]) test.execute(all_run_flags=["+verilator+debug"]) diff --git a/test_regress/t/t_timing_eval_act.out b/test_regress/t/t_timing_eval_act.out index 1d71dde4f..46417dac7 100644 --- a/test_regress/t/t_timing_eval_act.out +++ b/test_regress/t/t_timing_eval_act.out @@ -4,6 +4,7 @@ -V{t#,#}+ Vt_timing_eval_act___024root___eval_debug_assertions -V{t#,#}+ Initial -V{t#,#}+ Vt_timing_eval_act___024root___eval_static +-V{t#,#}+ Vt_timing_eval_act___024root___eval_static__TOP -V{t#,#}+ Vt_timing_eval_act___024root___timing_ready -V{t#,#}+ Vt_timing_eval_act___024root___eval_initial -V{t#,#}+ Vt_timing_eval_act___024root___eval_initial__TOP__Vtiming__0 diff --git a/test_regress/t/t_timing_eval_act.py b/test_regress/t/t_timing_eval_act.py index 787745ab3..46dae2791 100755 --- a/test_regress/t/t_timing_eval_act.py +++ b/test_regress/t/t_timing_eval_act.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('vlt') -test.compile(verilator_flags2=["--binary", "--runtime-debug"]) +test.compile(verilator_flags2=["--binary", "--runtime-debug", "-fno-inline-cfuncs"]) test.file_grep( test.obj_dir + "/" + test.vm_prefix + "___024root__0.cpp", r'void\s+' + test.vm_prefix + diff --git a/test_regress/t/t_verilated_debug.out b/test_regress/t/t_verilated_debug.out index 1d7867bab..7d98054ea 100644 --- a/test_regress/t/t_verilated_debug.out +++ b/test_regress/t/t_verilated_debug.out @@ -37,6 +37,7 @@ internalsDump: -V{t#,#}+ Vt_verilated_debug___024root___eval_phase__nba -V{t#,#}+ Vt_verilated_debug___024root___trigger_anySet__act -V{t#,#}+ Vt_verilated_debug___024root___eval_nba +-V{t#,#}+ Vt_verilated_debug___024root___nba_sequent__TOP__0 *-* All Finished *-* -V{t#,#}+ Vt_verilated_debug___024root___trigger_clear__act -V{t#,#}+ Vt_verilated_debug___024root___eval_phase__act diff --git a/test_regress/t/t_verilated_debug.py b/test_regress/t/t_verilated_debug.py index dbc82d1c1..e880f1866 100755 --- a/test_regress/t/t_verilated_debug.py +++ b/test_regress/t/t_verilated_debug.py @@ -12,7 +12,7 @@ import vltest_bootstrap test.scenarios('vlt_all') test.verilated_debug = True -test.compile(verilator_flags2=[]) +test.compile(verilator_flags2=['-fno-inline-cfuncs']) test.execute()