diff --git a/src/V3Ast.h b/src/V3Ast.h index ae0d45178..b0df2cd20 100644 --- a/src/V3Ast.h +++ b/src/V3Ast.h @@ -1286,6 +1286,8 @@ public: void iterateAndNextNull(AstNode* nodep); /// Call visit()s on const nodep (maybe nullptr) and nodep's nextp() list void iterateAndNextConstNull(AstNode* nodep); + /// Call visit()s on const nodep (maybe nullptr) and nodep's nextp() list, in reverse order + void iterateAndNextConstNullBackwards(AstNode* nodep); /// Return edited nodep; see comments in V3Ast.cpp AstNode* iterateSubtreeReturnEdits(AstNode* nodep); @@ -3012,6 +3014,9 @@ inline void VNVisitor::iterateChildrenConst(AstNode* nodep) { nodep->iterateChil inline void VNVisitor::iterateAndNextNull(AstNode* nodep) { if (VL_LIKELY(nodep)) nodep->iterateAndNext(*this); } +inline void VNVisitor::iterateAndNextConstNullBackwards(AstNode* nodep) { + if (VL_LIKELY(nodep)) nodep->iterateListBackwards(*this); +} inline void VNVisitor::iterateAndNextConstNull(AstNode* nodep) { if (VL_LIKELY(nodep)) nodep->iterateAndNextConst(*this); } diff --git a/src/V3AstUserAllocator.h b/src/V3AstUserAllocator.h index 5180db3eb..d757ba680 100644 --- a/src/V3AstUserAllocator.h +++ b/src/V3AstUserAllocator.h @@ -89,6 +89,8 @@ protected: for (T_Data* const p : m_allocated) { delete p; } } + VL_UNCOPYABLE(AstUserAllocatorBase); + public: // Get a reference to the user data T_Data& operator()(T_Node* nodep) { diff --git a/src/V3Inline.cpp b/src/V3Inline.cpp index 3fa665571..d2f600a2c 100644 --- a/src/V3Inline.cpp +++ b/src/V3Inline.cpp @@ -34,29 +34,46 @@ #include "V3Ast.h" #include "V3String.h" +#include "V3AstUserAllocator.h" + #include -#include #include +#include // CONFIG static const int INLINE_MODS_SMALLER = 100; // If a mod is < this # nodes, can always inline it //###################################################################### -// Inline state, as a visitor of each AstNode +// Inlining state. Kept as AstNodeModule::user1p via AstUser1Allocator + +namespace { + +struct ModuleState { + bool m_inlined = false; // Whether to inline this module + unsigned m_cellRefs = 0; // Number of AstCells instantiating this module + std::vector m_childCells; // AstCells under this module (to speed up traversal) +}; + +using ModuleStateUser1Allocator = AstUser1Allocator; + +} // namespace + +//###################################################################### +// Visitor that determines which modules will be inlined class InlineMarkVisitor final : public VNVisitor { private: // NODE STATE // Output - // AstNodeModule::user1() // OUTPUT: bool. User request to inline this module + // AstNodeModule::user1() // OUTPUT: ModuleState instance (via m_moduleState) // Internal state (can be cleared after this visit completes) // AstNodeModule::user2() // CIL_*. Allowed to automatically inline module - // AstNodeModule::user3() // int. Number of cells referencing this module // AstNodeModule::user4() // int. Statements in module const VNUser2InUse m_inuser2; - const VNUser3InUse m_inuser3; const VNUser4InUse m_inuser4; + ModuleStateUser1Allocator& m_moduleState; + // For the user2 field: enum : uint8_t { CIL_NOTHARD = 0, // Inline not supported @@ -120,7 +137,8 @@ private: iterateChildren(nodep); } virtual void visit(AstCell* nodep) override { - nodep->modp()->user3Inc(); // Inc refs + m_moduleState(nodep->modp()).m_cellRefs++; + m_moduleState(m_modp).m_childCells.push_back(nodep); m_instances[m_modp][nodep->modp()]++; iterateChildren(nodep); } @@ -166,7 +184,7 @@ private: m_modp->user4(oldcnt); } virtual void visit(AstNetlist* nodep) override { - // Build user2, user3, and user4 for all modules. + // Build ModuleState, user2, and user4 for all modules. // Also build m_allMods and m_instances. iterateChildren(nodep); @@ -180,14 +198,14 @@ private: int statements = modp->user4(); for (const auto& pair : m_instances[modp]) { const AstNodeModule* const childp = pair.first; - if (childp->user1()) { // inlining child + if (m_moduleState(childp).m_inlined) { // inlining child statements += childp->user4() * pair.second; } } modp->user4(statements); const int allowed = modp->user2(); - const int refs = modp->user3(); + const int refs = m_moduleState(modp).m_cellRefs; // Should we automatically inline this module? // If --flatten is specified, then force everything to be inlined that can be. @@ -203,7 +221,7 @@ private: || statements < INLINE_MODS_SMALLER // || v3Global.opt.inlineMult() < 1 // || refs * statements < v3Global.opt.inlineMult()); - modp->user1(doit); + m_moduleState(modp).m_inlined = doit; UINFO(4, " Inline=" << doit << " Possible=" << allowed << " Refs=" << refs << " Stmts=" << statements << " " << modp << endl); } @@ -216,7 +234,10 @@ private: public: // CONSTRUCTORS - explicit InlineMarkVisitor(AstNode* nodep) { iterate(nodep); } + explicit InlineMarkVisitor(AstNode* nodep, ModuleStateUser1Allocator& moduleState) + : m_moduleState{moduleState} { + iterate(nodep); + } virtual ~InlineMarkVisitor() override { V3Stats::addStat("Optimizations, Inline unsupported", m_statUnsup); } @@ -236,7 +257,10 @@ private: VL_DEBUG_FUNC; // Declare debug() // VISITORS - virtual void visit(AstCell* nodep) override { nodep->user4p(nodep->clonep()); } + virtual void visit(AstCell* nodep) override { + // clonep is nullptr when inlining the last instance, if so the use original node + nodep->user4p(nodep->clonep() ? nodep->clonep() : nodep); + } //-------------------- virtual void visit(AstNodeStmt*) override {} // Accelerate virtual void visit(AstNodeMath*) override {} // Accelerate @@ -467,7 +491,7 @@ private: // AstIfaceRefDType::user5p() // Whether the cell pointed to by this // // AstIfaceRefDType has been inlined // Input: - // AstNodeModule::user1p() // bool. True to inline this module (from InlineMarkVisitor) + // AstNodeModule::user1p() // ModuleState instance (via m_moduleState) // Cleared each cell // AstVar::user2p() // AstVarRef*/AstConst* Points to signal this // // is a direct connect to @@ -476,6 +500,8 @@ private: const VNUser4InUse m_inuser4; const VNUser5InUse m_inuser5; + ModuleStateUser1Allocator& m_moduleState; + // STATE AstNodeModule* m_modp = nullptr; // Current module VDouble0 m_statCells; // Statistic tracking @@ -483,10 +509,114 @@ private: // METHODS VL_DEBUG_FUNC; // Declare debug() + void inlineCell(AstCell* nodep) { + UINFO(5, " Inline CELL " << nodep << endl); + + const VNUser2InUse user2InUse; + const VNUser3InUse user3InUse; + + ++m_statCells; + + // Before cloning simplify pin assignments. Better off before, as if the module has + // multiple instantiations we will save work, and we can't call pinReconnectSimple in this + // loop as it clone()s itself. + for (AstPin* pinp = nodep->pinsp(); pinp; pinp = VN_AS(pinp->nextp(), Pin)) { + V3Inst::pinReconnectSimple(pinp, nodep, false); + } + + // Is this the last cell referencing this module? + const bool lastCell = --m_moduleState(nodep->modp()).m_cellRefs == 0; + + // Important: If this is the last cell, then don't clone the instantiated module but + // inline the original directly. While this requires some special casing, doing so + // saves us having to temporarily clone the module for the last cell, which + // significantly reduces Verilator memory usage. This is especially true as often the + // top few levels of the hierarchy are singleton wrapper modules, which we always + // inline. In this case this special casing saves us from having to clone essentially + // the entire netlist, which would in effect double Verilator memory consumption, or + // worse if we put off deleting the inlined modules until the end. Not having to clone + // large trees also improves speed. + AstNodeModule* newmodp = nullptr; + if (!lastCell) { + // Clone original module + newmodp = nodep->modp()->cloneTree(false); + } else { + // For the last cell, reuse the original module + nodep->modp()->unlinkFrBack(); + newmodp = nodep->modp(); + } + // Find cell cross-references + InlineCollectVisitor{nodep->modp()}; + // Create data for dotted variable resolution + AstCellInline* const inlinep + = new AstCellInline(nodep->fileline(), nodep->name(), nodep->modp()->origName(), + nodep->modp()->timeunit()); + m_modp->addInlinesp(inlinep); // Must be parsed before any AstCells + // Create assignments to the pins + for (AstPin* pinp = nodep->pinsp(); pinp; pinp = VN_AS(pinp->nextp(), Pin)) { + if (!pinp->exprp()) continue; + UINFO(6, " Pin change from " << pinp->modVarp() << endl); + + AstNode* const connectRefp = pinp->exprp(); + UASSERT_OBJ(VN_IS(connectRefp, Const) || VN_IS(connectRefp, VarRef), pinp, + "Unknown interconnect type; pinReconnectSimple should have cleared up"); + V3Inst::checkOutputShort(pinp); + + // Make new signal; even though we'll optimize the interconnect, we + // need an alias to trace correctly. If tracing is disabled, we'll + // delete it in later optimizations. + AstVar* const pinOldVarp = pinp->modVarp(); + AstVar* const pinNewVarp = lastCell ? pinOldVarp : pinOldVarp->clonep(); + UASSERT_OBJ(pinNewVarp, pinOldVarp, "Cloning failed"); + // Propagate any attributes across the interconnect + pinNewVarp->propagateAttrFrom(pinOldVarp); + if (const AstVarRef* const vrefp = VN_CAST(connectRefp, VarRef)) { + vrefp->varp()->propagateAttrFrom(pinOldVarp); + } + + // One to one interconnect won't make a temporary variable. + // This prevents creating a lot of extra wires for clock signals. + // It will become a tracing alias. + UINFO(6, "One-to-one " << connectRefp << endl); + UINFO(6, " -to " << pinNewVarp << endl); + pinNewVarp->user2p(connectRefp); + // Public output inside the cell must go via an assign rather + // than alias. Else the public logic will set the alias, losing + // the value to be propagated up (InOnly isn't a problem as the + // AssignAlias will create the assignment for us) + pinNewVarp->user3(pinNewVarp->isSigUserRWPublic() + && pinNewVarp->direction() == VDirection::OUTPUT); + } + // Cleanup var names, etc, to not conflict + { InlineRelinkVisitor{newmodp, m_modp, nodep}; } + // Move statements under the module we are inlining into + if (AstNode* const stmtsp = newmodp->stmtsp()) { + stmtsp->unlinkFrBackWithNext(); + m_modp->addStmtp(stmtsp); + } + // Clear any leftover ports, etc + VL_DO_DANGLING(newmodp->deleteTree(), newmodp); + // Remove the cell we just inlined + nodep->unlinkFrBack(); + VL_DO_DANGLING(pushDeletep(nodep), nodep); + } + // VISITORS virtual void visit(AstNetlist* nodep) override { // Iterate modules backwards, in bottom-up order. Required! - iterateChildrenBackwards(nodep); + iterateAndNextConstNullBackwards(nodep->modulesp()); + // Clean up AstIfaceRefDType references + iterateChildren(nodep->typeTablep()); + } + virtual void visit(AstNodeModule* nodep) override { + UASSERT_OBJ(!m_modp, nodep, "Unsupported: Nested modules"); + m_modp = nodep; + // Iterate the stored cells directly to reduce traversal + for (AstCell* const cellp : m_moduleState(nodep).m_childCells) { + if (m_moduleState(cellp->modp()).m_inlined) inlineCell(cellp); + } + m_moduleState(nodep).m_childCells.clear(); + m_modp = nullptr; } virtual void visit(AstIfaceRefDType* nodep) override { if (nodep->user5()) { @@ -496,99 +626,24 @@ private: nodep->cellp(nullptr); } } - virtual void visit(AstNodeModule* nodep) override { - VL_RESTORER(m_modp); - { - m_modp = nodep; - iterateChildren(nodep); - } - } - virtual void visit(AstCell* nodep) override { - if (nodep->modp()->user1()) { // Marked with inline request - UINFO(5, " Inline CELL " << nodep << endl); - - const VNUser2InUse m_inuser2; - const VNUser3InUse m_inuser3; - - ++m_statCells; - - // Before cloning simplify pin assignments - // Better off before, as if module has multiple instantiations - // we'll save work, and we can't call pinReconnectSimple in - // this loop as it clone()s itself. - for (AstPin* pinp = nodep->pinsp(); pinp; pinp = VN_AS(pinp->nextp(), Pin)) { - V3Inst::pinReconnectSimple(pinp, nodep, false); - } - - // Clone original module - AstNodeModule* const newmodp = nodep->modp()->cloneTree(false); - // Find cell cross references - InlineCollectVisitor{nodep->modp()}; - // Create data for dotted variable resolution - AstCellInline* const inlinep - = new AstCellInline(nodep->fileline(), nodep->name(), nodep->modp()->origName(), - nodep->modp()->timeunit()); - m_modp->addInlinesp(inlinep); // Must be parsed before any AstCells - // Create assignments to the pins - for (AstPin* pinp = nodep->pinsp(); pinp; pinp = VN_AS(pinp->nextp(), Pin)) { - if (!pinp->exprp()) continue; - UINFO(6, " Pin change from " << pinp->modVarp() << endl); - - AstNode* const connectRefp = pinp->exprp(); - UASSERT_OBJ( - VN_IS(connectRefp, Const) || VN_IS(connectRefp, VarRef), pinp, - "Unknown interconnect type; pinReconnectSimple should have cleared up"); - V3Inst::checkOutputShort(pinp); - - // Make new signal; even though we'll optimize the interconnect, we - // need an alias to trace correctly. If tracing is disabled, we'll - // delete it in later optimizations. - AstVar* const pinOldVarp = pinp->modVarp(); - AstVar* const pinNewVarp = pinOldVarp->clonep(); - UASSERT_OBJ(pinNewVarp, pinOldVarp, "Cloning failed"); - - // Propagate any attributes across the interconnect - pinNewVarp->propagateAttrFrom(pinOldVarp); - if (const AstVarRef* const vrefp = VN_CAST(connectRefp, VarRef)) { - vrefp->varp()->propagateAttrFrom(pinOldVarp); - } - - // One to one interconnect won't make a temporary variable. - // This prevents creating a lot of extra wires for clock signals. - // It will become a tracing alias. - UINFO(6, "One-to-one " << connectRefp << endl); - UINFO(6, " -to " << pinNewVarp << endl); - pinNewVarp->user2p(connectRefp); - // Public output inside the cell must go via an assign rather - // than alias. Else the public logic will set the alias, losing - // the value to be propagated up (InOnly isn't a problem as the - // AssignAlias will create the assignment for us) - pinNewVarp->user3(pinNewVarp->isSigUserRWPublic() - && pinNewVarp->direction() == VDirection::OUTPUT); - } - // Cleanup var names, etc, to not conflict - { InlineRelinkVisitor{newmodp, m_modp, nodep}; } - // Move statements under the module we are inlining into - if (AstNode* const stmtsp = newmodp->stmtsp()) { - stmtsp->unlinkFrBackWithNext(); - m_modp->addStmtp(stmtsp); - } - // Clear any leftover ports, etc - VL_DO_DANGLING(newmodp->deleteTree(), newmodp); - // Remove the cell we just inlined - nodep->unlinkFrBack(); - VL_DO_DANGLING(pushDeletep(nodep), nodep); - } - } //-------------------- - virtual void visit(AstNodeMath*) override {} // Accelerate - virtual void visit(AstNodeStmt*) override {} // Accelerate + virtual void visit(AstCell* nodep) override { // LCOV_EXCL_START + nodep->v3fatal("Traversal should have been short circuited"); + } + virtual void visit(AstNodeStmt* nodep) override { + nodep->v3fatal("Traversal should have been short circuited"); + } // LCOV_EXCL_STOP + virtual void visit(AstNodeFile*) override {} // Accelerate + virtual void visit(AstNodeDType*) override {} // Accelerate virtual void visit(AstNode* nodep) override { iterateChildren(nodep); } public: // CONSTRUCTORS - explicit InlineVisitor(AstNode* nodep) { iterate(nodep); } + explicit InlineVisitor(AstNode* nodep, ModuleStateUser1Allocator& moduleState) + : m_moduleState{moduleState} { + iterate(nodep); + } virtual ~InlineVisitor() override { V3Stats::addStat("Optimizations, Inlined instances", m_statCells); } @@ -601,6 +656,7 @@ class InlineIntfRefVisitor final : public VNVisitor { private: // NODE STATE // AstVar::user1p() // AstCell which this Var points to + const VNUser1InUse m_inuser1; const VNUser2InUse m_inuser2; string m_scope; // Scope name @@ -682,20 +738,26 @@ public: void V3Inline::inlineAll(AstNetlist* nodep) { UINFO(2, __FUNCTION__ << ": " << endl); - const VNUser1InUse m_inuser1; // output of InlineMarkVisitor, - // input to InlineVisitor. - // Scoped to clean up temp userN's - { InlineMarkVisitor{nodep}; } - { InlineVisitor{nodep}; } - // Remove all modules that were inlined - // V3Dead will also clean them up, but if we have debug on, it's a good - // idea to avoid dumping the hugely exploded tree. - for (AstNodeModule *modp = v3Global.rootp()->modulesp(), *nextmodp; modp; modp = nextmodp) { - nextmodp = VN_AS(modp->nextp(), NodeModule); - if (modp->user1()) { // Was inlined - VL_DO_DANGLING(modp->unlinkFrBack()->deleteTree(), modp); + + { + const VNUser1InUse m_inuser1; // output of InlineMarkVisitor, input to InlineVisitor. + ModuleStateUser1Allocator moduleState; // AstUser1Allocator + + // Scoped to clean up temp userN's + { InlineMarkVisitor{nodep, moduleState}; } + + { InlineVisitor{nodep, moduleState}; } + + // Check inlined modules have been removed during traversal. Otherwise we might have blown + // up Verilator memory consumption. + for (AstNodeModule* modp = v3Global.rootp()->modulesp(); modp; + modp = VN_AS(modp->nextp(), NodeModule)) { + UASSERT_OBJ(!moduleState(modp).m_inlined, modp, + "Inlined module should have been deleted when the last cell referencing " + "it was inlined"); } } + { InlineIntfRefVisitor{nodep}; } V3Global::dumpCheckGlobalTree("inline", 0, v3Global.opt.dumpTreeLevel(__FILE__) >= 3); }