Improve V3Inline speed and memory consumption

Avoid cloning the module when inlining the last instance that references
that module. This saves a lot of memory because it saves cloning
singleton modules (those with a single instance), which we always
inline. The top few levels of the hierarchy are often simple wrappers,
including the one added by Verilator in V3LinkLevel::wrapTop. Cloning
these and putting off deleting the originals can be very expensive
because they often have a lot of contents inlined into them, so each
layer of wrapper that is inlined would essentially add a whole new clone
of the large top-level. Directly inlining the module for the last cell
without cloning saves us from all this duplicate memory consumption and
also from having to create the clones in the first place.

Also added minor traversal speedups

This reduces the memory consumption of V3Inline by 80% and peak memory
consumption of Verilator by about 66% on a large design, while speeding
up the V3Inline pass by ~3.5x and the whole of Verilator by ~8% while
producing identical output.
This commit is contained in:
Geza Lore 2021-12-22 11:41:29 +00:00
parent 56f9d244de
commit 8c58612a3b
3 changed files with 182 additions and 113 deletions

View File

@ -1286,6 +1286,8 @@ public:
void iterateAndNextNull(AstNode* nodep);
/// Call visit()s on const nodep (maybe nullptr) and nodep's nextp() list
void iterateAndNextConstNull(AstNode* nodep);
/// Call visit()s on const nodep (maybe nullptr) and nodep's nextp() list, in reverse order
void iterateAndNextConstNullBackwards(AstNode* nodep);
/// Return edited nodep; see comments in V3Ast.cpp
AstNode* iterateSubtreeReturnEdits(AstNode* nodep);
@ -3012,6 +3014,9 @@ inline void VNVisitor::iterateChildrenConst(AstNode* nodep) { nodep->iterateChil
inline void VNVisitor::iterateAndNextNull(AstNode* nodep) {
if (VL_LIKELY(nodep)) nodep->iterateAndNext(*this);
}
inline void VNVisitor::iterateAndNextConstNullBackwards(AstNode* nodep) {
if (VL_LIKELY(nodep)) nodep->iterateListBackwards(*this);
}
inline void VNVisitor::iterateAndNextConstNull(AstNode* nodep) {
if (VL_LIKELY(nodep)) nodep->iterateAndNextConst(*this);
}

View File

@ -89,6 +89,8 @@ protected:
for (T_Data* const p : m_allocated) { delete p; }
}
VL_UNCOPYABLE(AstUserAllocatorBase);
public:
// Get a reference to the user data
T_Data& operator()(T_Node* nodep) {

View File

@ -34,29 +34,46 @@
#include "V3Ast.h"
#include "V3String.h"
#include "V3AstUserAllocator.h"
#include <algorithm>
#include <vector>
#include <unordered_set>
#include <vector>
// CONFIG
static const int INLINE_MODS_SMALLER = 100; // If a mod is < this # nodes, can always inline it
//######################################################################
// Inline state, as a visitor of each AstNode
// Inlining state. Kept as AstNodeModule::user1p via AstUser1Allocator
namespace {
struct ModuleState {
bool m_inlined = false; // Whether to inline this module
unsigned m_cellRefs = 0; // Number of AstCells instantiating this module
std::vector<AstCell*> m_childCells; // AstCells under this module (to speed up traversal)
};
using ModuleStateUser1Allocator = AstUser1Allocator<AstNodeModule, ModuleState>;
} // namespace
//######################################################################
// Visitor that determines which modules will be inlined
class InlineMarkVisitor final : public VNVisitor {
private:
// NODE STATE
// Output
// AstNodeModule::user1() // OUTPUT: bool. User request to inline this module
// AstNodeModule::user1() // OUTPUT: ModuleState instance (via m_moduleState)
// Internal state (can be cleared after this visit completes)
// AstNodeModule::user2() // CIL_*. Allowed to automatically inline module
// AstNodeModule::user3() // int. Number of cells referencing this module
// AstNodeModule::user4() // int. Statements in module
const VNUser2InUse m_inuser2;
const VNUser3InUse m_inuser3;
const VNUser4InUse m_inuser4;
ModuleStateUser1Allocator& m_moduleState;
// For the user2 field:
enum : uint8_t {
CIL_NOTHARD = 0, // Inline not supported
@ -120,7 +137,8 @@ private:
iterateChildren(nodep);
}
virtual void visit(AstCell* nodep) override {
nodep->modp()->user3Inc(); // Inc refs
m_moduleState(nodep->modp()).m_cellRefs++;
m_moduleState(m_modp).m_childCells.push_back(nodep);
m_instances[m_modp][nodep->modp()]++;
iterateChildren(nodep);
}
@ -166,7 +184,7 @@ private:
m_modp->user4(oldcnt);
}
virtual void visit(AstNetlist* nodep) override {
// Build user2, user3, and user4 for all modules.
// Build ModuleState, user2, and user4 for all modules.
// Also build m_allMods and m_instances.
iterateChildren(nodep);
@ -180,14 +198,14 @@ private:
int statements = modp->user4();
for (const auto& pair : m_instances[modp]) {
const AstNodeModule* const childp = pair.first;
if (childp->user1()) { // inlining child
if (m_moduleState(childp).m_inlined) { // inlining child
statements += childp->user4() * pair.second;
}
}
modp->user4(statements);
const int allowed = modp->user2();
const int refs = modp->user3();
const int refs = m_moduleState(modp).m_cellRefs;
// Should we automatically inline this module?
// If --flatten is specified, then force everything to be inlined that can be.
@ -203,7 +221,7 @@ private:
|| statements < INLINE_MODS_SMALLER //
|| v3Global.opt.inlineMult() < 1 //
|| refs * statements < v3Global.opt.inlineMult());
modp->user1(doit);
m_moduleState(modp).m_inlined = doit;
UINFO(4, " Inline=" << doit << " Possible=" << allowed << " Refs=" << refs
<< " Stmts=" << statements << " " << modp << endl);
}
@ -216,7 +234,10 @@ private:
public:
// CONSTRUCTORS
explicit InlineMarkVisitor(AstNode* nodep) { iterate(nodep); }
explicit InlineMarkVisitor(AstNode* nodep, ModuleStateUser1Allocator& moduleState)
: m_moduleState{moduleState} {
iterate(nodep);
}
virtual ~InlineMarkVisitor() override {
V3Stats::addStat("Optimizations, Inline unsupported", m_statUnsup);
}
@ -236,7 +257,10 @@ private:
VL_DEBUG_FUNC; // Declare debug()
// VISITORS
virtual void visit(AstCell* nodep) override { nodep->user4p(nodep->clonep()); }
virtual void visit(AstCell* nodep) override {
// clonep is nullptr when inlining the last instance, if so the use original node
nodep->user4p(nodep->clonep() ? nodep->clonep() : nodep);
}
//--------------------
virtual void visit(AstNodeStmt*) override {} // Accelerate
virtual void visit(AstNodeMath*) override {} // Accelerate
@ -467,7 +491,7 @@ private:
// AstIfaceRefDType::user5p() // Whether the cell pointed to by this
// // AstIfaceRefDType has been inlined
// Input:
// AstNodeModule::user1p() // bool. True to inline this module (from InlineMarkVisitor)
// AstNodeModule::user1p() // ModuleState instance (via m_moduleState)
// Cleared each cell
// AstVar::user2p() // AstVarRef*/AstConst* Points to signal this
// // is a direct connect to
@ -476,6 +500,8 @@ private:
const VNUser4InUse m_inuser4;
const VNUser5InUse m_inuser5;
ModuleStateUser1Allocator& m_moduleState;
// STATE
AstNodeModule* m_modp = nullptr; // Current module
VDouble0 m_statCells; // Statistic tracking
@ -483,10 +509,114 @@ private:
// METHODS
VL_DEBUG_FUNC; // Declare debug()
void inlineCell(AstCell* nodep) {
UINFO(5, " Inline CELL " << nodep << endl);
const VNUser2InUse user2InUse;
const VNUser3InUse user3InUse;
++m_statCells;
// Before cloning simplify pin assignments. Better off before, as if the module has
// multiple instantiations we will save work, and we can't call pinReconnectSimple in this
// loop as it clone()s itself.
for (AstPin* pinp = nodep->pinsp(); pinp; pinp = VN_AS(pinp->nextp(), Pin)) {
V3Inst::pinReconnectSimple(pinp, nodep, false);
}
// Is this the last cell referencing this module?
const bool lastCell = --m_moduleState(nodep->modp()).m_cellRefs == 0;
// Important: If this is the last cell, then don't clone the instantiated module but
// inline the original directly. While this requires some special casing, doing so
// saves us having to temporarily clone the module for the last cell, which
// significantly reduces Verilator memory usage. This is especially true as often the
// top few levels of the hierarchy are singleton wrapper modules, which we always
// inline. In this case this special casing saves us from having to clone essentially
// the entire netlist, which would in effect double Verilator memory consumption, or
// worse if we put off deleting the inlined modules until the end. Not having to clone
// large trees also improves speed.
AstNodeModule* newmodp = nullptr;
if (!lastCell) {
// Clone original module
newmodp = nodep->modp()->cloneTree(false);
} else {
// For the last cell, reuse the original module
nodep->modp()->unlinkFrBack();
newmodp = nodep->modp();
}
// Find cell cross-references
InlineCollectVisitor{nodep->modp()};
// Create data for dotted variable resolution
AstCellInline* const inlinep
= new AstCellInline(nodep->fileline(), nodep->name(), nodep->modp()->origName(),
nodep->modp()->timeunit());
m_modp->addInlinesp(inlinep); // Must be parsed before any AstCells
// Create assignments to the pins
for (AstPin* pinp = nodep->pinsp(); pinp; pinp = VN_AS(pinp->nextp(), Pin)) {
if (!pinp->exprp()) continue;
UINFO(6, " Pin change from " << pinp->modVarp() << endl);
AstNode* const connectRefp = pinp->exprp();
UASSERT_OBJ(VN_IS(connectRefp, Const) || VN_IS(connectRefp, VarRef), pinp,
"Unknown interconnect type; pinReconnectSimple should have cleared up");
V3Inst::checkOutputShort(pinp);
// Make new signal; even though we'll optimize the interconnect, we
// need an alias to trace correctly. If tracing is disabled, we'll
// delete it in later optimizations.
AstVar* const pinOldVarp = pinp->modVarp();
AstVar* const pinNewVarp = lastCell ? pinOldVarp : pinOldVarp->clonep();
UASSERT_OBJ(pinNewVarp, pinOldVarp, "Cloning failed");
// Propagate any attributes across the interconnect
pinNewVarp->propagateAttrFrom(pinOldVarp);
if (const AstVarRef* const vrefp = VN_CAST(connectRefp, VarRef)) {
vrefp->varp()->propagateAttrFrom(pinOldVarp);
}
// One to one interconnect won't make a temporary variable.
// This prevents creating a lot of extra wires for clock signals.
// It will become a tracing alias.
UINFO(6, "One-to-one " << connectRefp << endl);
UINFO(6, " -to " << pinNewVarp << endl);
pinNewVarp->user2p(connectRefp);
// Public output inside the cell must go via an assign rather
// than alias. Else the public logic will set the alias, losing
// the value to be propagated up (InOnly isn't a problem as the
// AssignAlias will create the assignment for us)
pinNewVarp->user3(pinNewVarp->isSigUserRWPublic()
&& pinNewVarp->direction() == VDirection::OUTPUT);
}
// Cleanup var names, etc, to not conflict
{ InlineRelinkVisitor{newmodp, m_modp, nodep}; }
// Move statements under the module we are inlining into
if (AstNode* const stmtsp = newmodp->stmtsp()) {
stmtsp->unlinkFrBackWithNext();
m_modp->addStmtp(stmtsp);
}
// Clear any leftover ports, etc
VL_DO_DANGLING(newmodp->deleteTree(), newmodp);
// Remove the cell we just inlined
nodep->unlinkFrBack();
VL_DO_DANGLING(pushDeletep(nodep), nodep);
}
// VISITORS
virtual void visit(AstNetlist* nodep) override {
// Iterate modules backwards, in bottom-up order. Required!
iterateChildrenBackwards(nodep);
iterateAndNextConstNullBackwards(nodep->modulesp());
// Clean up AstIfaceRefDType references
iterateChildren(nodep->typeTablep());
}
virtual void visit(AstNodeModule* nodep) override {
UASSERT_OBJ(!m_modp, nodep, "Unsupported: Nested modules");
m_modp = nodep;
// Iterate the stored cells directly to reduce traversal
for (AstCell* const cellp : m_moduleState(nodep).m_childCells) {
if (m_moduleState(cellp->modp()).m_inlined) inlineCell(cellp);
}
m_moduleState(nodep).m_childCells.clear();
m_modp = nullptr;
}
virtual void visit(AstIfaceRefDType* nodep) override {
if (nodep->user5()) {
@ -496,99 +626,24 @@ private:
nodep->cellp(nullptr);
}
}
virtual void visit(AstNodeModule* nodep) override {
VL_RESTORER(m_modp);
{
m_modp = nodep;
iterateChildren(nodep);
}
}
virtual void visit(AstCell* nodep) override {
if (nodep->modp()->user1()) { // Marked with inline request
UINFO(5, " Inline CELL " << nodep << endl);
const VNUser2InUse m_inuser2;
const VNUser3InUse m_inuser3;
++m_statCells;
// Before cloning simplify pin assignments
// Better off before, as if module has multiple instantiations
// we'll save work, and we can't call pinReconnectSimple in
// this loop as it clone()s itself.
for (AstPin* pinp = nodep->pinsp(); pinp; pinp = VN_AS(pinp->nextp(), Pin)) {
V3Inst::pinReconnectSimple(pinp, nodep, false);
}
// Clone original module
AstNodeModule* const newmodp = nodep->modp()->cloneTree(false);
// Find cell cross references
InlineCollectVisitor{nodep->modp()};
// Create data for dotted variable resolution
AstCellInline* const inlinep
= new AstCellInline(nodep->fileline(), nodep->name(), nodep->modp()->origName(),
nodep->modp()->timeunit());
m_modp->addInlinesp(inlinep); // Must be parsed before any AstCells
// Create assignments to the pins
for (AstPin* pinp = nodep->pinsp(); pinp; pinp = VN_AS(pinp->nextp(), Pin)) {
if (!pinp->exprp()) continue;
UINFO(6, " Pin change from " << pinp->modVarp() << endl);
AstNode* const connectRefp = pinp->exprp();
UASSERT_OBJ(
VN_IS(connectRefp, Const) || VN_IS(connectRefp, VarRef), pinp,
"Unknown interconnect type; pinReconnectSimple should have cleared up");
V3Inst::checkOutputShort(pinp);
// Make new signal; even though we'll optimize the interconnect, we
// need an alias to trace correctly. If tracing is disabled, we'll
// delete it in later optimizations.
AstVar* const pinOldVarp = pinp->modVarp();
AstVar* const pinNewVarp = pinOldVarp->clonep();
UASSERT_OBJ(pinNewVarp, pinOldVarp, "Cloning failed");
// Propagate any attributes across the interconnect
pinNewVarp->propagateAttrFrom(pinOldVarp);
if (const AstVarRef* const vrefp = VN_CAST(connectRefp, VarRef)) {
vrefp->varp()->propagateAttrFrom(pinOldVarp);
}
// One to one interconnect won't make a temporary variable.
// This prevents creating a lot of extra wires for clock signals.
// It will become a tracing alias.
UINFO(6, "One-to-one " << connectRefp << endl);
UINFO(6, " -to " << pinNewVarp << endl);
pinNewVarp->user2p(connectRefp);
// Public output inside the cell must go via an assign rather
// than alias. Else the public logic will set the alias, losing
// the value to be propagated up (InOnly isn't a problem as the
// AssignAlias will create the assignment for us)
pinNewVarp->user3(pinNewVarp->isSigUserRWPublic()
&& pinNewVarp->direction() == VDirection::OUTPUT);
}
// Cleanup var names, etc, to not conflict
{ InlineRelinkVisitor{newmodp, m_modp, nodep}; }
// Move statements under the module we are inlining into
if (AstNode* const stmtsp = newmodp->stmtsp()) {
stmtsp->unlinkFrBackWithNext();
m_modp->addStmtp(stmtsp);
}
// Clear any leftover ports, etc
VL_DO_DANGLING(newmodp->deleteTree(), newmodp);
// Remove the cell we just inlined
nodep->unlinkFrBack();
VL_DO_DANGLING(pushDeletep(nodep), nodep);
}
}
//--------------------
virtual void visit(AstNodeMath*) override {} // Accelerate
virtual void visit(AstNodeStmt*) override {} // Accelerate
virtual void visit(AstCell* nodep) override { // LCOV_EXCL_START
nodep->v3fatal("Traversal should have been short circuited");
}
virtual void visit(AstNodeStmt* nodep) override {
nodep->v3fatal("Traversal should have been short circuited");
} // LCOV_EXCL_STOP
virtual void visit(AstNodeFile*) override {} // Accelerate
virtual void visit(AstNodeDType*) override {} // Accelerate
virtual void visit(AstNode* nodep) override { iterateChildren(nodep); }
public:
// CONSTRUCTORS
explicit InlineVisitor(AstNode* nodep) { iterate(nodep); }
explicit InlineVisitor(AstNode* nodep, ModuleStateUser1Allocator& moduleState)
: m_moduleState{moduleState} {
iterate(nodep);
}
virtual ~InlineVisitor() override {
V3Stats::addStat("Optimizations, Inlined instances", m_statCells);
}
@ -601,6 +656,7 @@ class InlineIntfRefVisitor final : public VNVisitor {
private:
// NODE STATE
// AstVar::user1p() // AstCell which this Var points to
const VNUser1InUse m_inuser1;
const VNUser2InUse m_inuser2;
string m_scope; // Scope name
@ -682,20 +738,26 @@ public:
void V3Inline::inlineAll(AstNetlist* nodep) {
UINFO(2, __FUNCTION__ << ": " << endl);
const VNUser1InUse m_inuser1; // output of InlineMarkVisitor,
// input to InlineVisitor.
// Scoped to clean up temp userN's
{ InlineMarkVisitor{nodep}; }
{ InlineVisitor{nodep}; }
// Remove all modules that were inlined
// V3Dead will also clean them up, but if we have debug on, it's a good
// idea to avoid dumping the hugely exploded tree.
for (AstNodeModule *modp = v3Global.rootp()->modulesp(), *nextmodp; modp; modp = nextmodp) {
nextmodp = VN_AS(modp->nextp(), NodeModule);
if (modp->user1()) { // Was inlined
VL_DO_DANGLING(modp->unlinkFrBack()->deleteTree(), modp);
{
const VNUser1InUse m_inuser1; // output of InlineMarkVisitor, input to InlineVisitor.
ModuleStateUser1Allocator moduleState; // AstUser1Allocator
// Scoped to clean up temp userN's
{ InlineMarkVisitor{nodep, moduleState}; }
{ InlineVisitor{nodep, moduleState}; }
// Check inlined modules have been removed during traversal. Otherwise we might have blown
// up Verilator memory consumption.
for (AstNodeModule* modp = v3Global.rootp()->modulesp(); modp;
modp = VN_AS(modp->nextp(), NodeModule)) {
UASSERT_OBJ(!moduleState(modp).m_inlined, modp,
"Inlined module should have been deleted when the last cell referencing "
"it was inlined");
}
}
{ InlineIntfRefVisitor{nodep}; }
V3Global::dumpCheckGlobalTree("inline", 0, v3Global.opt.dumpTreeLevel(__FILE__) >= 3);
}