Localize variables used in multiple functions

Teach V3Localize how to localize variables that are used in multiple
functions, if in all functions where they are used, they are always
written in whole before being consumed. This allows a lot more variables
to be localized (+20k variables on OpenTitan - when building without
--trace), and can cause significant performance improvement (OpenTitan
simulates 8.5% - build single threaded and withuot --trace).
This commit is contained in:
Geza Lore 2021-06-17 18:49:45 +01:00
parent d6237e55b2
commit e5e5bc0fa3
4 changed files with 121 additions and 117 deletions

View File

@ -18,6 +18,7 @@ Verilator 4.205 devel
**Minor:** **Minor:**
* Optimize a lot more model variables into function locals (#3027). [Geza Lore]
* Remove deprecated --no-relative-cfuncs option (#3024). [Geza Lore] * Remove deprecated --no-relative-cfuncs option (#3024). [Geza Lore]
* Merge const static data globally into a new constant pool (#3013). [Geza Lore] * Merge const static data globally into a new constant pool (#3013). [Geza Lore]
* Fix error on unsupported recursive functions (#2957). [Trefor Southwell] * Fix error on unsupported recursive functions (#2957). [Trefor Southwell]

View File

@ -156,6 +156,7 @@ public:
return (m_e == READWRITE) ? VAccess(m_e) : (m_e == WRITE ? VAccess(READ) : VAccess(WRITE)); return (m_e == READWRITE) ? VAccess(m_e) : (m_e == WRITE ? VAccess(READ) : VAccess(WRITE));
} }
bool isReadOnly() const { return m_e == READ; } // False with READWRITE bool isReadOnly() const { return m_e == READ; } // False with READWRITE
bool isWriteOnly() const { return m_e == WRITE; } // False with READWRITE
bool isReadOrRW() const { return m_e == READ || m_e == READWRITE; } bool isReadOrRW() const { return m_e == READ || m_e == READWRITE; }
bool isWriteOrRW() const { return m_e == WRITE || m_e == READWRITE; } bool isWriteOrRW() const { return m_e == WRITE || m_e == READWRITE; }
bool isRW() const { return m_e == READWRITE; } bool isRW() const { return m_e == READWRITE; }

View File

@ -37,6 +37,34 @@
#include <algorithm> #include <algorithm>
//######################################################################
// Convert every WRITE AstVarRef to a READ ref
class ConvertWriteRefsToRead final : public AstNVisitor {
private:
// MEMBERS
AstNode* m_result = nullptr;
// CONSTRUCTORS
explicit ConvertWriteRefsToRead(AstNode* nodep) {
m_result = iterateSubtreeReturnEdits(nodep);
}
// VISITORS
void visit(AstVarRef* nodep) override {
UASSERT_OBJ(!nodep->access().isRW(), nodep, "Cannot handle a READWRITE reference");
if (nodep->access().isWriteOnly()) {
nodep->replaceWith(
new AstVarRef(nodep->fileline(), nodep->varScopep(), VAccess::READ));
}
}
void visit(AstNode* nodep) override { iterateChildren(nodep); }
public:
static AstNode* main(AstNode* nodep) { return ConvertWriteRefsToRead(nodep).m_result; }
};
//###################################################################### //######################################################################
// Clock state, as a visitor of each AstNode // Clock state, as a visitor of each AstNode
@ -272,14 +300,14 @@ private:
// IF(ORIG ^ CHANGE) { INC; CHANGE = ORIG; } // IF(ORIG ^ CHANGE) { INC; CHANGE = ORIG; }
AstNode* incp = nodep->incp()->unlinkFrBack(); AstNode* incp = nodep->incp()->unlinkFrBack();
AstNode* origp = nodep->origp()->unlinkFrBack(); AstNode* origp = nodep->origp()->unlinkFrBack();
AstNode* changep = nodep->changep()->unlinkFrBack(); AstNode* changeWrp = nodep->changep()->unlinkFrBack();
AstIf* newp = new AstIf(nodep->fileline(), new AstXor(nodep->fileline(), origp, changep), AstNode* changeRdp = ConvertWriteRefsToRead::main(changeWrp->cloneTree(false));
AstIf* newp = new AstIf(nodep->fileline(), new AstXor(nodep->fileline(), origp, changeRdp),
incp, nullptr); incp, nullptr);
// We could add another IF to detect posedges, and only increment if so. // We could add another IF to detect posedges, and only increment if so.
// It's another whole branch though versus a potential memory miss. // It's another whole branch though versus a potential memory miss.
// We'll go with the miss. // We'll go with the miss.
newp->addIfsp( newp->addIfsp(new AstAssign(nodep->fileline(), changeWrp, origp->cloneTree(false)));
new AstAssign(nodep->fileline(), changep->cloneTree(false), origp->cloneTree(false)));
nodep->replaceWith(newp); nodep->replaceWith(newp);
VL_DO_DANGLING(nodep->deleteTree(), nodep); VL_DO_DANGLING(nodep->deleteTree(), nodep);
} }

View File

@ -16,9 +16,9 @@
// LOCALIZE TRANSFORMATIONS: // LOCALIZE TRANSFORMATIONS:
// All modules: // All modules:
// VARSCOPE(BLOCKTEMP... // VARSCOPE(BLOCKTEMP...
// if only referenced in a CFUNC, make it local to that CFUNC // if only referenced in one CFUNC, make it local
// VARSCOPE(others // VARSCOPE
// if non-public, set before used, and in single CFUNC, make it local // if non-public, always written before used, make it local
// //
//************************************************************************* //*************************************************************************
@ -29,6 +29,7 @@
#include "V3Localize.h" #include "V3Localize.h"
#include "V3Stats.h" #include "V3Stats.h"
#include "V3Ast.h" #include "V3Ast.h"
#include "V3AstUserAllocator.h"
#include <vector> #include <vector>
@ -38,98 +39,73 @@
class LocalizeVisitor final : public AstNVisitor { class LocalizeVisitor final : public AstNVisitor {
private: private:
// NODE STATE // NODE STATE
// AstVar::user1p() -> First AstCFunc which references the variable // AstVarScope::user1() -> Bool indicating VarScope is not optimizable.
// AstVar::user2() -> VarFlags. Flag state // AstVarScope::user2() -> Bool indicating VarScope was fully assigned in the current
// AstVar::user4p() -> AstVarRef that writes whole variable, if first write ref. // function.
// AstVarScope::user3p() -> Set of CFuncs referencing this VarScope. (via m_accessors)
// AstCFunc::user4p() -> Multimap of 'VarScope -> VarRefs that reference that VarScope'
// in this function. (via m_references)
AstUser1InUse m_inuser1; AstUser1InUse m_inuser1;
AstUser2InUse m_inuser2; AstUser2InUse m_inuser2;
AstUser3InUse m_inuser3;
AstUser4InUse m_inuser4; AstUser4InUse m_inuser4;
// TYPES AstUser3Allocator<AstVarScope, std::unordered_set<AstCFunc*>> m_accessors;
union VarFlags { AstUser4Allocator<AstCFunc, std::unordered_multimap<const AstVarScope*, AstVarRef*>>
// Per-variable flags m_references;
// Used in user()'s so initializes to all zeros
struct {
int m_notOpt : 1; // NOT optimizable
int m_notStd : 1; // NOT optimizable if a non-blocktemp signal
int m_stdFuncAsn : 1; // Found simple assignment
};
// cppcheck-suppress unusedStructMember
uint32_t m_flags;
explicit VarFlags(AstVarScope* nodep) { m_flags = nodep->user2(); }
void setNodeFlags(AstVarScope* nodep) { nodep->user2(m_flags); }
};
// STATE // STATE
VDouble0 m_statLocVars; // Statistic tracking VDouble0 m_statLocVars; // Statistic tracking
AstCFunc* m_cfuncp = nullptr; // Current active function AstCFunc* m_cfuncp = nullptr; // Current active function
uint32_t m_nodeDepth = 0; // Node depth under m_cfuncp
std::vector<AstVarScope*> m_varScopeps; // List of variables to consider for localization std::vector<AstVarScope*> m_varScopeps; // List of variables to consider for localization
std::unordered_multimap<const AstVarScope*, AstVarRef*>
m_references; // VarRefs referencing the given VarScope
// METHODS // METHODS
VL_DEBUG_FUNC; // Declare debug() VL_DEBUG_FUNC; // Declare debug()
void clearOptimizable(AstVarScope* nodep, const char* reason) { bool isOptimizable(AstVarScope* nodep) {
UINFO(4, " NoOpt " << reason << " " << nodep << endl); return !nodep->user1() || // Not marked as not optimizable, or ...
VarFlags flags(nodep); (nodep->varp()->varType() == AstVarType::BLOCKTEMP
flags.m_notOpt = true; && m_accessors(nodep).size() == 1); // .. a block temp used in a single CFunc
flags.setNodeFlags(nodep);
}
void clearStdOptimizable(AstVarScope* nodep, const char* reason) {
UINFO(4, " NoStd " << reason << " " << nodep << endl);
VarFlags flags(nodep);
flags.m_notStd = true;
flags.setNodeFlags(nodep);
} }
void moveVarScopes() { void moveVarScopes() {
for (AstVarScope* const nodep : m_varScopeps) { for (AstVarScope* const nodep : m_varScopeps) {
if (nodep->varp()->valuep()) clearOptimizable(nodep, "HasInitValue"); if (!isOptimizable(nodep)) continue; // Not optimizable
if (!VarFlags(nodep).m_stdFuncAsn) clearStdOptimizable(nodep, "NoStdAssign");
VarFlags flags(nodep); const std::unordered_set<AstCFunc*>& funcps = m_accessors(nodep);
if (funcps.empty()) continue; // No referencing functions at all
if ((nodep->varp()->varType() == AstVarType::BLOCKTEMP
|| !flags.m_notStd) // Temporary Or used only in block
&& !flags.m_notOpt // Optimizable
&& !nodep->varp()->isClassMember() && // Statically exists in design hierarchy
nodep->user1p()) // Is under a CFunc
{
UINFO(4, "Localizing " << nodep << endl); UINFO(4, "Localizing " << nodep << endl);
++m_statLocVars; ++m_statLocVars;
AstCFunc* const funcp = VN_CAST(nodep->user1p(), CFunc);
// Yank the Var and VarScope from it's parent and schedule them for deletion // Yank the VarScope from it's parent and schedule them for deletion. Leave the Var
AstVar* const varp = nodep->varp(); // for now, as not all VarScopes referencing this Var might be localized.
if (varp->backp()) { // Might have already unlinked this via another AstVarScope
pushDeletep(varp->unlinkFrBack());
}
pushDeletep(nodep->unlinkFrBack()); pushDeletep(nodep->unlinkFrBack());
// In each referencing function, create a replacement local variable
AstVar* const oldVarp = nodep->varp();
for (AstCFunc* const funcp : funcps) {
// Create the new local variable. // Create the new local variable.
const string newName const string newName
= nodep->scopep() == funcp->scopep() = nodep->scopep() == funcp->scopep()
? varp->name() ? oldVarp->name()
: nodep->scopep()->nameDotless() + "__DOT__" + varp->name(); : nodep->scopep()->nameDotless() + "__DOT__" + oldVarp->name();
AstVar* const newVarp AstVar* const newVarp
= new AstVar(varp->fileline(), varp->varType(), newName, varp); = new AstVar(oldVarp->fileline(), oldVarp->varType(), newName, oldVarp);
newVarp->funcLocal(true); newVarp->funcLocal(true);
funcp->addInitsp(newVarp);
// Fix up all the references // Fix up all the references within this function
const auto er = m_references.equal_range(nodep); const auto er = m_references(funcp).equal_range(nodep);
for (auto it = er.first; it != er.second; ++it) { for (auto it = er.first; it != er.second; ++it) {
AstVarRef* const refp = it->second; AstVarRef* const refp = it->second;
refp->varScopep(nullptr); refp->varScopep(nullptr);
refp->varp(newVarp); refp->varp(newVarp);
} }
// Add the var to this function, and mark local
funcp->addInitsp(newVarp);
} else {
clearOptimizable(nodep, "NotDone");
} }
} }
m_varScopeps.clear(); m_varScopeps.clear();
m_references.clear();
} }
// VISITORS // VISITORS
@ -137,39 +113,35 @@ private:
iterateChildrenConst(nodep); iterateChildrenConst(nodep);
moveVarScopes(); moveVarScopes();
} }
virtual void visit(AstCFunc* nodep) override { virtual void visit(AstCFunc* nodep) override {
UINFO(4, " CFUNC " << nodep << endl); UINFO(4, " CFUNC " << nodep << endl);
VL_RESTORER(m_cfuncp); VL_RESTORER(m_cfuncp);
VL_RESTORER(m_nodeDepth);
{ {
m_cfuncp = nodep; m_cfuncp = nodep;
searchFuncStmts(nodep->argsp()); m_nodeDepth = 0;
searchFuncStmts(nodep->initsp()); AstNode::user2ClearTree(); // Check each function independently
searchFuncStmts(nodep->stmtsp());
searchFuncStmts(nodep->finalsp());
iterateChildrenConst(nodep); iterateChildrenConst(nodep);
} }
} }
void searchFuncStmts(AstNode* nodep) {
// Search for basic assignments to allow moving non-blocktemps virtual void visit(AstNodeAssign* nodep) override {
// For now we only find simple assignments not under any other statement. // Analyze RHS first so "a = a + 1" is detected as a read before write
// This could be more complicated; allow always-set under both branches of a IF. iterate(nodep->rhsp());
// If so, check for ArrayRef's and such, as they aren't acceptable. // For now we only consider an assignment that is directly under the function, (in
for (; nodep; nodep = nodep->nextp()) { // particular: not under an AstIf, or other kind of branch). This could be improved with
if (AstNodeAssign* const assignp = VN_CAST(nodep, NodeAssign)) { // proper data flow analysis.
if (AstVarRef* const varrefp = VN_CAST(assignp->lhsp(), VarRef)) { if (m_nodeDepth == 0) {
UASSERT_OBJ(varrefp->access().isWriteOrRW(), varrefp, // Check if simple "VARREF = ..." assignment, i.e.: this assignment sets the whole
"LHS of assignment is not an lvalue"); // variable (and in particular, it is not assigned only in part).
AstVarScope* const varScopep = varrefp->varScopep(); if (AstVarRef* const refp = VN_CAST(nodep->lhsp(), VarRef)) {
if (!varScopep->user4p()) { // Mark this VarScope as assigned in this function
UINFO(4, " FuncAsn " << varrefp << endl); refp->varScopep()->user2(1);
varScopep->user4p(varrefp);
VarFlags flags(varScopep);
flags.m_stdFuncAsn = true;
flags.setNodeFlags(varScopep);
}
}
} }
} }
// Analyze LHS (in case it's not the above simple case
iterate(nodep->lhsp());
} }
virtual void visit(AstVarScope* nodep) override { virtual void visit(AstVarScope* nodep) override {
@ -177,42 +149,44 @@ private:
&& !nodep->varp()->isSigPublic() // Not something the user wants to interact with && !nodep->varp()->isSigPublic() // Not something the user wants to interact with
&& !nodep->varp()->isFuncLocal() // Not already a function local (e.g.: argument) && !nodep->varp()->isFuncLocal() // Not already a function local (e.g.: argument)
&& !nodep->varp()->isStatic() // Not a static variable && !nodep->varp()->isStatic() // Not a static variable
&& !nodep->varp()->isClassMember() // Statically exists in design hierarchy
&& !nodep->varp()->valuep() // Does not have an initializer
) { ) {
UINFO(4, " BLKVAR " << nodep << endl); UINFO(4, "Consider for localization: " << nodep << endl);
m_varScopeps.push_back(nodep); m_varScopeps.push_back(nodep);
} }
// No iterate; Don't want varrefs under it // No iterate; Don't want varrefs under it (e.g.: in child dtype?)
} }
virtual void visit(AstVarRef* nodep) override { virtual void visit(AstVarRef* nodep) override {
UASSERT_OBJ(m_cfuncp, nodep, "AstVarRef not under function");
AstVarScope* const varScopep = nodep->varScopep(); AstVarScope* const varScopep = nodep->varScopep();
if (!VarFlags(varScopep).m_notOpt) { // Remember this function accesses this VarScope (we always need this as we might optimize
// Remember the reference // this VarScope into a local, even if it's not assigned. See 'isOptimizable')
m_references.emplace(varScopep, nodep); m_accessors(varScopep).emplace(m_cfuncp);
if (!m_cfuncp) { // Not in function, can't optimize // Remember the reference so we can fix it up later (we always need this as well)
// Perhaps impossible, but better safe m_references(m_cfuncp).emplace(varScopep, nodep);
clearOptimizable(varScopep, "BVnofunc"); // LCOV_EXCL_LINE
} else { // Check if already marked as not optimizable
// Allow a variable to appear in only a single function if (!varScopep->user1()) {
AstNode* const oldfunc = varScopep->user1p(); // Note: we only check read variables, as it's ok to localize (and in fact discard)
if (!oldfunc) { // any variables that are only written but never read.
// First encounter with this variable if (nodep->access().isReadOrRW() && !varScopep->user2()) {
UINFO(4, " BVnewref " << nodep << endl); // Variable is read, but is not known to have been assigned in this function. Mark
varScopep->user1p(m_cfuncp); // Remember where it was used // as not optimizable.
} else if (m_cfuncp != oldfunc) { UINFO(4, "Not optimizable (not written): " << nodep << endl);
// Used in multiple functions varScopep->user1(1);
clearOptimizable(varScopep, "BVmultiF");
}
// First varref in function must be assignment found earlier
const AstVarRef* const firstasn = VN_CAST(varScopep->user4p(), VarRef);
if (firstasn && nodep != firstasn) {
clearStdOptimizable(varScopep, "notFirstAsn");
varScopep->user4p(nullptr);
} }
} }
// No iterate; Don't want varrefs under it (e.g.: in child dtype?)
} }
// No iterate; Don't want varrefs under it
virtual void visit(AstNode* nodep) override {
++m_nodeDepth;
iterateChildrenConst(nodep);
--m_nodeDepth;
} }
virtual void visit(AstNode* nodep) override { iterateChildrenConst(nodep); }
public: public:
// CONSTRUCTORS // CONSTRUCTORS