// -*- mode: C++; c-file-style: "cc-mode" -*- //************************************************************************* // DESCRIPTION: Verilator: NBA shadow variable assignment elimination // // Code available from: https://verilator.org // //************************************************************************* // // Copyright 2003-2025 by Wilson Snyder. This program is free software; you // can redistribute it and/or modify it under the terms of either the GNU // Lesser General Public License Version 3 or the Perl Artistic License // Version 2.0. // SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 // //************************************************************************* // // Given a pair of variables 'd' and 'q', where 'd' is the shadow variable // created by V3Delayed using the ShadowVar scheme. Attemp to turn this code: // // ... reads of 'q' ok here // X1: d = q; // First and complete write of 'd' (the pre-scheduled NBA initial assignment) // ... reads of 'q' ok here // d = foo; // Second assignment to 'd' // ... no reads of 'q' here // X2: q = d; // Only and complete write of 'q', only read of 'd' (the post-scheduled NBA commit) // ... reads of 'q' ok here // // into: // // X1: q = q; // ... // q = foo; // ... // X2: q = q; // // by replacing 'd' with 'q'. This then allows deletion of 'd' and the two assignments. // // More formally, with the non-sequential mtasks graph, we must prove all of these: // 1) No reads of 'd' anywhere except for the ASSIGNPOST itself // 2) No write of 'q' anywhere except for the ASSIGNPOST itself // 3) The first write of 'd' is complete (writes all bits) // 4) Every read of 'q' either falls before the second write of 'd', or after only read of 'd' // // Notes: // // While these rules could be applied to any variables, not just the NBA // shadow variables. **Proving** that no reads of 'q' happen after the second // assignment of 'd' is difficult due to the presence of loops (the whole // eval_nba is inside a loop), virtual methods and other dynamic executions. // For the NBA shadow variables, we can compute this safely as their use // is understood as we schedule their first and last assignments specially. // // Constraint 2 could be relaxed to "no write of 'q' before the only read of // 'd', however we only have one write of 'q', created in V3Delayed, so // trying harder would just be a code coverage hole today. // // Constraint 3 should always hold with V3Delayed, will check assert it. // //************************************************************************* #include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT #include "V3LifePost.h" #include "V3ExecGraph.h" #include "V3GraphPathChecker.h" #include "V3Stats.h" #include #include #include VL_DEFINE_DEBUG_FUNCTIONS; //###################################################################### // LifePost delay elimination class LifePostDlyVisitor final : public VNVisitorConst { // TYPES // Location of AstNode within the program template class Location final { template friend class Location; T_Node* m_nodep; // The AstNode being recorded const AstExecGraph* m_egraphp; // AstExecTraph location is under, if any const ExecMTask* m_mtaskp; // The ExecMTask location is under, if any uint32_t m_seqNum; // Location counter public: Location(T_Node* nodep, const AstExecGraph* egp, const ExecMTask* mtaskp, uint32_t seqNum) : m_nodep{nodep} , m_egraphp{egp} , m_mtaskp{mtaskp} , m_seqNum{seqNum} {} Location() = delete; T_Node* const& nodep() const { return m_nodep; } // "is before" - Note: Equality (concurrency) is possible iff they are independent mtasks! template bool operator<(const Location& that) const { // If they are in different mtasks under the same graph, check for a path in the graph if (m_egraphp && m_egraphp == that.m_egraphp && m_mtaskp != that.m_mtaskp) { GraphPathChecker* const checkerp = m_egraphp->user1u().to(); return checkerp->pathExistsFrom(m_mtaskp, that.m_mtaskp); } // Otherwise the sequence numbers work (one/both outside graph, or both in same mtask) return m_seqNum < that.m_seqNum; } }; // NODE STATE // AstVarScope::user1() -> bool: referenced outside _eval__nba // AstVarScope::user4() -> AstVarScope*: Replacement variable // AstExecGraph::user1p() -> GraphPathChecker*: path checker for this AstExecGraph const VNUser1InUse m_inuser1; const VNUser4InUse m_inuser4; // STATE uint32_t m_sequence = 0; // Sequence number of assigns/varrefs, const AstExecGraph* m_execGraphp = nullptr; // Current AstExecGraph being processed (or null) const ExecMTask* m_execMTaskp = nullptr; // Current ExecMTask being processed (or null) VDouble0 m_statAssnDel; // Statistic tracking // Maps from Varscope to all their reads and writes using LocMap = std::unordered_map>>; LocMap m_reads; // VarScope read locations LocMap m_writes; // VarScope write locations std::vector> m_assigns; // Assignments considered for removal std::vector> m_checkers; // Storage for exec graph checkers const AstCFunc* const m_evalNbap; // The _eval__nba function bool m_inEvalNba = false; // Traversing under _eval__nba // METHODS void squashAssignposts() { for (const Location& assign : m_assigns) { AstVarScope* const dVscp = VN_AS(assign.nodep()->rhsp(), VarRef)->varScopep(); AstVarScope* const qVscp = VN_AS(assign.nodep()->lhsp(), VarRef)->varScopep(); // We are considering deleting 'y', don't do it if referenced external to _eval__nba if (dVscp->user1()) continue; const std::vector>& dWrites = m_writes[dVscp]; UASSERT_OBJ(!dWrites.empty(), dVscp, "NBA shadow variable read but never written"); // *** See file header for requirements *** // Proof (1) - Only read is on the RHS of this assignment if (m_reads[dVscp].size() > 1) continue; // Proof (2) - Only write is on the LHS of this assignment if (m_writes[qVscp].size() > 1) continue; // Proof (3) - Should always hold UASSERT_OBJ(VN_IS(dWrites.at(0).nodep()->backp(), NodeAssign), dVscp, "Partial first write to NBA shadow variable"); // Proof (4) if (dWrites.size() > 1) { // V3Order always serializes writes so they cannot be concurrent UASSERT_OBJ(dWrites[0] < dWrites[1], dVscp, "Concurrent writes"); const bool qRdOK = [&]() { for (const Location& qRead : m_reads[qVscp]) { if (assign < qRead) continue; // Check from 2nd write of 'd' for (size_t i = 1; i < dWrites.size(); ++i) { if (qRead < dWrites[i]) continue; return false; } } return true; }(); if (!qRdOK) continue; } // Mark variable for replacement dVscp->user4p(qVscp); // Delete assignment UINFO(4, " DELETE " << assign.nodep()); VL_DO_DANGLING(assign.nodep()->unlinkFrBack()->deleteTree(), assign.nodep()); ++m_statAssnDel; } } // Trace code in the given function void trace(AstCFunc* nodep) { VL_RESTORER(m_inEvalNba); if (nodep == m_evalNbap) m_inEvalNba = true; iterateChildrenConst(nodep); } // VISITORS void visit(AstNetlist* nodep) override { // First, build maps of every location (mtask and sequence // within the mtask) where each varscope is read, and written. iterateChildrenConst(nodep); // We need to be able to pick up the first write of each variable. // V3Order serializes all writes, and we trace AstExecGraph in // dependency order, so the first one we encounter during tracing should // always be the one. It's somewhat expensive to assert so only with debugCheck(). if (v3Global.opt.debugCheck()) { for (auto& pair : m_writes) { const std::vector>& writes = pair.second; const Location& first = writes[0]; for (size_t i = 1; i < writes.size(); ++i) { UASSERT_OBJ(first < writes[i], pair.first, "First write is not the first"); } } } // Find all assignposts. Determine which ones can be // eliminated. Remove those, and mark their dly vars' user4 field // to indicate we should replace these dly vars with their original // variables. squashAssignposts(); // Apply replacements nodep->foreach([](AstVarRef* nodep) { const AstVarScope* const vscp = nodep->varScopep(); AstVarScope* const replacementp = VN_AS(vscp->user4p(), VarScope); if (!replacementp) return; UINFO(9, " Replace " << nodep << " target " << vscp << " with " << replacementp); nodep->varScopep(replacementp); nodep->varp(replacementp->varp()); }); } void visit(AstVarRef* nodep) override { // We only try to optimize NBA shadow variables if (!nodep->varScopep()->optimizeLifePost()) return; // Mark variables referenced outside _eval__nba if (!m_inEvalNba) { nodep->varScopep()->user1(true); return; } // Consumption/generation of a variable, const AstVarScope* const vscp = nodep->varScopep(); UASSERT_OBJ(vscp, nodep, "Scope not assigned"); ++m_sequence; if (nodep->access().isWriteOrRW()) { m_writes[vscp].emplace_back(nodep, m_execGraphp, m_execMTaskp, m_sequence); } if (nodep->access().isReadOrRW()) { m_reads[vscp].emplace_back(nodep, m_execGraphp, m_execMTaskp, m_sequence); } } void visit(AstNodeAssign* nodep) override { // Record RHS before assignment iterateConst(nodep->rhsp()); // If a straight assignment between NBA variables, consider for removal if (const AstVarRef* const lhsp = VN_CAST(nodep->lhsp(), VarRef)) { if (const AstVarRef* const rhsp = VN_CAST(nodep->rhsp(), VarRef)) { if (lhsp->varScopep()->optimizeLifePost() // && rhsp->varScopep()->optimizeLifePost()) { m_assigns.emplace_back(nodep, m_execGraphp, m_execMTaskp, ++m_sequence); } } } // Record LHS after assignment iterateConst(nodep->lhsp()); } void visit(AstNodeCCall* nodep) override { iterateChildrenConst(nodep); // Entry points are roots of the trace, no need to do it here if (nodep->funcp()->entryPoint()) return; // Trace cellee trace(nodep->funcp()); } void visit(AstExecGraph* nodep) override { UASSERT_OBJ(!m_execGraphp, nodep, "Nested AstExecGraph"); VL_RESTORER(m_execGraphp); m_execGraphp = nodep; // Set up the path checker for this graph UASSERT_OBJ(!nodep->user1p(), nodep, "AstExecGraph visited twice"); m_checkers.emplace_back(new GraphPathChecker{nodep->depGraphp()}); nodep->user1p(m_checkers.back().get()); // Trace each mtask body. Note: the vertices are in topological order, // and we do not reset m_sequence, so a lower sequence number does // guarantee a node is not earlier than a higher sequence number, but // might still be concurrent. for (V3GraphVertex& mtaskVtx : nodep->depGraphp()->vertices()) { const ExecMTask* const mtaskp = mtaskVtx.as(); VL_RESTORER(m_execMTaskp); m_execMTaskp = mtaskp; trace(mtaskp->funcp()); } } void visit(AstCFunc* nodep) override { // Start a trace from each entry point if (nodep->entryPoint()) trace(nodep); } //----- void visit(AstNode* nodep) override { iterateChildrenConst(nodep); } public: // CONSTRUCTORS explicit LifePostDlyVisitor(AstNetlist* netlistp) : m_evalNbap{netlistp->evalNbap()} { iterateConst(netlistp); } ~LifePostDlyVisitor() override { V3Stats::addStat("Optimizations, Lifetime postassign deletions", m_statAssnDel); } }; //###################################################################### // LifePost class functions void V3LifePost::lifepostAll(AstNetlist* nodep) { UINFO(2, __FUNCTION__ << ":"); { LifePostDlyVisitor{nodep}; } // Destruct before checking V3Global::dumpCheckGlobalTree("life_post", 0, dumpTreeEitherLevel() >= 3); }