// -*- mode: C++; c-file-style: "cc-mode" -*- //************************************************************************* // DESCRIPTION: Verilator: Block code ordering // // Code available from: https://verilator.org // //************************************************************************* // // Copyright 2003-2024 by Wilson Snyder. This program is free software; you // can redistribute it and/or modify it under the terms of either the GNU // Lesser General Public License Version 3 or the Perl Artistic License // Version 2.0. // SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 // //************************************************************************* // V3Order's Transformations: // // Compute near optimal scheduling of always/wire statements // Make a graph of the entire netlist // // For seq logic // Add logic_sensitive_vertex for this list of SenItems // Add edge for each sensitive_var->logic_sensitive_vertex // For AssignPre's // Add vertex for this logic // Add edge logic_sensitive_vertex->logic_vertex // Add edge logic_consumed_var_PREVAR->logic_vertex // Add edge logic_vertex->logic_generated_var (same as if comb) // Add edge logic_vertex->generated_var_PREORDER // Cutable dependency to attempt to order dlyed // assignments to avoid saving state, thus we prefer // a <= b ... As the opposite order would // b <= c ... require the old value of b. // Add edge consumed_var_POST->logic_vertex // This prevents a consumer of the "early" value to be // scheduled after we've changed to the next-cycle value // For Logic // Add vertex for this logic // Add edge logic_sensitive_vertex->logic_vertex // Add edge logic_generated_var_PREORDER->logic_vertex // This ensures the AssignPre gets scheduled before this logic // Add edge logic_vertex->consumed_var_PREVAR // Add edge logic_vertex->consumed_var_POSTVAR // Add edge logic_vertex->logic_generated_var (same as if comb) // For AssignPost's // Add vertex for this logic // Add edge logic_sensitive_vertex->logic_vertex // Add edge logic_consumed_var->logic_vertex (same as if comb) // Add edge logic_vertex->logic_generated_var (same as if comb) // Add edge consumed_var_POST->logic_vertex (same as if comb) // // For comb logic // For comb logic // Add vertex for this logic // Add edge logic_consumed_var->logic_vertex // Add edge logic_vertex->logic_generated_var // Mark it cutable, as circular logic may require // the generated signal to become a primary input again. // // // // Rank the graph starting at INPUTS (see V3Graph) // // Visit the graph's logic vertices in ranked order // For all logic vertices with all inputs already ordered // Make ordered block for this module // For all ^^ in same domain // Move logic to ordered activation // When we have no more choices, we move to the next module // and make a new block. Add that new activation block to the list of calls to make. // //************************************************************************* #include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT #include "V3Order.h" #include "V3Const.h" #include "V3EmitV.h" #include "V3File.h" #include "V3Graph.h" #include "V3GraphStream.h" #include "V3List.h" #include "V3OrderInternal.h" #include "V3OrderMoveGraph.h" #include "V3OrderMoveGraphBuilder.h" #include "V3Partition.h" #include "V3PartitionGraph.h" #include "V3Sched.h" #include "V3SenTree.h" #include "V3SplitVar.h" #include "V3Stats.h" #include #include #include #include #include #include VL_DEFINE_DEBUG_FUNCTIONS; //###################################################################### class OrderProcess; class OrderMoveDomScope final { // Information stored for each unique loop, domain & scope trifecta public: V3ListEnt m_readyDomScopeE; // List of next ready dom scope V3List m_readyVertices; // Ready vertices with same domain & scope private: bool m_onReadyList = false; // True if DomScope is already on list of ready dom/scopes const AstSenTree* const m_domainp; // Domain all vertices belong to const AstScope* const m_scopep; // Scope all vertices belong to using DomScopeKey = std::pair; using DomScopeMap = std::map; static DomScopeMap s_dsMap; // Structure registered for each dom/scope pairing public: OrderMoveDomScope(const AstSenTree* domainp, const AstScope* scopep) : m_domainp{domainp} , m_scopep{scopep} {} OrderMoveDomScope* readyDomScopeNextp() const { return m_readyDomScopeE.nextp(); } const AstSenTree* domainp() const { return m_domainp; } const AstScope* scopep() const { return m_scopep; } // Check the domScope is on ready list, add if not void ready(OrderProcess* opp); // Mark one vertex as finished, remove from ready list if done void movedVertex(OrderProcess* opp, OrderMoveVertex* vertexp); // STATIC MEMBERS (for lookup) static void clear() { for (const auto& itr : s_dsMap) delete itr.second; s_dsMap.clear(); } V3List& readyVertices() { return m_readyVertices; } static OrderMoveDomScope* findCreate(const AstSenTree* domainp, const AstScope* scopep) { const DomScopeKey key = std::make_pair(domainp, scopep); const auto pair = s_dsMap.emplace(key, nullptr); if (pair.second) pair.first->second = new OrderMoveDomScope{domainp, scopep}; return pair.first->second; } string name() const { return string{"MDS:"} + " d=" + cvtToHex(domainp()) + " s=" + cvtToHex(scopep()); } }; OrderMoveDomScope::DomScopeMap OrderMoveDomScope::s_dsMap; std::ostream& operator<<(std::ostream& lhs, const OrderMoveDomScope& rhs) { lhs << rhs.name(); return lhs; } // ###################################################################### // OrderMoveVertexMaker and related class OrderMoveVertexMaker final : public V3OrderMoveGraphBuilder::MoveVertexMaker { // MEMBERS V3Graph* m_pomGraphp; V3List* m_pomWaitingp; public: // CONSTRUCTORS OrderMoveVertexMaker(V3Graph* pomGraphp, V3List* pomWaitingp) : m_pomGraphp{pomGraphp} , m_pomWaitingp{pomWaitingp} {} // METHODS OrderMoveVertex* makeVertexp(OrderLogicVertex* lvertexp, const OrderEitherVertex*, const AstSenTree* domainp) override { OrderMoveVertex* const resultp = new OrderMoveVertex{m_pomGraphp, lvertexp}; AstScope* const scopep = lvertexp ? lvertexp->scopep() : nullptr; resultp->domScopep(OrderMoveDomScope::findCreate(domainp, scopep)); resultp->m_pomWaitingE.pushBack(*m_pomWaitingp, resultp); return resultp; } private: VL_UNCOPYABLE(OrderMoveVertexMaker); }; class OrderMTaskMoveVertexMaker final : public V3OrderMoveGraphBuilder::MoveVertexMaker { V3Graph* m_pomGraphp; public: explicit OrderMTaskMoveVertexMaker(V3Graph* pomGraphp) : m_pomGraphp{pomGraphp} {} MTaskMoveVertex* makeVertexp(OrderLogicVertex* lvertexp, const OrderEitherVertex* varVertexp, const AstSenTree* domainp) override { return new MTaskMoveVertex{m_pomGraphp, lvertexp, varVertexp, domainp}; } private: VL_UNCOPYABLE(OrderMTaskMoveVertexMaker); }; class OrderVerticesByDomainThenScope final { PartPtrIdMap m_ids; public: bool operator()(const V3GraphVertex* lhsp, const V3GraphVertex* rhsp) const { const MTaskMoveVertex* const l_vxp = static_cast(lhsp); const MTaskMoveVertex* const r_vxp = static_cast(rhsp); uint64_t l_id = m_ids.findId(l_vxp->domainp()); uint64_t r_id = m_ids.findId(r_vxp->domainp()); if (l_id < r_id) return true; if (l_id > r_id) return false; l_id = m_ids.findId(l_vxp->scopep()); r_id = m_ids.findId(r_vxp->scopep()); return l_id < r_id; } }; struct MTaskVxIdLessThan final { // Sort vertex's, which must be AbstractMTask's, into a deterministic // order by comparing their serial IDs. bool operator()(const V3GraphVertex* lhsp, const V3GraphVertex* rhsp) const { const AbstractMTask* const lmtaskp = static_cast(lhsp); const AbstractMTask* const rmtaskp = static_cast(rhsp); return lmtaskp->id() < rmtaskp->id(); } }; //###################################################################### // OrderProcess class class OrderProcess final { // NODE STATE // AstNode::user4 -> Used by V3Const::constifyExpensiveEdit // STATE OrderGraph& m_graph; // The ordering graph // Map from Trigger reference AstSenItem to the original AstSenTree const std::unordered_map& m_trigToSen; // This is a function provided by the invoker of the ordering that can provide additional // sensitivity expression that when triggered indicates the passed AstVarScope might have // changed external to the code being ordered. const V3Order::ExternalDomainsProvider m_externalDomains; SenTreeFinder m_finder; // Global AstSenTree manager AstSenTree* const m_deleteDomainp; // Dummy AstSenTree indicating needs deletion const string m_tag; // Substring to add to generated names const bool m_slow; // Ordering slow code std::vector m_result; // The result nodes (~statements) in their sequential order AstCFunc* m_pomNewFuncp = nullptr; // Current function being created int m_pomNewStmts = 0; // Statements in function being created V3Graph m_pomGraph; // Graph of logic elements to move V3List m_pomWaiting; // List of nodes needing inputs to become ready friend class OrderMoveDomScope; V3List m_pomReadyDomScope; // List of ready domain/scope pairs, by loopId std::map, unsigned> m_funcNums; // Function ordinals VNDeleter m_deleter; // Used to delay deletion of nodes // METHODS void process(bool multiThreaded); void processDomains(); void processDomainsIterate(OrderEitherVertex* vertexp); void processEdgeReport(); // processMove* routines schedule serial execution void processMove(); void processMoveClear(); void processMoveBuildGraph(); void processMovePrepReady(); void processMoveReadyOne(OrderMoveVertex* vertexp); void processMoveDoneOne(OrderMoveVertex* vertexp); void processMoveOne(OrderMoveVertex* vertexp, const OrderMoveDomScope* domScopep, int level); AstActive* processMoveOneLogic(const OrderLogicVertex* lvertexp, AstCFunc*& newFuncpr, int& newStmtsr); // processMTask* routines schedule threaded execution struct MTaskState final { AstMTaskBody* m_mtaskBodyp = nullptr; std::list m_logics; ExecMTask* m_execMTaskp = nullptr; MTaskState() = default; }; void processMTasks(); string cfuncName(AstNodeModule* modp, AstSenTree* domainp, AstScope* scopep, AstNode* forWhatp) { string name = "_" + m_tag; name += domainp->isMulti() ? "_comb" : "_sequent"; name = name + "__" + scopep->nameDotless(); const unsigned funcnum = m_funcNums[{modp, name}]++; name = name + "__" + cvtToStr(funcnum); if (v3Global.opt.profCFuncs()) { name += "__PROF__" + forWhatp->fileline()->profileFuncname(); } return name; } // Make a domain that merges the two domains AstSenTree* combineDomains(AstSenTree* ap, AstSenTree* bp) { if (ap == m_deleteDomainp) return bp; UASSERT_OBJ(bp != m_deleteDomainp, bp, "Should not be delete domain"); AstSenTree* const senTreep = ap->cloneTree(false); senTreep->addSensesp(bp->sensesp()->cloneTree(true)); V3Const::constifyExpensiveEdit(senTreep); // Remove duplicates senTreep->multi(true); // Comment that it was made from 2 domains AstSenTree* const resultp = m_finder.getSenTree(senTreep); VL_DO_DANGLING(senTreep->deleteTree(), senTreep); // getSenTree clones, so delete this return resultp; } // Only for member initialization in constructor static AstSenTree* makeDeleteDomainSenTree(FileLine* fl) { return new AstSenTree{fl, new AstSenItem{fl, AstSenItem::Illegal{}}}; } // CONSTRUCTOR OrderProcess(AstNetlist* netlistp, OrderGraph& graph, const std::unordered_map& trigToSen, const string& tag, bool slow, const V3Order::ExternalDomainsProvider& externalDomains) : m_graph{graph} , m_trigToSen{trigToSen} , m_externalDomains{externalDomains} , m_finder{netlistp} , m_deleteDomainp{makeDeleteDomainSenTree(netlistp->fileline())} , m_tag{tag} , m_slow{slow} { m_deleter.pushDeletep(m_deleteDomainp); } ~OrderProcess() = default; public: // Order the logic static std::vector main(AstNetlist* netlistp, OrderGraph& graph, const std::unordered_map& trigToSen, const string& tag, bool parallel, bool slow, const V3Order::ExternalDomainsProvider& externalDomains) { OrderProcess visitor{netlistp, graph, trigToSen, tag, slow, externalDomains}; visitor.process(parallel); return std::move(visitor.m_result); } }; //###################################################################### // OrderMoveDomScope methods // Check the domScope is on ready list, add if not void OrderMoveDomScope::ready(OrderProcess* opp) { if (!m_onReadyList) { m_onReadyList = true; m_readyDomScopeE.pushBack(opp->m_pomReadyDomScope, this); } } // Mark one vertex as finished, remove from ready list if done void OrderMoveDomScope::movedVertex(OrderProcess* opp, OrderMoveVertex* vertexp) { UASSERT_OBJ(m_onReadyList, vertexp, "Moving vertex from ready when nothing was on que as ready."); if (m_readyVertices.empty()) { // Else more work to get to later m_onReadyList = false; m_readyDomScopeE.unlink(opp->m_pomReadyDomScope, this); } } //###################################################################### void OrderProcess::processDomains() { for (V3GraphVertex* itp = m_graph.verticesBeginp(); itp; itp = itp->verticesNextp()) { UASSERT(itp, "Vertex should not be null"); OrderEitherVertex* const vertexp = itp->as(); processDomainsIterate(vertexp); } } void OrderProcess::processDomainsIterate(OrderEitherVertex* vertexp) { // The graph routines have already sorted the vertexes and edges into best->worst order // Assign clock domains to each signal. // Sequential logic is forced into the same sequential domain. // Combo logic may be pushed into a seq domain if all its inputs are the same domain, // else, if all inputs are from flops, it's end-of-sequential code // else, it's full combo code if (vertexp->domainp()) return; // Already processed, or sequential logic UINFO(5, " pdi: " << vertexp << endl); AstSenTree* domainp = nullptr; if (OrderLogicVertex* const lvtxp = vertexp->cast()) { domainp = lvtxp->hybridp(); } std::vector externalDomainps; for (V3GraphEdge* edgep = vertexp->inBeginp(); edgep; edgep = edgep->inNextp()) { OrderEitherVertex* const fromVertexp = static_cast(edgep->fromp()); if (edgep->weight() && fromVertexp->domainMatters()) { AstSenTree* fromDomainp = fromVertexp->domainp(); UASSERT(!fromDomainp->hasCombo(), "There should be no need for combinational domains"); if (OrderVarVertex* const varVtxp = fromVertexp->cast()) { AstVarScope* const vscp = varVtxp->vscp(); // Add in any external domains externalDomainps.clear(); m_externalDomains(vscp, externalDomainps); for (AstSenTree* const externalDomainp : externalDomainps) { UASSERT_OBJ(!externalDomainp->hasCombo(), vscp, "There should be no need for combinational domains"); fromDomainp = combineDomains(fromDomainp, externalDomainp); } } // Irrelevant input vertex (never triggered) if (fromDomainp == m_deleteDomainp) continue; // First input to this vertex if (!domainp) domainp = fromDomainp; // Make a domain that merges the two domains if (domainp != fromDomainp) domainp = combineDomains(domainp, fromDomainp); } } // If nothing triggers this vertex, we can delete the corresponding logic if (!domainp) domainp = m_deleteDomainp; // Set the domain of the vertex vertexp->domainp(domainp); UINFO(5, " done d=" << cvtToHex(vertexp->domainp()) << (domainp == m_deleteDomainp ? " [DEL]" : vertexp->domainp()->hasCombo() ? " [COMB]" : vertexp->domainp()->isMulti() ? " [MULT]" : "") << " " << vertexp << endl); } //###################################################################### // OrderProcess - Move graph construction void OrderProcess::processEdgeReport() { // Make report of all signal names and what clock edges they have const string filename = v3Global.debugFilename(m_tag + "_order_edges.txt"); const std::unique_ptr logp{V3File::new_ofstream(filename)}; if (logp->fail()) v3fatal("Can't write " << filename); std::deque report; // Rebuild the trigger to original AstSenTree map using equality key comparison, as // merging domains have created new AstSenTree instances which are not in the map std::unordered_map, const AstSenTree*> trigToSen; for (const auto& pair : m_trigToSen) trigToSen.emplace(*pair.first, pair.second); for (V3GraphVertex* itp = m_graph.verticesBeginp(); itp; itp = itp->verticesNextp()) { if (OrderVarVertex* const vvertexp = itp->cast()) { string name(vvertexp->vscp()->prettyName()); if (itp->is()) { name += " {PRE}"; } else if (itp->is()) { name += " {POST}"; } else if (itp->is()) { name += " {PORD}"; } std::ostringstream os; os.setf(std::ios::left); os << " " << cvtToHex(vvertexp->vscp()) << " " << std::setw(50) << name << " "; AstSenTree* const senTreep = vvertexp->domainp(); if (senTreep == m_deleteDomainp) { os << "DELETED"; } else { for (AstSenItem* senItemp = senTreep->sensesp(); senItemp; senItemp = VN_AS(senItemp->nextp(), SenItem)) { if (senItemp != senTreep->sensesp()) os << " or "; const auto it = trigToSen.find(*senItemp); if (it != trigToSen.end()) { V3EmitV::verilogForTree(it->second, os); } else { V3EmitV::verilogForTree(senItemp, os); } } } report.push_back(os.str()); } } *logp << "Signals and their clock domains:\n"; stable_sort(report.begin(), report.end()); for (const string& i : report) *logp << i << '\n'; } void OrderProcess::processMoveClear() { OrderMoveDomScope::clear(); m_pomWaiting.reset(); m_pomReadyDomScope.reset(); m_pomGraph.clear(); } void OrderProcess::processMoveBuildGraph() { // Build graph of only vertices UINFO(5, " MoveBuildGraph\n"); processMoveClear(); // Vertex::user->OrderMoveVertex*, last edge added or nullptr=none m_pomGraph.userClearVertices(); OrderMoveVertexMaker createOrderMoveVertex(&m_pomGraph, &m_pomWaiting); V3OrderMoveGraphBuilder serialPMBG(&m_graph, &m_pomGraph, m_trigToSen, &createOrderMoveVertex); serialPMBG.build(); } //###################################################################### // OrderVisitor - Moving void OrderProcess::processMove() { // The graph routines have already sorted the vertexes and edges into best->worst order // Make a new waiting graph with only OrderLogicVertex's // (Order is preserved in the recreation so the sorting is preserved) // Move any node with all inputs ready to a "ready" graph mapped by domain and then scope // While waiting graph ! empty (and also known: something in ready graph) // For all scopes in domain of top ready vertex // For all vertexes in domain&scope of top ready vertex // Make ordered activation block for this module // Add that new activation to the list of calls to make. // Move logic to ordered active // Any children that have all inputs now ready move from waiting->ready graph // (This may add nodes the for loop directly above needs to detext) processMovePrepReady(); // New domain... another loop UINFO(5, " MoveIterate\n"); while (!m_pomReadyDomScope.empty()) { // Start with top node on ready list's domain & scope OrderMoveDomScope* domScopep = m_pomReadyDomScope.begin(); OrderMoveVertex* const topVertexp = domScopep->readyVertices().begin(); // lintok-begin-on-ref UASSERT(topVertexp, "domScope on ready list without any nodes ready under it"); // Work on all scopes ready inside this domain while (domScopep) { UINFO(6, " MoveDomain l=" << domScopep->domainp() << endl); // Process all nodes ready under same domain & scope m_pomNewFuncp = nullptr; while (OrderMoveVertex* vertexp = domScopep->readyVertices().begin()) { // lintok-begin-on-ref processMoveOne(vertexp, domScopep, 1); } // Done with scope/domain pair, pick new scope under same domain, or nullptr if none // left OrderMoveDomScope* domScopeNextp = nullptr; for (OrderMoveDomScope* huntp = m_pomReadyDomScope.begin(); huntp; huntp = huntp->readyDomScopeNextp()) { if (huntp->domainp() == domScopep->domainp()) { domScopeNextp = huntp; break; } } domScopep = domScopeNextp; } } UASSERT(m_pomWaiting.empty(), "Didn't converge; nodes waiting, none ready, perhaps some input activations lost."); // Cleanup memory processMoveClear(); } void OrderProcess::processMovePrepReady() { // Make list of ready nodes UINFO(5, " MovePrepReady\n"); for (OrderMoveVertex* vertexp = m_pomWaiting.begin(); vertexp;) { OrderMoveVertex* const nextp = vertexp->pomWaitingNextp(); if (vertexp->isWait() && vertexp->inEmpty()) processMoveReadyOne(vertexp); vertexp = nextp; } } void OrderProcess::processMoveReadyOne(OrderMoveVertex* vertexp) { // Recursive! // Move one node from waiting to ready list vertexp->setReady(); // Remove node from waiting list vertexp->m_pomWaitingE.unlink(m_pomWaiting, vertexp); if (vertexp->logicp()) { // Add to ready list (indexed by domain and scope) vertexp->m_readyVerticesE.pushBack(vertexp->domScopep()->m_readyVertices, vertexp); vertexp->domScopep()->ready(this); } else { // vertexp represents a non-logic vertex. // Recurse to mark its following neighbors ready. processMoveDoneOne(vertexp); } } void OrderProcess::processMoveDoneOne(OrderMoveVertex* vertexp) { // Move one node from ready to completion vertexp->setMoved(); // Unlink from ready lists if (vertexp->logicp()) { vertexp->m_readyVerticesE.unlink(vertexp->domScopep()->m_readyVertices, vertexp); vertexp->domScopep()->movedVertex(this, vertexp); } // Don't need to add it to another list, as we're done with it // Mark our outputs as one closer to ready for (V3GraphEdge *edgep = vertexp->outBeginp(), *nextp; edgep; edgep = nextp) { nextp = edgep->outNextp(); OrderMoveVertex* const toVertexp = static_cast(edgep->top()); UINFO(9, " Clear to " << (toVertexp->inEmpty() ? "[EMP] " : " ") << toVertexp << endl); // Delete this edge VL_DO_DANGLING(edgep->unlinkDelete(), edgep); if (toVertexp->inEmpty()) { // If destination node now has all inputs resolved; recurse to move that vertex // This is thus depth first (before width) which keeps the // resulting executable's d-cache happy. processMoveReadyOne(toVertexp); } } } void OrderProcess::processMoveOne(OrderMoveVertex* vertexp, const OrderMoveDomScope* domScopep, int level) { UASSERT_OBJ(vertexp->domScopep() == domScopep, vertexp, "Domain mismatch; list misbuilt?"); const OrderLogicVertex* const lvertexp = vertexp->logicp(); const AstScope* const scopep = lvertexp->scopep(); UINFO(5, " POSmove l" << std::setw(3) << level << " d=" << cvtToHex(lvertexp->domainp()) << " s=" << cvtToHex(scopep) << " " << lvertexp << endl); AstActive* const newActivep = processMoveOneLogic(lvertexp, m_pomNewFuncp /*ref*/, m_pomNewStmts /*ref*/); if (newActivep) m_result.push_back(newActivep); processMoveDoneOne(vertexp); } AstActive* OrderProcess::processMoveOneLogic(const OrderLogicVertex* lvertexp, AstCFunc*& newFuncpr, int& newStmtsr) { AstActive* activep = nullptr; AstScope* const scopep = lvertexp->scopep(); AstSenTree* const domainp = lvertexp->domainp(); AstNode* nodep = lvertexp->nodep(); AstNodeModule* const modp = scopep->modp(); UASSERT(modp, "nullptr"); // We are move the logic into a CFunc, so unlink it from the AstActive nodep->unlinkFrBack(); // Process procedures per statement (unless profCFuncs), so we can split CFuncs within // procedures. Everything else is handled in one go bool suspendable = false; bool needProcess = false; bool slow = m_slow; if (AstNodeProcedure* const procp = VN_CAST(nodep, NodeProcedure)) { suspendable = procp->isSuspendable(); needProcess = procp->needProcess(); if (suspendable) slow = slow && !VN_IS(procp, Always); nodep = procp->stmtsp(); m_deleter.pushDeletep(procp); } // Put suspendable processes into individual functions on their own if (suspendable) newFuncpr = nullptr; // When profCFuncs, create a new function for all logic block if (v3Global.opt.profCFuncs()) newFuncpr = nullptr; while (nodep) { // Split the CFunc if too large (but not when profCFuncs) if (!suspendable && !v3Global.opt.profCFuncs() && (v3Global.opt.outputSplitCFuncs() && v3Global.opt.outputSplitCFuncs() < newStmtsr)) { // Put every statement into a unique function to ease profiling or reduce function // size newFuncpr = nullptr; } if (!newFuncpr && domainp != m_deleteDomainp) { const string name = cfuncName(modp, domainp, scopep, nodep); newFuncpr = new AstCFunc{nodep->fileline(), name, scopep, suspendable ? "VlCoroutine" : ""}; if (needProcess) newFuncpr->setNeedProcess(); newFuncpr->isStatic(false); newFuncpr->isLoose(true); newFuncpr->slow(slow); newStmtsr = 0; scopep->addBlocksp(newFuncpr); // Create top call to it AstCCall* const callp = new AstCCall{nodep->fileline(), newFuncpr}; callp->dtypeSetVoid(); // Where will we be adding the call? AstActive* const newActivep = new AstActive{nodep->fileline(), name, domainp}; newActivep->addStmtsp(callp->makeStmt()); if (!activep) { activep = newActivep; } else { activep->addNext(newActivep); } UINFO(6, " New " << newFuncpr << endl); } AstNode* const nextp = nodep->nextp(); // When processing statements in a procedure, unlink the current statement if (nodep->backp()) nodep->unlinkFrBack(); if (domainp == m_deleteDomainp) { VL_DO_DANGLING(m_deleter.pushDeletep(nodep), nodep); } else { newFuncpr->addStmtsp(nodep); // Add in the number of nodes we're adding if (v3Global.opt.outputSplitCFuncs()) newStmtsr += nodep->nodeCount(); } nodep = nextp; } // Put suspendable processes into individual functions on their own if (suspendable) newFuncpr = nullptr; return activep; } void OrderProcess::processMTasks() { // For nondeterminism debug: V3Partition::hashGraphDebug(&m_graph, "V3Order's m_graph"); // We already produced a graph of every var, input, and logic // block and all dependencies; this is 'm_graph'. // // Now, starting from m_graph, make a slightly-coarsened graph representing // only logic, and discarding edges we know we can ignore. // This is quite similar to the 'm_pomGraph' of the serial code gen: V3Graph logicGraph; { OrderMTaskMoveVertexMaker create_mtask_vertex(&logicGraph); V3OrderMoveGraphBuilder mtask_pmbg(&m_graph, &logicGraph, m_trigToSen, &create_mtask_vertex); mtask_pmbg.build(); } // Needed? We do this for m_pomGraph in serial mode, so do it here too: logicGraph.removeRedundantEdgesMax(&V3GraphEdge::followAlwaysTrue); // Partition logicGraph into LogicMTask's. The partitioner will annotate // each vertex in logicGraph with a 'color' which is really an mtask ID // in this context. V3Partition partitioner{&m_graph, &logicGraph}; V3Graph mtasks; partitioner.go(&mtasks); std::unordered_map mtaskStates; // Iterate through the entire logicGraph. For each logic node, // attach it to a per-MTask ordered list of logic nodes. // This is the order we'll execute logic nodes within the MTask. // // MTasks may span scopes and domains, so sort by both here: GraphStream emit_logic{&logicGraph}; const V3GraphVertex* moveVxp; while ((moveVxp = emit_logic.nextp())) { const MTaskMoveVertex* const movep = static_cast(moveVxp); // Only care about logic vertices if (!movep->logicp()) continue; const unsigned mtaskId = movep->color(); UASSERT(mtaskId > 0, "Every MTaskMoveVertex should have an mtask assignment >0"); // Add this logic to the per-mtask order mtaskStates[mtaskId].m_logics.push_back(movep->logicp()); // Since we happen to be iterating over every logic node, // take this opportunity to annotate each AstVar with the id's // of mtasks that consume it and produce it. We'll use this // information in V3EmitC when we lay out var's in memory. const OrderLogicVertex* const logicp = movep->logicp(); for (const V3GraphEdge* edgep = logicp->inBeginp(); edgep; edgep = edgep->inNextp()) { const OrderVarVertex* const pre_varp = edgep->fromp()->cast(); if (!pre_varp) continue; AstVar* const varp = pre_varp->vscp()->varp(); // varp depends on logicp, so logicp produces varp, // and vice-versa below varp->addProducingMTaskId(mtaskId); } for (const V3GraphEdge* edgep = logicp->outBeginp(); edgep; edgep = edgep->outNextp()) { const OrderVarVertex* const post_varp = edgep->top()->cast(); if (!post_varp) continue; AstVar* const varp = post_varp->vscp()->varp(); varp->addConsumingMTaskId(mtaskId); } // TODO? We ignore IO vars here, so those will have empty mtask // signatures. But we could also give those mtask signatures. } // Create the AstExecGraph node which represents the execution // of the MTask graph. FileLine* const rootFlp = v3Global.rootp()->fileline(); AstExecGraph* const execGraphp = new AstExecGraph{rootFlp, m_tag}; m_result.push_back(execGraphp); // Create CFuncs and bodies for each MTask. GraphStream emit_mtasks(&mtasks); const V3GraphVertex* mtaskVxp; while ((mtaskVxp = emit_mtasks.nextp())) { const AbstractLogicMTask* const mtaskp = static_cast(mtaskVxp); // Create a body for this mtask AstMTaskBody* const bodyp = new AstMTaskBody{rootFlp}; MTaskState& state = mtaskStates[mtaskp->id()]; state.m_mtaskBodyp = bodyp; // Create leaf CFunc's to run this mtask's logic, // and create a set of AstActive's to call those CFuncs. // Add the AstActive's into the AstMTaskBody. const AstSenTree* last_domainp = nullptr; AstCFunc* leafCFuncp = nullptr; int leafStmts = 0; for (const OrderLogicVertex* logicp : state.m_logics) { if (logicp->domainp() != last_domainp) { // Start a new leaf function. leafCFuncp = nullptr; } last_domainp = logicp->domainp(); AstActive* const newActivep = processMoveOneLogic(logicp, leafCFuncp /*ref*/, leafStmts /*ref*/); if (newActivep) bodyp->addStmtsp(newActivep); } // Translate the LogicMTask graph into the corresponding ExecMTask // graph, which will outlive V3Order and persist for the remainder // of verilator's processing. // - The LogicMTask graph points to MTaskMoveVertex's // and OrderLogicVertex's which are ephemeral to V3Order. // - The ExecMTask graph and the AstMTaskBody's produced here // persist until code generation time. V3Graph* const depGraphp = execGraphp->depGraphp(); state.m_execMTaskp = new ExecMTask{depGraphp, bodyp, mtaskp->id()}; // Cross-link each ExecMTask and MTaskBody // Q: Why even have two objects? // A: One is an AstNode, the other is a GraphVertex, // to combine them would involve multiple inheritance... state.m_mtaskBodyp->execMTaskp(state.m_execMTaskp); for (V3GraphEdge* inp = mtaskp->inBeginp(); inp; inp = inp->inNextp()) { const V3GraphVertex* fromVxp = inp->fromp(); const AbstractLogicMTask* const fromp = static_cast(fromVxp); const MTaskState& fromState = mtaskStates[fromp->id()]; new V3GraphEdge{depGraphp, fromState.m_execMTaskp, state.m_execMTaskp, 1}; } execGraphp->addMTaskBodiesp(bodyp); } } //###################################################################### // OrderVisitor - Top processing void OrderProcess::process(bool multiThreaded) { // Dump data if (dumpGraphLevel()) m_graph.dumpDotFilePrefixed(m_tag + "_orderg_pre"); // Break cycles. Each strongly connected subgraph (including cutable // edges) will have its own color, and corresponds to a loop in the // original graph. However the new graph will be acyclic (the removed // edges are actually still there, just with weight 0). UINFO(2, " Acyclic and Order...\n"); m_graph.acyclic(&V3GraphEdge::followAlwaysTrue); if (dumpGraphLevel()) m_graph.dumpDotFilePrefixed(m_tag + "_orderg_acyc"); // Assign ranks so we know what to follow // Then, sort vertices and edges by that ordering m_graph.order(); if (dumpGraphLevel()) m_graph.dumpDotFilePrefixed(m_tag + "_orderg_order"); // Assign logic vertices to new domains UINFO(2, " Domains...\n"); processDomains(); if (dumpGraphLevel()) m_graph.dumpDotFilePrefixed(m_tag + "_orderg_domain"); if (dumpLevel()) processEdgeReport(); if (!multiThreaded) { UINFO(2, " Construct Move Graph...\n"); processMoveBuildGraph(); // Different prefix (ordermv) as it's not the same graph if (dumpGraphLevel() >= 4) m_pomGraph.dumpDotFilePrefixed(m_tag + "_ordermv_start"); m_pomGraph.removeRedundantEdgesMax(&V3GraphEdge::followAlwaysTrue); if (dumpGraphLevel() >= 4) m_pomGraph.dumpDotFilePrefixed(m_tag + "_ordermv_simpl"); UINFO(2, " Move...\n"); processMove(); } else { UINFO(2, " Set up mtasks...\n"); processMTasks(); } // Dump data if (dumpGraphLevel()) m_graph.dumpDotFilePrefixed(m_tag + "_orderg_done"); } //###################################################################### AstCFunc* V3Order::order(AstNetlist* netlistp, // const std::vector& logic, // const std::unordered_map& trigToSen, const string& tag, // bool parallel, // bool slow, // const ExternalDomainsProvider& externalDomains) { // Order the code const std::unique_ptr graph = buildOrderGraph(netlistp, logic, trigToSen); const auto& nodeps = OrderProcess::main(netlistp, *graph, trigToSen, tag, parallel, slow, externalDomains); // Create the result function AstScope* const scopeTopp = netlistp->topScopep()->scopep(); FileLine* const flp = netlistp->fileline(); AstCFunc* const funcp = new AstCFunc{flp, "_eval_" + tag, scopeTopp, ""}; funcp->dontCombine(true); funcp->isStatic(false); funcp->isLoose(true); funcp->slow(slow); funcp->isConst(false); funcp->declPrivate(true); scopeTopp->addBlocksp(funcp); if (v3Global.opt.profExec()) { funcp->addStmtsp(new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"func " + tag + "\");\n"}); } // Add ordered statements to the result function for (AstNode* const nodep : nodeps) funcp->addStmtsp(nodep); if (v3Global.opt.profExec()) { funcp->addStmtsp(new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPop();\n"}); } // Dispose of the remnants of the inputs for (auto* const lbsp : logic) lbsp->deleteActives(); // Done return funcp; }