diff --git a/Changes b/Changes index c10b4aa9d..1c7cf0e4b 100644 --- a/Changes +++ b/Changes @@ -23,6 +23,7 @@ Verilator 5.009 devel * Support complicated IEEE 'for' assignments. * Support $fopen as an expression. * Support ++/-- on dotted member variables. +* Optimize static trigger evaluation (#4142). [Geza Lore, X-EPIC] * Change range order warning from LITENDIAN to ASCRANGE (#4010). [Iztok Jeras] * Change ZERODLY to a warning. * Fix random internal crashes (#666). [Dag Lem] diff --git a/include/verilated.h b/include/verilated.h index a1bad1d49..ac2d3c773 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -149,6 +149,15 @@ enum VerilatedVarFlags { VLVF_DPI_CLAY = (1 << 10) // DPI compatible C standard layout }; +//============================================================================= +// Utility functions + +template +inline constexpr size_t roundUpToMultipleOf(size_t value) { + static_assert((N & (N - 1)) == 0, "'N' must be a power of 2"); + return (value + N - 1) & ~(N - 1); +} + //========================================================================= // Mutex and threading support diff --git a/include/verilated_types.h b/include/verilated_types.h index 940aeafe9..0214e8fb4 100644 --- a/include/verilated_types.h +++ b/include/verilated_types.h @@ -79,9 +79,9 @@ extern std::string VL_TO_STRING_W(int words, const WDataInP obj); template // class VlTriggerVec final { // TODO: static assert T_size > 0, and don't generate when empty -private: + // MEMBERS - std::array m_flags; // State of the assoc array + alignas(16) std::array(T_size) / 64> m_flags; // The flags public: // CONSTRUCTOR @@ -91,10 +91,18 @@ public: // METHODS // Set all elements to false - void clear() { m_flags.fill(false); } + void clear() { m_flags.fill(0); } - // Reference to element at 'index' - bool& at(size_t index) { return m_flags.at(index); } + // Word at given 'wordIndex' + uint64_t word(size_t wordIndex) const { return m_flags[wordIndex]; } + + // Set specified flag to given value + void set(size_t index, bool value) { + uint64_t& w = m_flags[index / 64]; + const size_t bitIndex = index % 64; + w &= ~(1ULL << bitIndex); + w |= (static_cast(value) << bitIndex); + } // Return true iff at least one element is set bool any() const { @@ -104,13 +112,13 @@ public: } // Set all elements true in 'this' that are set in 'other' - void set(const VlTriggerVec& other) { + void thisOr(const VlTriggerVec& other) { for (size_t i = 0; i < m_flags.size(); ++i) m_flags[i] |= other.m_flags[i]; } // Set elements of 'this' to 'a & !b' element-wise void andNot(const VlTriggerVec& a, const VlTriggerVec& b) { - for (size_t i = 0; i < m_flags.size(); ++i) m_flags[i] = a.m_flags[i] && !b.m_flags[i]; + for (size_t i = 0; i < m_flags.size(); ++i) m_flags[i] = a.m_flags[i] & ~b.m_flags[i]; } }; diff --git a/include/verilated_vcd_c.cpp b/include/verilated_vcd_c.cpp index f1d93a183..e9efd953c 100644 --- a/include/verilated_vcd_c.cpp +++ b/include/verilated_vcd_c.cpp @@ -61,16 +61,6 @@ constexpr unsigned VL_TRACE_MAX_VCD_CODE_SIZE = 5; // Maximum length of a VCD s // cache-lines. constexpr unsigned VL_TRACE_SUFFIX_ENTRY_SIZE = 8; // Size of a suffix entry -//============================================================================= -// Utility functions: TODO: put these in a common place and share them. - -template -static size_t roundUpToMultipleOf(size_t value) { - static_assert((N & (N - 1)) == 0, "'N' must be a power of 2"); - size_t mask = N - 1; - return (value + mask) & ~mask; -} - //============================================================================= // Specialization of the generics for this trace format diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 3ebd67499..6c300058f 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -2321,7 +2321,7 @@ int AstCMethodHard::instrCount() const { if (AstBasicDType* const basicp = fromp()->dtypep()->basicp()) { // TODO: add a more structured description of library methods, rather than using string // matching. See #3715. - if (basicp->isTriggerVec() && m_name == "at") { + if (basicp->isTriggerVec() && m_name == "word") { // This is an important special case for scheduling so we compute it precisely, // it is simply a load. return INSTR_COUNT_LD; diff --git a/src/V3Const.cpp b/src/V3Const.cpp index 232d4ebca..47f1a4b05 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -2746,52 +2746,74 @@ private: } } - struct SenItemCmp { - bool operator()(const AstSenItem* lhsp, const AstSenItem* rhsp) const { - if (lhsp->type() < rhsp->type()) return true; - if (lhsp->type() > rhsp->type()) return false; - // Looks visually better if we keep sorted by name - if (!lhsp->sensp() && rhsp->sensp()) return true; - if (lhsp->sensp() && !rhsp->sensp()) return false; - if (lhsp->varrefp() && !rhsp->varrefp()) return true; - if (!lhsp->varrefp() && rhsp->varrefp()) return false; - if (lhsp->varrefp() && rhsp->varrefp()) { - if (lhsp->varrefp()->name() < rhsp->varrefp()->name()) return true; - if (lhsp->varrefp()->name() > rhsp->varrefp()->name()) return false; + class SenItemCmp final { + static int cmp(const AstNodeExpr* ap, const AstNodeExpr* bp) { + const VNType aType = ap->type(); + const VNType bType = bp->type(); + if (aType != bType) return static_cast(bType) - static_cast(aType); + + if (const AstVarRef* const aRefp = VN_CAST(ap, VarRef)) { + const AstVarRef* const bRefp = VN_AS(bp, VarRef); + // Looks visually better if we keep sorted by name + if (aRefp->name() < bRefp->name()) return -1; + if (aRefp->name() > bRefp->name()) return 1; // But might be same name with different scopes - if (lhsp->varrefp()->varScopep() < rhsp->varrefp()->varScopep()) return true; - if (lhsp->varrefp()->varScopep() > rhsp->varrefp()->varScopep()) return false; + if (aRefp->varScopep() < bRefp->varScopep()) return -1; + if (aRefp->varScopep() > bRefp->varScopep()) return 1; // Or rarely, different data types - if (lhsp->varrefp()->dtypep() < rhsp->varrefp()->dtypep()) return true; - if (lhsp->varrefp()->dtypep() > rhsp->varrefp()->dtypep()) return false; - } else if (AstCMethodHard* const lp = VN_CAST(lhsp->sensp(), CMethodHard)) { - if (AstCMethodHard* const rp = VN_CAST(rhsp->sensp(), CMethodHard)) { - if (AstVarRef* const lRefp = VN_CAST(lp->fromp(), VarRef)) { - if (AstVarRef* const rRefp = VN_CAST(rp->fromp(), VarRef)) { - if (lRefp->name() < rRefp->name()) return true; - if (lRefp->name() > rRefp->name()) return false; - // But might be same name with different scopes - if (lRefp->varScopep() < rRefp->varScopep()) return true; - if (lRefp->varScopep() > rRefp->varScopep()) return false; - // Or rarely, different data types - if (lRefp->dtypep() < rRefp->dtypep()) return true; - if (lRefp->dtypep() > rRefp->dtypep()) return false; - } - } - if (AstConst* lConstp = VN_CAST(lp->pinsp(), Const)) { - if (AstConst* rConstp = VN_CAST(rp->pinsp(), Const)) { - if (lConstp->toUInt() < rConstp->toUInt()) return true; - if (lConstp->toUInt() > rConstp->toUInt()) return false; - } - } - } + if (aRefp->dtypep() < bRefp->dtypep()) return -1; + if (aRefp->dtypep() > bRefp->dtypep()) return 1; + return 0; } - // Sort by edge, AFTER variable, as we want multiple edges for same var adjacent. - // note the SenTree optimizer requires this order (more - // general first, less general last) - if (lhsp->edgeType() < rhsp->edgeType()) return true; - if (lhsp->edgeType() > rhsp->edgeType()) return false; - return false; + + if (const AstConst* const aConstp = VN_CAST(ap, Const)) { + const AstConst* const bConstp = VN_AS(bp, Const); + if (aConstp->toUQuad() < bConstp->toUQuad()) return -1; + if (aConstp->toUQuad() > bConstp->toUQuad()) return 1; + return 0; + } + + if (const AstNodeBiop* const aBiOpp = VN_CAST(ap, NodeBiop)) { + const AstNodeBiop* const bBiOpp = VN_AS(bp, NodeBiop); + // Compare RHSs first as LHS might be const, but the variable term should become + // adjacent for optimization if identical. + if (const int c = cmp(aBiOpp->rhsp(), bBiOpp->rhsp())) return c; + return cmp(aBiOpp->lhsp(), bBiOpp->lhsp()); + } + + if (const AstCMethodHard* const aCallp = VN_CAST(ap, CMethodHard)) { + const AstCMethodHard* const bCallp = VN_AS(bp, CMethodHard); + if (aCallp->name() < bCallp->name()) return -1; + if (aCallp->name() > bCallp->name()) return 1; + if (const int c = cmp(aCallp->fromp(), bCallp->fromp())) return c; + AstNodeExpr* aPinsp = aCallp->pinsp(); + AstNodeExpr* bPinsp = bCallp->pinsp(); + while (aPinsp && bPinsp) { + if (const int c = cmp(aPinsp, bPinsp)) return c; + aPinsp = VN_AS(aPinsp->nextp(), NodeExpr); + bPinsp = VN_AS(bPinsp->nextp(), NodeExpr); + } + return aPinsp ? -1 : bPinsp ? 1 : 0; + } + + return 0; + } + + public: + bool operator()(const AstSenItem* lhsp, const AstSenItem* rhsp) const { + AstNodeExpr* const lSensp = lhsp->sensp(); + AstNodeExpr* const rSensp = rhsp->sensp(); + if (lSensp && rSensp) { + // If both terms have sensitivity expressions, recursively compare them + if (const int c = cmp(lSensp, rSensp)) return c < 0; + } else if (lSensp || rSensp) { + // Terms with sensitivity expressions come after those without + return rSensp; + } + // Finally sort by edge, AFTER variable, as we want multiple edges for same var + // adjacent. note the SenTree optimizer requires this order (more general first, + // less general last) + return lhsp->edgeType() < rhsp->edgeType(); } }; @@ -2816,9 +2838,8 @@ private: } } - // Sort the sensitivity names so "posedge a or b" and "posedge b or a" end up together. - // Also, remove duplicate assignments, and fold POS&NEGs into ANYEDGEs - // Make things a little faster; check first if we need a sort + // Pass 1: Sort the sensitivity items so "posedge a or b" and "posedge b or a" and + // similar, optimizable expressions end up next to each other. for (AstSenItem *nextp, *senp = nodep->sensesp(); senp; senp = nextp) { nextp = VN_AS(senp->nextp(), SenItem); // cppcheck-suppress unassignedVariable // cppcheck bug @@ -2838,35 +2859,53 @@ private: } } - // Pass2, remove dup edges - for (AstSenItem *nextp, *senp = nodep->sensesp(); senp; senp = nextp) { + // Pass 2, remove duplicates and simplify adjacent terms if possible + for (AstSenItem *senp = nodep->sensesp(), *nextp; senp; senp = nextp) { nextp = VN_AS(senp->nextp(), SenItem); - AstSenItem* const litemp = senp; - AstSenItem* const ritemp = nextp; - if (ritemp) { - if ((litemp->sensp() && ritemp->sensp() - && litemp->sensp()->sameGateTree(ritemp->sensp())) - || (!litemp->sensp() && !ritemp->sensp())) { - // We've sorted in the order ANY, BOTH, POS, NEG, - // so we don't need to try opposite orders - if ((litemp->edgeType() == VEdgeType::ET_POSEDGE // POS or NEG -> BOTH - && ritemp->edgeType() == VEdgeType::ET_NEGEDGE) - || (litemp->edgeType() == ritemp->edgeType()) // Identical edges - ) { - // Fix edge of old node - if (litemp->edgeType() == VEdgeType::ET_POSEDGE - && ritemp->edgeType() == VEdgeType::ET_NEGEDGE) - litemp->edgeType(VEdgeType::ET_BOTHEDGE); - // Remove redundant node - VL_DO_DANGLING(ritemp->unlinkFrBack()->deleteTree(), ritemp); - VL_DANGLING(ritemp); - // Try to collapse again - nextp = litemp; + if (!nextp) break; + AstSenItem* const lItemp = senp; + AstSenItem* const rItemp = nextp; + AstNodeExpr* const lSenp = lItemp->sensp(); + AstNodeExpr* const rSenp = rItemp->sensp(); + if (!lSenp || !rSenp) continue; + + if (lSenp->sameGateTree(rSenp)) { + // POSEDGE or NEGEDGE -> BOTHEDGE. (We've sorted POSEDGE, before NEGEDGE, so we + // do not need to test for the opposite orders.) + if (lItemp->edgeType() == VEdgeType::ET_POSEDGE + && rItemp->edgeType() == VEdgeType::ET_NEGEDGE) { + // Make both terms BOTHEDGE, the second will be removed below + lItemp->edgeType(VEdgeType::ET_BOTHEDGE); + rItemp->edgeType(VEdgeType::ET_BOTHEDGE); + } + + // Remove identical expressions + if (lItemp->edgeType() == rItemp->edgeType()) { + VL_DO_DANGLING(rItemp->unlinkFrBack()->deleteTree(), rItemp); + nextp = lItemp; + } + + continue; + } + + // Not identical terms, check if they can be combined + if (lSenp->width() != rSenp->width()) continue; + if (AstAnd* const lAndp = VN_CAST(lSenp, And)) { + if (AstAnd* const rAndp = VN_CAST(rSenp, And)) { + if (AstConst* const lConstp = VN_CAST(lAndp->lhsp(), Const)) { + if (AstConst* const rConstp = VN_CAST(rAndp->lhsp(), Const)) { + if (lAndp->rhsp()->sameTree(rAndp->rhsp())) { + const V3Number lNum{lConstp->num()}; + lConstp->num().opOr(lNum, rConstp->num()); + // Remove redundant term + VL_DO_DANGLING(rItemp->unlinkFrBack()->deleteTree(), rItemp); + nextp = lItemp; + } + } } } } } - // nodep->dumpTree("- ssou: "); } } diff --git a/src/V3Sched.cpp b/src/V3Sched.cpp index 9f8896641..f9ac9cc8e 100644 --- a/src/V3Sched.cpp +++ b/src/V3Sched.cpp @@ -314,25 +314,23 @@ struct TriggerKit { void addFirstIterationTriggerAssignment(AstVarScope* counterp, uint32_t index) const { FileLine* const flp = counterp->fileline(); AstVarRef* const vrefp = new AstVarRef{flp, m_vscp, VAccess::WRITE}; - AstCMethodHard* const callp - = new AstCMethodHard{flp, vrefp, "at", new AstConst{flp, index}}; - callp->dtypeSetBit(); - callp->pure(true); - m_funcp->stmtsp()->addHereThisAsNext(new AstAssign{ - flp, callp, - new AstEq{flp, new AstVarRef{flp, counterp, VAccess::READ}, new AstConst{flp, 0}}}); + AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "set"}; + callp->addPinsp(new AstConst{flp, index}); + callp->addPinsp( + new AstEq{flp, new AstVarRef{flp, counterp, VAccess::READ}, new AstConst{flp, 0}}); + callp->dtypeSetVoid(); + m_funcp->stmtsp()->addHereThisAsNext(callp->makeStmt()); } // Utility to set then clear the dpiExportTrigger trigger void addDpiExportTriggerAssignment(AstVarScope* dpiExportTriggerVscp, uint32_t index) const { FileLine* const flp = dpiExportTriggerVscp->fileline(); AstVarRef* const vrefp = new AstVarRef{flp, m_vscp, VAccess::WRITE}; - AstCMethodHard* const callp - = new AstCMethodHard{flp, vrefp, "at", new AstConst{flp, index}}; - callp->dtypeSetBit(); - callp->pure(true); - AstNode* stmtp - = new AstAssign{flp, callp, new AstVarRef{flp, dpiExportTriggerVscp, VAccess::READ}}; + AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "set"}; + callp->addPinsp(new AstConst{flp, index}); + callp->addPinsp(new AstVarRef{flp, dpiExportTriggerVscp, VAccess::READ}); + callp->dtypeSetVoid(); + AstNode* const stmtp = callp->makeStmt(); stmtp->addNext(new AstAssign{flp, new AstVarRef{flp, dpiExportTriggerVscp, VAccess::WRITE}, new AstConst{flp, AstConst::BitFalse{}}}); m_funcp->stmtsp()->addHereThisAsNext(stmtp); @@ -359,10 +357,15 @@ AstSenTree* createTriggerSenTree(AstNetlist* netlistp, AstVarScope* const vscp, AstTopScope* const topScopep = netlistp->topScopep(); FileLine* const flp = topScopep->fileline(); AstVarRef* const vrefp = new AstVarRef{flp, vscp, VAccess::READ}; - AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "at", new AstConst{flp, index}}; - callp->dtypeSetBit(); + const uint32_t wordIndex = index / 64; + const uint32_t bitIndex = index % 64; + AstCMethodHard* const callp + = new AstCMethodHard{flp, vrefp, "word", new AstConst{flp, wordIndex}}; + callp->dtypeSetUInt64(); callp->pure(true); - AstSenItem* const senItemp = new AstSenItem{flp, VEdgeType::ET_TRUE, callp}; + AstNodeExpr* const termp + = new AstAnd{flp, new AstConst{flp, AstConst::Unsized64{}, 1ULL << bitIndex}, callp}; + AstSenItem* const senItemp = new AstSenItem{flp, VEdgeType::ET_TRUE, termp}; AstSenTree* const resultp = new AstSenTree{flp, senItemp}; topScopep->addSenTreesp(resultp); return resultp; @@ -427,14 +430,28 @@ const TriggerKit createTriggers(AstNetlist* netlistp, AstCFunc* const initFuncp, new AstText{flp, "VL_DBG_MSGF(\" No triggers active\\n\");\n", true}); } + // Set the given trigger to the given value + const auto setTrig = [&](uint32_t index, AstNodeExpr* valp) { + AstVarRef* const vrefp = new AstVarRef{flp, vscp, VAccess::WRITE}; + AstCMethodHard* const callp = new AstCMethodHard{flp, vrefp, "set"}; + callp->addPinsp(new AstConst{flp, index}); + callp->addPinsp(valp); + callp->dtypeSetVoid(); + return callp->makeStmt(); + }; + // Create a reference to a trigger flag - const auto getTrigRef = [&](uint32_t index, VAccess access) { - AstVarRef* const vrefp = new AstVarRef{flp, vscp, access}; - AstConst* const idxp = new AstConst{flp, index}; - AstCMethodHard* callp = new AstCMethodHard{flp, vrefp, "at", idxp}; - callp->dtypeSetBit(); + const auto getTrig = [&](uint32_t index) { + AstVarRef* const vrefp = new AstVarRef{flp, vscp, VAccess::READ}; + const uint32_t wordIndex = index / 64; + const uint32_t bitIndex = index % 64; + AstCMethodHard* const callp + = new AstCMethodHard{flp, vrefp, "word", new AstConst{flp, wordIndex}}; + callp->dtypeSetUInt64(); callp->pure(true); - return callp; + AstNodeExpr* const termp + = new AstAnd{flp, new AstConst{flp, AstConst::Unsized64{}, 1ULL << bitIndex}, callp}; + return termp; }; // Add a debug dumping statement for this trigger @@ -446,7 +463,7 @@ const TriggerKit createTriggers(AstNetlist* netlistp, AstCFunc* const initFuncp, ss << "\\n\");\n"; const string message{ss.str()}; - AstIf* const ifp = new AstIf{flp, getTrigRef(index, VAccess::READ)}; + AstIf* const ifp = new AstIf{flp, getTrig(index)}; dumpp->addStmtsp(ifp); ifp->addThensp(new AstText{flp, message, true}); }; @@ -458,13 +475,13 @@ const TriggerKit createTriggers(AstNetlist* netlistp, AstCFunc* const initFuncp, // Add trigger computation uint32_t triggerNumber = extraTriggers.size(); - AstNode* initialTrigsp = nullptr; + AstNodeStmt* initialTrigsp = nullptr; for (const AstSenTree* const senTreep : senTreeps) { UASSERT_OBJ(senTreep->hasClocked() || senTreep->hasHybrid(), senTreep, "Cannot create trigger expression for non-clocked sensitivity"); - // Create the trigger AstSenTrees and associate it with the original AstSenTree - AstCMethodHard* const senp = getTrigRef(triggerNumber, VAccess::READ); + // Create the trigger AstSenTrees and associate them with the original AstSenTree + AstNodeExpr* const senp = getTrig(triggerNumber); AstSenItem* const senItemp = new AstSenItem{flp, VEdgeType::ET_TRUE, senp}; AstSenTree* const trigpSenp = new AstSenTree{flp, senItemp}; topScopep->addSenTreesp(trigpSenp); @@ -472,14 +489,12 @@ const TriggerKit createTriggers(AstNetlist* netlistp, AstCFunc* const initFuncp, // Add the trigger computation const auto& pair = senExprBuilder.build(senTreep); - funcp->addStmtsp( - new AstAssign{flp, getTrigRef(triggerNumber, VAccess::WRITE), pair.first}); + funcp->addStmtsp(setTrig(triggerNumber, pair.first)); // Add initialization time trigger if (pair.second || v3Global.opt.xInitialEdge()) { - AstNode* const assignp = new AstAssign{flp, getTrigRef(triggerNumber, VAccess::WRITE), - new AstConst{flp, 1}}; - initialTrigsp = AstNode::addNext(initialTrigsp, assignp); + initialTrigsp + = AstNode::addNext(initialTrigsp, setTrig(triggerNumber, new AstConst{flp, 1})); } // Add a debug statement for this trigger @@ -803,7 +818,7 @@ AstStmtExpr* createTriggerSetCall(FileLine* const flp, AstVarScope* const toVscp AstVarScope* const fromVscp) { AstVarRef* const lhsp = new AstVarRef{flp, toVscp, VAccess::WRITE}; AstVarRef* const argp = new AstVarRef{flp, fromVscp, VAccess::READ}; - AstCMethodHard* const callp = new AstCMethodHard{flp, lhsp, "set", argp}; + AstCMethodHard* const callp = new AstCMethodHard{flp, lhsp, "thisOr", argp}; callp->dtypeSetVoid(); return callp->makeStmt(); } diff --git a/test_regress/t/t_xml_debugcheck.out b/test_regress/t/t_xml_debugcheck.out index e99f614c6..11a9f8501 100644 --- a/test_regress/t/t_xml_debugcheck.out +++ b/test_regress/t/t_xml_debugcheck.out @@ -600,22 +600,22 @@ - - - - - - - - - - - - + + + + + + + + + + + + - + @@ -647,10 +647,13 @@ - - - - + + + + + + + @@ -673,10 +676,13 @@ - - - - + + + + + + + @@ -1505,10 +1511,13 @@ - - - - + + + + + + + @@ -1609,7 +1618,7 @@ - + @@ -1668,7 +1677,7 @@ - + @@ -1763,7 +1772,7 @@ - + @@ -1775,7 +1784,7 @@ - + @@ -1795,16 +1804,18 @@ - + + + - +