diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index 29e3876c4..72da2961f 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -659,6 +659,18 @@ Summary: .. option:: -fno-case + Rarely needed. Disable all case statement optimizations. + + Alias for all other `-fno-case-*` options. + +.. option:: -fno-case-table + + Rarely needed. Disable converting case statements into table lookups. + +.. option:: -fno-case-tree + + Rarely needed. Disable converting case statements into bitwise branch trees. + .. option:: -fno-combine .. option:: -fno-const diff --git a/src/V3AstNodeOther.h b/src/V3AstNodeOther.h index f9518bf63..e3bbcde14 100644 --- a/src/V3AstNodeOther.h +++ b/src/V3AstNodeOther.h @@ -1000,6 +1000,8 @@ public: // this matters, the caller must handle the dtype difference as appropriate. If 'mergeDType' is // false, the returned VarScope will have _->dtypep()->sameTree(initp->dtypep()) return true. AstVarScope* findConst(AstConst* initp, bool mergeDType); + // Rebuild hashes after potential removals + void reCache(); }; class AstConstraint final : public AstNode { // Constraint @@ -2136,6 +2138,7 @@ class AstVar final : public AstNode { bool m_attrFsmRegisterWrapper : 1; // connected to an fsm_register_wrapper instance bool m_attrFsmResetArc : 1; // declared with fsm_reset_arc metacomment bool m_attrFsmArcInclCond : 1; // declared with fsm_arc_include_cond metacomment + bool m_constPoolEntry : 1; // Constant pool variable bool m_fileDescr : 1; // File descriptor bool m_gotNansiType : 1; // Linker saw Non-ANSI type declaration bool m_icoMaybeWritten : 1; // Design might write this input signal - for ico change detect @@ -2199,6 +2202,7 @@ class AstVar final : public AstNode { m_attrFsmRegisterWrapper = false; m_attrFsmResetArc = false; m_attrFsmArcInclCond = false; + m_constPoolEntry = false; m_fileDescr = false; m_gotNansiType = false; m_icoMaybeWritten = false; @@ -2348,6 +2352,8 @@ public: void attrFsmRegisterWrapper(bool flag) { m_attrFsmRegisterWrapper = flag; } void attrFsmResetArc(bool flag) { m_attrFsmResetArc = flag; } void attrFsmArcInclCond(bool flag) { m_attrFsmArcInclCond = flag; } + bool constPoolEntry() const { return m_constPoolEntry; } + void setConstPoolEntry() { m_constPoolEntry = true; } void rand(const VRandAttr flag) { m_rand = flag; } void usedParam(bool flag) { m_usedParam = flag; } void usedLoopIdx(bool flag) { m_usedLoopIdx = flag; } diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 453c45d21..3b35037a8 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -1629,6 +1629,7 @@ AstConstPool::AstConstPool(FileLine* fl) AstVarScope* AstConstPool::createNewEntry(const string& name, AstNodeExpr* initp) { FileLine* const fl = initp->fileline(); AstVar* const varp = new AstVar{fl, VVarType::MODULETEMP, name, initp->dtypep()}; + varp->setConstPoolEntry(); varp->isConst(true); varp->isStatic(true); varp->valuep(initp->cloneTree(false)); @@ -1748,6 +1749,17 @@ AstVarScope* AstConstPool::findConst(AstConst* initp, bool mergeDType) { return varScopep; } +void AstConstPool::reCache() { + m_tables.clear(); + m_consts.clear(); + for (AstVarScope* vscp = m_scopep->varsp(); vscp; vscp = VN_CAST(vscp->nextp(), VarScope)) { + AstNode* const valuep = vscp->varp()->valuep(); + const V3Hash hash = V3Hasher::uncachedHash(valuep); + if (VN_IS(valuep, InitArray)) m_tables.emplace(hash.value(), vscp); + if (VN_IS(valuep, Const)) m_consts.emplace(hash.value(), vscp); + } +} + //====================================================================== // Per-type Debugging @@ -3198,6 +3210,7 @@ int AstVarRef::instrCount() const { } void AstVar::dump(std::ostream& str) const { this->AstNode::dump(str); + if (constPoolEntry()) str << " [CONSTPOOL]"; if (isSc()) str << " [SC]"; if (isPrimaryIO()) str << (isInout() ? " [PIO]" : (isWritable() ? " [PO]" : " [PI]")); if (isPrimaryClock()) str << " [PCLK]"; @@ -3239,6 +3252,7 @@ void AstVar::dump(std::ostream& str) const { void AstVar::dumpJson(std::ostream& str) const { dumpJsonStrFunc(str, origName); dumpJsonStrFunc(str, verilogName); + dumpJsonBoolFuncIf(str, constPoolEntry); dumpJsonBoolFuncIf(str, isSc); dumpJsonBoolFuncIf(str, isPrimaryIO); dumpJsonBoolFuncIf(str, isPrimaryClock); diff --git a/src/V3Case.cpp b/src/V3Case.cpp index 80dffba68..251c67202 100644 --- a/src/V3Case.cpp +++ b/src/V3Case.cpp @@ -133,6 +133,12 @@ class CaseVisitor final : public VNVisitor { constexpr static int CASE_DETAILS_MAX_WIDTH = 16; // Levels of priority to be ORed together in top IF tree constexpr static int CASE_ENCODER_GROUP_DEPTH = 8; + // Maximum size for tiny lookup tables - materialized in code + constexpr static size_t CASE_TABLE_TINY_BITS = 32; // Up to 2 instructions to materialize + // Maximum size for normal lookup tables - stored in constant pool + constexpr static size_t CASE_TABLE_MAX_BITS = 1ULL << 16; // 64Kbits / 8KBytes + // Minimum number of the branches a table must replace to be worth a load + constexpr static size_t CASE_TABLE_MIN_BRANCHES = 3; // TYPES // Record for each case value @@ -142,21 +148,49 @@ class CaseVisitor final : public VNVisitor { AstNode* stmtsp; // Statements of 'itemp' (might be nullptr if case is empty) }; + // Record for each LHS of a decoder pattern + struct LhsRecord final { + AstNodeExpr* lhsp = nullptr; // LHS of the assignment + AstNodeAssign* preDefaultp = nullptr; // Default assignment *before the case statement* + size_t nCaseAssigns = 0; // Number of AstAssigns to this LHS in case clauses + size_t nCaseAssignDlys = 0; // Number of AstAssignDlys to this LHS in case clauses + size_t offset = 0; // Offset in the table for this LHS + + static size_t s_nextId; // Static unique Id counter + size_t id = ++s_nextId; // Unique Id for sorting + }; + + // NODE STATE: + // AstVarScope::user1() -> bool: true if written to, only in parts of analysis phase + // STATE // Statistics tracking, as a struct so can be passed to 'const' methods struct Stats final { + VDouble0 caseTableNormal; // Cases using table method with normal table + VDouble0 caseTableTiny; // Cases using table method with tiny table VDouble0 caseFast; // Cases using fast bit tree method VDouble0 caseGeneric; // Cases using generic if/else tree method VDouble0 provenAssertions; // Assertions proven to hold } m_stats; const AstNode* m_alwaysp = nullptr; // Always in which case is located + size_t m_nTmps = 0; // Sequence numbers for temporary variables + AstScope* m_scopep = nullptr; // Current scope // STATE - per AstCase. Update by 'analyzeCase', treat 'const' otherwise bool m_caseOpaque = false; // Case statement is opaque (non-packed, or non-const conditions) + bool m_caseHasDefault = false; // Indicates the case statement has a default case + size_t m_caseNCaseItems = 0; // Number of AstCaseItems in the case statement size_t m_caseNConditions = 0; // Number of conditions in the case statement + // Map from LHSs of decoder pattern to corresponding LhsRecord. + std::unordered_map, LhsRecord> m_caseLhsRecords; + // Values of 'm_caseLhsRecords' in sorted order, if case statement is a decoder pattern + std::vector m_caseDecoderRecords; + size_t m_caseDecoderEntryWidth = 0; // Width of each entry in the decoder table + size_t m_caseTableWidth = 0; // Total width of the case table - 0 means can't optimize bool m_caseDetailsValid = false; // Indicates m_caseDetails is valid struct final { bool exhaustive = false; // Proven exhaustive + bool exhaustiveOverEnumOnly = false; // Exhaustive over enum values only bool noOverlaps = false; // Proven no overlaps between cases // Map from value (index) to the CaseRecord that covers this value std::array records; @@ -189,6 +223,50 @@ class CaseVisitor final : public VNVisitor { return pairMaskBits; } + // If the given statement is an assignment that fits the decoder pattern, + // return it, otherwise return nullptr + static AstNodeAssign* checkDecoderAssign(AstNode* stmtp) { + // Only Assign and AssignDly are supported + if (!VN_IS(stmtp, Assign) && !VN_IS(stmtp, AssignDly)) return nullptr; + AstNodeAssign* const assp = VN_AS(stmtp, NodeAssign); + // Only if no timing control + if (assp->timingControlp()) return nullptr; + // Only if assigning a constant + if (!VN_IS(assp->rhsp(), Const)) return nullptr; + // Only if it's a packed value + AstNodeDType* const dtypep = assp->rhsp()->dtypep(); + if (dtypep->isString() || dtypep->isDouble()) return nullptr; + // Only if the LHS has no reads (can be relaxed, but need to prove there is no r/w hazard) + if (assp->lhsp()->exists([](AstVarRef* refp) { return refp->access().isReadOrRW(); })) { + return nullptr; + } + // This is an assignment that fits the decoder pattern + return assp; + } + + // Analyze if the given case item fits the decoder pattern, return true iff so. + // Updates 'm_caseLhsRecords'. + bool analyzeDecoderCaseItem(AstCaseItem* cip) { + // AstVarScope::user1() -> bool: true if written to + const VNUser1InUse user1InUse; + for (AstNode* stmtp = cip->stmtsp(); stmtp; stmtp = stmtp->nextp()) { + // Must be an assignment that fits the decoder pattern + AstNodeAssign* const assp = checkDecoderAssign(stmtp); + if (!assp) return false; + // Must assign each LHS exactly once - RHS is Const + const bool multipleAssignments = assp->lhsp()->exists([](AstVarRef* refp) { // + return refp->varScopep()->user1SetOnce(); + }); + if (multipleAssignments) return false; + // Update LhsRecord + LhsRecord& lhsRecord = m_caseLhsRecords[*assp->lhsp()]; + if (!lhsRecord.lhsp) lhsRecord.lhsp = assp->lhsp(); + lhsRecord.nCaseAssigns += VN_IS(assp, Assign); + lhsRecord.nCaseAssignDlys += VN_IS(assp, AssignDly); + } + return true; + } + // Determine whether we should check case items are complete // Returns enum's dtype if should check, nullptr if shouldn't static const AstEnumDType* getEnumCompletionCheckDType(const AstCase* const nodep) { @@ -245,13 +323,6 @@ class CaseVisitor final : public VNVisitor { return true; } - bool checkExhaustive(AstCase* nodep) { - if (const AstEnumDType* const enump = getEnumCompletionCheckDType(nodep)) { - return checkExhaustiveEnum(nodep, enump); - } - return checkExhaustivePacked(nodep); - } - // Analyze each value in the case statement. Updates 'm_caseDetails' and issues warnings. void analyzeCaseDetails(AstCase* nodep) { const uint32_t numValues = 1UL << nodep->exprp()->width(); @@ -361,16 +432,136 @@ class CaseVisitor final : public VNVisitor { } // If there was no default, check exhaustiveness - m_caseDetails.exhaustive = hasDefault || checkExhaustive(nodep); + m_caseDetails.exhaustiveOverEnumOnly = false; + m_caseDetails.exhaustive = hasDefault; + if (!hasDefault) { + if (const AstEnumDType* const enump = getEnumCompletionCheckDType(nodep)) { + // Only checks enum values are covered, not all bit patterns of the case expression + const bool exhaustiveOverEnum = checkExhaustiveEnum(nodep, enump); + m_caseDetails.exhaustiveOverEnumOnly = exhaustiveOverEnum; + m_caseDetails.exhaustive = exhaustiveOverEnum; + } else { + m_caseDetails.exhaustive = checkExhaustivePacked(nodep); + } + } + // Records now valid m_caseDetailsValid = true; } + void analyzeDecoderPattern(AstCase* nodep) { + // Check each LHS record + for (auto it = m_caseLhsRecords.cbegin(); it != m_caseLhsRecords.cend();) { + const LhsRecord& lhsRecord = it->second; + + // Delete records that have no assignments in any case item (only pre-defaults) + if (!lhsRecord.nCaseAssigns && !lhsRecord.nCaseAssignDlys) { + it = m_caseLhsRecords.erase(it); + continue; + } + ++it; + + // If mixed assignments, it's not a decoder pattern + if (lhsRecord.nCaseAssigns && lhsRecord.nCaseAssignDlys) return; + + // If assigned in all branches, it's good - but only if every table entry will be + // covered, i.e. the case has a default, or is exhaustive over all bit patterns. + // Enum-only exhaustiveness is not enough: out-of-enum values leave entries + // uncovered. + if (m_caseHasDefault + || (m_caseDetailsValid && m_caseDetails.exhaustive + && !m_caseDetails.exhaustiveOverEnumOnly)) { + if (lhsRecord.nCaseAssigns == m_caseNCaseItems) continue; + if (lhsRecord.nCaseAssignDlys == m_caseNCaseItems) continue; + } + + // Otherwise it needs to have a pre-default assignment + AstNode* const preDefaultp = lhsRecord.preDefaultp; + if (!preDefaultp) return; + // And the pre-default needs to be the same type + if (lhsRecord.nCaseAssigns && !VN_IS(preDefaultp, Assign)) return; + if (lhsRecord.nCaseAssignDlys && !VN_IS(preDefaultp, AssignDly)) return; + } + // All cases check out, can optimize if there are some entries left + if (m_caseLhsRecords.empty()) return; + + // Gather all the LhsRecords and sort them - there is a copy here, it's ok, won't be many + m_caseDecoderRecords.reserve(m_caseLhsRecords.size()); + for (const auto& item : m_caseLhsRecords) m_caseDecoderRecords.emplace_back(item.second); + std::sort(m_caseDecoderRecords.begin(), m_caseDecoderRecords.end(), + [](const LhsRecord& a, const LhsRecord& b) { + // Sort by width, then id + const int aWidth = a.lhsp->width(); + const int bWidth = b.lhsp->width(); + if (aWidth != bWidth) return aWidth < bWidth; + return a.id < b.id; + }); + + // We can either create a single lookup table for all LHSs, or one for each LHS. + // With a single table, we need to select out of the lookup via a temporary variable. + // With one table per LHS, we need to do multiple loads. The table is likely to incur a + // D-cache miss on large designs, so we choose single table. + + const int caseWidth = nodep->exprp()->width(); + + // Safely check if table with 'entryWidth' entries would fit within 'maxWidth' bits + const auto fitsLimit = [&](size_t entryWidth, size_t maxWidth) -> bool { + size_t totalWidth = entryWidth; + // Multiply cases - iterative to avoid overflow + for (int i = 0; i < caseWidth; ++i) { + totalWidth <<= 1; + if (totalWidth > maxWidth) return false; + } + return true; + }; + + // Check if the whole table would fit in a tiny table packed tightly + m_caseDecoderEntryWidth = 0; + for (LhsRecord& lhsRecord : m_caseDecoderRecords) { + lhsRecord.offset = m_caseDecoderEntryWidth; + m_caseDecoderEntryWidth += lhsRecord.lhsp->width(); + } + // If it fits, we will pack it tightly + if (fitsLimit(m_caseDecoderEntryWidth, CASE_TABLE_TINY_BITS)) { + m_caseTableWidth = m_caseDecoderEntryWidth << caseWidth; // Can optimize + return; + } + + // Tabel will be bigish. To avoid expensive bit swizzling, align each entry to a + // word boundary if it would cross a word boundary. + m_caseDecoderEntryWidth = 0; + for (LhsRecord& lhsRecord : m_caseDecoderRecords) { + const size_t width = lhsRecord.lhsp->width(); + const size_t lsbWord = VL_BITWORD_E(m_caseDecoderEntryWidth); + const size_t msbWord = VL_BITWORD_E(m_caseDecoderEntryWidth + width - 1); + if (lsbWord != msbWord) { + m_caseDecoderEntryWidth = VL_WORDS_I(m_caseDecoderEntryWidth) * VL_EDATASIZE; + } + lhsRecord.offset = m_caseDecoderEntryWidth; + m_caseDecoderEntryWidth += width; + } + // Also align the whole entry width to a word boundary + m_caseDecoderEntryWidth = VL_WORDS_I(m_caseDecoderEntryWidth) * VL_EDATASIZE; + // Check the table fits max size + if (fitsLimit(m_caseDecoderEntryWidth, CASE_TABLE_MAX_BITS)) { + m_caseTableWidth = m_caseDecoderEntryWidth << caseWidth; // Can optimize + return; + } + + // Can't optimize - yet ... + } + // Analyze case statement. Updates 'm_case*' members. Reports warnings. void analyzeCase(AstCase* nodep) { // Reset all analysis results m_caseOpaque = false; + m_caseHasDefault = false; + m_caseNCaseItems = 0; m_caseNConditions = 0; + m_caseDecoderRecords.clear(); + m_caseDecoderEntryWidth = 0; + m_caseTableWidth = 0; + m_caseLhsRecords.clear(); m_caseDetailsValid = false; AstNode* const caseExprp = nodep->exprp(); @@ -378,14 +569,44 @@ class CaseVisitor final : public VNVisitor { // Mark opaque if not a packed value - TODO: can this be a class? if (caseExprp->isDouble() || caseExprp->isString()) m_caseOpaque = true; - // Check each condition expression + // Gather pre-default assignments of decoder pattern + { + // AstVarScope::user1() -> bool: true if written to + const VNUser1InUse user1InUse; + for (AstNode* prevp = nodep->prevp(); prevp; prevp = prevp->prevp()) { + AstNodeAssign* const assp = checkDecoderAssign(prevp); + if (!assp) break; // Stop if not a decoder assignment + // Stop if multiple assignments + const bool multipleAssignments = assp->lhsp()->exists([&](AstVarRef* refp) { // + return refp->varScopep()->user1SetOnce(); + }); + if (multipleAssignments) break; + // Store pre-default assignment + LhsRecord& lhsRecord = m_caseLhsRecords[*assp->lhsp()]; + lhsRecord.lhsp = assp->lhsp(); + lhsRecord.preDefaultp = assp; + } + } + + // Check each case item + bool canBeDecoder = true; for (AstCaseItem* cip = nodep->itemsp(); cip; cip = VN_AS(cip->nextp(), CaseItem)) { + // Check conditions for (AstNode* condp = cip->condsp(); condp; condp = condp->nextp()) { // Count conditions ++m_caseNConditions; // Mark opaque if non-constant condition - if (!VN_IS(condp, Const)) m_caseOpaque = true; + if (!VN_IS(condp, Const)) { + m_caseOpaque = true; + canBeDecoder = false; // Can't be a decoder if opaque + } } + // Check if it has a default case + if (cip->isDefault()) m_caseHasDefault = true; + // Count case items + ++m_caseNCaseItems; + // Check if it fits the decoder pattern, if still possible + if (canBeDecoder) canBeDecoder = analyzeDecoderCaseItem(cip); } // Nothing else to do if not a packed type, or non-const conditions @@ -393,6 +614,135 @@ class CaseVisitor final : public VNVisitor { // If small enough, analyse details if (caseExprp->width() <= CASE_DETAILS_MAX_WIDTH) analyzeCaseDetails(nodep); + + // Check if it actually fits a full decoder pattern + if (canBeDecoder) analyzeDecoderPattern(nodep); + } + + AstNodeStmt* convertCaseTable(AstCase* nodep) { + // Create the table constant + FileLine* const flp = nodep->fileline(); + AstConst* const tablep + = new AstConst{flp, AstConst::WidthedValue{}, static_cast(m_caseTableWidth), 0}; + const uint32_t tableEntries = 1U << nodep->exprp()->width(); + + // Populate the table + for (const LhsRecord& lhsRecord : m_caseDecoderRecords) { + const int lhsWidth = lhsRecord.lhsp->width(); + const int lhsOffset = lhsRecord.offset; + + // Broadcast the pre-default assignment + if (lhsRecord.preDefaultp) { + AstConst* const rhsp = VN_AS(lhsRecord.preDefaultp->rhsp(), Const); + for (uint32_t index = 0; index < tableEntries; ++index) { + const uint32_t tableOffset = index * m_caseDecoderEntryWidth + lhsOffset; + tablep->num().opSelInto(rhsp->num(), tableOffset, lhsWidth); + } + } + + // Populate table based on each case item. In reverse order so earlier items win + for (AstCaseItem* cip = VN_AS(nodep->itemsp()->lastp(), CaseItem); cip; + cip = VN_AS(cip->prevp(), CaseItem)) { + // Find the RHS in this case + AstConst* const rhsp = [&]() -> AstConst* { + for (AstNode* stmtp = cip->stmtsp(); stmtp; stmtp = stmtp->nextp()) { + AstNodeAssign* const ap = VN_AS(stmtp, NodeAssign); + if (lhsRecord.lhsp->sameTree(ap->lhsp())) return VN_AS(ap->rhsp(), Const); + } + // Not assigned in this case, use the pre-assigned default + return VN_AS(lhsRecord.preDefaultp->rhsp(), Const); + }(); + + // If default, broadcast it + if (cip->isDefault()) { + for (uint32_t index = 0; index < tableEntries; ++index) { + const uint32_t tableOffset = index * m_caseDecoderEntryWidth + lhsOffset; + tablep->num().opSelInto(rhsp->num(), tableOffset, lhsWidth); + } + continue; + } + + // Iterate case conditions in reverse order + for (AstConst* condp = VN_AS(cip->condsp()->lastp(), Const); condp; + condp = VN_AS(condp->prevp(), Const)) { + if (neverItem(nodep, condp)) continue; // If item never matches, ignore it + const auto& match = matchPattern(nodep, condp); + const uint32_t matchMask = match.first.toUInt(); + const uint32_t matchBits = match.second.toUInt(); + const uint32_t inverseMask = ~matchMask & ((1U << condp->width()) - 1); + // This iterates through all integers that are a subset of the inverse mask, + // i.e.: all don't care values masked out + for (uint32_t i = inverseMask; true; i = (i - 1) & inverseMask) { + const uint32_t index = i | matchBits; + const uint32_t tableOffset = index * m_caseDecoderEntryWidth + lhsOffset; + tablep->num().opSelInto(rhsp->num(), tableOffset, lhsWidth); + if (!i) break; + } + } + } + } + + // Create the table in the constant pool, unless using an inline table + AstVarScope* const tableVscp = [&]() -> AstVarScope* { + if (m_caseTableWidth <= CASE_TABLE_TINY_BITS) { + ++m_stats.caseTableTiny; + return nullptr; + } + ++m_stats.caseTableNormal; + AstVarScope* vscp = v3Global.rootp()->constPoolp()->findConst(tablep, true); + VL_DO_DANGLING(tablep->deleteTree(), tablep); // findConst clones + return vscp; + }(); + + // Create the lookup table reference and index + AstNodeExpr* const tableRefp + = tableVscp ? static_cast(new AstVarRef{flp, tableVscp, VAccess::READ}) + : static_cast(tablep); + AstNodeExpr* const caseExprp + = new AstExtend{flp, nodep->exprp()->cloneTreePure(false), 32}; + AstNodeExpr* const scalep + = new AstConst{flp, static_cast(m_caseDecoderEntryWidth)}; + AstNodeExpr* const tableLsbp = new AstMul{flp, scalep, caseExprp}; + + // If there is only one LHS, just use the result + if (m_caseDecoderRecords.size() == 1) { + const LhsRecord& lhsRecord = m_caseDecoderRecords[0]; + const int width = lhsRecord.lhsp->width(); + AstNodeExpr* const rhsp = new AstSel{flp, tableRefp, tableLsbp, width}; + AstNodeExpr* const lhsp = lhsRecord.lhsp->cloneTreePure(false); + if (lhsRecord.nCaseAssigns) { + return new AstAssign{flp, lhsp, rhsp}; + } else if (lhsRecord.nCaseAssignDlys) { + return new AstAssignDly{flp, lhsp, rhsp}; + } else { + nodep->v3fatalSrc("Unknown assignment type"); + } + } + + // There are multiple LHSs, store the lookup result in a temporary + const std::string name = "__VcaseTableOut" + std::to_string(m_nTmps++); + AstVarScope* const tempVscp = m_scopep->createTemp(name, m_caseDecoderEntryWidth); + AstNodeExpr* const tempWritep = new AstVarRef{flp, tempVscp, VAccess::WRITE}; + AstNodeExpr* const tableSelp + = new AstSel{flp, tableRefp, tableLsbp, static_cast(m_caseDecoderEntryWidth)}; + AstNodeStmt* const resultp = new AstAssign{flp, tempWritep, tableSelp}; + + // For each LHS, select out the result + for (const LhsRecord& lhsRecord : m_caseDecoderRecords) { + const int width = lhsRecord.lhsp->width(); + const int lsb = lhsRecord.offset; + AstNodeExpr* const tempReadp = new AstVarRef{flp, tempVscp, VAccess::READ}; + AstNodeExpr* const rhsp = new AstSel{flp, tempReadp, lsb, width}; + AstNodeExpr* const lhsp = lhsRecord.lhsp->cloneTreePure(false); + if (lhsRecord.nCaseAssigns) { + resultp->addNext(new AstAssign{flp, lhsp, rhsp}); + } else if (lhsRecord.nCaseAssignDlys) { + resultp->addNext(new AstAssignDly{flp, lhsp, rhsp}); + } else { + nodep->v3fatalSrc("Unknown assignment type"); + } + } + return resultp; } // TODO: should return AstNodeStmt after #6280 @@ -443,7 +793,8 @@ class CaseVisitor final : public VNVisitor { // -> tree of IF(msb, IF(msb-1, 11, 10) // IF(msb-1, 01, 00)) // TODO: should return AstNodeStmt after #6280 - AstNode* convertCaseFast(AstCase* nodep) const { + AstNode* convertCaseFast(AstCase* nodep) { + ++m_stats.caseFast; const int caseWidth = nodep->exprp()->width(); AstNode* const ifrootp = convertCaseFastRecurse(nodep->exprp(), caseWidth - 1, 0UL); return ifrootp && ifrootp->backp() ? ifrootp->cloneTree(true) : ifrootp; @@ -455,7 +806,8 @@ class CaseVisitor final : public VNVisitor { // IF((EQ (AND MASK cexpr) (AND MASK icond1) // ,istmts2, istmts3 // TODO: should return AstNodeStmt after #6280 - AstNode* convertCaseGeneric(AstCase* nodep) const { + AstNode* convertCaseGeneric(AstCase* nodep) { + ++m_stats.caseGeneric; // We'll do this in two stages. // First stage, convert the conditions to the appropriate IF AND terms. bool hasDefault = false; @@ -522,7 +874,8 @@ class CaseVisitor final : public VNVisitor { // 'Or' new term with previous terms newCondp = newCondp ? new AstLogOr{flp, newCondp, termp} : termp; } - // Replace expression in tree. Needs to be non-null, so add a constant false if needed + // Replace expression in tree. Needs to be non-null, so add a constant false if + // needed if (!newCondp) newCondp = new AstConst{flp, AstConst::BitFalse{}}; itemp->addCondsp(newCondp); } @@ -591,11 +944,31 @@ class CaseVisitor final : public VNVisitor { // Convert the given case statement to a representation not using AstCase // TODO: should return AstNodeStmt after #6280 - AstNode* convertCase(AstCase* nodep, Stats& stats) const { + AstNode* convertCase(AstCase* nodep) { + // Determine if we should use the lookup table method + const bool useTable = [&]() { + // Not if disabled + if (!v3Global.opt.fCaseTable()) return false; + // Not if analysis tells us we can't + if (!m_caseTableWidth) return false; + // Always if tiny - it is materialized inline, so there is no load to amortize + if (m_caseTableWidth <= CASE_TABLE_TINY_BITS) return true; + // For a normal (constant-pool) table, weigh the indexed load against the branch + // lowering it would replace. That lowering's depth is bounded by the selector + // width (a balanced bit tree tests ~one bit per level) and by the number of + // distinct values (a generic if/else does ~one compare per value). A few compares + // are cheaper than a load that is likely to be a cache miss, so only table once that + // depth is exceeded. + const size_t branches = std::min(nodep->exprp()->width(), m_caseNConditions); + if (branches < CASE_TABLE_MIN_BRANCHES) return false; + return true; + }(); + if (useTable) return convertCaseTable(nodep); + // Determine if we should use the fast bitwise branching tree method const bool useFastBitTree = [&]() { // Not if disabled - if (!v3Global.opt.fCase()) return false; + if (!v3Global.opt.fCaseTree()) return false; // Can't do it without the detailed analysis if (!m_caseDetailsValid) return false; // Can't do it if not exhaustive @@ -608,13 +981,9 @@ class CaseVisitor final : public VNVisitor { // Otherwise use the bit tree return true; }(); - if (useFastBitTree) { - ++stats.caseFast; - return convertCaseFast(nodep); - } + if (useFastBitTree) return convertCaseFast(nodep); // Convert using the generic if/else tree method - ++stats.caseGeneric; // If a case statement is exhaustive, presume signals involved aren't forming a latch // TODO: this is broken, but it is as was before if (m_alwaysp && (!m_caseDetailsValid || m_caseDetails.exhaustive)) { @@ -650,14 +1019,20 @@ class CaseVisitor final : public VNVisitor { } // Convert the case statement and replace the original - if (AstNode* const replacementp = convertCase(nodep, m_stats)) { + if (AstNode* const replacementp = convertCase(nodep)) { nodep->replaceWith(replacementp); } else { nodep->unlinkFrBack(); } VL_DO_DANGLING(nodep->deleteTree(), nodep); } - //-------------------- + + void visit(AstScope* nodep) override { + VL_RESTORER(m_scopep); + m_scopep = nodep; + iterateChildren(nodep); + } + void visit(AstAlways* nodep) override { VL_RESTORER(m_alwaysp); m_alwaysp = nodep; @@ -669,12 +1044,16 @@ public: // CONSTRUCTORS explicit CaseVisitor(AstNetlist* nodep) { iterate(nodep); } ~CaseVisitor() override { + V3Stats::addStat("Optimizations, Cases table normal", m_stats.caseTableNormal); + V3Stats::addStat("Optimizations, Cases table tiny", m_stats.caseTableTiny); V3Stats::addStat("Optimizations, Cases parallelized", m_stats.caseFast); V3Stats::addStat("Optimizations, Cases complex", m_stats.caseGeneric); V3Stats::addStat("Optimizations, Cases proven assertions", m_stats.provenAssertions); } }; +size_t CaseVisitor::LhsRecord::s_nextId = 0; + //###################################################################### // Case class functions diff --git a/src/V3Const.cpp b/src/V3Const.cpp index 0098f1d94..34c4f349a 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -3281,7 +3281,7 @@ class ConstVisitor final : public VNVisitor { iterateChildren(nodep); UASSERT_OBJ(nodep->varp(), nodep, "Not linked"); bool did = false; - if (m_doV && nodep->varp()->valuep() && !m_attrp) { + if (m_doV && !nodep->varp()->constPoolEntry() && nodep->varp()->valuep() && !m_attrp) { // UINFOTREE(1, valuep, "", "visitvaref"); iterateAndNextNull(nodep->varp()->valuep()); // May change nodep->varp()->valuep() AstNode* const valuep = nodep->varp()->valuep(); diff --git a/src/V3Dead.cpp b/src/V3Dead.cpp index 1a08da9e8..677f0b2aa 100644 --- a/src/V3Dead.cpp +++ b/src/V3Dead.cpp @@ -597,6 +597,7 @@ public: // We may have removed some datatypes, cleanup nodep->typeTablep()->repairCache(); VIsCached::clearCacheTree(); // Removing assignments may affect isPure + nodep->constPoolp()->reCache(); } ~DeadVisitor() override { V3Stats::addStatSum("Optimizations, deadified FTasks", m_statFTasksDeadified); diff --git a/src/V3DfgOptimizer.cpp b/src/V3DfgOptimizer.cpp index 0ad3b8aa9..07002049d 100644 --- a/src/V3DfgOptimizer.cpp +++ b/src/V3DfgOptimizer.cpp @@ -78,9 +78,15 @@ class DataflowOptimize final { if (AstVarScope* const vscp = VN_CAST(nodep, VarScope)) { const AstVar* const varp = vscp->varp(); // Force and trace have already been processed - const bool hasExtRd = varp->isPrimaryIO() || varp->isSigUserRdPublic(); - const bool hasExtWr - = (varp->isPrimaryIO() && varp->isNonOutput()) || varp->isSigUserRWPublic(); + const bool hasExtRd = // + varp->isPrimaryIO() // Top level port - readable + || varp->isSigUserRdPublic() // Readable by user + || varp->constPoolEntry() // Stored in AstConstPool hashmap, but read only + ; + const bool hasExtWr = // + (varp->isPrimaryIO() && varp->isNonOutput()) // Top level port - writable + || varp->isSigUserRWPublic() // Writable by user + ; if (hasExtRd) DfgVertexVar::setHasExtRdRefs(vscp); if (hasExtWr) DfgVertexVar::setHasExtWrRefs(vscp); return; diff --git a/src/V3Options.cpp b/src/V3Options.cpp index dd26e7e06..575a498f9 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1448,7 +1448,12 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, DECL_OPTION("-facyc-simp", FOnOff, &m_fAcycSimp); DECL_OPTION("-fassemble", FOnOff, &m_fAssemble); - DECL_OPTION("-fcase", FOnOff, &m_fCase); + DECL_OPTION("-fcase", CbFOnOff, [this](bool flag) { + m_fCaseTable = flag; + m_fCaseTree = flag; + }); + DECL_OPTION("-fcase-table", FOnOff, &m_fCaseTable); + DECL_OPTION("-fcase-tree", FOnOff, &m_fCaseTree); DECL_OPTION("-fcombine", FOnOff, &m_fCombine); DECL_OPTION("-fconst", FOnOff, &m_fConst); DECL_OPTION("-fconst-before-dfg", FOnOff, &m_fConstBeforeDfg); @@ -2351,7 +2356,8 @@ void V3Options::optimize(int level) { const bool flag = level > 0; m_fAcycSimp = flag; m_fAssemble = flag; - m_fCase = flag; + m_fCaseTable = flag; + m_fCaseTree = flag; m_fCombine = flag; m_fConst = flag; m_fConstBitOpTree = flag; diff --git a/src/V3Options.h b/src/V3Options.h index 56ba51f79..0dca257d7 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -392,7 +392,8 @@ private: // MEMBERS (optimizations) bool m_fAcycSimp; // main switch: -fno-acyc-simp: acyclic pre-optimizations bool m_fAssemble; // main switch: -fno-assemble: assign assemble - bool m_fCase; // main switch: -fno-case: case tree conversion + bool m_fCaseTable; // main switch: -fno-case-table: case table conversion + bool m_fCaseTree; // main switch: -fno-case-tree: case tree conversion bool m_fCombine; // main switch: -fno-combine: common icode packing bool m_fConst; // main switch: -fno-const: constant folding bool m_fConstBeforeDfg = true; // main switch: -fno-const-before-dfg for testing only! @@ -725,7 +726,8 @@ public: // ACCESSORS (optimization options) bool fAcycSimp() const { return m_fAcycSimp; } bool fAssemble() const { return m_fAssemble; } - bool fCase() const { return m_fCase; } + bool fCaseTable() const { return m_fCaseTable; } + bool fCaseTree() const { return m_fCaseTree; } bool fCombine() const { return m_fCombine; } bool fConst() const { return m_fConst; } bool fConstBeforeDfg() const { return m_fConstBeforeDfg; } diff --git a/test_regress/t/t_case_huge.py b/test_regress/t/t_case_huge.py index 0ac31a2f8..57891d35b 100755 --- a/test_regress/t/t_case_huge.py +++ b/test_regress/t/t_case_huge.py @@ -16,12 +16,10 @@ test.compile(verilator_flags2=["--stats", "-fno-dfg"]) test.execute() -if test.vlt: - test.file_grep(test.stats, r'Optimizations, Cases parallelized\s+(\d+)', 11) - test.file_grep(test.stats, r'Optimizations, Combined CFuncs\s+(\d+)', 8) - test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 10) -elif test.vltmt: - test.file_grep(test.stats, r'Optimizations, Combined CFuncs\s+(\d+)', 9) - test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 10) +test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 8) +test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Cases parallelized\s+(\d+)', 3) +test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 2) +test.file_grep(test.stats, r'Optimizations, Combined CFuncs\s+(\d+)', 9 if test.vltmt else 8) test.passes() diff --git a/test_regress/t/t_case_huge_nocase.py b/test_regress/t/t_case_huge_nocase.py index 2b3aad742..011bbdc67 100755 --- a/test_regress/t/t_case_huge_nocase.py +++ b/test_regress/t/t_case_huge_nocase.py @@ -16,6 +16,8 @@ test.compile(verilator_flags2=["--stats -fno-case"]) test.execute() +test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 0) test.file_grep(test.stats, r'Optimizations, Cases parallelized\s+(\d+)', 0) test.passes() diff --git a/test_regress/t/t_case_huge_nocase_tree.py b/test_regress/t/t_case_huge_nocase_tree.py new file mode 100755 index 000000000..2083db119 --- /dev/null +++ b/test_regress/t/t_case_huge_nocase_tree.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2024 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('simulator_st') +test.top_filename = 't/t_case_huge.v' + +test.compile(verilator_flags2=["--stats -fno-case-tree"]) + +test.execute() + +test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 8) +test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Cases parallelized\s+(\d+)', 0) + +test.passes() diff --git a/test_regress/t/t_case_table_normal.py b/test_regress/t/t_case_table_normal.py new file mode 100755 index 000000000..25746e901 --- /dev/null +++ b/test_regress/t/t_case_table_normal.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt') + +test.compile(verilator_flags2=['--binary', '--stats']) + +test.execute() + +test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 8) +test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0) + +test.passes() diff --git a/test_regress/t/t_case_table_normal.v b/test_regress/t/t_case_table_normal.v new file mode 100644 index 000000000..26e32fa81 --- /dev/null +++ b/test_regress/t/t_case_table_normal.v @@ -0,0 +1,273 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain. +// SPDX-FileCopyrightText: 2026 Wilson Snyder +// SPDX-License-Identifier: CC0-1.0 +// +// Case statements that become a "normal" (constant-pool) lookup table, followed by +// cases that must not be converted to one. Each output is compared against an +// equivalent reference computed without a case statement, so the reference itself is +// never tabled. Selectors are wide enough, with enough distinct values, that the +// branch lowering they replace is deep enough to make a table worthwhile. + +// verilog_format: off +`define stop $stop +`define checkh(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got=%0x exp=%0x (%s !== %s)\n", `__FILE__,`__LINE__, (gotv), (expv), `"gotv`", `"expv`"); `stop; end while(0); +// verilog_format: on + +module t; + logic clk = 1'b0; + always #5 clk = ~clk; + + logic [31:0] cyc = 0; + + // Accept A: single output, blocking assignment, all selector values covered. + logic [15:0] accept_a_out, accept_a_ref; + always_comb + case (cyc[3:0]) + 4'd0: accept_a_out = 16'h1111; + 4'd1: accept_a_out = 16'h2222; + 4'd2: accept_a_out = 16'h4444; + 4'd3: accept_a_out = 16'h8888; + default: accept_a_out = 16'h0f0f; + endcase + assign accept_a_ref = (cyc[3:0] == 4'd0) ? 16'h1111 + : (cyc[3:0] == 4'd1) ? 16'h2222 + : (cyc[3:0] == 4'd2) ? 16'h4444 + : (cyc[3:0] == 4'd3) ? 16'h8888 : 16'h0f0f; + + // Accept B: single output, non-blocking assignment, with a default value set before + // the case and not all selector values covered. + logic [15:0] accept_b_out, accept_b_ref; + // verilator lint_off CASEINCOMPLETE + always_ff @(posedge clk) begin + accept_b_out <= 16'hffff; + case (cyc[3:0]) + 4'd0: accept_b_out <= 16'h0001; + 4'd1: accept_b_out <= 16'h0002; + 4'd2: accept_b_out <= 16'h0004; + 4'd3: accept_b_out <= 16'h0008; + endcase + end + // verilator lint_on CASEINCOMPLETE + always_ff @(posedge clk) + accept_b_ref <= (cyc[3:0] == 4'd0) ? 16'h0001 + : (cyc[3:0] == 4'd1) ? 16'h0002 + : (cyc[3:0] == 4'd2) ? 16'h0004 + : (cyc[3:0] == 4'd3) ? 16'h0008 : 16'hffff; + + // Accept C: three outputs, blocking assignment, with a default branch. + logic [11:0] accept_c_out_0, accept_c_ref_0; + logic [11:0] accept_c_out_1, accept_c_ref_1; + logic [11:0] accept_c_out_2, accept_c_ref_2; + always_comb + case (cyc[3:0]) + 4'd0: begin accept_c_out_0 = 12'h001; accept_c_out_1 = 12'h010; accept_c_out_2 = 12'h100; end + 4'd1: begin accept_c_out_0 = 12'h002; accept_c_out_1 = 12'h020; accept_c_out_2 = 12'h200; end + 4'd2: begin accept_c_out_0 = 12'h004; accept_c_out_1 = 12'h040; accept_c_out_2 = 12'h400; end + 4'd3: begin accept_c_out_0 = 12'h008; accept_c_out_1 = 12'h080; accept_c_out_2 = 12'h800; end + default: begin accept_c_out_0 = 12'h000; accept_c_out_1 = 12'h0ff; accept_c_out_2 = 12'hfff; end + endcase + assign accept_c_ref_0 = (cyc[3:0] == 4'd0) ? 12'h001 : (cyc[3:0] == 4'd1) ? 12'h002 + : (cyc[3:0] == 4'd2) ? 12'h004 : (cyc[3:0] == 4'd3) ? 12'h008 : 12'h000; + assign accept_c_ref_1 = (cyc[3:0] == 4'd0) ? 12'h010 : (cyc[3:0] == 4'd1) ? 12'h020 + : (cyc[3:0] == 4'd2) ? 12'h040 : (cyc[3:0] == 4'd3) ? 12'h080 : 12'h0ff; + assign accept_c_ref_2 = (cyc[3:0] == 4'd0) ? 12'h100 : (cyc[3:0] == 4'd1) ? 12'h200 + : (cyc[3:0] == 4'd2) ? 12'h400 : (cyc[3:0] == 4'd3) ? 12'h800 : 12'hfff; + + // Accept D: two outputs, non-blocking assignment, empty default branch, with default + // values set before the case. + logic [15:0] accept_d_out_0, accept_d_ref_0; + logic [15:0] accept_d_out_1, accept_d_ref_1; + always_ff @(posedge clk) begin + accept_d_out_0 <= 16'h0000; + accept_d_out_1 <= 16'hffff; + case (cyc[3:0]) + 4'd0: begin accept_d_out_0 <= 16'h0001; accept_d_out_1 <= 16'h0010; end + 4'd1: begin accept_d_out_0 <= 16'h0002; accept_d_out_1 <= 16'h0020; end + 4'd2: begin accept_d_out_0 <= 16'h0004; accept_d_out_1 <= 16'h0040; end + 4'd3: begin accept_d_out_0 <= 16'h0008; accept_d_out_1 <= 16'h0080; end + default: begin end + endcase + end + always_ff @(posedge clk) begin + accept_d_ref_0 <= (cyc[3:0] == 4'd0) ? 16'h0001 : (cyc[3:0] == 4'd1) ? 16'h0002 + : (cyc[3:0] == 4'd2) ? 16'h0004 : (cyc[3:0] == 4'd3) ? 16'h0008 : 16'h0000; + accept_d_ref_1 <= (cyc[3:0] == 4'd0) ? 16'h0010 : (cyc[3:0] == 4'd1) ? 16'h0020 + : (cyc[3:0] == 4'd2) ? 16'h0040 : (cyc[3:0] == 4'd3) ? 16'h0080 : 16'hffff; + end + + // Accept E: casez with don't-care bits. + logic [15:0] accept_e_out, accept_e_ref; + always_comb + casez (cyc[3:0]) + 4'b00??: accept_e_out = 16'haaaa; + 4'b01??: accept_e_out = 16'hbbbb; + 4'b10??: accept_e_out = 16'hcccc; + 4'b11??: accept_e_out = 16'hdddd; + endcase + assign accept_e_ref = (cyc[3:2] == 2'd0) ? 16'haaaa : (cyc[3:2] == 2'd1) ? 16'hbbbb + : (cyc[3:2] == 2'd2) ? 16'hcccc : 16'hdddd; + + // Accept F: an item that can never match, and an item listing multiple values. + logic [15:0] accept_f_out, accept_f_ref; + // verilator lint_off CASEWITHX + always_comb + casez (cyc[3:0]) + 4'bxxx0: accept_f_out = 16'h0000; // X can never match in 2-state + 4'b0001, 4'b0011, 4'b0101: accept_f_out = 16'h5555; // lists three values + default: accept_f_out = 16'h9999; + endcase + // verilator lint_on CASEWITHX + assign accept_f_ref = (cyc[3:0] == 4'd1 || cyc[3:0] == 4'd3 || cyc[3:0] == 4'd5) + ? 16'h5555 : 16'h9999; + + // Accept G: items assign different subsets of two outputs, with default values (and an + // unrelated output) set before the case. + logic [15:0] accept_g_out_0, accept_g_ref_0; + logic [15:0] accept_g_out_1, accept_g_ref_1; + logic [15:0] accept_g_out_2, accept_g_ref_2; + always_comb begin + accept_g_out_0 = 16'h0000; + accept_g_out_1 = 16'hffff; + accept_g_out_2 = 16'h3333; // not assigned in the case + case (cyc[3:0]) + 4'd0: accept_g_out_0 = 16'h0001; + 4'd1: accept_g_out_1 = 16'h0002; + 4'd2: begin accept_g_out_0 = 16'h0004; accept_g_out_1 = 16'h0008; end + 4'd3: accept_g_out_0 = 16'h0010; + default: ; + endcase + end + assign accept_g_ref_0 = (cyc[3:0] == 4'd0) ? 16'h0001 : (cyc[3:0] == 4'd2) ? 16'h0004 + : (cyc[3:0] == 4'd3) ? 16'h0010 : 16'h0000; + assign accept_g_ref_1 = (cyc[3:0] == 4'd1) ? 16'h0002 : (cyc[3:0] == 4'd2) ? 16'h0008 : 16'hffff; + assign accept_g_ref_2 = 16'h3333; + + // Accept H: unique0 enum case; the selector may hold an out-of-range value. + typedef enum logic [3:0] {NE0, NE1, NE2, NE3, NE4} ne_t; + ne_t accept_h_in; + assign accept_h_in = ne_t'(cyc[3:0]); + logic [15:0] accept_h_out, accept_h_ref; + always_comb begin + accept_h_out = 16'hffff; + unique0 case (accept_h_in) + NE0: accept_h_out = 16'h0001; + NE1: accept_h_out = 16'h0002; + NE2: accept_h_out = 16'h0003; + NE3: accept_h_out = 16'h0004; + NE4: accept_h_out = 16'h0005; + endcase + end + assign accept_h_ref = (cyc[3:0] == 4'd0) ? 16'h0001 : (cyc[3:0] == 4'd1) ? 16'h0002 + : (cyc[3:0] == 4'd2) ? 16'h0003 : (cyc[3:0] == 4'd3) ? 16'h0004 + : (cyc[3:0] == 4'd4) ? 16'h0005 : 16'hffff; + + // The cases below are intentionally NOT converted to a lookup table. + + // Reject A: too few distinct values, so the branch lowering is cheaper than a load. + logic [15:0] reject_a_out, reject_a_ref; + always_comb + case (cyc[3:0]) + 4'd0: reject_a_out = 16'h0001; + 4'd1: reject_a_out = 16'h0002; + default: reject_a_out = 16'h00ff; + endcase + assign reject_a_ref = (cyc[3:0] == 4'd0) ? 16'h0001 : (cyc[3:0] == 4'd1) ? 16'h0002 : 16'h00ff; + + // Reject B: a one-bit selector, too shallow to be worth a load. + logic [19:0] reject_b_out, reject_b_ref; + always_comb + case (cyc[0]) + 1'b0: reject_b_out = 20'h00001; + 1'b1: reject_b_out = 20'h00002; + default: reject_b_out = 20'h00000; + endcase + assign reject_b_ref = cyc[0] ? 20'h00002 : 20'h00001; + + // Reject C: a 12-bit selector, too wide to table. + logic [15:0] reject_c_out, reject_c_ref; + always_comb + case (cyc[11:0]) + 12'd0: reject_c_out = 16'h0001; + 12'd1: reject_c_out = 16'h0002; + 12'd2: reject_c_out = 16'h0004; + default: reject_c_out = 16'h0000; + endcase + assign reject_c_ref = (cyc[11:0] == 12'd0) ? 16'h0001 + : (cyc[11:0] == 12'd1) ? 16'h0002 + : (cyc[11:0] == 12'd2) ? 16'h0004 : 16'h0000; + + // Reject D: a 17-bit selector, too wide to table. + logic [16:0] reject_d_in; + assign reject_d_in = cyc[16:0]; + logic [15:0] reject_d_out, reject_d_ref; + // verilator lint_off CASEINCOMPLETE + always_comb begin + reject_d_out = 16'hbeef; + case (reject_d_in) + 17'd0: reject_d_out = 16'h0001; + 17'd1: reject_d_out = 16'h0002; + 17'd2: reject_d_out = 16'h0004; + endcase + end + // verilator lint_on CASEINCOMPLETE + assign reject_d_ref = (reject_d_in == 17'd0) ? 16'h0001 + : (reject_d_in == 17'd1) ? 16'h0002 + : (reject_d_in == 17'd2) ? 16'h0004 : 16'hbeef; + + // Reject E: a whole output and a sub-range of it assigned in different items. + logic [7:0] reject_e_out, reject_e_ref; + always_comb begin + reject_e_out = 8'h00; + reject_e_out[3:0] = 4'h0; + case (cyc[1:0]) + 2'b00: reject_e_out = 8'haa; // assigns the whole output + 2'b01: reject_e_out[3:0] = 4'h5; // assigns a sub-range of the same output + default: ; + endcase + end + assign reject_e_ref = (cyc[1:0] == 2'd0) ? 8'haa : (cyc[1:0] == 2'd1) ? 8'h05 : 8'h00; + + // Reject F: a sub-range's default value is overwritten by a later whole-output default + // before the case, so the sub-range's pre-case value is set elsewhere. + logic [31:0] reject_f_out, reject_f_ref; + always_comb begin + reject_f_out[15:0] = 16'h0005; // farther default for the sub-range + reject_f_out = 32'h0; // closer whole-output default overwrites the sub-range to 0 + case (cyc[1:0]) + 2'b00: reject_f_out[15:0] = 16'habcd; // only the sub-range is assigned in the case + default: ; + endcase + end + assign reject_f_ref = (cyc[1:0] == 2'd0) ? 32'h0000abcd : 32'h00000000; + + // Test driver/checker + always @(posedge clk) begin + `checkh(accept_a_out, accept_a_ref); + `checkh(accept_b_out, accept_b_ref); + `checkh(accept_c_out_0, accept_c_ref_0); + `checkh(accept_c_out_1, accept_c_ref_1); + `checkh(accept_c_out_2, accept_c_ref_2); + `checkh(accept_d_out_0, accept_d_ref_0); + `checkh(accept_d_out_1, accept_d_ref_1); + `checkh(accept_e_out, accept_e_ref); + `checkh(accept_f_out, accept_f_ref); + `checkh(accept_g_out_0, accept_g_ref_0); + `checkh(accept_g_out_1, accept_g_ref_1); + `checkh(accept_g_out_2, accept_g_ref_2); + `checkh(accept_h_out, accept_h_ref); + `checkh(reject_a_out, reject_a_ref); + `checkh(reject_b_out, reject_b_ref); + `checkh(reject_c_out, reject_c_ref); + `checkh(reject_d_out, reject_d_ref); + `checkh(reject_e_out, reject_e_ref); + `checkh(reject_f_out, reject_f_ref); + + cyc <= cyc + 32'd1; + if (cyc == 32'd32) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end +endmodule diff --git a/test_regress/t/t_case_table_normal_off.py b/test_regress/t/t_case_table_normal_off.py new file mode 100755 index 000000000..7497fbc92 --- /dev/null +++ b/test_regress/t/t_case_table_normal_off.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt') + +test.top_filename = "t/t_case_table_normal.v" + +test.compile(verilator_flags2=['--binary', '--stats', '-fno-case-table']) + +test.execute() + +test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 0) + +test.passes() diff --git a/test_regress/t/t_case_table_tiny.py b/test_regress/t/t_case_table_tiny.py new file mode 100755 index 000000000..55d103058 --- /dev/null +++ b/test_regress/t/t_case_table_tiny.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt') + +test.compile(verilator_flags2=['--binary', '--stats']) + +test.execute() + +test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 11) +test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 1) + +test.passes() diff --git a/test_regress/t/t_case_table_tiny.v b/test_regress/t/t_case_table_tiny.v new file mode 100644 index 000000000..dd3c6fb5b --- /dev/null +++ b/test_regress/t/t_case_table_tiny.v @@ -0,0 +1,369 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain. +// SPDX-FileCopyrightText: 2026 Wilson Snyder +// SPDX-License-Identifier: CC0-1.0 +// +// Case statements that become a "tiny" lookup table, followed by cases that must +// not be converted to one. Each output is compared against an equivalent reference +// computed without a case statement, so the reference itself is never tabled. + +// verilog_format: off +`define stop $stop +`define checkh(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got=%0x exp=%0x (%s !== %s)\n", `__FILE__,`__LINE__, (gotv), (expv), `"gotv`", `"expv`"); `stop; end while(0); +`define checkr(gotv,expv) do if ((gotv) != (expv)) begin $write("%%Error: %s:%0d: got=%f exp=%f\n", `__FILE__,`__LINE__, (gotv), (expv)); `stop; end while(0); +`define checks(gotv,expv) do if ((gotv) != (expv)) begin $write("%%Error: %s:%0d: got='%s' exp='%s'\n", `__FILE__,`__LINE__, (gotv), (expv)); `stop; end while(0); +// verilog_format: on + +module t; + logic clk = 1'b0; + always #5 clk = ~clk; + + logic [31:0] cyc = 0; + + // Accept A: single output, blocking assignment, all selector values covered. + wire [2:0] accept_a_in = cyc[2:0]; + logic [3:0] accept_a_out, accept_a_ref; + always_comb + case (accept_a_in) + 3'd0: accept_a_out = 4'd3; + 3'd1: accept_a_out = 4'd4; + 3'd2: accept_a_out = 4'd5; + 3'd3: accept_a_out = 4'd6; + 3'd4: accept_a_out = 4'd7; + 3'd5: accept_a_out = 4'd8; + 3'd6: accept_a_out = 4'd9; + 3'd7: accept_a_out = 4'd10; + endcase + assign accept_a_ref = 4'd3 + {1'b0, accept_a_in}; + + // Accept B: single output, non-blocking assignment, with a default value set before + // the case and not all selector values covered. + logic [3:0] accept_b_out, accept_b_ref; + // verilator lint_off CASEINCOMPLETE + always_ff @(posedge clk) begin + accept_b_out <= 4'hf; + case (cyc[1:0]) + 2'b00: accept_b_out <= 4'h1; + 2'b01: accept_b_out <= 4'h2; + endcase + end + // verilator lint_on CASEINCOMPLETE + always_ff @(posedge clk) + accept_b_ref <= (cyc[1:0] == 2'b00) ? 4'h1 : (cyc[1:0] == 2'b01) ? 4'h2 : 4'hf; + + // Accept C: two outputs of different widths, blocking assignment, with a default branch. + logic [2:0] accept_c_out_0, accept_c_ref_0; + logic [3:0] accept_c_out_1, accept_c_ref_1; + always_comb + case (cyc[1:0]) + 2'b00: begin accept_c_out_0 = 3'd1; accept_c_out_1 = 4'd6; end + 2'b01: begin accept_c_out_0 = 3'd2; accept_c_out_1 = 4'd5; end + default: begin accept_c_out_0 = 3'd0; accept_c_out_1 = 4'd7; end + endcase + assign accept_c_ref_0 = (cyc[1:0] == 2'b00) ? 3'd1 : (cyc[1:0] == 2'b01) ? 3'd2 : 3'd0; + assign accept_c_ref_1 = (cyc[1:0] == 2'b00) ? 4'd6 : (cyc[1:0] == 2'b01) ? 4'd5 : 4'd7; + + // Accept D: two outputs, non-blocking assignment, empty default branch, with default + // values set before the case. + logic [2:0] accept_d_out_0, accept_d_ref_0; + logic [2:0] accept_d_out_1, accept_d_ref_1; + always_ff @(posedge clk) begin + accept_d_out_0 <= 3'd0; + accept_d_out_1 <= 3'd7; + case (cyc[1:0]) + 2'b00: begin accept_d_out_0 <= 3'd1; accept_d_out_1 <= 3'd6; end + 2'b01: begin accept_d_out_0 <= 3'd2; accept_d_out_1 <= 3'd5; end + default: begin end + endcase + end + always_ff @(posedge clk) begin + accept_d_ref_0 <= (cyc[1:0] == 2'b00) ? 3'd1 : (cyc[1:0] == 2'b01) ? 3'd2 : 3'd0; + accept_d_ref_1 <= (cyc[1:0] == 2'b00) ? 3'd6 : (cyc[1:0] == 2'b01) ? 3'd5 : 3'd7; + end + + // Accept E: casez with a don't-care bit. + logic [3:0] accept_e_out, accept_e_ref; + always_comb + casez (cyc[1:0]) + 2'b1?: accept_e_out = 4'ha; + 2'b0?: accept_e_out = 4'hb; + endcase + assign accept_e_ref = cyc[1] ? 4'ha : 4'hb; + + // Accept F: an item that can never match, and an item listing multiple values. + logic [3:0] accept_f_out, accept_f_ref; + // verilator lint_off CASEWITHX + always_comb + casez (cyc[1:0]) + 2'bx0: accept_f_out = 4'h0; // X can never match in 2-state + 2'b01, 2'b11: accept_f_out = 4'h5; // lists two values + default: accept_f_out = 4'h9; + endcase + // verilator lint_on CASEWITHX + assign accept_f_ref = (cyc[1:0] == 2'b01 || cyc[1:0] == 2'b11) ? 4'h5 : 4'h9; + + // Accept G: items assign different subsets of two outputs, with default values (and an + // unrelated output) set before the case. + logic [3:0] accept_g_out_0, accept_g_ref_0; + logic [3:0] accept_g_out_1, accept_g_ref_1; + logic [3:0] accept_g_out_2, accept_g_ref_2; + // verilator lint_off CASEINCOMPLETE + always_comb begin + accept_g_out_0 = 4'h0; + accept_g_out_1 = 4'hf; + accept_g_out_2 = 4'h3; // not assigned in the case + case (cyc[1:0]) + 2'b00: accept_g_out_0 = 4'h1; + 2'b01: accept_g_out_1 = 4'h2; + endcase + end + // verilator lint_on CASEINCOMPLETE + assign accept_g_ref_0 = (cyc[1:0] == 2'b00) ? 4'h1 : 4'h0; + assign accept_g_ref_1 = (cyc[1:0] == 2'b01) ? 4'h2 : 4'hf; + assign accept_g_ref_2 = 4'h3; + + // Accept H: single output, non-blocking assignment, all selector values covered. + logic [3:0] accept_h_out, accept_h_ref; + always_ff @(posedge clk) + case (cyc[1:0]) + 2'b00: accept_h_out <= 4'h1; + 2'b01: accept_h_out <= 4'h2; + 2'b10: accept_h_out <= 4'h4; + 2'b11: accept_h_out <= 4'h8; + endcase + always_ff @(posedge clk) + accept_h_ref <= 4'h1 << cyc[1:0]; + + // Accept I: unique0 enum case; the selector may hold an out-of-range value. + typedef enum logic [1:0] {E0, E1, E2} e_t; + e_t accept_i_in; + assign accept_i_in = e_t'(cyc[1:0]); + logic [3:0] accept_i_out, accept_i_ref; + always_comb begin + accept_i_out = 4'hf; + unique0 case (accept_i_in) + E0: accept_i_out = 4'h1; + E1: accept_i_out = 4'h2; + E2: accept_i_out = 4'h3; + endcase + end + assign accept_i_ref = (cyc[1:0] == 2'd0) ? 4'h1 + : (cyc[1:0] == 2'd1) ? 4'h2 + : (cyc[1:0] == 2'd2) ? 4'h3 : 4'hf; + + // Accept J: wide output, materialized as a normal (not tiny) lookup table. + logic [8:0] accept_j_out, accept_j_ref; + always_comb + case (cyc[3:0]) + 4'd0: accept_j_out = 9'h001; + 4'd1: accept_j_out = 9'h002; + 4'd2: accept_j_out = 9'h004; + 4'd3: accept_j_out = 9'h008; + default: accept_j_out = 9'h010; + endcase + assign accept_j_ref = (cyc[3:0] < 4'd4) ? (9'h1 << cyc[3:0]) : 9'h010; + + // Accept K: a non-constant assignment precedes the case. + logic [3:0] accept_k_out_0, accept_k_ref_0; + logic [3:0] accept_k_out_1, accept_k_ref_1; + always_comb begin + accept_k_out_1 = cyc[3:0] ^ 4'ha; // non-constant value + case (cyc[1:0]) + 2'b00: accept_k_out_0 = 4'h1; + 2'b01: accept_k_out_0 = 4'h2; + 2'b10: accept_k_out_0 = 4'h4; + 2'b11: accept_k_out_0 = 4'h8; + endcase + end + assign accept_k_ref_0 = 4'h1 << cyc[1:0]; + assign accept_k_ref_1 = cyc[3:0] ^ 4'ha; + + // Accept L: the same output is given a default value twice before the case. + logic [3:0] accept_l_out, accept_l_ref; + // verilator lint_off CASEINCOMPLETE + always_comb begin + accept_l_out = 4'h1; + accept_l_out = 4'h6; // assigned a second time before the case + case (cyc[1:0]) + 2'b00: accept_l_out = 4'h2; + 2'b01: accept_l_out = 4'h3; + endcase + end + // verilator lint_on CASEINCOMPLETE + assign accept_l_ref = (cyc[1:0] == 2'd0) ? 4'h2 : (cyc[1:0] == 2'd1) ? 4'h3 : 4'h6; + + // The cases below are intentionally NOT converted to a lookup table. + + // Reject A: an item whose body is not a simple assignment. + logic [3:0] reject_a_out, reject_a_ref; + always_comb begin + reject_a_out = 4'h0; + case (cyc[1:0]) + 2'b00: reject_a_out = 4'h1; + 2'b01: if (cyc[0]) reject_a_out = 4'h2; // not a simple assignment + default: reject_a_out = 4'h3; + endcase + end + assign reject_a_ref = (cyc[1:0] == 2'd0) ? 4'h1 : (cyc[1:0] == 2'd1) ? 4'h2 : 4'h3; + + // Reject B: an item assigns through a variable bit-select (the index is read). + logic [3:0] reject_b_out, reject_b_ref; + always_comb begin + reject_b_out = 4'h0; + case (cyc[1:0]) + 2'b00: reject_b_out[cyc[1:0]] = 1'b1; + default: reject_b_out = 4'h5; + endcase + end + assign reject_b_ref = (cyc[1:0] == 2'd0) ? 4'h1 : 4'h5; + + // Reject C: an item assigns the same output twice. + logic [3:0] reject_c_out, reject_c_ref; + always_comb begin + reject_c_out = 4'h0; + case (cyc[1:0]) + 2'b00: begin reject_c_out = 4'h1; reject_c_out = 4'h2; end + default: reject_c_out = 4'h3; + endcase + end + assign reject_c_ref = (cyc[1:0] == 2'd0) ? 4'h2 : 4'h3; + + // Reject D: a non-constant case-item value. + logic [1:0] reject_d_in; + assign reject_d_in = cyc[1:0]; + logic [3:0] reject_d_out, reject_d_ref; + always_comb begin + reject_d_out = 4'h0; + case (cyc[1:0]) + reject_d_in: reject_d_out = 4'h7; // non-constant item value + default: reject_d_out = 4'h9; + endcase + end + assign reject_d_ref = 4'h7; // reject_d_in always equals the case expression + + // Reject E: all items are empty. + logic [3:0] reject_e_out, reject_e_ref; + always_comb begin + reject_e_out = 4'h7; + case (cyc[2:0]) + 3'd0: ; + 3'd1: ; + 3'd2: ; + 3'd3: ; + 3'd4: ; + 3'd5: ; + 3'd6: ; + 3'd7: ; + endcase + end + assign reject_e_ref = 4'h7; + + // Reject F: an item uses a delayed (intra-assignment) assignment. + logic [3:0] reject_f_out, reject_f_ref; + always_ff @(posedge clk) + case (cyc[1:0]) + 2'b00: reject_f_out <= #1 4'h1; // delayed assignment + default: reject_f_out <= 4'h2; + endcase + always_ff @(posedge clk) + if (cyc[1:0] == 2'b00) reject_f_ref <= #1 4'h1; + else reject_f_ref <= 4'h2; + + // Reject G: an output assigned with both blocking and non-blocking assignments. The three + // variants exercise the distinct ways the assignment kinds conflict. The deliberate + // mixing warnings are waived. + // verilator lint_off BLKANDNBLK + // verilator lint_off COMBDLY + // verilator lint_off CASEINCOMPLETE + // Variant 0: an item mixes a blocking and a non-blocking assignment to the same output. + logic [3:0] reject_g_out_0, reject_g_ref_0; + always_comb + case (cyc[1:0]) + 2'b00: reject_g_out_0 = 4'h1; + 2'b01: reject_g_out_0 <= 4'h2; + default: reject_g_out_0 = 4'h3; + endcase + assign reject_g_ref_0 = (cyc[1:0] == 2'b00) ? 4'h1 : (cyc[1:0] == 2'b01) ? 4'h2 : 4'h3; + // Variant 1: blocking items, but the pre-case default is a non-blocking assignment. + logic [3:0] reject_g_out_1, reject_g_ref_1; + always_comb begin + reject_g_out_1 <= 4'h0; + case (cyc[1:0]) + 2'b00: reject_g_out_1 = 4'h1; + 2'b01: reject_g_out_1 = 4'h2; + endcase + end + assign reject_g_ref_1 = (cyc[1:0] == 2'b00) ? 4'h1 : (cyc[1:0] == 2'b01) ? 4'h2 : 4'h0; + // Variant 2: non-blocking items, but the pre-case default is a blocking assignment. + logic [3:0] reject_g_out_2, reject_g_ref_2; + always_comb begin + reject_g_out_2 = 4'h0; + case (cyc[1:0]) + 2'b00: reject_g_out_2 <= 4'h1; + 2'b01: reject_g_out_2 <= 4'h2; + endcase + end + assign reject_g_ref_2 = (cyc[1:0] == 2'b00) ? 4'h1 : (cyc[1:0] == 2'b01) ? 4'h2 : 4'h0; + // verilator lint_on CASEINCOMPLETE + // verilator lint_on COMBDLY + // verilator lint_on BLKANDNBLK + + // Reject H: items assign a real (non-packed) output. + real reject_h_out, reject_h_ref; + always_comb + case (cyc[1:0]) + 2'b00: reject_h_out = 1.5; + 2'b01: reject_h_out = 2.5; + default: reject_h_out = 9.0; + endcase + always_comb reject_h_ref = (cyc[1:0] == 2'b00) ? 1.5 : (cyc[1:0] == 2'b01) ? 2.5 : 9.0; + + // Reject I: items assign a string (non-packed) output. + string reject_i_out, reject_i_ref; + always_comb + case (cyc[1:0]) + 2'b00: reject_i_out = "zero"; + 2'b01: reject_i_out = "one"; + default: reject_i_out = "other"; + endcase + always_comb reject_i_ref = (cyc[1:0] == 2'b00) ? "zero" : (cyc[1:0] == 2'b01) ? "one" : "other"; + + // Test driver/checker + always @(posedge clk) begin + `checkh(accept_a_out, accept_a_ref); + `checkh(accept_b_out, accept_b_ref); + `checkh(accept_c_out_0, accept_c_ref_0); + `checkh(accept_c_out_1, accept_c_ref_1); + `checkh(accept_d_out_0, accept_d_ref_0); + `checkh(accept_d_out_1, accept_d_ref_1); + `checkh(accept_e_out, accept_e_ref); + `checkh(accept_f_out, accept_f_ref); + `checkh(accept_g_out_0, accept_g_ref_0); + `checkh(accept_g_out_1, accept_g_ref_1); + `checkh(accept_g_out_2, accept_g_ref_2); + `checkh(accept_h_out, accept_h_ref); + `checkh(accept_i_out, accept_i_ref); + `checkh(accept_j_out, accept_j_ref); + `checkh(accept_k_out_0, accept_k_ref_0); + `checkh(accept_k_out_1, accept_k_ref_1); + `checkh(accept_l_out, accept_l_ref); + `checkh(reject_a_out, reject_a_ref); + `checkh(reject_b_out, reject_b_ref); + `checkh(reject_c_out, reject_c_ref); + `checkh(reject_d_out, reject_d_ref); + `checkh(reject_e_out, reject_e_ref); + `checkh(reject_f_out, reject_f_ref); + `checkh(reject_g_out_0, reject_g_ref_0); + `checkh(reject_g_out_1, reject_g_ref_1); + `checkh(reject_g_out_2, reject_g_ref_2); + `checkr(reject_h_out, reject_h_ref); + `checks(reject_i_out, reject_i_ref); + + cyc <= cyc + 32'd1; + if (cyc == 32'd32) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end +endmodule diff --git a/test_regress/t/t_case_table_tiny_off.py b/test_regress/t/t_case_table_tiny_off.py new file mode 100755 index 000000000..8890dcd07 --- /dev/null +++ b/test_regress/t/t_case_table_tiny_off.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt') + +test.top_filename = "t/t_case_table_tiny.v" + +test.compile(verilator_flags2=['--binary', '--stats', '-fno-case-table']) + +test.execute() + +test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0) +test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 0) + +test.passes() diff --git a/test_regress/t/t_dfg_constpool_unused.py b/test_regress/t/t_dfg_constpool_unused.py new file mode 100755 index 000000000..d8194a13e --- /dev/null +++ b/test_regress/t/t_dfg_constpool_unused.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt') + +test.compile(verilator_flags2=['--binary', '--stats']) + +test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 1) +test.file_grep(test.stats, r'Optimizations, DFG, Peephole, remove var\s+(\d+)', 2) +test.file_grep_not(test.stats, r'ConstPool, Constants emitted') # Removed by V3Dead later + +test.passes() diff --git a/test_regress/t/t_dfg_constpool_unused.v b/test_regress/t/t_dfg_constpool_unused.v new file mode 100644 index 000000000..339346ffa --- /dev/null +++ b/test_regress/t/t_dfg_constpool_unused.v @@ -0,0 +1,44 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain. +// SPDX-FileCopyrightText: 2026 Wilson Snyder +// SPDX-License-Identifier: CC0-1.0 + +// verilog_format: off +`define stop $stop +`define checkh(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got=%0x exp=%0x (%s !== %s)\n", `__FILE__,`__LINE__, (gotv), (expv), `"gotv`", `"expv`"); `stop; end while(0); +// verilog_format: on + +module t; + logic clk = 1'b0; + always #5 clk = ~clk; + + logic [31:0] cyc = 0; + + // Converted to case table in const pool, but proven unused by Dfg + logic [15:0] out; + always_comb begin + case (cyc[3:0]) + 4'd0: out = 16'h1111; + 4'd1: out = 16'h2222; + 4'd2: out = 16'h4444; + 4'd3: out = 16'h8888; + default: out = 16'h0f0f; + endcase + end + + // Complicated way to write constant 0 that only Dfg can decipher + wire [63:0] convoluted_zero = (({64{cyc[0]}} & ~{64{cyc[0]}})); + + wire logic [15:0] zero = &convoluted_zero ? out : 16'd0; + + // Test driver/checker + always @(posedge clk) begin + `checkh(zero, 16'd0); + cyc <= cyc + 32'd1; + if (cyc == 32'd32) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end +endmodule diff --git a/test_regress/t/t_opt_table_enum.py b/test_regress/t/t_opt_table_enum.py index 5908d7cde..561f0c766 100755 --- a/test_regress/t/t_opt_table_enum.py +++ b/test_regress/t/t_opt_table_enum.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1) diff --git a/test_regress/t/t_opt_table_packed_array.py b/test_regress/t/t_opt_table_packed_array.py index 5908d7cde..561f0c766 100755 --- a/test_regress/t/t_opt_table_packed_array.py +++ b/test_regress/t/t_opt_table_packed_array.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1) diff --git a/test_regress/t/t_opt_table_real.py b/test_regress/t/t_opt_table_real.py index 5908d7cde..561f0c766 100755 --- a/test_regress/t/t_opt_table_real.py +++ b/test_regress/t/t_opt_table_real.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1) diff --git a/test_regress/t/t_opt_table_same.py b/test_regress/t/t_opt_table_same.py index 2cee4586a..a51a48a73 100755 --- a/test_regress/t/t_opt_table_same.py +++ b/test_regress/t/t_opt_table_same.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 2) diff --git a/test_regress/t/t_opt_table_signed.py b/test_regress/t/t_opt_table_signed.py index 5908d7cde..561f0c766 100755 --- a/test_regress/t/t_opt_table_signed.py +++ b/test_regress/t/t_opt_table_signed.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1) diff --git a/test_regress/t/t_opt_table_string.py b/test_regress/t/t_opt_table_string.py index 5908d7cde..561f0c766 100755 --- a/test_regress/t/t_opt_table_string.py +++ b/test_regress/t/t_opt_table_string.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1) diff --git a/test_regress/t/t_opt_table_struct.py b/test_regress/t/t_opt_table_struct.py index 5908d7cde..561f0c766 100755 --- a/test_regress/t/t_opt_table_struct.py +++ b/test_regress/t/t_opt_table_struct.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1)