diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index 2be7d5bf4..4e302f06d 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -664,6 +664,10 @@ Summary: Alias for all other `-fno-case-*` options. +.. option:: -fno-case-decoder + + Rarely needed. Disable converting case statements into decoder tables. + .. option:: -fno-case-table Rarely needed. Disable converting case statements into table lookups. diff --git a/include/verilated_funcs.h b/include/verilated_funcs.h index 8aebd1e34..d69bca256 100644 --- a/include/verilated_funcs.h +++ b/include/verilated_funcs.h @@ -269,6 +269,41 @@ static inline VlQueue VL_CVT_UNPACK_TO_Q(const VlUnpacked& q) VL_ return ret; } +// Masked match functions +static inline IData VL_MATCHMASKED_I(int, IData lhs, WDataInP matchp) VL_PURE { + size_t i = 0; + while (true) { + const IData mask = matchp[i * 2]; + const IData bits = matchp[i * 2 + 1]; + if ((mask & lhs) == bits) break; + ++i; + } + return i; +} +static inline IData VL_MATCHMASKED_Q(int, QData lhs, WDataInP matchp) VL_PURE { + size_t i = 0; + while (true) { + const QData mask = VL_SET_QW(matchp + i * 4); + const QData bits = VL_SET_QW(matchp + i * 4 + 2); + if ((mask & lhs) == bits) break; + ++i; + } + return i; +} +static inline IData VL_MATCHMASKED_W(int lbits, WDataInP lhsp, WDataInP matchp) VL_MT_SAFE { + const int iwords = VL_WORDS_I(lbits); + size_t i = 0; + while (true) { + const WDataInP maskp = matchp + (i * iwords * 2); + const WDataInP bitsp = matchp + (i * iwords * 2 + iwords); + EData diff = 0; + for (int j = 0; j < iwords; ++j) diff |= (maskp[j] & lhsp[j]) ^ bitsp[j]; + if (!diff) break; + ++i; + } + return i; +} + // Return double from lhs (numeric) unsigned double VL_ITOR_D_W(int lbits, WDataInP const lwp) VL_PURE; static inline double VL_ITOR_D_I(int, IData lhs) VL_PURE { diff --git a/src/V3AstInlines.h b/src/V3AstInlines.h index 8a4477060..6ef16eec6 100644 --- a/src/V3AstInlines.h +++ b/src/V3AstInlines.h @@ -163,6 +163,13 @@ bool AstVar::sameNode(const AstNode* samep) const { return m_name == asamep->m_name && varType() == asamep->varType(); } +AstMatchMasked::AstMatchMasked(FileLine* fl, AstNodeExpr* lhsp, AstVarScope* matchp) + : ASTGEN_SUPER_MatchMasked(fl) { + this->lhsp(lhsp); + this->matchp(new AstVarRef{fl, matchp, VAccess::READ}); + dtypeSetUInt32(); +} + AstVarRef::AstVarRef(FileLine* fl, AstVar* varp, const VAccess& access) : ASTGEN_SUPER_VarRef(fl, varp, access) { if (v3Global.assertDTypesResolved()) { diff --git a/src/V3AstNodeExpr.h b/src/V3AstNodeExpr.h index f6e0d98f5..91d689227 100644 --- a/src/V3AstNodeExpr.h +++ b/src/V3AstNodeExpr.h @@ -1855,6 +1855,22 @@ public: bool index() const { return m_index; } bool isExprCoverageEligible() const override { return false; } }; +class AstMatchMasked final : public AstNodeExpr { + // This is a non-source construct, created internally to represent + // some case statements. It is a '(mask & _) == bits' matching loop + // where {mask, bits} pairs are packed into a single wide 'matchp', + // and the result is the index of the first matching entry. + // See VL_DECODER_* runtime functions. + // @astgen op1 := lhsp : AstNodeExpr + // @astgen op2 := matchp : AstVarRef +public: + inline AstMatchMasked(FileLine* fl, AstNodeExpr* lhsp, AstVarScope* matchp); + ASTGEN_MEMBERS_AstMatchMasked; + string emitVerilog() override { V3ERROR_NA_RETURN(""); } + string emitC() override { return "VL_MATCHMASKED_%lq(%lw, %li, %ri)"; } + bool cleanOut() const override { return true; } + static uint32_t fold(const V3Number& lhs, AstVar* matchVarp); +}; class AstMatches final : public AstNodeExpr { // "matches" operator: "expr matches pattern" // @astgen op1 := lhsp : AstNodeExpr // Expression to match diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 3b35037a8..85b077053 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -546,6 +546,26 @@ AstConst* AstConst::parseParamLiteral(FileLine* fl, const string& literal) { string AstConstraintRef::name() const { return constrp()->name(); } +uint32_t AstMatchMasked::fold(const V3Number& lhs, AstVar* matchVarp) { + const V3Number& numTable = VN_AS(matchVarp->valuep(), Const)->num(); + V3Number numMask{matchVarp, lhs.width(), 0}; + V3Number numBits{matchVarp, lhs.width(), 0}; + V3Number numAnd{matchVarp, lhs.width(), 0}; + const int width = lhs.width(); + const int entryWidth = VL_WORDS_I(width) * VL_EDATASIZE; + uint32_t i = 0; + while (true) { + const int lsb = 2 * i * entryWidth; + const int msb = lsb + width - 1; + numMask.opSel(numTable, msb, lsb); + numBits.opSel(numTable, msb + entryWidth, lsb + entryWidth); + numAnd.opAnd(numMask, lhs); + if (numAnd.isCaseEq(numBits)) break; + ++i; + } + return i; +} + AstNetlist::AstNetlist() : ASTGEN_SUPER_Netlist(new FileLine{FileLine::builtInFilename()}) , m_typeTablep{new AstTypeTable{fileline()}} diff --git a/src/V3Case.cpp b/src/V3Case.cpp index 251c67202..c4d4fbc21 100644 --- a/src/V3Case.cpp +++ b/src/V3Case.cpp @@ -166,6 +166,7 @@ class CaseVisitor final : public VNVisitor { // STATE // Statistics tracking, as a struct so can be passed to 'const' methods struct Stats final { + VDouble0 caseDecoder; // Cases using decoder method VDouble0 caseTableNormal; // Cases using table method with normal table VDouble0 caseTableTiny; // Cases using table method with tiny table VDouble0 caseFast; // Cases using fast bit tree method @@ -540,15 +541,14 @@ class CaseVisitor final : public VNVisitor { lhsRecord.offset = m_caseDecoderEntryWidth; m_caseDecoderEntryWidth += width; } - // Also align the whole entry width to a word boundary - m_caseDecoderEntryWidth = VL_WORDS_I(m_caseDecoderEntryWidth) * VL_EDATASIZE; // Check the table fits max size - if (fitsLimit(m_caseDecoderEntryWidth, CASE_TABLE_MAX_BITS)) { - m_caseTableWidth = m_caseDecoderEntryWidth << caseWidth; // Can optimize + const size_t alignedEntryWidth = VL_WORDS_I(m_caseDecoderEntryWidth) * VL_EDATASIZE; + if (fitsLimit(alignedEntryWidth, CASE_TABLE_MAX_BITS)) { + m_caseTableWidth = alignedEntryWidth << caseWidth; // Can optimize return; } - // Can't optimize - yet ... + // Can optimize as AstMatchMasked, no other info needed } // Analyze case statement. Updates 'm_case*' members. Reports warnings. @@ -593,8 +593,8 @@ class CaseVisitor final : public VNVisitor { for (AstCaseItem* cip = nodep->itemsp(); cip; cip = VN_AS(cip->nextp(), CaseItem)) { // Check conditions for (AstNode* condp = cip->condsp(); condp; condp = condp->nextp()) { - // Count conditions - ++m_caseNConditions; + // Count conditions that can actually match. + if (!neverItem(nodep, VN_AS(condp, NodeExpr))) ++m_caseNConditions; // Mark opaque if non-constant condition if (!VN_IS(condp, Const)) { m_caseOpaque = true; @@ -619,6 +619,50 @@ class CaseVisitor final : public VNVisitor { if (canBeDecoder) analyzeDecoderPattern(nodep); } + AstNodeStmt* connectDecoderOutputs(AstCase* nodep, AstNodeExpr* exprp, + const char* tmpPrefixp) { + FileLine* const flp = nodep->fileline(); + + // If there is only one LHS, just use the result + if (m_caseDecoderRecords.size() == 1) { + const LhsRecord& lhsRecord = m_caseDecoderRecords[0]; + const int width = lhsRecord.lhsp->width(); + AstNodeExpr* const rhsp + = exprp->width() == width ? exprp : new AstSel{flp, exprp, 0, width}; + AstNodeExpr* const lhsp = lhsRecord.lhsp->cloneTreePure(false); + if (lhsRecord.nCaseAssigns) { + return new AstAssign{flp, lhsp, rhsp}; + } else if (lhsRecord.nCaseAssignDlys) { + return new AstAssignDly{flp, lhsp, rhsp}; + } else { + nodep->v3fatalSrc("Unknown assignment type"); + } + } + + // There are multiple LHSs, store the lookup result in a temporary + const std::string name = tmpPrefixp + std::to_string(m_nTmps++); + AstVarScope* const tempVscp = m_scopep->createTemp(name, m_caseDecoderEntryWidth); + AstNodeExpr* const tempWritep = new AstVarRef{flp, tempVscp, VAccess::WRITE}; + AstNodeStmt* const resultp = new AstAssign{flp, tempWritep, exprp}; + + // For each LHS, select out the result + for (const LhsRecord& lhsRecord : m_caseDecoderRecords) { + const int width = lhsRecord.lhsp->width(); + const int lsb = lhsRecord.offset; + AstNodeExpr* const tempReadp = new AstVarRef{flp, tempVscp, VAccess::READ}; + AstNodeExpr* const rhsp = new AstSel{flp, tempReadp, lsb, width}; + AstNodeExpr* const lhsp = lhsRecord.lhsp->cloneTreePure(false); + if (lhsRecord.nCaseAssigns) { + resultp->addNext(new AstAssign{flp, lhsp, rhsp}); + } else if (lhsRecord.nCaseAssignDlys) { + resultp->addNext(new AstAssignDly{flp, lhsp, rhsp}); + } else { + nodep->v3fatalSrc("Unknown assignment type"); + } + } + return resultp; + } + AstNodeStmt* convertCaseTable(AstCase* nodep) { // Create the table constant FileLine* const flp = nodep->fileline(); @@ -626,7 +670,17 @@ class CaseVisitor final : public VNVisitor { = new AstConst{flp, AstConst::WidthedValue{}, static_cast(m_caseTableWidth), 0}; const uint32_t tableEntries = 1U << nodep->exprp()->width(); - // Populate the table + const bool isTinyTable = m_caseTableWidth <= CASE_TABLE_TINY_BITS; + if (isTinyTable) { + ++m_stats.caseTableTiny; + } else { + ++m_stats.caseTableNormal; + } + + // Populate the table. Align entries to a word boundary to avoid bit swizzling at runtime. + const uint32_t entryWidth = isTinyTable + ? m_caseDecoderEntryWidth + : VL_WORDS_I(m_caseDecoderEntryWidth) * VL_EDATASIZE; for (const LhsRecord& lhsRecord : m_caseDecoderRecords) { const int lhsWidth = lhsRecord.lhsp->width(); const int lhsOffset = lhsRecord.offset; @@ -635,7 +689,7 @@ class CaseVisitor final : public VNVisitor { if (lhsRecord.preDefaultp) { AstConst* const rhsp = VN_AS(lhsRecord.preDefaultp->rhsp(), Const); for (uint32_t index = 0; index < tableEntries; ++index) { - const uint32_t tableOffset = index * m_caseDecoderEntryWidth + lhsOffset; + const uint32_t tableOffset = index * entryWidth + lhsOffset; tablep->num().opSelInto(rhsp->num(), tableOffset, lhsWidth); } } @@ -656,7 +710,7 @@ class CaseVisitor final : public VNVisitor { // If default, broadcast it if (cip->isDefault()) { for (uint32_t index = 0; index < tableEntries; ++index) { - const uint32_t tableOffset = index * m_caseDecoderEntryWidth + lhsOffset; + const uint32_t tableOffset = index * entryWidth + lhsOffset; tablep->num().opSelInto(rhsp->num(), tableOffset, lhsWidth); } continue; @@ -674,7 +728,7 @@ class CaseVisitor final : public VNVisitor { // i.e.: all don't care values masked out for (uint32_t i = inverseMask; true; i = (i - 1) & inverseMask) { const uint32_t index = i | matchBits; - const uint32_t tableOffset = index * m_caseDecoderEntryWidth + lhsOffset; + const uint32_t tableOffset = index * entryWidth + lhsOffset; tablep->num().opSelInto(rhsp->num(), tableOffset, lhsWidth); if (!i) break; } @@ -684,11 +738,7 @@ class CaseVisitor final : public VNVisitor { // Create the table in the constant pool, unless using an inline table AstVarScope* const tableVscp = [&]() -> AstVarScope* { - if (m_caseTableWidth <= CASE_TABLE_TINY_BITS) { - ++m_stats.caseTableTiny; - return nullptr; - } - ++m_stats.caseTableNormal; + if (isTinyTable) return nullptr; AstVarScope* vscp = v3Global.rootp()->constPoolp()->findConst(tablep, true); VL_DO_DANGLING(tablep->deleteTree(), tablep); // findConst clones return vscp; @@ -700,49 +750,121 @@ class CaseVisitor final : public VNVisitor { : static_cast(tablep); AstNodeExpr* const caseExprp = new AstExtend{flp, nodep->exprp()->cloneTreePure(false), 32}; - AstNodeExpr* const scalep - = new AstConst{flp, static_cast(m_caseDecoderEntryWidth)}; + AstNodeExpr* const scalep = new AstConst{flp, entryWidth}; AstNodeExpr* const tableLsbp = new AstMul{flp, scalep, caseExprp}; - - // If there is only one LHS, just use the result - if (m_caseDecoderRecords.size() == 1) { - const LhsRecord& lhsRecord = m_caseDecoderRecords[0]; - const int width = lhsRecord.lhsp->width(); - AstNodeExpr* const rhsp = new AstSel{flp, tableRefp, tableLsbp, width}; - AstNodeExpr* const lhsp = lhsRecord.lhsp->cloneTreePure(false); - if (lhsRecord.nCaseAssigns) { - return new AstAssign{flp, lhsp, rhsp}; - } else if (lhsRecord.nCaseAssignDlys) { - return new AstAssignDly{flp, lhsp, rhsp}; - } else { - nodep->v3fatalSrc("Unknown assignment type"); - } - } - - // There are multiple LHSs, store the lookup result in a temporary - const std::string name = "__VcaseTableOut" + std::to_string(m_nTmps++); - AstVarScope* const tempVscp = m_scopep->createTemp(name, m_caseDecoderEntryWidth); - AstNodeExpr* const tempWritep = new AstVarRef{flp, tempVscp, VAccess::WRITE}; AstNodeExpr* const tableSelp = new AstSel{flp, tableRefp, tableLsbp, static_cast(m_caseDecoderEntryWidth)}; - AstNodeStmt* const resultp = new AstAssign{flp, tempWritep, tableSelp}; - // For each LHS, select out the result - for (const LhsRecord& lhsRecord : m_caseDecoderRecords) { - const int width = lhsRecord.lhsp->width(); - const int lsb = lhsRecord.offset; - AstNodeExpr* const tempReadp = new AstVarRef{flp, tempVscp, VAccess::READ}; - AstNodeExpr* const rhsp = new AstSel{flp, tempReadp, lsb, width}; - AstNodeExpr* const lhsp = lhsRecord.lhsp->cloneTreePure(false); - if (lhsRecord.nCaseAssigns) { - resultp->addNext(new AstAssign{flp, lhsp, rhsp}); - } else if (lhsRecord.nCaseAssignDlys) { - resultp->addNext(new AstAssignDly{flp, lhsp, rhsp}); - } else { - nodep->v3fatalSrc("Unknown assignment type"); + // Connect outputs + return connectDecoderOutputs(nodep, tableSelp, "__VcaseTableOut"); + } + + AstNodeStmt* convertCaseDecoder(AstCase* nodep) { + ++m_stats.caseDecoder; + + FileLine* const flp = nodep->fileline(); + + // Gather all the case conditions, paird with their statements. A 'nullptr' condition + // matches anything (the default case, or the catch-all added below). + std::vector> clauses; // (condition, item statements) + clauses.reserve(m_caseNConditions + 1); + for (AstCaseItem* cip = nodep->itemsp(); cip; cip = VN_AS(cip->nextp(), CaseItem)) { + if (cip->isDefault()) { + clauses.emplace_back(nullptr, cip->stmtsp()); + continue; + } + for (AstNode* condp = cip->condsp(); condp; condp = condp->nextp()) { + AstConst* const iconstp = VN_AS(condp, Const); + // Skip items that can never match in 2-state simulation (e.g. X in casez) + if (neverItem(nodep, iconstp)) continue; + clauses.emplace_back(iconstp, cip->stmtsp()); } } - return resultp; + // If the case has no default item and is not provably exhaustive, unmatched selector + // values fall back to the pre-defaults. Represent that with a catch-all clause (null + // condition and no statements, so every LHS uses its pre-default). 'analyzeDecoderPattern' + // guarantees every LHS has a pre-default in this case. + const bool provenExhaustive = m_caseDetailsValid && m_caseDetails.exhaustive + && !m_caseDetails.exhaustiveOverEnumOnly; + if (clauses.back().first && !provenExhaustive) clauses.emplace_back(nullptr, nullptr); + + // Number of entries in decoder table + const int decoderEnries = clauses.size(); + + // Build the match table: a {matchBits, matchMask} packed pair per clause, shared by all + // LHSs. Each field is rounded up to a whole EDATA word boundary to avoid bit swizzling + // at runtime. We use a packed value so the runtime function can take a VlWide pointer + // without templating on the array size. + const int condWidth = nodep->exprp()->width(); + const int matchWidth = 2 * VL_WORDS_I(condWidth) * VL_EDATASIZE; + AstConst* const matchp + = new AstConst{flp, AstConst::WidthedValue{}, decoderEnries * matchWidth, 0}; + for (int i = 0; i < decoderEnries; ++i) { + const int entryLsb = i * matchWidth; + + // If the entry has a condition, use it's match bits and mask + if (AstConst* const condp = clauses[i].first) { + const auto& match = matchPattern(nodep, condp); + matchp->num().opSelInto(match.first, entryLsb, condWidth); + matchp->num().opSelInto(match.second, entryLsb + matchWidth / 2, condWidth); + continue; + } + + // Otherwise use zero for mask and bits, which matches anything + V3Number numZero{flp, condWidth, 0}; + matchp->num().opSelInto(numZero, entryLsb, condWidth); + matchp->num().opSelInto(numZero, entryLsb + matchWidth / 2, condWidth); + } + + // Create the table initializer + AstRange* const rangep = new AstRange{flp, decoderEnries - 1, 0}; + AstNodeDType* const entryDtypep = nodep->findBitDType( + m_caseDecoderEntryWidth, m_caseDecoderEntryWidth, VSigning::UNSIGNED); + AstNodeDType* const tableDtypep = new AstUnpackArrayDType{flp, entryDtypep, rangep}; + v3Global.rootp()->typeTablep()->addTypesp(tableDtypep); + AstInitArray* const tablep = new AstInitArray{flp, tableDtypep, nullptr}; + + // Build a single value table for all LHSs: one entry per clause, packing each LHS's value + // at its offset. The entry width is the table packing computed by 'analyzeDecoderPattern' + // Rounded up to a whole EDATA word boundary to avoid bit swizzling at runtime. + for (int i = 0; i < decoderEnries; ++i) { + AstNode* const stmtsp = clauses[i].second; + AstConst* const entryp = new AstConst{flp, AstConst::WidthedValue{}, + static_cast(m_caseDecoderEntryWidth), 0}; + for (const LhsRecord& lhsRecord : m_caseDecoderRecords) { + AstNodeExpr* const lhsp = lhsRecord.lhsp; + // Find the value assigned to this LHS in the clause's statements + AstConst* valConstp = nullptr; + for (AstNode* stmtp = stmtsp; stmtp; stmtp = stmtp->nextp()) { + AstNodeAssign* const assignp = VN_AS(stmtp, NodeAssign); + if (!lhsp->sameTree(assignp->lhsp())) continue; + valConstp = VN_AS(assignp->rhsp(), Const); + break; + } + // Not assigned in this clause, so use the pre-assigned default + if (!valConstp) { + UASSERT_OBJ(lhsRecord.preDefaultp, nodep, + "Decoder LHS unassigned in case item without a pre-default"); + valConstp = VN_AS(lhsRecord.preDefaultp->rhsp(), Const); + } + entryp->num().opSelInto(valConstp->num(), lhsRecord.offset, lhsp->width()); + } + tablep->addIndexValuep(i, entryp); + } + + // Create the tables + AstVarScope* const matchVscp = v3Global.rootp()->constPoolp()->findConst(matchp, true); + AstVarScope* const tableVscp = v3Global.rootp()->constPoolp()->findTable(tablep); + VL_DO_DANGLING(matchp->deleteTree(), matchp); + VL_DO_DANGLING(tablep->deleteTree(), tablep); + + // AstMatchMasked produces the index of the matching entry + AstNodeExpr* const tableRefp = new AstVarRef{flp, tableVscp, VAccess::READ}; + AstNodeExpr* const caseExprp = nodep->exprp()->cloneTreePure(false); + AstMatchMasked* const indexp = new AstMatchMasked{flp, caseExprp, matchVscp}; + AstNodeExpr* const entryp = new AstArraySel{flp, tableRefp, indexp}; + + return connectDecoderOutputs(nodep, entryp, "__VcaseDecoderOut"); } // TODO: should return AstNodeStmt after #6280 @@ -965,6 +1087,20 @@ class CaseVisitor final : public VNVisitor { }(); if (useTable) return convertCaseTable(nodep); + // Determine if we should use the decoder method. + const bool useDecoder = [&]() { + // Not if disabled + if (!v3Global.opt.fCaseDecoder()) return false; + // Not if not a decoder pattern + if (m_caseDecoderRecords.empty()) return false; + // Only worth it once the branch lowering it would replace is deep enough (see + // useTable) + const size_t branches = std::min(nodep->exprp()->width(), m_caseNConditions); + if (branches < CASE_TABLE_MIN_BRANCHES) return false; + return true; + }(); + if (useDecoder) return convertCaseDecoder(nodep); + // Determine if we should use the fast bitwise branching tree method const bool useFastBitTree = [&]() { // Not if disabled @@ -1044,6 +1180,7 @@ public: // CONSTRUCTORS explicit CaseVisitor(AstNetlist* nodep) { iterate(nodep); } ~CaseVisitor() override { + V3Stats::addStat("Optimizations, Cases decoder", m_stats.caseDecoder); V3Stats::addStat("Optimizations, Cases table normal", m_stats.caseTableNormal); V3Stats::addStat("Optimizations, Cases table tiny", m_stats.caseTableTiny); V3Stats::addStat("Optimizations, Cases parallelized", m_stats.caseFast); diff --git a/src/V3Clean.cpp b/src/V3Clean.cpp index 2c326bbb7..07920f8de 100644 --- a/src/V3Clean.cpp +++ b/src/V3Clean.cpp @@ -232,6 +232,11 @@ class CleanVisitor final : public VNVisitor { ensureClean(nodep->rhsp()); setClean(nodep, true); } + void visit(AstMatchMasked* nodep) override { + iterateChildren(nodep); + ensureClean(nodep->lhsp()); + setClean(nodep, true); + } void visit(AstSel* nodep) override { operandBiop(nodep); setClean(nodep, nodep->cleanOut()); diff --git a/src/V3Const.cpp b/src/V3Const.cpp index 34c4f349a..739313ff8 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -3019,6 +3019,15 @@ class ConstVisitor final : public VNVisitor { } } void visit(AstClassOrPackageRef* nodep) override { iterateChildren(nodep); } + + void visit(AstMatchMasked* nodep) override { + // Do not iterate the tables, they must be constant pool entries + iterate(nodep->lhsp()); + if (AstConst* const constp = VN_CAST(nodep->lhsp(), Const)) { + replaceNum(nodep, AstMatchMasked::fold(constp->num(), nodep->matchp()->varp())); + } + } + void visit(AstPin* nodep) override { iterateChildren(nodep); } void replaceLogEq(AstLogEq* nodep) { @@ -3281,7 +3290,8 @@ class ConstVisitor final : public VNVisitor { iterateChildren(nodep); UASSERT_OBJ(nodep->varp(), nodep, "Not linked"); bool did = false; - if (m_doV && !nodep->varp()->constPoolEntry() && nodep->varp()->valuep() && !m_attrp) { + if (m_doV && (!nodep->varp()->constPoolEntry() || m_selp) && nodep->varp()->valuep() + && !m_attrp) { // UINFOTREE(1, valuep, "", "visitvaref"); iterateAndNextNull(nodep->varp()->valuep()); // May change nodep->varp()->valuep() AstNode* const valuep = nodep->varp()->valuep(); diff --git a/src/V3Dfg.cpp b/src/V3Dfg.cpp index 637cf184f..cb18ba704 100644 --- a/src/V3Dfg.cpp +++ b/src/V3Dfg.cpp @@ -96,6 +96,11 @@ std::unique_ptr DfgGraph::clone() const { vtxp2clonep.emplace(&vtx, cp); break; } // LCOV_EXCL_STOP + case VDfgType::MatchMasked: { + DfgMatchMasked* const cp = new DfgMatchMasked{*clonep, vtx.fileline(), vtx.dtype()}; + vtxp2clonep.emplace(&vtx, cp); + break; + } case VDfgType::Sel: { DfgSel* const cp = new DfgSel{*clonep, vtx.fileline(), vtx.dtype()}; cp->lsb(vtx.as()->lsb()); @@ -672,6 +677,15 @@ void DfgVertex::typeCheck(const DfgGraph& dfg) const { CHECK(v.dtype() == DfgDataType::select(v.srcp()->dtype(), v.lsb(), v.size()), "sel"); return; } + case VDfgType::MatchMasked: { + const DfgMatchMasked& v = *as(); + CHECK(v.isPacked(), "Should be Packed type"); + CHECK(v.size() == 32U, "Should yield a 32-bit result"); + CHECK(v.lhsp()->isPacked(), "Lhs should be packed"); + CHECK(v.matchp()->isPacked(), "Match should be Packed type"); + CHECK(v.matchp()->is(), "Match should be a variable"); + return; + } case VDfgType::Mux: { const DfgMux& v = *as(); CHECK(v.isPacked(), "Should be Packed type"); diff --git a/src/V3Dfg.h b/src/V3Dfg.h index 95320daad..b93c1c716 100644 --- a/src/V3Dfg.h +++ b/src/V3Dfg.h @@ -817,8 +817,11 @@ bool DfgVertex::isCheaperThanLoad() const { if (is()) return true; // Variables if (is()) return true; - // Array sels are just address computation - if (is()) return true; + // Array sels are just address computation, but the address itself can be expensive + if (const DfgArraySel* aselp = cast()) { + if (aselp->bitp()->is()) return false; + return true; + } // Small select from variable if (const DfgSel* const selp = cast()) { if (!selp->fromp()->is()) return false; diff --git a/src/V3DfgBreakCycles.cpp b/src/V3DfgBreakCycles.cpp index 73789cb9b..ecdcc875d 100644 --- a/src/V3DfgBreakCycles.cpp +++ b/src/V3DfgBreakCycles.cpp @@ -332,12 +332,25 @@ class TraceDriver final : public DfgVisitor { } void visit(DfgArraySel* vtxp) override { - // Only constant select - const DfgConst* const idxp = vtxp->bitp()->cast(); - if (!idxp) return; // From a variable const DfgVarArray* varp = vtxp->fromp()->cast(); if (!varp) return; + + // If index is not constant, independence was proven only if the 'fromp' is + // independent, so no need to trace that + if (!vtxp->bitp()->is()) { + DfgArraySel* const resp = make(vtxp, vtxp->width()); + resp->fromp(vtxp->fromp()); + resp->bitp(trace(vtxp->bitp(), vtxp->bitp()->width() - 1, 0)); + DfgSel* const selp = make(vtxp, m_msb - m_lsb + 1); + selp->fromp(resp); + selp->lsb(m_lsb); + SET_RESULT(selp); + return; + } + + // Trace the relevant driver based on the static index + const DfgConst* const idxp = vtxp->bitp()->as(); UASSERT_OBJ(!varp->isVolatile(), vtxp, "Should not trace through volatile VarArray"); // Skip through intermediate variables while (varp->srcp() && varp->srcp()->is()) { @@ -589,6 +602,16 @@ class TraceDriver final : public DfgVisitor { SET_RESULT(resp); } + void visit(DfgMatchMasked* vtxp) override { + DfgMatchMasked* const resp = make(vtxp, vtxp->width()); + resp->lhsp(trace(vtxp->lhsp(), vtxp->lhsp()->width() - 1, 0)); + resp->matchp(trace(vtxp->matchp(), vtxp->matchp()->width() - 1, 0)); + DfgSel* const selp = make(vtxp, m_msb - m_lsb + 1); + selp->fromp(resp); + selp->lsb(m_lsb); + SET_RESULT(resp); + } + #undef SET_RESULT public: @@ -721,12 +744,23 @@ class IndependentBits final : public DfgVisitor { } void visit(DfgArraySel* vtxp) override { - // Only constant select - const DfgConst* const idxp = vtxp->bitp()->cast(); - if (!idxp) return; // From a variable const DfgVarArray* varp = vtxp->fromp()->cast(); if (!varp) return; + + // If index is not constant, independent only if the variable index + // is indenpendent and the array is independent. We don't track arrays, + // so we will assume an array is only independent if it has no drivers + // in the graph. TODO: could check all drivers. + if (!vtxp->bitp()->is()) { + if (MASK(vtxp->bitp()).isEqAllOnes() && !varp->srcp() && !varp->defaultp()) { + MASK(vtxp).setAllBits1(); + } + return; + } + + // Trace the relevant driver based on the static index + const DfgConst* const idxp = vtxp->bitp()->as(); // We cannot trace through a volatile variable, so pretend all bits are dependent if (varp->isVolatile()) return; // Skip through intermediate variables @@ -954,6 +988,12 @@ class IndependentBits final : public DfgVisitor { } } + void visit(DfgMatchMasked* vtxp) override { + if (MASK(vtxp->lhsp()).isEqAllOnes() && MASK(vtxp->matchp()).isEqAllOnes()) { // + MASK(vtxp).setAllBits1(); + } + } + #undef MASK // Enqueue sinks of vertex to work list for traversal - only called from constructor diff --git a/src/V3DfgCse.cpp b/src/V3DfgCse.cpp index 7a60b4be7..c1b3862e4 100644 --- a/src/V3DfgCse.cpp +++ b/src/V3DfgCse.cpp @@ -72,6 +72,7 @@ class V3DfgCse final { } // Vertices with no internal information + case VDfgType::MatchMasked: case VDfgType::Mux: case VDfgType::UnitArray: return V3Hash{}; @@ -197,6 +198,7 @@ class V3DfgCse final { } // Vertices with no internal information + case VDfgType::MatchMasked: case VDfgType::Mux: case VDfgType::UnitArray: return true; diff --git a/src/V3DfgDfgToAst.cpp b/src/V3DfgDfgToAst.cpp index ca349ccb3..b2af96461 100644 --- a/src/V3DfgDfgToAst.cpp +++ b/src/V3DfgDfgToAst.cpp @@ -234,6 +234,12 @@ class DfgToAstVisitor final : DfgVisitor { AstVar* const varp = sinkp->as()->vscp()->varp(); m_resultp = new AstCReset{vtxp->fileline(), varp, false}; } + void visit(DfgMatchMasked* vtxp) override { + FileLine* const flp = vtxp->fileline(); + AstNodeExpr* const lhsp = convertDfgVertexToAstNodeExpr(vtxp->lhsp()); + AstVarScope* const matchp = vtxp->matchp()->as()->vscp(); + m_resultp = new AstMatchMasked{flp, lhsp, matchp}; + } void visit(DfgRep* vtxp) override { FileLine* const flp = vtxp->fileline(); diff --git a/src/V3DfgPeephole.cpp b/src/V3DfgPeephole.cpp index 3f08f490f..de92790a8 100644 --- a/src/V3DfgPeephole.cpp +++ b/src/V3DfgPeephole.cpp @@ -2751,6 +2751,16 @@ class V3DfgPeephole final : public DfgVisitor { } } + void visit(DfgMatchMasked* const vtxp) override { + if (DfgConst* const constp = vtxp->lhsp()->cast()) { + APPLYING(FOLD_MATCHMASKED) { + AstVar* const matchVarp = vtxp->matchp()->as()->vscp()->varp(); + replace(makeI32(vtxp->fileline(), AstMatchMasked::fold(constp->num(), matchVarp))); + return; + } + } + } + //========================================================================= // DfgVertexTernary //========================================================================= diff --git a/src/V3DfgPeepholePatterns.h b/src/V3DfgPeepholePatterns.h index 52d5717fe..1c9de14e2 100644 --- a/src/V3DfgPeepholePatterns.h +++ b/src/V3DfgPeepholePatterns.h @@ -31,6 +31,7 @@ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_ASSOC_BINARY_LHS_OF_RHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_ASSOC_BINARY_RHS_OF_LHS) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_BINARY) \ + _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_MATCHMASKED) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_MUX_FROM_ONES) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_MUX_FROM_ZERO) \ _FOR_EACH_DFG_PEEPHOLE_OPTIMIZATION_APPLY(macro, FOLD_REP) \ diff --git a/src/V3DfgSynthesize.cpp b/src/V3DfgSynthesize.cpp index 706c9c0c9..c0f5bdc99 100644 --- a/src/V3DfgSynthesize.cpp +++ b/src/V3DfgSynthesize.cpp @@ -402,6 +402,29 @@ class AstToDfgConverter final : public VNVisitor { DfgVertex* const vtxp = make(nodep->fileline(), *dtypep); nodep->user2p(vtxp); } + void visit(AstMatchMasked* nodep) override { + UASSERT_OBJ(m_converting, nodep, "AstToDfg visit called without m_converting"); + UASSERT_OBJ(!nodep->user2p(), nodep, "Already has Dfg vertex"); + if (unhandled(nodep)) return; + + const DfgDataType* const dtypep = DfgDataType::fromAst(nodep->dtypep()); + if (!dtypep) { + m_foundUnhandled = true; + ++m_ctx.m_conv.nonRepDType; + return; + } + + iterate(nodep->lhsp()); + if (m_foundUnhandled) return; + iterate(nodep->matchp()); + if (m_foundUnhandled) return; + + FileLine* const flp = nodep->fileline(); + DfgMatchMasked* const vtxp = make(flp, *dtypep); + vtxp->lhsp(nodep->lhsp()->user2u().to()); + vtxp->matchp(nodep->matchp()->user2u().to()); + nodep->user2p(vtxp); + } void visit(AstReplicate* nodep) override { UASSERT_OBJ(m_converting, nodep, "AstToDfg visit called without m_converting"); UASSERT_OBJ(!nodep->user2p(), nodep, "Already has Dfg vertex"); diff --git a/src/V3DfgVertices.h b/src/V3DfgVertices.h index f8ca8e9c8..d932de1a3 100644 --- a/src/V3DfgVertices.h +++ b/src/V3DfgVertices.h @@ -329,6 +329,21 @@ public: ASTGEN_MEMBERS_DfgVertexBinary; }; +class DfgMatchMasked final : public DfgVertexBinary { + // Dfg equivalent of AstMatchMasked +public: + DfgMatchMasked(DfgGraph& dfg, FileLine* flp, const DfgDataType& dtype) + : DfgVertexBinary{dfg, dfgType(), flp, dtype} {} + ASTGEN_MEMBERS_DfgMatchMasked; + + DfgVertex* lhsp() const { return inputp(0); } + void lhsp(DfgVertex* vtxp) { inputp(0, vtxp); } + DfgVertex* matchp() const { return inputp(1); } + void matchp(DfgVertex* vtxp) { inputp(1, vtxp); } + + std::string srcName(size_t idx) const override { return idx ? "matchp" : "lhsp"; } +}; + class DfgMux final : public DfgVertexBinary { // AstSel is binary, but 'lsbp' is very often constant. As AstSel is fairly // common, we special case as a DfgSel for the constant 'lsbp', and as diff --git a/src/V3EmitCFunc.h b/src/V3EmitCFunc.h index 969e2fd0b..5e527870c 100644 --- a/src/V3EmitCFunc.h +++ b/src/V3EmitCFunc.h @@ -1590,6 +1590,9 @@ public: puts(")"); } } + void visit(AstMatchMasked* nodep) override { + emitOpName(nodep, nodep->emitC(), nodep->lhsp(), nodep->matchp(), nullptr); + } void visit(AstMemberSel* nodep) override { iterateAndNextConstNull(nodep->fromp()); putnbs(nodep, "->"); diff --git a/src/V3Gate.cpp b/src/V3Gate.cpp index b96c892f2..e41b61daf 100644 --- a/src/V3Gate.cpp +++ b/src/V3Gate.cpp @@ -471,6 +471,11 @@ class GateOkVisitor final : public VNVisitorConst { // assign to get randomization etc clearSimple("CReset"); } + void visit(AstMatchMasked* nodep) override { + if (!m_isSimple) return; + // This node can be expensive + clearSimple("MatchMasked"); + } //-------------------- void visit(AstNode* nodep) override { if (!m_isSimple) return; // Fastpath diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 575a498f9..22f96ca7e 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1449,9 +1449,11 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, DECL_OPTION("-facyc-simp", FOnOff, &m_fAcycSimp); DECL_OPTION("-fassemble", FOnOff, &m_fAssemble); DECL_OPTION("-fcase", CbFOnOff, [this](bool flag) { + m_fCaseDecoder = flag; m_fCaseTable = flag; m_fCaseTree = flag; }); + DECL_OPTION("-fcase-decoder", FOnOff, &m_fCaseDecoder); DECL_OPTION("-fcase-table", FOnOff, &m_fCaseTable); DECL_OPTION("-fcase-tree", FOnOff, &m_fCaseTree); DECL_OPTION("-fcombine", FOnOff, &m_fCombine); @@ -2356,6 +2358,7 @@ void V3Options::optimize(int level) { const bool flag = level > 0; m_fAcycSimp = flag; m_fAssemble = flag; + m_fCaseDecoder = flag; m_fCaseTable = flag; m_fCaseTree = flag; m_fCombine = flag; diff --git a/src/V3Options.h b/src/V3Options.h index 0dca257d7..71a32c31b 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -392,6 +392,7 @@ private: // MEMBERS (optimizations) bool m_fAcycSimp; // main switch: -fno-acyc-simp: acyclic pre-optimizations bool m_fAssemble; // main switch: -fno-assemble: assign assemble + bool m_fCaseDecoder; // main switch: -fno-case-decoder: case decoder conversion bool m_fCaseTable; // main switch: -fno-case-table: case table conversion bool m_fCaseTree; // main switch: -fno-case-tree: case tree conversion bool m_fCombine; // main switch: -fno-combine: common icode packing @@ -726,6 +727,7 @@ public: // ACCESSORS (optimization options) bool fAcycSimp() const { return m_fAcycSimp; } bool fAssemble() const { return m_fAssemble; } + bool fCaseDecoder() const { return m_fCaseDecoder; } bool fCaseTable() const { return m_fCaseTable; } bool fCaseTree() const { return m_fCaseTree; } bool fCombine() const { return m_fCombine; } diff --git a/src/V3Premit.cpp b/src/V3Premit.cpp index 50c23da38..9fcd89ddf 100644 --- a/src/V3Premit.cpp +++ b/src/V3Premit.cpp @@ -347,6 +347,14 @@ class PremitVisitor final : public VNVisitor { } checkNode(nodep); } + void visit(AstMatchMasked* nodep) override { + iterateChildren(nodep); + if (!nodep->user1SetOnce()) { + // Don't want this replicated by V3Expand + AstVar* const varp = createTemp(nodep); + varp->noSubst(true); // Do not re-inline in V3Subst + } + } void visit(AstCond* nodep) override { // Convert AstCond to AstIf in order to avoid evaluating // sub-expressions in both branches unconditionally. diff --git a/test_regress/t/t_case_decoder.py b/test_regress/t/t_case_decoder.py new file mode 100755 index 000000000..4265c138b --- /dev/null +++ b/test_regress/t/t_case_decoder.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt') + +test.compile(verilator_flags2=['--binary', '--stats']) + +test.execute() + +test.file_grep(test.stats, r'Optimizations, Cases decoder\s+(\d+)', 17) + +test.passes() diff --git a/test_regress/t/t_case_decoder.v b/test_regress/t/t_case_decoder.v new file mode 100644 index 000000000..6d5c779c2 --- /dev/null +++ b/test_regress/t/t_case_decoder.v @@ -0,0 +1,524 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain. +// SPDX-FileCopyrightText: 2026 Wilson Snyder +// SPDX-License-Identifier: CC0-1.0 +// +// Case statements that become a "decoder" (the selector is matched against a packed constant +// table at runtime), followed by cases that must not be converted to one. Each output is +// compared against an equivalent reference computed without a case statement, so the reference +// itself is never converted. + +// verilog_format: off +`define stop $stop +`define checkh(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got=%0x exp=%0x (%s !== %s)\n", `__FILE__,`__LINE__, (gotv), (expv), `"gotv`", `"expv`"); `stop; end while(0); +// verilog_format: on + +module t; + logic clk = 1'b0; + always #5 clk = ~clk; + + logic [31:0] cyc = 0; + + // Accept A: a 31-bit (I) selector decoded into outputs of three widths (I/Q/W result). + wire [30:0] accept_a_in = 31'b1 << cyc[3:0]; + logic [ 5:0] accept_a_out_0, accept_a_ref_0; + logic [ 55:0] accept_a_out_1, accept_a_ref_1; + logic [142:0] accept_a_out_2, accept_a_ref_2; + always_comb begin + casez (accept_a_in) + 31'b???????_????????_????????_???????1 : accept_a_out_0 = 6'd00; + 31'b???????_????????_????????_??????1? : accept_a_out_0 = 6'd01; + 31'b???????_????????_????????_?????1?? : accept_a_out_0 = 6'd02; + 31'b???????_????????_????????_????1??? : accept_a_out_0 = 6'd03; + 31'b???????_????????_????????_???1???? : accept_a_out_0 = 6'd04; + 31'b???????_????????_????????_??1????? : accept_a_out_0 = 6'd05; + 31'b???????_????????_????????_?1?????? : accept_a_out_0 = 6'd06; + 31'b???????_????????_????????_1??????? : accept_a_out_0 = 6'd07; + 31'b???????_????????_???????1_???????? : accept_a_out_0 = 6'd08; + 31'b???????_????????_??????1?_???????? : accept_a_out_0 = 6'd09; + 31'b???????_????????_?????1??_???????? : accept_a_out_0 = 6'd10; + 31'b???????_????????_????1???_???????? : accept_a_out_0 = 6'd11; + 31'b???????_????????_???1????_???????? : accept_a_out_0 = 6'd12; + 31'b???????_????????_??1?????_???????? : accept_a_out_0 = 6'd13; + 31'b???????_????????_?1??????_???????? : accept_a_out_0 = 6'd14; + 31'b???????_????????_1???????_???????? : accept_a_out_0 = 6'd15; + 31'b???????_???????1_????????_???????? : accept_a_out_0 = 6'd16; + 31'b???????_??????1?_????????_???????? : accept_a_out_0 = 6'd17; + 31'b???????_?????1??_????????_???????? : accept_a_out_0 = 6'd18; + 31'b???????_????1???_????????_???????? : accept_a_out_0 = 6'd19; + 31'b???????_???1????_????????_???????? : accept_a_out_0 = 6'd20; + 31'b???????_??1?????_????????_???????? : accept_a_out_0 = 6'd21; + 31'b???????_?1??????_????????_???????? : accept_a_out_0 = 6'd22; + 31'b???????_1???????_????????_???????? : accept_a_out_0 = 6'd23; + 31'b??????1_????????_????????_???????? : accept_a_out_0 = 6'd24; + 31'b?????1?_????????_????????_???????? : accept_a_out_0 = 6'd25; + 31'b????1??_????????_????????_???????? : accept_a_out_0 = 6'd26; + 31'b???1???_????????_????????_???????? : accept_a_out_0 = 6'd27; + 31'b??1????_????????_????????_???????? : accept_a_out_0 = 6'd28; + 31'b?1?????_????????_????????_???????? : accept_a_out_0 = 6'd29; + 31'b1??????_????????_????????_???????? : accept_a_out_0 = 6'd30; + default: accept_a_out_0 = '1; + endcase + casez (accept_a_in) + 31'b???????_????????_????????_???????1 : accept_a_out_1 = 56'd0000; + 31'b???????_????????_????????_??????1? : accept_a_out_1 = 56'd0100; + 31'b???????_????????_????????_?????1?? : accept_a_out_1 = 56'd0200; + 31'b???????_????????_????????_????1??? : accept_a_out_1 = 56'd0300; + 31'b???????_????????_????????_???1???? : accept_a_out_1 = 56'd0400; + 31'b???????_????????_????????_??1????? : accept_a_out_1 = 56'd0500; + 31'b???????_????????_????????_?1?????? : accept_a_out_1 = 56'd0600; + 31'b???????_????????_????????_1??????? : accept_a_out_1 = 56'd0700; + 31'b???????_????????_???????1_???????? : accept_a_out_1 = 56'd0800; + 31'b???????_????????_??????1?_???????? : accept_a_out_1 = 56'd0900; + 31'b???????_????????_?????1??_???????? : accept_a_out_1 = 56'd1000; + 31'b???????_????????_????1???_???????? : accept_a_out_1 = 56'd1100; + 31'b???????_????????_???1????_???????? : accept_a_out_1 = 56'd1200; + 31'b???????_????????_??1?????_???????? : accept_a_out_1 = 56'd1300; + 31'b???????_????????_?1??????_???????? : accept_a_out_1 = 56'd1400; + 31'b???????_????????_1???????_???????? : accept_a_out_1 = 56'd1500; + 31'b???????_???????1_????????_???????? : accept_a_out_1 = 56'd1600; + 31'b???????_??????1?_????????_???????? : accept_a_out_1 = 56'd1700; + 31'b???????_?????1??_????????_???????? : accept_a_out_1 = 56'd1800; + 31'b???????_????1???_????????_???????? : accept_a_out_1 = 56'd1900; + 31'b???????_???1????_????????_???????? : accept_a_out_1 = 56'd2000; + 31'b???????_??1?????_????????_???????? : accept_a_out_1 = 56'd2100; + 31'b???????_?1??????_????????_???????? : accept_a_out_1 = 56'd2200; + 31'b???????_1???????_????????_???????? : accept_a_out_1 = 56'd2300; + 31'b??????1_????????_????????_???????? : accept_a_out_1 = 56'd2400; + 31'b?????1?_????????_????????_???????? : accept_a_out_1 = 56'd2500; + 31'b????1??_????????_????????_???????? : accept_a_out_1 = 56'd2600; + 31'b???1???_????????_????????_???????? : accept_a_out_1 = 56'd2700; + 31'b??1????_????????_????????_???????? : accept_a_out_1 = 56'd2800; + 31'b?1?????_????????_????????_???????? : accept_a_out_1 = 56'd2900; + 31'b1??????_????????_????????_???????? : accept_a_out_1 = 56'd3000; + default: accept_a_out_1 = '1; + endcase + casez (accept_a_in) + 31'b???????_????????_????????_???????1 : accept_a_out_2 = 143'd0000000000; + 31'b???????_????????_????????_??????1? : accept_a_out_2 = 143'd0100000000; + 31'b???????_????????_????????_?????1?? : accept_a_out_2 = 143'd0200000000; + 31'b???????_????????_????????_????1??? : accept_a_out_2 = 143'd0300000000; + 31'b???????_????????_????????_???1???? : accept_a_out_2 = 143'd0400000000; + 31'b???????_????????_????????_??1????? : accept_a_out_2 = 143'd0500000000; + 31'b???????_????????_????????_?1?????? : accept_a_out_2 = 143'd0600000000; + 31'b???????_????????_????????_1??????? : accept_a_out_2 = 143'd0700000000; + 31'b???????_????????_???????1_???????? : accept_a_out_2 = 143'd0800000000; + 31'b???????_????????_??????1?_???????? : accept_a_out_2 = 143'd0900000000; + 31'b???????_????????_?????1??_???????? : accept_a_out_2 = 143'd1000000000; + 31'b???????_????????_????1???_???????? : accept_a_out_2 = 143'd1100000000; + 31'b???????_????????_???1????_???????? : accept_a_out_2 = 143'd1200000000; + 31'b???????_????????_??1?????_???????? : accept_a_out_2 = 143'd1300000000; + 31'b???????_????????_?1??????_???????? : accept_a_out_2 = 143'd1400000000; + 31'b???????_????????_1???????_???????? : accept_a_out_2 = 143'd1500000000; + 31'b???????_???????1_????????_???????? : accept_a_out_2 = 143'd1600000000; + 31'b???????_??????1?_????????_???????? : accept_a_out_2 = 143'd1700000000; + 31'b???????_?????1??_????????_???????? : accept_a_out_2 = 143'd1800000000; + 31'b???????_????1???_????????_???????? : accept_a_out_2 = 143'd1900000000; + 31'b???????_???1????_????????_???????? : accept_a_out_2 = 143'd2000000000; + 31'b???????_??1?????_????????_???????? : accept_a_out_2 = 143'd2100000000; + 31'b???????_?1??????_????????_???????? : accept_a_out_2 = 143'd2200000000; + 31'b???????_1???????_????????_???????? : accept_a_out_2 = 143'd2300000000; + 31'b??????1_????????_????????_???????? : accept_a_out_2 = 143'd2400000000; + 31'b?????1?_????????_????????_???????? : accept_a_out_2 = 143'd2500000000; + 31'b????1??_????????_????????_???????? : accept_a_out_2 = 143'd2600000000; + 31'b???1???_????????_????????_???????? : accept_a_out_2 = 143'd2700000000; + 31'b??1????_????????_????????_???????? : accept_a_out_2 = 143'd2800000000; + 31'b?1?????_????????_????????_???????? : accept_a_out_2 = 143'd2900000000; + 31'b1??????_????????_????????_???????? : accept_a_out_2 = 143'd3000000000; + default: accept_a_out_2 = '1; + endcase + end + assign accept_a_ref_0 = 6'(cyc[3:0]); + assign accept_a_ref_1 = 56'(cyc[3:0]) * 56'd100; + assign accept_a_ref_2 = 143'(cyc[3:0]) * 143'd100000000; + + // Accept B: a 40-bit (Q) selector decoded into outputs of three widths (I/Q/W result). + wire [39:0] accept_b_in = 40'b1 << cyc[5:1]; + logic [ 5:0] accept_b_out_0, accept_b_ref_0; + logic [ 55:0] accept_b_out_1, accept_b_ref_1; + logic [142:0] accept_b_out_2, accept_b_ref_2; + always_comb begin + casez (accept_b_in) + 40'b????????_????????_????????_????????_???????1 : accept_b_out_0 = 6'd00; + 40'b????????_????????_????????_????????_??????1? : accept_b_out_0 = 6'd01; + 40'b????????_????????_????????_????????_?????1?? : accept_b_out_0 = 6'd02; + 40'b????????_????????_????????_????????_????1??? : accept_b_out_0 = 6'd03; + 40'b????????_????????_????????_????????_???1???? : accept_b_out_0 = 6'd04; + 40'b????????_????????_????????_????????_??1????? : accept_b_out_0 = 6'd05; + 40'b????????_????????_????????_????????_?1?????? : accept_b_out_0 = 6'd06; + 40'b????????_????????_????????_????????_1??????? : accept_b_out_0 = 6'd07; + 40'b????????_????????_????????_???????1_???????? : accept_b_out_0 = 6'd08; + 40'b????????_????????_????????_??????1?_???????? : accept_b_out_0 = 6'd09; + 40'b????????_????????_????????_?????1??_???????? : accept_b_out_0 = 6'd10; + 40'b????????_????????_????????_????1???_???????? : accept_b_out_0 = 6'd11; + 40'b????????_????????_????????_???1????_???????? : accept_b_out_0 = 6'd12; + 40'b????????_????????_????????_??1?????_???????? : accept_b_out_0 = 6'd13; + 40'b????????_????????_????????_?1??????_???????? : accept_b_out_0 = 6'd14; + 40'b????????_????????_????????_1???????_???????? : accept_b_out_0 = 6'd15; + 40'b????????_????????_???????1_????????_???????? : accept_b_out_0 = 6'd16; + 40'b????????_????????_??????1?_????????_???????? : accept_b_out_0 = 6'd17; + 40'b????????_????????_?????1??_????????_???????? : accept_b_out_0 = 6'd18; + 40'b????????_????????_????1???_????????_???????? : accept_b_out_0 = 6'd19; + 40'b????????_????????_???1????_????????_???????? : accept_b_out_0 = 6'd20; + 40'b????????_????????_??1?????_????????_???????? : accept_b_out_0 = 6'd21; + 40'b????????_????????_?1??????_????????_???????? : accept_b_out_0 = 6'd22; + 40'b????????_????????_1???????_????????_???????? : accept_b_out_0 = 6'd23; + 40'b????????_???????1_????????_????????_???????? : accept_b_out_0 = 6'd24; + 40'b????????_??????1?_????????_????????_???????? : accept_b_out_0 = 6'd25; + 40'b????????_?????1??_????????_????????_???????? : accept_b_out_0 = 6'd26; + 40'b????????_????1???_????????_????????_???????? : accept_b_out_0 = 6'd27; + 40'b????????_???1????_????????_????????_???????? : accept_b_out_0 = 6'd28; + 40'b????????_??1?????_????????_????????_???????? : accept_b_out_0 = 6'd29; + 40'b????????_?1??????_????????_????????_???????? : accept_b_out_0 = 6'd30; + 40'b????????_1???????_????????_????????_???????? : accept_b_out_0 = 6'd31; + 40'b???????1_????????_????????_????????_???????? : accept_b_out_0 = 6'd32; + 40'b??????1?_????????_????????_????????_???????? : accept_b_out_0 = 6'd33; + 40'b?????1??_????????_????????_????????_???????? : accept_b_out_0 = 6'd34; + 40'b????1???_????????_????????_????????_???????? : accept_b_out_0 = 6'd35; + 40'b???1????_????????_????????_????????_???????? : accept_b_out_0 = 6'd36; + 40'b??1?????_????????_????????_????????_???????? : accept_b_out_0 = 6'd37; + 40'b?1??????_????????_????????_????????_???????? : accept_b_out_0 = 6'd38; + 40'b1???????_????????_????????_????????_???????? : accept_b_out_0 = 6'd39; + default: accept_b_out_0 = '1; + endcase + casez (accept_b_in) + 40'b????????_????????_????????_????????_???????1 : accept_b_out_1 = 56'd0000; + 40'b????????_????????_????????_????????_??????1? : accept_b_out_1 = 56'd0100; + 40'b????????_????????_????????_????????_?????1?? : accept_b_out_1 = 56'd0200; + 40'b????????_????????_????????_????????_????1??? : accept_b_out_1 = 56'd0300; + 40'b????????_????????_????????_????????_???1???? : accept_b_out_1 = 56'd0400; + 40'b????????_????????_????????_????????_??1????? : accept_b_out_1 = 56'd0500; + 40'b????????_????????_????????_????????_?1?????? : accept_b_out_1 = 56'd0600; + 40'b????????_????????_????????_????????_1??????? : accept_b_out_1 = 56'd0700; + 40'b????????_????????_????????_???????1_???????? : accept_b_out_1 = 56'd0800; + 40'b????????_????????_????????_??????1?_???????? : accept_b_out_1 = 56'd0900; + 40'b????????_????????_????????_?????1??_???????? : accept_b_out_1 = 56'd1000; + 40'b????????_????????_????????_????1???_???????? : accept_b_out_1 = 56'd1100; + 40'b????????_????????_????????_???1????_???????? : accept_b_out_1 = 56'd1200; + 40'b????????_????????_????????_??1?????_???????? : accept_b_out_1 = 56'd1300; + 40'b????????_????????_????????_?1??????_???????? : accept_b_out_1 = 56'd1400; + 40'b????????_????????_????????_1???????_???????? : accept_b_out_1 = 56'd1500; + 40'b????????_????????_???????1_????????_???????? : accept_b_out_1 = 56'd1600; + 40'b????????_????????_??????1?_????????_???????? : accept_b_out_1 = 56'd1700; + 40'b????????_????????_?????1??_????????_???????? : accept_b_out_1 = 56'd1800; + 40'b????????_????????_????1???_????????_???????? : accept_b_out_1 = 56'd1900; + 40'b????????_????????_???1????_????????_???????? : accept_b_out_1 = 56'd2000; + 40'b????????_????????_??1?????_????????_???????? : accept_b_out_1 = 56'd2100; + 40'b????????_????????_?1??????_????????_???????? : accept_b_out_1 = 56'd2200; + 40'b????????_????????_1???????_????????_???????? : accept_b_out_1 = 56'd2300; + 40'b????????_???????1_????????_????????_???????? : accept_b_out_1 = 56'd2400; + 40'b????????_??????1?_????????_????????_???????? : accept_b_out_1 = 56'd2500; + 40'b????????_?????1??_????????_????????_???????? : accept_b_out_1 = 56'd2600; + 40'b????????_????1???_????????_????????_???????? : accept_b_out_1 = 56'd2700; + 40'b????????_???1????_????????_????????_???????? : accept_b_out_1 = 56'd2800; + 40'b????????_??1?????_????????_????????_???????? : accept_b_out_1 = 56'd2900; + 40'b????????_?1??????_????????_????????_???????? : accept_b_out_1 = 56'd3000; + 40'b????????_1???????_????????_????????_???????? : accept_b_out_1 = 56'd3100; + 40'b???????1_????????_????????_????????_???????? : accept_b_out_1 = 56'd3200; + 40'b??????1?_????????_????????_????????_???????? : accept_b_out_1 = 56'd3300; + 40'b?????1??_????????_????????_????????_???????? : accept_b_out_1 = 56'd3400; + 40'b????1???_????????_????????_????????_???????? : accept_b_out_1 = 56'd3500; + 40'b???1????_????????_????????_????????_???????? : accept_b_out_1 = 56'd3600; + 40'b??1?????_????????_????????_????????_???????? : accept_b_out_1 = 56'd3700; + 40'b?1??????_????????_????????_????????_???????? : accept_b_out_1 = 56'd3800; + 40'b1???????_????????_????????_????????_???????? : accept_b_out_1 = 56'd3900; + default: accept_b_out_1 = '1; + endcase + casez (accept_b_in) + 40'b????????_????????_????????_????????_???????1 : accept_b_out_2 = 143'd0000000000; + 40'b????????_????????_????????_????????_??????1? : accept_b_out_2 = 143'd0100000000; + 40'b????????_????????_????????_????????_?????1?? : accept_b_out_2 = 143'd0200000000; + 40'b????????_????????_????????_????????_????1??? : accept_b_out_2 = 143'd0300000000; + 40'b????????_????????_????????_????????_???1???? : accept_b_out_2 = 143'd0400000000; + 40'b????????_????????_????????_????????_??1????? : accept_b_out_2 = 143'd0500000000; + 40'b????????_????????_????????_????????_?1?????? : accept_b_out_2 = 143'd0600000000; + 40'b????????_????????_????????_????????_1??????? : accept_b_out_2 = 143'd0700000000; + 40'b????????_????????_????????_???????1_???????? : accept_b_out_2 = 143'd0800000000; + 40'b????????_????????_????????_??????1?_???????? : accept_b_out_2 = 143'd0900000000; + 40'b????????_????????_????????_?????1??_???????? : accept_b_out_2 = 143'd1000000000; + 40'b????????_????????_????????_????1???_???????? : accept_b_out_2 = 143'd1100000000; + 40'b????????_????????_????????_???1????_???????? : accept_b_out_2 = 143'd1200000000; + 40'b????????_????????_????????_??1?????_???????? : accept_b_out_2 = 143'd1300000000; + 40'b????????_????????_????????_?1??????_???????? : accept_b_out_2 = 143'd1400000000; + 40'b????????_????????_????????_1???????_???????? : accept_b_out_2 = 143'd1500000000; + 40'b????????_????????_???????1_????????_???????? : accept_b_out_2 = 143'd1600000000; + 40'b????????_????????_??????1?_????????_???????? : accept_b_out_2 = 143'd1700000000; + 40'b????????_????????_?????1??_????????_???????? : accept_b_out_2 = 143'd1800000000; + 40'b????????_????????_????1???_????????_???????? : accept_b_out_2 = 143'd1900000000; + 40'b????????_????????_???1????_????????_???????? : accept_b_out_2 = 143'd2000000000; + 40'b????????_????????_??1?????_????????_???????? : accept_b_out_2 = 143'd2100000000; + 40'b????????_????????_?1??????_????????_???????? : accept_b_out_2 = 143'd2200000000; + 40'b????????_????????_1???????_????????_???????? : accept_b_out_2 = 143'd2300000000; + 40'b????????_???????1_????????_????????_???????? : accept_b_out_2 = 143'd2400000000; + 40'b????????_??????1?_????????_????????_???????? : accept_b_out_2 = 143'd2500000000; + 40'b????????_?????1??_????????_????????_???????? : accept_b_out_2 = 143'd2600000000; + 40'b????????_????1???_????????_????????_???????? : accept_b_out_2 = 143'd2700000000; + 40'b????????_???1????_????????_????????_???????? : accept_b_out_2 = 143'd2800000000; + 40'b????????_??1?????_????????_????????_???????? : accept_b_out_2 = 143'd2900000000; + 40'b????????_?1??????_????????_????????_???????? : accept_b_out_2 = 143'd3000000000; + 40'b????????_1???????_????????_????????_???????? : accept_b_out_2 = 143'd3100000000; + 40'b???????1_????????_????????_????????_???????? : accept_b_out_2 = 143'd3200000000; + 40'b??????1?_????????_????????_????????_???????? : accept_b_out_2 = 143'd3300000000; + 40'b?????1??_????????_????????_????????_???????? : accept_b_out_2 = 143'd3400000000; + 40'b????1???_????????_????????_????????_???????? : accept_b_out_2 = 143'd3500000000; + 40'b???1????_????????_????????_????????_???????? : accept_b_out_2 = 143'd3600000000; + 40'b??1?????_????????_????????_????????_???????? : accept_b_out_2 = 143'd3700000000; + 40'b?1??????_????????_????????_????????_???????? : accept_b_out_2 = 143'd3800000000; + 40'b1???????_????????_????????_????????_???????? : accept_b_out_2 = 143'd3900000000; + default: accept_b_out_2 = '1; + endcase + end + assign accept_b_ref_0 = 6'(cyc[5:1]); + assign accept_b_ref_1 = 56'(cyc[5:1]) * 56'd100; + assign accept_b_ref_2 = 143'(cyc[5:1]) * 143'd100000000; + + // Accept C: a 155-bit (W) selector decoded into outputs of three widths (I/Q/W result). + wire [154:0] accept_c_in = 155'b1 << cyc[5:0]; + logic [ 5:0] accept_c_out_0, accept_c_ref_0; + logic [ 55:0] accept_c_out_1, accept_c_ref_1; + logic [142:0] accept_c_out_2, accept_c_ref_2; + always_comb begin + casez (accept_c_in) + 155'b????????_????????_????????_????????_???????1 : accept_c_out_0 = 6'd00; + 155'b????????_????????_????????_????????_??????1? : accept_c_out_0 = 6'd01; + 155'b????????_????????_????????_????????_?????1?? : accept_c_out_0 = 6'd02; + 155'b????????_????????_????????_????????_????1??? : accept_c_out_0 = 6'd03; + 155'b????????_????????_????????_????????_???1???? : accept_c_out_0 = 6'd04; + 155'b????????_????????_????????_????????_??1????? : accept_c_out_0 = 6'd05; + 155'b????????_????????_????????_????????_?1?????? : accept_c_out_0 = 6'd06; + 155'b????????_????????_????????_????????_1??????? : accept_c_out_0 = 6'd07; + 155'b????????_????????_????????_???????1_???????? : accept_c_out_0 = 6'd08; + 155'b????????_????????_????????_??????1?_???????? : accept_c_out_0 = 6'd09; + 155'b????????_????????_????????_?????1??_???????? : accept_c_out_0 = 6'd10; + default: accept_c_out_0 = '1; + endcase + casez (accept_c_in) + 155'b????????_????????_????????_????????_???????1 : accept_c_out_1 = 56'd0000; + 155'b????????_????????_????????_????????_??????1? : accept_c_out_1 = 56'd0100; + 155'b????????_????????_????????_????????_?????1?? : accept_c_out_1 = 56'd0200; + 155'b????????_????????_????????_????????_????1??? : accept_c_out_1 = 56'd0300; + 155'b????????_????????_????????_????????_???1???? : accept_c_out_1 = 56'd0400; + 155'b????????_????????_????????_????????_??1????? : accept_c_out_1 = 56'd0500; + 155'b????????_????????_????????_????????_?1?????? : accept_c_out_1 = 56'd0600; + 155'b????????_????????_????????_????????_1??????? : accept_c_out_1 = 56'd0700; + 155'b????????_????????_????????_???????1_???????? : accept_c_out_1 = 56'd0800; + 155'b????????_????????_????????_??????1?_???????? : accept_c_out_1 = 56'd0900; + 155'b????????_????????_????????_?????1??_???????? : accept_c_out_1 = 56'd1000; + default: accept_c_out_1 = '1; + endcase + casez (accept_c_in) + 155'b????????_????????_????????_????????_???????1 : accept_c_out_2 = 143'd0000000000; + 155'b????????_????????_????????_????????_??????1? : accept_c_out_2 = 143'd0100000000; + 155'b????????_????????_????????_????????_?????1?? : accept_c_out_2 = 143'd0200000000; + 155'b????????_????????_????????_????????_????1??? : accept_c_out_2 = 143'd0300000000; + 155'b????????_????????_????????_????????_???1???? : accept_c_out_2 = 143'd0400000000; + 155'b????????_????????_????????_????????_??1????? : accept_c_out_2 = 143'd0500000000; + 155'b????????_????????_????????_????????_?1?????? : accept_c_out_2 = 143'd0600000000; + 155'b????????_????????_????????_????????_1??????? : accept_c_out_2 = 143'd0700000000; + 155'b????????_????????_????????_???????1_???????? : accept_c_out_2 = 143'd0800000000; + 155'b????????_????????_????????_??????1?_???????? : accept_c_out_2 = 143'd0900000000; + 155'b????????_????????_????????_?????1??_???????? : accept_c_out_2 = 143'd1000000000; + default: accept_c_out_2 = '1; + endcase + end + assign accept_c_ref_0 = cyc[5:0] <= 6'd10 ? 6'(cyc[5:0]) : ~6'd0; + assign accept_c_ref_1 = cyc[5:0] <= 6'd10 ? 56'(cyc[5:0]) * 56'd100 : ~56'd0; + assign accept_c_ref_2 = cyc[5:0] <= 6'd10 ? 143'(cyc[5:0]) * 143'd100000000 : ~143'd0; + + // Accept D: the default value is set before the case, with no default item. + wire [39:0] accept_d_in = 40'b1 << cyc[4:0]; + logic [4:0] accept_d_out, accept_d_ref; + always_comb begin + accept_d_out = '1; + casez (accept_d_in) + 40'b????????_????????_????????_????????_???????1 : accept_d_out = 5'd0; + 40'b????????_????????_????????_????????_??????1? : accept_d_out = 5'd1; + 40'b????????_????????_????????_????????_?????1?? : accept_d_out = 5'd2; + 40'b????????_????????_????????_????????_????1??? : accept_d_out = 5'd3; + endcase + end + assign accept_d_ref = cyc[4:0] <= 5'd3 ? 5'(cyc[4:0]) : 5'h1f; + + // Accept E: an empty default item. + wire [39:0] accept_e_in = 40'b1 << cyc[4:0]; + logic [4:0] accept_e_out, accept_e_ref; + always_comb begin + accept_e_out = '1; + casez (accept_e_in) + 40'b????????_????????_????????_????????_???????1 : accept_e_out = 5'd0; + 40'b????????_????????_????????_????????_??????1? : accept_e_out = 5'd1; + 40'b????????_????????_????????_????????_?????1?? : accept_e_out = 5'd2; + 40'b????????_????????_????????_????????_????1??? : accept_e_out = 5'd3; + default: begin end + endcase + end + assign accept_e_ref = cyc[4:0] <= 5'd3 ? 5'(cyc[4:0]) : 5'h1f; + + // Accept F: an item with an empty body (output keeps its pre-case default). + wire [39:0] accept_f_in = 40'b1 << cyc[4:0]; + logic [4:0] accept_f_out, accept_f_ref; + always_comb begin + accept_f_out = '1; + casez (accept_f_in) + 40'b????????_????????_????????_????????_???1???? : begin end + 40'b????????_????????_????????_????????_???????1 : accept_f_out = 5'd0; + 40'b????????_????????_????????_????????_??????1? : accept_f_out = 5'd1; + 40'b????????_????????_????????_????????_?????1?? : accept_f_out = 5'd2; + 40'b????????_????????_????????_????????_????1??? : accept_f_out = 5'd3; + default: begin end + endcase + end + assign accept_f_ref = cyc[4:0] <= 5'd3 ? 5'(cyc[4:0]) : 5'h1f; + + // Accept G: non-blocking assignments. + wire [23:0] accept_g_in = 24'b1 << cyc[4:0]; + logic [5:0] accept_g_out, accept_g_ref; + always_ff @(posedge clk) begin + accept_g_out <= '1; + casez (accept_g_in) + 24'b????????_????????_???????1 : accept_g_out <= 6'd0; + 24'b????????_????????_??????1? : accept_g_out <= 6'd1; + 24'b????????_????????_?????1?? : accept_g_out <= 6'd2; + 24'b????????_????????_????1??? : accept_g_out <= 6'd3; + endcase + end + always_ff @(posedge clk) + accept_g_ref <= cyc[4:0] == 5'd0 ? 6'd0 : cyc[4:0] == 5'd1 ? 6'd1 + : cyc[4:0] == 5'd2 ? 6'd2 : cyc[4:0] == 5'd3 ? 6'd3 : ~6'd0; + + // Accept H: multiple outputs from one decoder, some items assigning only a subset. + wire [23:0] accept_h_in = 24'b1 << cyc[4:0]; + logic [ 5:0] accept_h_out_0, accept_h_ref_0; + logic [11:0] accept_h_out_1, accept_h_ref_1; + always_comb begin + accept_h_out_0 = '1; + accept_h_out_1 = '1; + casez (accept_h_in) + 24'b????????_????????_???????1 : begin accept_h_out_0 = 6'd0; accept_h_out_1 = 12'h001; end + 24'b????????_????????_??????1? : accept_h_out_0 = 6'd1; + 24'b????????_????????_?????1?? : accept_h_out_1 = 12'h004; + 24'b????????_????????_????1??? : begin accept_h_out_0 = 6'd3; accept_h_out_1 = 12'h008; end + endcase + end + assign accept_h_ref_0 = cyc[4:0] == 5'd0 ? 6'd0 : cyc[4:0] == 5'd1 ? 6'd1 + : cyc[4:0] == 5'd3 ? 6'd3 : ~6'd0; + assign accept_h_ref_1 = cyc[4:0] == 5'd0 ? 12'h001 : cyc[4:0] == 5'd2 ? 12'h004 + : cyc[4:0] == 5'd3 ? 12'h008 : ~12'd0; + + // Accept I: multiple outputs from one decoder, non-blocking assignments. + wire [23:0] accept_i_in = 24'b1 << cyc[4:0]; + logic [ 5:0] accept_i_out_0, accept_i_ref_0; + logic [11:0] accept_i_out_1, accept_i_ref_1; + always_ff @(posedge clk) begin + accept_i_out_0 <= '1; + accept_i_out_1 <= '1; + casez (accept_i_in) + 24'b????????_????????_???????1 : begin accept_i_out_0 <= 6'd0; accept_i_out_1 <= 12'h001; end + 24'b????????_????????_??????1? : begin accept_i_out_0 <= 6'd1; accept_i_out_1 <= 12'h002; end + 24'b????????_????????_?????1?? : begin accept_i_out_0 <= 6'd2; accept_i_out_1 <= 12'h004; end + 24'b????????_????????_????1??? : begin accept_i_out_0 <= 6'd3; accept_i_out_1 <= 12'h008; end + endcase + end + always_ff @(posedge clk) begin + accept_i_ref_0 <= cyc[4:0] == 5'd0 ? 6'd0 : cyc[4:0] == 5'd1 ? 6'd1 + : cyc[4:0] == 5'd2 ? 6'd2 : cyc[4:0] == 5'd3 ? 6'd3 : ~6'd0; + accept_i_ref_1 <= cyc[4:0] == 5'd0 ? 12'h001 : cyc[4:0] == 5'd1 ? 12'h002 + : cyc[4:0] == 5'd2 ? 12'h004 : cyc[4:0] == 5'd3 ? 12'h008 : ~12'd0; + end + + // Accept J: an item that can never match in 2-state (an X in casez) is skipped. + // verilator lint_off CASEWITHX + wire [23:0] accept_j_in = 24'b1 << cyc[4:0]; + logic [5:0] accept_j_out, accept_j_ref; + always_comb begin + accept_j_out = '1; + casez (accept_j_in) + 24'b????????_????????_???????x : accept_j_out = 6'd9; // X never matches in 2-state, skipped + 24'b????????_????????_??????1? : accept_j_out = 6'd1; + 24'b????????_????????_?????1?? : accept_j_out = 6'd2; + 24'b????????_????????_????1??? : accept_j_out = 6'd3; + endcase + end + // verilator lint_on CASEWITHX + assign accept_j_ref = cyc[4:0] == 5'd1 ? 6'd1 : cyc[4:0] == 5'd2 ? 6'd2 + : cyc[4:0] == 5'd3 ? 6'd3 : ~6'd0; + + // Accept K: Will constant fold after converting to decoder + wire [23:0] accept_k_in; + logic [5:0] accept_k_out, accept_k_ref; + always_ff @(posedge clk) begin + accept_k_out <= '1; + casez (accept_k_in) + 24'b????????_????????_???????1 : accept_k_out <= 6'd0; + 24'b????????_????????_??????1? : accept_k_out <= 6'd1; + 24'b????????_????????_?????1?? : accept_k_out <= 6'd2; + 24'b????????_????????_????1??? : accept_k_out <= 6'd3; + endcase + end + always_ff @(posedge clk) + accept_k_ref <= 6'd2; + assign accept_k_in = 24'b100; + + // The cases below are intentionally NOT converted to a decoder. + + // Reject A: too few conditions to be worth the indexed load. + wire [23:0] reject_a_in = 24'b1 << cyc[4:0]; + logic [5:0] reject_a_out, reject_a_ref; + always_comb begin + reject_a_out = '1; + casez (reject_a_in) + 24'b????????_????????_???????1 : reject_a_out = 6'd0; + 24'b????????_????????_??????1? : reject_a_out = 6'd1; + endcase + end + assign reject_a_ref = cyc[4:0] == 5'd0 ? 6'd0 : cyc[4:0] == 5'd1 ? 6'd1 : ~6'd0; + + // Reject B: every condition contains an X, so none can ever match in 2-state. The case has no + // matchable branches, so it must not be treated as a decoder - it just keeps the + // pre-case default. + // verilator lint_off CASEWITHX + wire [23:0] reject_b_in = 24'b1 << cyc[4:0]; + logic [5:0] reject_b_out, reject_b_ref; + always_comb begin + reject_b_out = 6'h2a; + casez (reject_b_in) + 24'b????????_????????_???????x : reject_b_out = 6'd0; + 24'b????????_????????_??????x? : reject_b_out = 6'd1; + 24'b????????_????????_?????x?? : reject_b_out = 6'd2; + endcase + end + // verilator lint_on CASEWITHX + assign reject_b_ref = 6'h2a; // No condition can ever match, so always the pre-default + + // Test driver/checker + always @(posedge clk) begin + `checkh(accept_a_out_0, accept_a_ref_0); + `checkh(accept_a_out_1, accept_a_ref_1); + `checkh(accept_a_out_2, accept_a_ref_2); + `checkh(accept_b_out_0, accept_b_ref_0); + `checkh(accept_b_out_1, accept_b_ref_1); + `checkh(accept_b_out_2, accept_b_ref_2); + `checkh(accept_c_out_0, accept_c_ref_0); + `checkh(accept_c_out_1, accept_c_ref_1); + `checkh(accept_c_out_2, accept_c_ref_2); + `checkh(accept_d_out, accept_d_ref); + `checkh(accept_e_out, accept_e_ref); + `checkh(accept_f_out, accept_f_ref); + `checkh(accept_g_out, accept_g_ref); + `checkh(accept_h_out_0, accept_h_ref_0); + `checkh(accept_h_out_1, accept_h_ref_1); + `checkh(accept_i_out_0, accept_i_ref_0); + `checkh(accept_i_out_1, accept_i_ref_1); + `checkh(accept_j_out, accept_j_ref); + `checkh(accept_k_out, accept_k_ref); + `checkh(reject_a_out, reject_a_ref); + `checkh(reject_b_out, reject_b_ref); + + cyc <= cyc + 32'd1; + if (cyc[16]) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end +endmodule diff --git a/test_regress/t/t_case_decoder_off.py b/test_regress/t/t_case_decoder_off.py new file mode 100755 index 000000000..abc61d879 --- /dev/null +++ b/test_regress/t/t_case_decoder_off.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt') + +test.top_filename = "t/t_case_decoder.v" + +test.compile(verilator_flags2=['--binary', '--stats', '-fno-case-decoder']) + +test.execute() + +test.file_grep(test.stats, r'Optimizations, Cases decoder\s+(\d+)', 0) + +test.passes() diff --git a/test_regress/t/t_dfg_break_cycles.v b/test_regress/t/t_dfg_break_cycles.v index 80c5aad04..ae7e6d781 100644 --- a/test_regress/t/t_dfg_break_cycles.v +++ b/test_regress/t/t_dfg_break_cycles.v @@ -436,4 +436,86 @@ module t ( assign VOLATILE_ARRAY_IN_CYCLE_1 = volatile_array_in_cycle_1a[1]; // verilator lint_on + ////////////////////////////////////////////////////////////////////////// + // Match masked + ////////////////////////////////////////////////////////////////////////// + + logic [63:0] match_masked; // UNOPTFLAT + always_comb begin + casez (rand_a[31:0]) + 32'b????????_????????_????????_???????1 : match_masked[31:0] = 32'd00; + 32'b????????_????????_????????_??????1? : match_masked[31:0] = 32'd01; + 32'b????????_????????_????????_?????1?? : match_masked[31:0] = 32'd02; + 32'b????????_????????_????????_????1??? : match_masked[31:0] = 32'd03; + 32'b????????_????????_????????_???1???? : match_masked[31:0] = 32'd04; + 32'b????????_????????_????????_??1????? : match_masked[31:0] = 32'd05; + 32'b????????_????????_????????_?1?????? : match_masked[31:0] = 32'd06; + 32'b????????_????????_????????_1??????? : match_masked[31:0] = 32'd07; + 32'b????????_????????_???????1_???????? : match_masked[31:0] = 32'd08; + 32'b????????_????????_??????1?_???????? : match_masked[31:0] = 32'd09; + 32'b????????_????????_?????1??_???????? : match_masked[31:0] = 32'd10; + 32'b????????_????????_????1???_???????? : match_masked[31:0] = 32'd11; + 32'b????????_????????_???1????_???????? : match_masked[31:0] = 32'd12; + 32'b????????_????????_??1?????_???????? : match_masked[31:0] = 32'd13; + 32'b????????_????????_?1??????_???????? : match_masked[31:0] = 32'd14; + 32'b????????_????????_1???????_???????? : match_masked[31:0] = 32'd15; + 32'b????????_???????1_????????_???????? : match_masked[31:0] = 32'd16; + 32'b????????_??????1?_????????_???????? : match_masked[31:0] = 32'd17; + 32'b????????_?????1??_????????_???????? : match_masked[31:0] = 32'd18; + 32'b????????_????1???_????????_???????? : match_masked[31:0] = 32'd19; + 32'b????????_???1????_????????_???????? : match_masked[31:0] = 32'd20; + 32'b????????_??1?????_????????_???????? : match_masked[31:0] = 32'd21; + 32'b????????_?1??????_????????_???????? : match_masked[31:0] = 32'd22; + 32'b????????_1???????_????????_???????? : match_masked[31:0] = 32'd23; + 32'b???????1_????????_????????_???????? : match_masked[31:0] = 32'd24; + 32'b??????1?_????????_????????_???????? : match_masked[31:0] = 32'd25; + 32'b?????1??_????????_????????_???????? : match_masked[31:0] = 32'd26; + 32'b????1???_????????_????????_???????? : match_masked[31:0] = 32'd27; + 32'b???1????_????????_????????_???????? : match_masked[31:0] = 32'd28; + 32'b??1?????_????????_????????_???????? : match_masked[31:0] = 32'd29; + 32'b?1??????_????????_????????_???????? : match_masked[31:0] = 32'd30; + 32'b1???????_????????_????????_???????? : match_masked[31:0] = 32'd31; + default : match_masked[31:0] = '1; + endcase + end + always_comb begin + casez (match_masked[31:0]) + 32'd00 : match_masked[63:32] = 32'b00000000_00000000_00000000_00000001; + 32'd01 : match_masked[63:32] = 32'b00000000_00000000_00000000_00000010; + 32'd02 : match_masked[63:32] = 32'b00000000_00000000_00000000_00000100; + 32'd03 : match_masked[63:32] = 32'b00000000_00000000_00000000_00001000; + 32'd04 : match_masked[63:32] = 32'b00000000_00000000_00000000_00010000; + 32'd05 : match_masked[63:32] = 32'b00000000_00000000_00000000_00100000; + 32'd06 : match_masked[63:32] = 32'b00000000_00000000_00000000_01000000; + 32'd07 : match_masked[63:32] = 32'b00000000_00000000_00000000_10000000; + 32'd08 : match_masked[63:32] = 32'b00000000_00000000_00000001_00000000; + 32'd09 : match_masked[63:32] = 32'b00000000_00000000_00000010_00000000; + 32'd10 : match_masked[63:32] = 32'b00000000_00000000_00000100_00000000; + 32'd11 : match_masked[63:32] = 32'b00000000_00000000_00001000_00000000; + 32'd12 : match_masked[63:32] = 32'b00000000_00000000_00010000_00000000; + 32'd13 : match_masked[63:32] = 32'b00000000_00000000_00100000_00000000; + 32'd14 : match_masked[63:32] = 32'b00000000_00000000_01000000_00000000; + 32'd15 : match_masked[63:32] = 32'b00000000_00000000_10000000_00000000; + 32'd16 : match_masked[63:32] = 32'b00000000_00000001_00000000_00000000; + 32'd17 : match_masked[63:32] = 32'b00000000_00000010_00000000_00000000; + 32'd18 : match_masked[63:32] = 32'b00000000_00000100_00000000_00000000; + 32'd19 : match_masked[63:32] = 32'b00000000_00001000_00000000_00000000; + 32'd20 : match_masked[63:32] = 32'b00000000_00010000_00000000_00000000; + 32'd21 : match_masked[63:32] = 32'b00000000_00100000_00000000_00000000; + 32'd22 : match_masked[63:32] = 32'b00000000_01000000_00000000_00000000; + 32'd23 : match_masked[63:32] = 32'b00000000_10000000_00000000_00000000; + 32'd24 : match_masked[63:32] = 32'b00000001_00000000_00000000_00000000; + 32'd25 : match_masked[63:32] = 32'b00000010_00000000_00000000_00000000; + 32'd26 : match_masked[63:32] = 32'b00000100_00000000_00000000_00000000; + 32'd27 : match_masked[63:32] = 32'b00001000_00000000_00000000_00000000; + 32'd28 : match_masked[63:32] = 32'b00010000_00000000_00000000_00000000; + 32'd29 : match_masked[63:32] = 32'b00100000_00000000_00000000_00000000; + 32'd30 : match_masked[63:32] = 32'b01000000_00000000_00000000_00000000; + 32'd31 : match_masked[63:32] = 32'b10000000_00000000_00000000_00000000; + default: match_masked[63:32] = 32'b00000000_00000000_00000000_00000000; + endcase + end + `signal(MATCH_MASKED, 64); + assign MATCH_MASKED = match_masked; + endmodule diff --git a/test_regress/t/t_dfg_peephole.v b/test_regress/t/t_dfg_peephole.v index 98d9146be..3e393a297 100644 --- a/test_regress/t/t_dfg_peephole.v +++ b/test_regress/t/t_dfg_peephole.v @@ -136,12 +136,12 @@ module t ( `signal(FOLD_SEL, const_a[3:1]); - int fold_arraysel_table_one; - ffs ffs_a(convoluted_zero[0] ? 8'hff: 8'd2, fold_arraysel_table_one); - int fold_arraysel_table_two; - ffs ffs_b(convoluted_zero[1] ? 8'hff: 8'd7, fold_arraysel_table_two); - `signal(FOLD_ARRAYSEL_TABLE_ONE, fold_arraysel_table_one); - `signal(FOLD_ARRAYSEL_TABLE_TWO, fold_arraysel_table_two); + int fold_arraysel_table; + ffs ffs_a(convoluted_zero[0] ? 20'hff: 20'd2, fold_arraysel_table); + int fold_matchmasked; + ffs ffs_b(convoluted_zero[1] ? 20'hff: 20'd7, fold_matchmasked); + `signal(FOLD_ARRAYSEL_TABLE, fold_arraysel_table); + `signal(FOLD_MATCHMASKED, fold_matchmasked); `signal(SWAP_CONST_IN_COMMUTATIVE_BINARY, rand_a + const_a); `signal(SWAP_NOT_IN_COMMUTATIVE_BINARY, rand_a + ~rand_a); @@ -439,23 +439,33 @@ module t ( endmodule module ffs( - input logic [7:0] i, + input logic [19:0] i, output int o ); // V3Table will convert this always_comb begin - // verilator lint_off CASEOVERLAP casez (i) - 8'b1???????: o = 7; - 8'b?1??????: o = 6; - 8'b??1?????: o = 5; - 8'b???1????: o = 4; - 8'b????1???: o = 3; - 8'b?????1??: o = 2; - 8'b??????1?: o = 1; - 8'b???????1: o = 0; - 8'b00000000: o = -1; + 20'b1???????????????????: o = 19; + 20'b?1??????????????????: o = 18; + 20'b??1?????????????????: o = 17; + 20'b???1????????????????: o = 16; + 20'b????1???????????????: o = 15; + 20'b?????1??????????????: o = 14; + 20'b??????1?????????????: o = 13; + 20'b???????1????????????: o = 12; + 20'b????????1???????????: o = 11; + 20'b?????????1??????????: o = 10; + 20'b??????????1?????????: o = 9; + 20'b???????????1????????: o = 8; + 20'b????????????1???????: o = 7; + 20'b?????????????1??????: o = 6; + 20'b??????????????1?????: o = 5; + 20'b???????????????1????: o = 4; + 20'b????????????????1???: o = 3; + 20'b?????????????????1??: o = 2; + 20'b??????????????????1?: o = 1; + 20'b???????????????????1: o = 0; + default: o = 32'hffffffff; endcase - // verilator lint_on CASEOVERLAP end endmodule diff --git a/test_regress/t/t_opt_table_enum.py b/test_regress/t/t_opt_table_enum.py index 561f0c766..00a2e3a5a 100755 --- a/test_regress/t/t_opt_table_enum.py +++ b/test_regress/t/t_opt_table_enum.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats", "-fno-case-table"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table", "-fno-case-decoder"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1) diff --git a/test_regress/t/t_opt_table_packed_array.py b/test_regress/t/t_opt_table_packed_array.py index 561f0c766..00a2e3a5a 100755 --- a/test_regress/t/t_opt_table_packed_array.py +++ b/test_regress/t/t_opt_table_packed_array.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats", "-fno-case-table"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table", "-fno-case-decoder"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1) diff --git a/test_regress/t/t_opt_table_real.py b/test_regress/t/t_opt_table_real.py index 561f0c766..00a2e3a5a 100755 --- a/test_regress/t/t_opt_table_real.py +++ b/test_regress/t/t_opt_table_real.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats", "-fno-case-table"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table", "-fno-case-decoder"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1) diff --git a/test_regress/t/t_opt_table_same.py b/test_regress/t/t_opt_table_same.py index a51a48a73..8f09062b6 100755 --- a/test_regress/t/t_opt_table_same.py +++ b/test_regress/t/t_opt_table_same.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats", "-fno-case-table"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table", "-fno-case-decoder"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 2) diff --git a/test_regress/t/t_opt_table_signed.py b/test_regress/t/t_opt_table_signed.py index 561f0c766..00a2e3a5a 100755 --- a/test_regress/t/t_opt_table_signed.py +++ b/test_regress/t/t_opt_table_signed.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats", "-fno-case-table"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table", "-fno-case-decoder"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1) diff --git a/test_regress/t/t_opt_table_string.py b/test_regress/t/t_opt_table_string.py index 561f0c766..00a2e3a5a 100755 --- a/test_regress/t/t_opt_table_string.py +++ b/test_regress/t/t_opt_table_string.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats", "-fno-case-table"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table", "-fno-case-decoder"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1) diff --git a/test_regress/t/t_opt_table_struct.py b/test_regress/t/t_opt_table_struct.py index 561f0c766..00a2e3a5a 100755 --- a/test_regress/t/t_opt_table_struct.py +++ b/test_regress/t/t_opt_table_struct.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=["--stats", "-fno-case-table"]) +test.compile(verilator_flags2=["--stats", "-fno-case-table", "-fno-case-decoder"]) if test.vlt_all: test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1)