Optimize decoder case statements into lookup tables (#7795)
Recognize "decoder" case statements (where every case item only assigns constants to a fixed set of left-hand sides) and replace them with a single packed constant lookup table indexed by the case expression. Small tables are materialized inline in the generated code, and are always optimized. Larger ones are placed in the constant pool and only optimized if deemed beneficial over branches. While this slightly conflicts with V3Table, and is not worth that much on it's own, there will be a follow up patch that converts more cases of this form which will be much more valuable. This patch does the necessary analysis and the simple table conversion when possible. Split -fcase into -fcase-table (this new conversion) and -fcase-tree (the existing bitwise branch-tree conversion); -fno-case is now an alias for both. Default branches, assignments preceding the case (used as default values), casez wildcards, multiple and partial left-hand sides, and both blocking and non-blocking assignments are handled. Cases that cannot be safely tabled (e.g. non-exhaustive with no default, overlapping writes to one variable, or mixed blocking/non-blocking assignments) fall back to the existing if/else lowering. Consequently disabled re-inlining of constant pool variables in V3Const, and rebuild the constant pool hash in V3Dead (previously we didn't create constant pool entries early enough for this to matter)
This commit is contained in:
parent
3a4377d39e
commit
5712f9b614
|
|
@ -659,6 +659,18 @@ Summary:
|
|||
|
||||
.. option:: -fno-case
|
||||
|
||||
Rarely needed. Disable all case statement optimizations.
|
||||
|
||||
Alias for all other `-fno-case-*` options.
|
||||
|
||||
.. option:: -fno-case-table
|
||||
|
||||
Rarely needed. Disable converting case statements into table lookups.
|
||||
|
||||
.. option:: -fno-case-tree
|
||||
|
||||
Rarely needed. Disable converting case statements into bitwise branch trees.
|
||||
|
||||
.. option:: -fno-combine
|
||||
|
||||
.. option:: -fno-const
|
||||
|
|
|
|||
|
|
@ -1000,6 +1000,8 @@ public:
|
|||
// this matters, the caller must handle the dtype difference as appropriate. If 'mergeDType' is
|
||||
// false, the returned VarScope will have _->dtypep()->sameTree(initp->dtypep()) return true.
|
||||
AstVarScope* findConst(AstConst* initp, bool mergeDType);
|
||||
// Rebuild hashes after potential removals
|
||||
void reCache();
|
||||
};
|
||||
class AstConstraint final : public AstNode {
|
||||
// Constraint
|
||||
|
|
@ -2136,6 +2138,7 @@ class AstVar final : public AstNode {
|
|||
bool m_attrFsmRegisterWrapper : 1; // connected to an fsm_register_wrapper instance
|
||||
bool m_attrFsmResetArc : 1; // declared with fsm_reset_arc metacomment
|
||||
bool m_attrFsmArcInclCond : 1; // declared with fsm_arc_include_cond metacomment
|
||||
bool m_constPoolEntry : 1; // Constant pool variable
|
||||
bool m_fileDescr : 1; // File descriptor
|
||||
bool m_gotNansiType : 1; // Linker saw Non-ANSI type declaration
|
||||
bool m_icoMaybeWritten : 1; // Design might write this input signal - for ico change detect
|
||||
|
|
@ -2199,6 +2202,7 @@ class AstVar final : public AstNode {
|
|||
m_attrFsmRegisterWrapper = false;
|
||||
m_attrFsmResetArc = false;
|
||||
m_attrFsmArcInclCond = false;
|
||||
m_constPoolEntry = false;
|
||||
m_fileDescr = false;
|
||||
m_gotNansiType = false;
|
||||
m_icoMaybeWritten = false;
|
||||
|
|
@ -2348,6 +2352,8 @@ public:
|
|||
void attrFsmRegisterWrapper(bool flag) { m_attrFsmRegisterWrapper = flag; }
|
||||
void attrFsmResetArc(bool flag) { m_attrFsmResetArc = flag; }
|
||||
void attrFsmArcInclCond(bool flag) { m_attrFsmArcInclCond = flag; }
|
||||
bool constPoolEntry() const { return m_constPoolEntry; }
|
||||
void setConstPoolEntry() { m_constPoolEntry = true; }
|
||||
void rand(const VRandAttr flag) { m_rand = flag; }
|
||||
void usedParam(bool flag) { m_usedParam = flag; }
|
||||
void usedLoopIdx(bool flag) { m_usedLoopIdx = flag; }
|
||||
|
|
|
|||
|
|
@ -1629,6 +1629,7 @@ AstConstPool::AstConstPool(FileLine* fl)
|
|||
AstVarScope* AstConstPool::createNewEntry(const string& name, AstNodeExpr* initp) {
|
||||
FileLine* const fl = initp->fileline();
|
||||
AstVar* const varp = new AstVar{fl, VVarType::MODULETEMP, name, initp->dtypep()};
|
||||
varp->setConstPoolEntry();
|
||||
varp->isConst(true);
|
||||
varp->isStatic(true);
|
||||
varp->valuep(initp->cloneTree(false));
|
||||
|
|
@ -1748,6 +1749,17 @@ AstVarScope* AstConstPool::findConst(AstConst* initp, bool mergeDType) {
|
|||
return varScopep;
|
||||
}
|
||||
|
||||
void AstConstPool::reCache() {
|
||||
m_tables.clear();
|
||||
m_consts.clear();
|
||||
for (AstVarScope* vscp = m_scopep->varsp(); vscp; vscp = VN_CAST(vscp->nextp(), VarScope)) {
|
||||
AstNode* const valuep = vscp->varp()->valuep();
|
||||
const V3Hash hash = V3Hasher::uncachedHash(valuep);
|
||||
if (VN_IS(valuep, InitArray)) m_tables.emplace(hash.value(), vscp);
|
||||
if (VN_IS(valuep, Const)) m_consts.emplace(hash.value(), vscp);
|
||||
}
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// Per-type Debugging
|
||||
|
||||
|
|
@ -3198,6 +3210,7 @@ int AstVarRef::instrCount() const {
|
|||
}
|
||||
void AstVar::dump(std::ostream& str) const {
|
||||
this->AstNode::dump(str);
|
||||
if (constPoolEntry()) str << " [CONSTPOOL]";
|
||||
if (isSc()) str << " [SC]";
|
||||
if (isPrimaryIO()) str << (isInout() ? " [PIO]" : (isWritable() ? " [PO]" : " [PI]"));
|
||||
if (isPrimaryClock()) str << " [PCLK]";
|
||||
|
|
@ -3239,6 +3252,7 @@ void AstVar::dump(std::ostream& str) const {
|
|||
void AstVar::dumpJson(std::ostream& str) const {
|
||||
dumpJsonStrFunc(str, origName);
|
||||
dumpJsonStrFunc(str, verilogName);
|
||||
dumpJsonBoolFuncIf(str, constPoolEntry);
|
||||
dumpJsonBoolFuncIf(str, isSc);
|
||||
dumpJsonBoolFuncIf(str, isPrimaryIO);
|
||||
dumpJsonBoolFuncIf(str, isPrimaryClock);
|
||||
|
|
|
|||
423
src/V3Case.cpp
423
src/V3Case.cpp
|
|
@ -133,6 +133,12 @@ class CaseVisitor final : public VNVisitor {
|
|||
constexpr static int CASE_DETAILS_MAX_WIDTH = 16;
|
||||
// Levels of priority to be ORed together in top IF tree
|
||||
constexpr static int CASE_ENCODER_GROUP_DEPTH = 8;
|
||||
// Maximum size for tiny lookup tables - materialized in code
|
||||
constexpr static size_t CASE_TABLE_TINY_BITS = 32; // Up to 2 instructions to materialize
|
||||
// Maximum size for normal lookup tables - stored in constant pool
|
||||
constexpr static size_t CASE_TABLE_MAX_BITS = 1ULL << 16; // 64Kbits / 8KBytes
|
||||
// Minimum number of the branches a table must replace to be worth a load
|
||||
constexpr static size_t CASE_TABLE_MIN_BRANCHES = 3;
|
||||
|
||||
// TYPES
|
||||
// Record for each case value
|
||||
|
|
@ -142,21 +148,49 @@ class CaseVisitor final : public VNVisitor {
|
|||
AstNode* stmtsp; // Statements of 'itemp' (might be nullptr if case is empty)
|
||||
};
|
||||
|
||||
// Record for each LHS of a decoder pattern
|
||||
struct LhsRecord final {
|
||||
AstNodeExpr* lhsp = nullptr; // LHS of the assignment
|
||||
AstNodeAssign* preDefaultp = nullptr; // Default assignment *before the case statement*
|
||||
size_t nCaseAssigns = 0; // Number of AstAssigns to this LHS in case clauses
|
||||
size_t nCaseAssignDlys = 0; // Number of AstAssignDlys to this LHS in case clauses
|
||||
size_t offset = 0; // Offset in the table for this LHS
|
||||
|
||||
static size_t s_nextId; // Static unique Id counter
|
||||
size_t id = ++s_nextId; // Unique Id for sorting
|
||||
};
|
||||
|
||||
// NODE STATE:
|
||||
// AstVarScope::user1() -> bool: true if written to, only in parts of analysis phase
|
||||
|
||||
// STATE
|
||||
// Statistics tracking, as a struct so can be passed to 'const' methods
|
||||
struct Stats final {
|
||||
VDouble0 caseTableNormal; // Cases using table method with normal table
|
||||
VDouble0 caseTableTiny; // Cases using table method with tiny table
|
||||
VDouble0 caseFast; // Cases using fast bit tree method
|
||||
VDouble0 caseGeneric; // Cases using generic if/else tree method
|
||||
VDouble0 provenAssertions; // Assertions proven to hold
|
||||
} m_stats;
|
||||
const AstNode* m_alwaysp = nullptr; // Always in which case is located
|
||||
size_t m_nTmps = 0; // Sequence numbers for temporary variables
|
||||
AstScope* m_scopep = nullptr; // Current scope
|
||||
|
||||
// STATE - per AstCase. Update by 'analyzeCase', treat 'const' otherwise
|
||||
bool m_caseOpaque = false; // Case statement is opaque (non-packed, or non-const conditions)
|
||||
bool m_caseHasDefault = false; // Indicates the case statement has a default case
|
||||
size_t m_caseNCaseItems = 0; // Number of AstCaseItems in the case statement
|
||||
size_t m_caseNConditions = 0; // Number of conditions in the case statement
|
||||
// Map from LHSs of decoder pattern to corresponding LhsRecord.
|
||||
std::unordered_map<VNRef<AstNodeExpr>, LhsRecord> m_caseLhsRecords;
|
||||
// Values of 'm_caseLhsRecords' in sorted order, if case statement is a decoder pattern
|
||||
std::vector<LhsRecord> m_caseDecoderRecords;
|
||||
size_t m_caseDecoderEntryWidth = 0; // Width of each entry in the decoder table
|
||||
size_t m_caseTableWidth = 0; // Total width of the case table - 0 means can't optimize
|
||||
bool m_caseDetailsValid = false; // Indicates m_caseDetails is valid
|
||||
struct final {
|
||||
bool exhaustive = false; // Proven exhaustive
|
||||
bool exhaustiveOverEnumOnly = false; // Exhaustive over enum values only
|
||||
bool noOverlaps = false; // Proven no overlaps between cases
|
||||
// Map from value (index) to the CaseRecord that covers this value
|
||||
std::array<CaseRecord, 1U << CASE_DETAILS_MAX_WIDTH> records;
|
||||
|
|
@ -189,6 +223,50 @@ class CaseVisitor final : public VNVisitor {
|
|||
return pairMaskBits;
|
||||
}
|
||||
|
||||
// If the given statement is an assignment that fits the decoder pattern,
|
||||
// return it, otherwise return nullptr
|
||||
static AstNodeAssign* checkDecoderAssign(AstNode* stmtp) {
|
||||
// Only Assign and AssignDly are supported
|
||||
if (!VN_IS(stmtp, Assign) && !VN_IS(stmtp, AssignDly)) return nullptr;
|
||||
AstNodeAssign* const assp = VN_AS(stmtp, NodeAssign);
|
||||
// Only if no timing control
|
||||
if (assp->timingControlp()) return nullptr;
|
||||
// Only if assigning a constant
|
||||
if (!VN_IS(assp->rhsp(), Const)) return nullptr;
|
||||
// Only if it's a packed value
|
||||
AstNodeDType* const dtypep = assp->rhsp()->dtypep();
|
||||
if (dtypep->isString() || dtypep->isDouble()) return nullptr;
|
||||
// Only if the LHS has no reads (can be relaxed, but need to prove there is no r/w hazard)
|
||||
if (assp->lhsp()->exists([](AstVarRef* refp) { return refp->access().isReadOrRW(); })) {
|
||||
return nullptr;
|
||||
}
|
||||
// This is an assignment that fits the decoder pattern
|
||||
return assp;
|
||||
}
|
||||
|
||||
// Analyze if the given case item fits the decoder pattern, return true iff so.
|
||||
// Updates 'm_caseLhsRecords'.
|
||||
bool analyzeDecoderCaseItem(AstCaseItem* cip) {
|
||||
// AstVarScope::user1() -> bool: true if written to
|
||||
const VNUser1InUse user1InUse;
|
||||
for (AstNode* stmtp = cip->stmtsp(); stmtp; stmtp = stmtp->nextp()) {
|
||||
// Must be an assignment that fits the decoder pattern
|
||||
AstNodeAssign* const assp = checkDecoderAssign(stmtp);
|
||||
if (!assp) return false;
|
||||
// Must assign each LHS exactly once - RHS is Const
|
||||
const bool multipleAssignments = assp->lhsp()->exists([](AstVarRef* refp) { //
|
||||
return refp->varScopep()->user1SetOnce();
|
||||
});
|
||||
if (multipleAssignments) return false;
|
||||
// Update LhsRecord
|
||||
LhsRecord& lhsRecord = m_caseLhsRecords[*assp->lhsp()];
|
||||
if (!lhsRecord.lhsp) lhsRecord.lhsp = assp->lhsp();
|
||||
lhsRecord.nCaseAssigns += VN_IS(assp, Assign);
|
||||
lhsRecord.nCaseAssignDlys += VN_IS(assp, AssignDly);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Determine whether we should check case items are complete
|
||||
// Returns enum's dtype if should check, nullptr if shouldn't
|
||||
static const AstEnumDType* getEnumCompletionCheckDType(const AstCase* const nodep) {
|
||||
|
|
@ -245,13 +323,6 @@ class CaseVisitor final : public VNVisitor {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool checkExhaustive(AstCase* nodep) {
|
||||
if (const AstEnumDType* const enump = getEnumCompletionCheckDType(nodep)) {
|
||||
return checkExhaustiveEnum(nodep, enump);
|
||||
}
|
||||
return checkExhaustivePacked(nodep);
|
||||
}
|
||||
|
||||
// Analyze each value in the case statement. Updates 'm_caseDetails' and issues warnings.
|
||||
void analyzeCaseDetails(AstCase* nodep) {
|
||||
const uint32_t numValues = 1UL << nodep->exprp()->width();
|
||||
|
|
@ -361,16 +432,136 @@ class CaseVisitor final : public VNVisitor {
|
|||
}
|
||||
|
||||
// If there was no default, check exhaustiveness
|
||||
m_caseDetails.exhaustive = hasDefault || checkExhaustive(nodep);
|
||||
m_caseDetails.exhaustiveOverEnumOnly = false;
|
||||
m_caseDetails.exhaustive = hasDefault;
|
||||
if (!hasDefault) {
|
||||
if (const AstEnumDType* const enump = getEnumCompletionCheckDType(nodep)) {
|
||||
// Only checks enum values are covered, not all bit patterns of the case expression
|
||||
const bool exhaustiveOverEnum = checkExhaustiveEnum(nodep, enump);
|
||||
m_caseDetails.exhaustiveOverEnumOnly = exhaustiveOverEnum;
|
||||
m_caseDetails.exhaustive = exhaustiveOverEnum;
|
||||
} else {
|
||||
m_caseDetails.exhaustive = checkExhaustivePacked(nodep);
|
||||
}
|
||||
}
|
||||
|
||||
// Records now valid
|
||||
m_caseDetailsValid = true;
|
||||
}
|
||||
|
||||
void analyzeDecoderPattern(AstCase* nodep) {
|
||||
// Check each LHS record
|
||||
for (auto it = m_caseLhsRecords.cbegin(); it != m_caseLhsRecords.cend();) {
|
||||
const LhsRecord& lhsRecord = it->second;
|
||||
|
||||
// Delete records that have no assignments in any case item (only pre-defaults)
|
||||
if (!lhsRecord.nCaseAssigns && !lhsRecord.nCaseAssignDlys) {
|
||||
it = m_caseLhsRecords.erase(it);
|
||||
continue;
|
||||
}
|
||||
++it;
|
||||
|
||||
// If mixed assignments, it's not a decoder pattern
|
||||
if (lhsRecord.nCaseAssigns && lhsRecord.nCaseAssignDlys) return;
|
||||
|
||||
// If assigned in all branches, it's good - but only if every table entry will be
|
||||
// covered, i.e. the case has a default, or is exhaustive over all bit patterns.
|
||||
// Enum-only exhaustiveness is not enough: out-of-enum values leave entries
|
||||
// uncovered.
|
||||
if (m_caseHasDefault
|
||||
|| (m_caseDetailsValid && m_caseDetails.exhaustive
|
||||
&& !m_caseDetails.exhaustiveOverEnumOnly)) {
|
||||
if (lhsRecord.nCaseAssigns == m_caseNCaseItems) continue;
|
||||
if (lhsRecord.nCaseAssignDlys == m_caseNCaseItems) continue;
|
||||
}
|
||||
|
||||
// Otherwise it needs to have a pre-default assignment
|
||||
AstNode* const preDefaultp = lhsRecord.preDefaultp;
|
||||
if (!preDefaultp) return;
|
||||
// And the pre-default needs to be the same type
|
||||
if (lhsRecord.nCaseAssigns && !VN_IS(preDefaultp, Assign)) return;
|
||||
if (lhsRecord.nCaseAssignDlys && !VN_IS(preDefaultp, AssignDly)) return;
|
||||
}
|
||||
// All cases check out, can optimize if there are some entries left
|
||||
if (m_caseLhsRecords.empty()) return;
|
||||
|
||||
// Gather all the LhsRecords and sort them - there is a copy here, it's ok, won't be many
|
||||
m_caseDecoderRecords.reserve(m_caseLhsRecords.size());
|
||||
for (const auto& item : m_caseLhsRecords) m_caseDecoderRecords.emplace_back(item.second);
|
||||
std::sort(m_caseDecoderRecords.begin(), m_caseDecoderRecords.end(),
|
||||
[](const LhsRecord& a, const LhsRecord& b) {
|
||||
// Sort by width, then id
|
||||
const int aWidth = a.lhsp->width();
|
||||
const int bWidth = b.lhsp->width();
|
||||
if (aWidth != bWidth) return aWidth < bWidth;
|
||||
return a.id < b.id;
|
||||
});
|
||||
|
||||
// We can either create a single lookup table for all LHSs, or one for each LHS.
|
||||
// With a single table, we need to select out of the lookup via a temporary variable.
|
||||
// With one table per LHS, we need to do multiple loads. The table is likely to incur a
|
||||
// D-cache miss on large designs, so we choose single table.
|
||||
|
||||
const int caseWidth = nodep->exprp()->width();
|
||||
|
||||
// Safely check if table with 'entryWidth' entries would fit within 'maxWidth' bits
|
||||
const auto fitsLimit = [&](size_t entryWidth, size_t maxWidth) -> bool {
|
||||
size_t totalWidth = entryWidth;
|
||||
// Multiply cases - iterative to avoid overflow
|
||||
for (int i = 0; i < caseWidth; ++i) {
|
||||
totalWidth <<= 1;
|
||||
if (totalWidth > maxWidth) return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
// Check if the whole table would fit in a tiny table packed tightly
|
||||
m_caseDecoderEntryWidth = 0;
|
||||
for (LhsRecord& lhsRecord : m_caseDecoderRecords) {
|
||||
lhsRecord.offset = m_caseDecoderEntryWidth;
|
||||
m_caseDecoderEntryWidth += lhsRecord.lhsp->width();
|
||||
}
|
||||
// If it fits, we will pack it tightly
|
||||
if (fitsLimit(m_caseDecoderEntryWidth, CASE_TABLE_TINY_BITS)) {
|
||||
m_caseTableWidth = m_caseDecoderEntryWidth << caseWidth; // Can optimize
|
||||
return;
|
||||
}
|
||||
|
||||
// Tabel will be bigish. To avoid expensive bit swizzling, align each entry to a
|
||||
// word boundary if it would cross a word boundary.
|
||||
m_caseDecoderEntryWidth = 0;
|
||||
for (LhsRecord& lhsRecord : m_caseDecoderRecords) {
|
||||
const size_t width = lhsRecord.lhsp->width();
|
||||
const size_t lsbWord = VL_BITWORD_E(m_caseDecoderEntryWidth);
|
||||
const size_t msbWord = VL_BITWORD_E(m_caseDecoderEntryWidth + width - 1);
|
||||
if (lsbWord != msbWord) {
|
||||
m_caseDecoderEntryWidth = VL_WORDS_I(m_caseDecoderEntryWidth) * VL_EDATASIZE;
|
||||
}
|
||||
lhsRecord.offset = m_caseDecoderEntryWidth;
|
||||
m_caseDecoderEntryWidth += width;
|
||||
}
|
||||
// Also align the whole entry width to a word boundary
|
||||
m_caseDecoderEntryWidth = VL_WORDS_I(m_caseDecoderEntryWidth) * VL_EDATASIZE;
|
||||
// Check the table fits max size
|
||||
if (fitsLimit(m_caseDecoderEntryWidth, CASE_TABLE_MAX_BITS)) {
|
||||
m_caseTableWidth = m_caseDecoderEntryWidth << caseWidth; // Can optimize
|
||||
return;
|
||||
}
|
||||
|
||||
// Can't optimize - yet ...
|
||||
}
|
||||
|
||||
// Analyze case statement. Updates 'm_case*' members. Reports warnings.
|
||||
void analyzeCase(AstCase* nodep) {
|
||||
// Reset all analysis results
|
||||
m_caseOpaque = false;
|
||||
m_caseHasDefault = false;
|
||||
m_caseNCaseItems = 0;
|
||||
m_caseNConditions = 0;
|
||||
m_caseDecoderRecords.clear();
|
||||
m_caseDecoderEntryWidth = 0;
|
||||
m_caseTableWidth = 0;
|
||||
m_caseLhsRecords.clear();
|
||||
m_caseDetailsValid = false;
|
||||
|
||||
AstNode* const caseExprp = nodep->exprp();
|
||||
|
|
@ -378,14 +569,44 @@ class CaseVisitor final : public VNVisitor {
|
|||
// Mark opaque if not a packed value - TODO: can this be a class?
|
||||
if (caseExprp->isDouble() || caseExprp->isString()) m_caseOpaque = true;
|
||||
|
||||
// Check each condition expression
|
||||
// Gather pre-default assignments of decoder pattern
|
||||
{
|
||||
// AstVarScope::user1() -> bool: true if written to
|
||||
const VNUser1InUse user1InUse;
|
||||
for (AstNode* prevp = nodep->prevp(); prevp; prevp = prevp->prevp()) {
|
||||
AstNodeAssign* const assp = checkDecoderAssign(prevp);
|
||||
if (!assp) break; // Stop if not a decoder assignment
|
||||
// Stop if multiple assignments
|
||||
const bool multipleAssignments = assp->lhsp()->exists([&](AstVarRef* refp) { //
|
||||
return refp->varScopep()->user1SetOnce();
|
||||
});
|
||||
if (multipleAssignments) break;
|
||||
// Store pre-default assignment
|
||||
LhsRecord& lhsRecord = m_caseLhsRecords[*assp->lhsp()];
|
||||
lhsRecord.lhsp = assp->lhsp();
|
||||
lhsRecord.preDefaultp = assp;
|
||||
}
|
||||
}
|
||||
|
||||
// Check each case item
|
||||
bool canBeDecoder = true;
|
||||
for (AstCaseItem* cip = nodep->itemsp(); cip; cip = VN_AS(cip->nextp(), CaseItem)) {
|
||||
// Check conditions
|
||||
for (AstNode* condp = cip->condsp(); condp; condp = condp->nextp()) {
|
||||
// Count conditions
|
||||
++m_caseNConditions;
|
||||
// Mark opaque if non-constant condition
|
||||
if (!VN_IS(condp, Const)) m_caseOpaque = true;
|
||||
if (!VN_IS(condp, Const)) {
|
||||
m_caseOpaque = true;
|
||||
canBeDecoder = false; // Can't be a decoder if opaque
|
||||
}
|
||||
}
|
||||
// Check if it has a default case
|
||||
if (cip->isDefault()) m_caseHasDefault = true;
|
||||
// Count case items
|
||||
++m_caseNCaseItems;
|
||||
// Check if it fits the decoder pattern, if still possible
|
||||
if (canBeDecoder) canBeDecoder = analyzeDecoderCaseItem(cip);
|
||||
}
|
||||
|
||||
// Nothing else to do if not a packed type, or non-const conditions
|
||||
|
|
@ -393,6 +614,135 @@ class CaseVisitor final : public VNVisitor {
|
|||
|
||||
// If small enough, analyse details
|
||||
if (caseExprp->width() <= CASE_DETAILS_MAX_WIDTH) analyzeCaseDetails(nodep);
|
||||
|
||||
// Check if it actually fits a full decoder pattern
|
||||
if (canBeDecoder) analyzeDecoderPattern(nodep);
|
||||
}
|
||||
|
||||
AstNodeStmt* convertCaseTable(AstCase* nodep) {
|
||||
// Create the table constant
|
||||
FileLine* const flp = nodep->fileline();
|
||||
AstConst* const tablep
|
||||
= new AstConst{flp, AstConst::WidthedValue{}, static_cast<int>(m_caseTableWidth), 0};
|
||||
const uint32_t tableEntries = 1U << nodep->exprp()->width();
|
||||
|
||||
// Populate the table
|
||||
for (const LhsRecord& lhsRecord : m_caseDecoderRecords) {
|
||||
const int lhsWidth = lhsRecord.lhsp->width();
|
||||
const int lhsOffset = lhsRecord.offset;
|
||||
|
||||
// Broadcast the pre-default assignment
|
||||
if (lhsRecord.preDefaultp) {
|
||||
AstConst* const rhsp = VN_AS(lhsRecord.preDefaultp->rhsp(), Const);
|
||||
for (uint32_t index = 0; index < tableEntries; ++index) {
|
||||
const uint32_t tableOffset = index * m_caseDecoderEntryWidth + lhsOffset;
|
||||
tablep->num().opSelInto(rhsp->num(), tableOffset, lhsWidth);
|
||||
}
|
||||
}
|
||||
|
||||
// Populate table based on each case item. In reverse order so earlier items win
|
||||
for (AstCaseItem* cip = VN_AS(nodep->itemsp()->lastp(), CaseItem); cip;
|
||||
cip = VN_AS(cip->prevp(), CaseItem)) {
|
||||
// Find the RHS in this case
|
||||
AstConst* const rhsp = [&]() -> AstConst* {
|
||||
for (AstNode* stmtp = cip->stmtsp(); stmtp; stmtp = stmtp->nextp()) {
|
||||
AstNodeAssign* const ap = VN_AS(stmtp, NodeAssign);
|
||||
if (lhsRecord.lhsp->sameTree(ap->lhsp())) return VN_AS(ap->rhsp(), Const);
|
||||
}
|
||||
// Not assigned in this case, use the pre-assigned default
|
||||
return VN_AS(lhsRecord.preDefaultp->rhsp(), Const);
|
||||
}();
|
||||
|
||||
// If default, broadcast it
|
||||
if (cip->isDefault()) {
|
||||
for (uint32_t index = 0; index < tableEntries; ++index) {
|
||||
const uint32_t tableOffset = index * m_caseDecoderEntryWidth + lhsOffset;
|
||||
tablep->num().opSelInto(rhsp->num(), tableOffset, lhsWidth);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Iterate case conditions in reverse order
|
||||
for (AstConst* condp = VN_AS(cip->condsp()->lastp(), Const); condp;
|
||||
condp = VN_AS(condp->prevp(), Const)) {
|
||||
if (neverItem(nodep, condp)) continue; // If item never matches, ignore it
|
||||
const auto& match = matchPattern(nodep, condp);
|
||||
const uint32_t matchMask = match.first.toUInt();
|
||||
const uint32_t matchBits = match.second.toUInt();
|
||||
const uint32_t inverseMask = ~matchMask & ((1U << condp->width()) - 1);
|
||||
// This iterates through all integers that are a subset of the inverse mask,
|
||||
// i.e.: all don't care values masked out
|
||||
for (uint32_t i = inverseMask; true; i = (i - 1) & inverseMask) {
|
||||
const uint32_t index = i | matchBits;
|
||||
const uint32_t tableOffset = index * m_caseDecoderEntryWidth + lhsOffset;
|
||||
tablep->num().opSelInto(rhsp->num(), tableOffset, lhsWidth);
|
||||
if (!i) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create the table in the constant pool, unless using an inline table
|
||||
AstVarScope* const tableVscp = [&]() -> AstVarScope* {
|
||||
if (m_caseTableWidth <= CASE_TABLE_TINY_BITS) {
|
||||
++m_stats.caseTableTiny;
|
||||
return nullptr;
|
||||
}
|
||||
++m_stats.caseTableNormal;
|
||||
AstVarScope* vscp = v3Global.rootp()->constPoolp()->findConst(tablep, true);
|
||||
VL_DO_DANGLING(tablep->deleteTree(), tablep); // findConst clones
|
||||
return vscp;
|
||||
}();
|
||||
|
||||
// Create the lookup table reference and index
|
||||
AstNodeExpr* const tableRefp
|
||||
= tableVscp ? static_cast<AstNodeExpr*>(new AstVarRef{flp, tableVscp, VAccess::READ})
|
||||
: static_cast<AstNodeExpr*>(tablep);
|
||||
AstNodeExpr* const caseExprp
|
||||
= new AstExtend{flp, nodep->exprp()->cloneTreePure(false), 32};
|
||||
AstNodeExpr* const scalep
|
||||
= new AstConst{flp, static_cast<uint32_t>(m_caseDecoderEntryWidth)};
|
||||
AstNodeExpr* const tableLsbp = new AstMul{flp, scalep, caseExprp};
|
||||
|
||||
// If there is only one LHS, just use the result
|
||||
if (m_caseDecoderRecords.size() == 1) {
|
||||
const LhsRecord& lhsRecord = m_caseDecoderRecords[0];
|
||||
const int width = lhsRecord.lhsp->width();
|
||||
AstNodeExpr* const rhsp = new AstSel{flp, tableRefp, tableLsbp, width};
|
||||
AstNodeExpr* const lhsp = lhsRecord.lhsp->cloneTreePure(false);
|
||||
if (lhsRecord.nCaseAssigns) {
|
||||
return new AstAssign{flp, lhsp, rhsp};
|
||||
} else if (lhsRecord.nCaseAssignDlys) {
|
||||
return new AstAssignDly{flp, lhsp, rhsp};
|
||||
} else {
|
||||
nodep->v3fatalSrc("Unknown assignment type");
|
||||
}
|
||||
}
|
||||
|
||||
// There are multiple LHSs, store the lookup result in a temporary
|
||||
const std::string name = "__VcaseTableOut" + std::to_string(m_nTmps++);
|
||||
AstVarScope* const tempVscp = m_scopep->createTemp(name, m_caseDecoderEntryWidth);
|
||||
AstNodeExpr* const tempWritep = new AstVarRef{flp, tempVscp, VAccess::WRITE};
|
||||
AstNodeExpr* const tableSelp
|
||||
= new AstSel{flp, tableRefp, tableLsbp, static_cast<int>(m_caseDecoderEntryWidth)};
|
||||
AstNodeStmt* const resultp = new AstAssign{flp, tempWritep, tableSelp};
|
||||
|
||||
// For each LHS, select out the result
|
||||
for (const LhsRecord& lhsRecord : m_caseDecoderRecords) {
|
||||
const int width = lhsRecord.lhsp->width();
|
||||
const int lsb = lhsRecord.offset;
|
||||
AstNodeExpr* const tempReadp = new AstVarRef{flp, tempVscp, VAccess::READ};
|
||||
AstNodeExpr* const rhsp = new AstSel{flp, tempReadp, lsb, width};
|
||||
AstNodeExpr* const lhsp = lhsRecord.lhsp->cloneTreePure(false);
|
||||
if (lhsRecord.nCaseAssigns) {
|
||||
resultp->addNext(new AstAssign{flp, lhsp, rhsp});
|
||||
} else if (lhsRecord.nCaseAssignDlys) {
|
||||
resultp->addNext(new AstAssignDly{flp, lhsp, rhsp});
|
||||
} else {
|
||||
nodep->v3fatalSrc("Unknown assignment type");
|
||||
}
|
||||
}
|
||||
return resultp;
|
||||
}
|
||||
|
||||
// TODO: should return AstNodeStmt after #6280
|
||||
|
|
@ -443,7 +793,8 @@ class CaseVisitor final : public VNVisitor {
|
|||
// -> tree of IF(msb, IF(msb-1, 11, 10)
|
||||
// IF(msb-1, 01, 00))
|
||||
// TODO: should return AstNodeStmt after #6280
|
||||
AstNode* convertCaseFast(AstCase* nodep) const {
|
||||
AstNode* convertCaseFast(AstCase* nodep) {
|
||||
++m_stats.caseFast;
|
||||
const int caseWidth = nodep->exprp()->width();
|
||||
AstNode* const ifrootp = convertCaseFastRecurse(nodep->exprp(), caseWidth - 1, 0UL);
|
||||
return ifrootp && ifrootp->backp() ? ifrootp->cloneTree(true) : ifrootp;
|
||||
|
|
@ -455,7 +806,8 @@ class CaseVisitor final : public VNVisitor {
|
|||
// IF((EQ (AND MASK cexpr) (AND MASK icond1)
|
||||
// ,istmts2, istmts3
|
||||
// TODO: should return AstNodeStmt after #6280
|
||||
AstNode* convertCaseGeneric(AstCase* nodep) const {
|
||||
AstNode* convertCaseGeneric(AstCase* nodep) {
|
||||
++m_stats.caseGeneric;
|
||||
// We'll do this in two stages.
|
||||
// First stage, convert the conditions to the appropriate IF AND terms.
|
||||
bool hasDefault = false;
|
||||
|
|
@ -522,7 +874,8 @@ class CaseVisitor final : public VNVisitor {
|
|||
// 'Or' new term with previous terms
|
||||
newCondp = newCondp ? new AstLogOr{flp, newCondp, termp} : termp;
|
||||
}
|
||||
// Replace expression in tree. Needs to be non-null, so add a constant false if needed
|
||||
// Replace expression in tree. Needs to be non-null, so add a constant false if
|
||||
// needed
|
||||
if (!newCondp) newCondp = new AstConst{flp, AstConst::BitFalse{}};
|
||||
itemp->addCondsp(newCondp);
|
||||
}
|
||||
|
|
@ -591,11 +944,31 @@ class CaseVisitor final : public VNVisitor {
|
|||
|
||||
// Convert the given case statement to a representation not using AstCase
|
||||
// TODO: should return AstNodeStmt after #6280
|
||||
AstNode* convertCase(AstCase* nodep, Stats& stats) const {
|
||||
AstNode* convertCase(AstCase* nodep) {
|
||||
// Determine if we should use the lookup table method
|
||||
const bool useTable = [&]() {
|
||||
// Not if disabled
|
||||
if (!v3Global.opt.fCaseTable()) return false;
|
||||
// Not if analysis tells us we can't
|
||||
if (!m_caseTableWidth) return false;
|
||||
// Always if tiny - it is materialized inline, so there is no load to amortize
|
||||
if (m_caseTableWidth <= CASE_TABLE_TINY_BITS) return true;
|
||||
// For a normal (constant-pool) table, weigh the indexed load against the branch
|
||||
// lowering it would replace. That lowering's depth is bounded by the selector
|
||||
// width (a balanced bit tree tests ~one bit per level) and by the number of
|
||||
// distinct values (a generic if/else does ~one compare per value). A few compares
|
||||
// are cheaper than a load that is likely to be a cache miss, so only table once that
|
||||
// depth is exceeded.
|
||||
const size_t branches = std::min<size_t>(nodep->exprp()->width(), m_caseNConditions);
|
||||
if (branches < CASE_TABLE_MIN_BRANCHES) return false;
|
||||
return true;
|
||||
}();
|
||||
if (useTable) return convertCaseTable(nodep);
|
||||
|
||||
// Determine if we should use the fast bitwise branching tree method
|
||||
const bool useFastBitTree = [&]() {
|
||||
// Not if disabled
|
||||
if (!v3Global.opt.fCase()) return false;
|
||||
if (!v3Global.opt.fCaseTree()) return false;
|
||||
// Can't do it without the detailed analysis
|
||||
if (!m_caseDetailsValid) return false;
|
||||
// Can't do it if not exhaustive
|
||||
|
|
@ -608,13 +981,9 @@ class CaseVisitor final : public VNVisitor {
|
|||
// Otherwise use the bit tree
|
||||
return true;
|
||||
}();
|
||||
if (useFastBitTree) {
|
||||
++stats.caseFast;
|
||||
return convertCaseFast(nodep);
|
||||
}
|
||||
if (useFastBitTree) return convertCaseFast(nodep);
|
||||
|
||||
// Convert using the generic if/else tree method
|
||||
++stats.caseGeneric;
|
||||
// If a case statement is exhaustive, presume signals involved aren't forming a latch
|
||||
// TODO: this is broken, but it is as was before
|
||||
if (m_alwaysp && (!m_caseDetailsValid || m_caseDetails.exhaustive)) {
|
||||
|
|
@ -650,14 +1019,20 @@ class CaseVisitor final : public VNVisitor {
|
|||
}
|
||||
|
||||
// Convert the case statement and replace the original
|
||||
if (AstNode* const replacementp = convertCase(nodep, m_stats)) {
|
||||
if (AstNode* const replacementp = convertCase(nodep)) {
|
||||
nodep->replaceWith(replacementp);
|
||||
} else {
|
||||
nodep->unlinkFrBack();
|
||||
}
|
||||
VL_DO_DANGLING(nodep->deleteTree(), nodep);
|
||||
}
|
||||
//--------------------
|
||||
|
||||
void visit(AstScope* nodep) override {
|
||||
VL_RESTORER(m_scopep);
|
||||
m_scopep = nodep;
|
||||
iterateChildren(nodep);
|
||||
}
|
||||
|
||||
void visit(AstAlways* nodep) override {
|
||||
VL_RESTORER(m_alwaysp);
|
||||
m_alwaysp = nodep;
|
||||
|
|
@ -669,12 +1044,16 @@ public:
|
|||
// CONSTRUCTORS
|
||||
explicit CaseVisitor(AstNetlist* nodep) { iterate(nodep); }
|
||||
~CaseVisitor() override {
|
||||
V3Stats::addStat("Optimizations, Cases table normal", m_stats.caseTableNormal);
|
||||
V3Stats::addStat("Optimizations, Cases table tiny", m_stats.caseTableTiny);
|
||||
V3Stats::addStat("Optimizations, Cases parallelized", m_stats.caseFast);
|
||||
V3Stats::addStat("Optimizations, Cases complex", m_stats.caseGeneric);
|
||||
V3Stats::addStat("Optimizations, Cases proven assertions", m_stats.provenAssertions);
|
||||
}
|
||||
};
|
||||
|
||||
size_t CaseVisitor::LhsRecord::s_nextId = 0;
|
||||
|
||||
//######################################################################
|
||||
// Case class functions
|
||||
|
||||
|
|
|
|||
|
|
@ -3281,7 +3281,7 @@ class ConstVisitor final : public VNVisitor {
|
|||
iterateChildren(nodep);
|
||||
UASSERT_OBJ(nodep->varp(), nodep, "Not linked");
|
||||
bool did = false;
|
||||
if (m_doV && nodep->varp()->valuep() && !m_attrp) {
|
||||
if (m_doV && !nodep->varp()->constPoolEntry() && nodep->varp()->valuep() && !m_attrp) {
|
||||
// UINFOTREE(1, valuep, "", "visitvaref");
|
||||
iterateAndNextNull(nodep->varp()->valuep()); // May change nodep->varp()->valuep()
|
||||
AstNode* const valuep = nodep->varp()->valuep();
|
||||
|
|
|
|||
|
|
@ -597,6 +597,7 @@ public:
|
|||
// We may have removed some datatypes, cleanup
|
||||
nodep->typeTablep()->repairCache();
|
||||
VIsCached::clearCacheTree(); // Removing assignments may affect isPure
|
||||
nodep->constPoolp()->reCache();
|
||||
}
|
||||
~DeadVisitor() override {
|
||||
V3Stats::addStatSum("Optimizations, deadified FTasks", m_statFTasksDeadified);
|
||||
|
|
|
|||
|
|
@ -78,9 +78,15 @@ class DataflowOptimize final {
|
|||
if (AstVarScope* const vscp = VN_CAST(nodep, VarScope)) {
|
||||
const AstVar* const varp = vscp->varp();
|
||||
// Force and trace have already been processed
|
||||
const bool hasExtRd = varp->isPrimaryIO() || varp->isSigUserRdPublic();
|
||||
const bool hasExtWr
|
||||
= (varp->isPrimaryIO() && varp->isNonOutput()) || varp->isSigUserRWPublic();
|
||||
const bool hasExtRd = //
|
||||
varp->isPrimaryIO() // Top level port - readable
|
||||
|| varp->isSigUserRdPublic() // Readable by user
|
||||
|| varp->constPoolEntry() // Stored in AstConstPool hashmap, but read only
|
||||
;
|
||||
const bool hasExtWr = //
|
||||
(varp->isPrimaryIO() && varp->isNonOutput()) // Top level port - writable
|
||||
|| varp->isSigUserRWPublic() // Writable by user
|
||||
;
|
||||
if (hasExtRd) DfgVertexVar::setHasExtRdRefs(vscp);
|
||||
if (hasExtWr) DfgVertexVar::setHasExtWrRefs(vscp);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -1448,7 +1448,12 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
|
|||
|
||||
DECL_OPTION("-facyc-simp", FOnOff, &m_fAcycSimp);
|
||||
DECL_OPTION("-fassemble", FOnOff, &m_fAssemble);
|
||||
DECL_OPTION("-fcase", FOnOff, &m_fCase);
|
||||
DECL_OPTION("-fcase", CbFOnOff, [this](bool flag) {
|
||||
m_fCaseTable = flag;
|
||||
m_fCaseTree = flag;
|
||||
});
|
||||
DECL_OPTION("-fcase-table", FOnOff, &m_fCaseTable);
|
||||
DECL_OPTION("-fcase-tree", FOnOff, &m_fCaseTree);
|
||||
DECL_OPTION("-fcombine", FOnOff, &m_fCombine);
|
||||
DECL_OPTION("-fconst", FOnOff, &m_fConst);
|
||||
DECL_OPTION("-fconst-before-dfg", FOnOff, &m_fConstBeforeDfg);
|
||||
|
|
@ -2351,7 +2356,8 @@ void V3Options::optimize(int level) {
|
|||
const bool flag = level > 0;
|
||||
m_fAcycSimp = flag;
|
||||
m_fAssemble = flag;
|
||||
m_fCase = flag;
|
||||
m_fCaseTable = flag;
|
||||
m_fCaseTree = flag;
|
||||
m_fCombine = flag;
|
||||
m_fConst = flag;
|
||||
m_fConstBitOpTree = flag;
|
||||
|
|
|
|||
|
|
@ -392,7 +392,8 @@ private:
|
|||
// MEMBERS (optimizations)
|
||||
bool m_fAcycSimp; // main switch: -fno-acyc-simp: acyclic pre-optimizations
|
||||
bool m_fAssemble; // main switch: -fno-assemble: assign assemble
|
||||
bool m_fCase; // main switch: -fno-case: case tree conversion
|
||||
bool m_fCaseTable; // main switch: -fno-case-table: case table conversion
|
||||
bool m_fCaseTree; // main switch: -fno-case-tree: case tree conversion
|
||||
bool m_fCombine; // main switch: -fno-combine: common icode packing
|
||||
bool m_fConst; // main switch: -fno-const: constant folding
|
||||
bool m_fConstBeforeDfg = true; // main switch: -fno-const-before-dfg for testing only!
|
||||
|
|
@ -725,7 +726,8 @@ public:
|
|||
// ACCESSORS (optimization options)
|
||||
bool fAcycSimp() const { return m_fAcycSimp; }
|
||||
bool fAssemble() const { return m_fAssemble; }
|
||||
bool fCase() const { return m_fCase; }
|
||||
bool fCaseTable() const { return m_fCaseTable; }
|
||||
bool fCaseTree() const { return m_fCaseTree; }
|
||||
bool fCombine() const { return m_fCombine; }
|
||||
bool fConst() const { return m_fConst; }
|
||||
bool fConstBeforeDfg() const { return m_fConstBeforeDfg; }
|
||||
|
|
|
|||
|
|
@ -16,12 +16,10 @@ test.compile(verilator_flags2=["--stats", "-fno-dfg"])
|
|||
|
||||
test.execute()
|
||||
|
||||
if test.vlt:
|
||||
test.file_grep(test.stats, r'Optimizations, Cases parallelized\s+(\d+)', 11)
|
||||
test.file_grep(test.stats, r'Optimizations, Combined CFuncs\s+(\d+)', 8)
|
||||
test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 10)
|
||||
elif test.vltmt:
|
||||
test.file_grep(test.stats, r'Optimizations, Combined CFuncs\s+(\d+)', 9)
|
||||
test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 10)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 8)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases parallelized\s+(\d+)', 3)
|
||||
test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 2)
|
||||
test.file_grep(test.stats, r'Optimizations, Combined CFuncs\s+(\d+)', 9 if test.vltmt else 8)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ test.compile(verilator_flags2=["--stats -fno-case"])
|
|||
|
||||
test.execute()
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases parallelized\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2024 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('simulator_st')
|
||||
test.top_filename = 't/t_case_huge.v'
|
||||
|
||||
test.compile(verilator_flags2=["--stats -fno-case-tree"])
|
||||
|
||||
test.execute()
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 8)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases parallelized\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
test.compile(verilator_flags2=['--binary', '--stats'])
|
||||
|
||||
test.execute()
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 8)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,273 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain.
|
||||
// SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
//
|
||||
// Case statements that become a "normal" (constant-pool) lookup table, followed by
|
||||
// cases that must not be converted to one. Each output is compared against an
|
||||
// equivalent reference computed without a case statement, so the reference itself is
|
||||
// never tabled. Selectors are wide enough, with enough distinct values, that the
|
||||
// branch lowering they replace is deep enough to make a table worthwhile.
|
||||
|
||||
// verilog_format: off
|
||||
`define stop $stop
|
||||
`define checkh(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got=%0x exp=%0x (%s !== %s)\n", `__FILE__,`__LINE__, (gotv), (expv), `"gotv`", `"expv`"); `stop; end while(0);
|
||||
// verilog_format: on
|
||||
|
||||
module t;
|
||||
logic clk = 1'b0;
|
||||
always #5 clk = ~clk;
|
||||
|
||||
logic [31:0] cyc = 0;
|
||||
|
||||
// Accept A: single output, blocking assignment, all selector values covered.
|
||||
logic [15:0] accept_a_out, accept_a_ref;
|
||||
always_comb
|
||||
case (cyc[3:0])
|
||||
4'd0: accept_a_out = 16'h1111;
|
||||
4'd1: accept_a_out = 16'h2222;
|
||||
4'd2: accept_a_out = 16'h4444;
|
||||
4'd3: accept_a_out = 16'h8888;
|
||||
default: accept_a_out = 16'h0f0f;
|
||||
endcase
|
||||
assign accept_a_ref = (cyc[3:0] == 4'd0) ? 16'h1111
|
||||
: (cyc[3:0] == 4'd1) ? 16'h2222
|
||||
: (cyc[3:0] == 4'd2) ? 16'h4444
|
||||
: (cyc[3:0] == 4'd3) ? 16'h8888 : 16'h0f0f;
|
||||
|
||||
// Accept B: single output, non-blocking assignment, with a default value set before
|
||||
// the case and not all selector values covered.
|
||||
logic [15:0] accept_b_out, accept_b_ref;
|
||||
// verilator lint_off CASEINCOMPLETE
|
||||
always_ff @(posedge clk) begin
|
||||
accept_b_out <= 16'hffff;
|
||||
case (cyc[3:0])
|
||||
4'd0: accept_b_out <= 16'h0001;
|
||||
4'd1: accept_b_out <= 16'h0002;
|
||||
4'd2: accept_b_out <= 16'h0004;
|
||||
4'd3: accept_b_out <= 16'h0008;
|
||||
endcase
|
||||
end
|
||||
// verilator lint_on CASEINCOMPLETE
|
||||
always_ff @(posedge clk)
|
||||
accept_b_ref <= (cyc[3:0] == 4'd0) ? 16'h0001
|
||||
: (cyc[3:0] == 4'd1) ? 16'h0002
|
||||
: (cyc[3:0] == 4'd2) ? 16'h0004
|
||||
: (cyc[3:0] == 4'd3) ? 16'h0008 : 16'hffff;
|
||||
|
||||
// Accept C: three outputs, blocking assignment, with a default branch.
|
||||
logic [11:0] accept_c_out_0, accept_c_ref_0;
|
||||
logic [11:0] accept_c_out_1, accept_c_ref_1;
|
||||
logic [11:0] accept_c_out_2, accept_c_ref_2;
|
||||
always_comb
|
||||
case (cyc[3:0])
|
||||
4'd0: begin accept_c_out_0 = 12'h001; accept_c_out_1 = 12'h010; accept_c_out_2 = 12'h100; end
|
||||
4'd1: begin accept_c_out_0 = 12'h002; accept_c_out_1 = 12'h020; accept_c_out_2 = 12'h200; end
|
||||
4'd2: begin accept_c_out_0 = 12'h004; accept_c_out_1 = 12'h040; accept_c_out_2 = 12'h400; end
|
||||
4'd3: begin accept_c_out_0 = 12'h008; accept_c_out_1 = 12'h080; accept_c_out_2 = 12'h800; end
|
||||
default: begin accept_c_out_0 = 12'h000; accept_c_out_1 = 12'h0ff; accept_c_out_2 = 12'hfff; end
|
||||
endcase
|
||||
assign accept_c_ref_0 = (cyc[3:0] == 4'd0) ? 12'h001 : (cyc[3:0] == 4'd1) ? 12'h002
|
||||
: (cyc[3:0] == 4'd2) ? 12'h004 : (cyc[3:0] == 4'd3) ? 12'h008 : 12'h000;
|
||||
assign accept_c_ref_1 = (cyc[3:0] == 4'd0) ? 12'h010 : (cyc[3:0] == 4'd1) ? 12'h020
|
||||
: (cyc[3:0] == 4'd2) ? 12'h040 : (cyc[3:0] == 4'd3) ? 12'h080 : 12'h0ff;
|
||||
assign accept_c_ref_2 = (cyc[3:0] == 4'd0) ? 12'h100 : (cyc[3:0] == 4'd1) ? 12'h200
|
||||
: (cyc[3:0] == 4'd2) ? 12'h400 : (cyc[3:0] == 4'd3) ? 12'h800 : 12'hfff;
|
||||
|
||||
// Accept D: two outputs, non-blocking assignment, empty default branch, with default
|
||||
// values set before the case.
|
||||
logic [15:0] accept_d_out_0, accept_d_ref_0;
|
||||
logic [15:0] accept_d_out_1, accept_d_ref_1;
|
||||
always_ff @(posedge clk) begin
|
||||
accept_d_out_0 <= 16'h0000;
|
||||
accept_d_out_1 <= 16'hffff;
|
||||
case (cyc[3:0])
|
||||
4'd0: begin accept_d_out_0 <= 16'h0001; accept_d_out_1 <= 16'h0010; end
|
||||
4'd1: begin accept_d_out_0 <= 16'h0002; accept_d_out_1 <= 16'h0020; end
|
||||
4'd2: begin accept_d_out_0 <= 16'h0004; accept_d_out_1 <= 16'h0040; end
|
||||
4'd3: begin accept_d_out_0 <= 16'h0008; accept_d_out_1 <= 16'h0080; end
|
||||
default: begin end
|
||||
endcase
|
||||
end
|
||||
always_ff @(posedge clk) begin
|
||||
accept_d_ref_0 <= (cyc[3:0] == 4'd0) ? 16'h0001 : (cyc[3:0] == 4'd1) ? 16'h0002
|
||||
: (cyc[3:0] == 4'd2) ? 16'h0004 : (cyc[3:0] == 4'd3) ? 16'h0008 : 16'h0000;
|
||||
accept_d_ref_1 <= (cyc[3:0] == 4'd0) ? 16'h0010 : (cyc[3:0] == 4'd1) ? 16'h0020
|
||||
: (cyc[3:0] == 4'd2) ? 16'h0040 : (cyc[3:0] == 4'd3) ? 16'h0080 : 16'hffff;
|
||||
end
|
||||
|
||||
// Accept E: casez with don't-care bits.
|
||||
logic [15:0] accept_e_out, accept_e_ref;
|
||||
always_comb
|
||||
casez (cyc[3:0])
|
||||
4'b00??: accept_e_out = 16'haaaa;
|
||||
4'b01??: accept_e_out = 16'hbbbb;
|
||||
4'b10??: accept_e_out = 16'hcccc;
|
||||
4'b11??: accept_e_out = 16'hdddd;
|
||||
endcase
|
||||
assign accept_e_ref = (cyc[3:2] == 2'd0) ? 16'haaaa : (cyc[3:2] == 2'd1) ? 16'hbbbb
|
||||
: (cyc[3:2] == 2'd2) ? 16'hcccc : 16'hdddd;
|
||||
|
||||
// Accept F: an item that can never match, and an item listing multiple values.
|
||||
logic [15:0] accept_f_out, accept_f_ref;
|
||||
// verilator lint_off CASEWITHX
|
||||
always_comb
|
||||
casez (cyc[3:0])
|
||||
4'bxxx0: accept_f_out = 16'h0000; // X can never match in 2-state
|
||||
4'b0001, 4'b0011, 4'b0101: accept_f_out = 16'h5555; // lists three values
|
||||
default: accept_f_out = 16'h9999;
|
||||
endcase
|
||||
// verilator lint_on CASEWITHX
|
||||
assign accept_f_ref = (cyc[3:0] == 4'd1 || cyc[3:0] == 4'd3 || cyc[3:0] == 4'd5)
|
||||
? 16'h5555 : 16'h9999;
|
||||
|
||||
// Accept G: items assign different subsets of two outputs, with default values (and an
|
||||
// unrelated output) set before the case.
|
||||
logic [15:0] accept_g_out_0, accept_g_ref_0;
|
||||
logic [15:0] accept_g_out_1, accept_g_ref_1;
|
||||
logic [15:0] accept_g_out_2, accept_g_ref_2;
|
||||
always_comb begin
|
||||
accept_g_out_0 = 16'h0000;
|
||||
accept_g_out_1 = 16'hffff;
|
||||
accept_g_out_2 = 16'h3333; // not assigned in the case
|
||||
case (cyc[3:0])
|
||||
4'd0: accept_g_out_0 = 16'h0001;
|
||||
4'd1: accept_g_out_1 = 16'h0002;
|
||||
4'd2: begin accept_g_out_0 = 16'h0004; accept_g_out_1 = 16'h0008; end
|
||||
4'd3: accept_g_out_0 = 16'h0010;
|
||||
default: ;
|
||||
endcase
|
||||
end
|
||||
assign accept_g_ref_0 = (cyc[3:0] == 4'd0) ? 16'h0001 : (cyc[3:0] == 4'd2) ? 16'h0004
|
||||
: (cyc[3:0] == 4'd3) ? 16'h0010 : 16'h0000;
|
||||
assign accept_g_ref_1 = (cyc[3:0] == 4'd1) ? 16'h0002 : (cyc[3:0] == 4'd2) ? 16'h0008 : 16'hffff;
|
||||
assign accept_g_ref_2 = 16'h3333;
|
||||
|
||||
// Accept H: unique0 enum case; the selector may hold an out-of-range value.
|
||||
typedef enum logic [3:0] {NE0, NE1, NE2, NE3, NE4} ne_t;
|
||||
ne_t accept_h_in;
|
||||
assign accept_h_in = ne_t'(cyc[3:0]);
|
||||
logic [15:0] accept_h_out, accept_h_ref;
|
||||
always_comb begin
|
||||
accept_h_out = 16'hffff;
|
||||
unique0 case (accept_h_in)
|
||||
NE0: accept_h_out = 16'h0001;
|
||||
NE1: accept_h_out = 16'h0002;
|
||||
NE2: accept_h_out = 16'h0003;
|
||||
NE3: accept_h_out = 16'h0004;
|
||||
NE4: accept_h_out = 16'h0005;
|
||||
endcase
|
||||
end
|
||||
assign accept_h_ref = (cyc[3:0] == 4'd0) ? 16'h0001 : (cyc[3:0] == 4'd1) ? 16'h0002
|
||||
: (cyc[3:0] == 4'd2) ? 16'h0003 : (cyc[3:0] == 4'd3) ? 16'h0004
|
||||
: (cyc[3:0] == 4'd4) ? 16'h0005 : 16'hffff;
|
||||
|
||||
// The cases below are intentionally NOT converted to a lookup table.
|
||||
|
||||
// Reject A: too few distinct values, so the branch lowering is cheaper than a load.
|
||||
logic [15:0] reject_a_out, reject_a_ref;
|
||||
always_comb
|
||||
case (cyc[3:0])
|
||||
4'd0: reject_a_out = 16'h0001;
|
||||
4'd1: reject_a_out = 16'h0002;
|
||||
default: reject_a_out = 16'h00ff;
|
||||
endcase
|
||||
assign reject_a_ref = (cyc[3:0] == 4'd0) ? 16'h0001 : (cyc[3:0] == 4'd1) ? 16'h0002 : 16'h00ff;
|
||||
|
||||
// Reject B: a one-bit selector, too shallow to be worth a load.
|
||||
logic [19:0] reject_b_out, reject_b_ref;
|
||||
always_comb
|
||||
case (cyc[0])
|
||||
1'b0: reject_b_out = 20'h00001;
|
||||
1'b1: reject_b_out = 20'h00002;
|
||||
default: reject_b_out = 20'h00000;
|
||||
endcase
|
||||
assign reject_b_ref = cyc[0] ? 20'h00002 : 20'h00001;
|
||||
|
||||
// Reject C: a 12-bit selector, too wide to table.
|
||||
logic [15:0] reject_c_out, reject_c_ref;
|
||||
always_comb
|
||||
case (cyc[11:0])
|
||||
12'd0: reject_c_out = 16'h0001;
|
||||
12'd1: reject_c_out = 16'h0002;
|
||||
12'd2: reject_c_out = 16'h0004;
|
||||
default: reject_c_out = 16'h0000;
|
||||
endcase
|
||||
assign reject_c_ref = (cyc[11:0] == 12'd0) ? 16'h0001
|
||||
: (cyc[11:0] == 12'd1) ? 16'h0002
|
||||
: (cyc[11:0] == 12'd2) ? 16'h0004 : 16'h0000;
|
||||
|
||||
// Reject D: a 17-bit selector, too wide to table.
|
||||
logic [16:0] reject_d_in;
|
||||
assign reject_d_in = cyc[16:0];
|
||||
logic [15:0] reject_d_out, reject_d_ref;
|
||||
// verilator lint_off CASEINCOMPLETE
|
||||
always_comb begin
|
||||
reject_d_out = 16'hbeef;
|
||||
case (reject_d_in)
|
||||
17'd0: reject_d_out = 16'h0001;
|
||||
17'd1: reject_d_out = 16'h0002;
|
||||
17'd2: reject_d_out = 16'h0004;
|
||||
endcase
|
||||
end
|
||||
// verilator lint_on CASEINCOMPLETE
|
||||
assign reject_d_ref = (reject_d_in == 17'd0) ? 16'h0001
|
||||
: (reject_d_in == 17'd1) ? 16'h0002
|
||||
: (reject_d_in == 17'd2) ? 16'h0004 : 16'hbeef;
|
||||
|
||||
// Reject E: a whole output and a sub-range of it assigned in different items.
|
||||
logic [7:0] reject_e_out, reject_e_ref;
|
||||
always_comb begin
|
||||
reject_e_out = 8'h00;
|
||||
reject_e_out[3:0] = 4'h0;
|
||||
case (cyc[1:0])
|
||||
2'b00: reject_e_out = 8'haa; // assigns the whole output
|
||||
2'b01: reject_e_out[3:0] = 4'h5; // assigns a sub-range of the same output
|
||||
default: ;
|
||||
endcase
|
||||
end
|
||||
assign reject_e_ref = (cyc[1:0] == 2'd0) ? 8'haa : (cyc[1:0] == 2'd1) ? 8'h05 : 8'h00;
|
||||
|
||||
// Reject F: a sub-range's default value is overwritten by a later whole-output default
|
||||
// before the case, so the sub-range's pre-case value is set elsewhere.
|
||||
logic [31:0] reject_f_out, reject_f_ref;
|
||||
always_comb begin
|
||||
reject_f_out[15:0] = 16'h0005; // farther default for the sub-range
|
||||
reject_f_out = 32'h0; // closer whole-output default overwrites the sub-range to 0
|
||||
case (cyc[1:0])
|
||||
2'b00: reject_f_out[15:0] = 16'habcd; // only the sub-range is assigned in the case
|
||||
default: ;
|
||||
endcase
|
||||
end
|
||||
assign reject_f_ref = (cyc[1:0] == 2'd0) ? 32'h0000abcd : 32'h00000000;
|
||||
|
||||
// Test driver/checker
|
||||
always @(posedge clk) begin
|
||||
`checkh(accept_a_out, accept_a_ref);
|
||||
`checkh(accept_b_out, accept_b_ref);
|
||||
`checkh(accept_c_out_0, accept_c_ref_0);
|
||||
`checkh(accept_c_out_1, accept_c_ref_1);
|
||||
`checkh(accept_c_out_2, accept_c_ref_2);
|
||||
`checkh(accept_d_out_0, accept_d_ref_0);
|
||||
`checkh(accept_d_out_1, accept_d_ref_1);
|
||||
`checkh(accept_e_out, accept_e_ref);
|
||||
`checkh(accept_f_out, accept_f_ref);
|
||||
`checkh(accept_g_out_0, accept_g_ref_0);
|
||||
`checkh(accept_g_out_1, accept_g_ref_1);
|
||||
`checkh(accept_g_out_2, accept_g_ref_2);
|
||||
`checkh(accept_h_out, accept_h_ref);
|
||||
`checkh(reject_a_out, reject_a_ref);
|
||||
`checkh(reject_b_out, reject_b_ref);
|
||||
`checkh(reject_c_out, reject_c_ref);
|
||||
`checkh(reject_d_out, reject_d_ref);
|
||||
`checkh(reject_e_out, reject_e_ref);
|
||||
`checkh(reject_f_out, reject_f_ref);
|
||||
|
||||
cyc <= cyc + 32'd1;
|
||||
if (cyc == 32'd32) begin
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
test.top_filename = "t/t_case_table_normal.v"
|
||||
|
||||
test.compile(verilator_flags2=['--binary', '--stats', '-fno-case-table'])
|
||||
|
||||
test.execute()
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
test.compile(verilator_flags2=['--binary', '--stats'])
|
||||
|
||||
test.execute()
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 11)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 1)
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,369 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain.
|
||||
// SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
//
|
||||
// Case statements that become a "tiny" lookup table, followed by cases that must
|
||||
// not be converted to one. Each output is compared against an equivalent reference
|
||||
// computed without a case statement, so the reference itself is never tabled.
|
||||
|
||||
// verilog_format: off
|
||||
`define stop $stop
|
||||
`define checkh(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got=%0x exp=%0x (%s !== %s)\n", `__FILE__,`__LINE__, (gotv), (expv), `"gotv`", `"expv`"); `stop; end while(0);
|
||||
`define checkr(gotv,expv) do if ((gotv) != (expv)) begin $write("%%Error: %s:%0d: got=%f exp=%f\n", `__FILE__,`__LINE__, (gotv), (expv)); `stop; end while(0);
|
||||
`define checks(gotv,expv) do if ((gotv) != (expv)) begin $write("%%Error: %s:%0d: got='%s' exp='%s'\n", `__FILE__,`__LINE__, (gotv), (expv)); `stop; end while(0);
|
||||
// verilog_format: on
|
||||
|
||||
module t;
|
||||
logic clk = 1'b0;
|
||||
always #5 clk = ~clk;
|
||||
|
||||
logic [31:0] cyc = 0;
|
||||
|
||||
// Accept A: single output, blocking assignment, all selector values covered.
|
||||
wire [2:0] accept_a_in = cyc[2:0];
|
||||
logic [3:0] accept_a_out, accept_a_ref;
|
||||
always_comb
|
||||
case (accept_a_in)
|
||||
3'd0: accept_a_out = 4'd3;
|
||||
3'd1: accept_a_out = 4'd4;
|
||||
3'd2: accept_a_out = 4'd5;
|
||||
3'd3: accept_a_out = 4'd6;
|
||||
3'd4: accept_a_out = 4'd7;
|
||||
3'd5: accept_a_out = 4'd8;
|
||||
3'd6: accept_a_out = 4'd9;
|
||||
3'd7: accept_a_out = 4'd10;
|
||||
endcase
|
||||
assign accept_a_ref = 4'd3 + {1'b0, accept_a_in};
|
||||
|
||||
// Accept B: single output, non-blocking assignment, with a default value set before
|
||||
// the case and not all selector values covered.
|
||||
logic [3:0] accept_b_out, accept_b_ref;
|
||||
// verilator lint_off CASEINCOMPLETE
|
||||
always_ff @(posedge clk) begin
|
||||
accept_b_out <= 4'hf;
|
||||
case (cyc[1:0])
|
||||
2'b00: accept_b_out <= 4'h1;
|
||||
2'b01: accept_b_out <= 4'h2;
|
||||
endcase
|
||||
end
|
||||
// verilator lint_on CASEINCOMPLETE
|
||||
always_ff @(posedge clk)
|
||||
accept_b_ref <= (cyc[1:0] == 2'b00) ? 4'h1 : (cyc[1:0] == 2'b01) ? 4'h2 : 4'hf;
|
||||
|
||||
// Accept C: two outputs of different widths, blocking assignment, with a default branch.
|
||||
logic [2:0] accept_c_out_0, accept_c_ref_0;
|
||||
logic [3:0] accept_c_out_1, accept_c_ref_1;
|
||||
always_comb
|
||||
case (cyc[1:0])
|
||||
2'b00: begin accept_c_out_0 = 3'd1; accept_c_out_1 = 4'd6; end
|
||||
2'b01: begin accept_c_out_0 = 3'd2; accept_c_out_1 = 4'd5; end
|
||||
default: begin accept_c_out_0 = 3'd0; accept_c_out_1 = 4'd7; end
|
||||
endcase
|
||||
assign accept_c_ref_0 = (cyc[1:0] == 2'b00) ? 3'd1 : (cyc[1:0] == 2'b01) ? 3'd2 : 3'd0;
|
||||
assign accept_c_ref_1 = (cyc[1:0] == 2'b00) ? 4'd6 : (cyc[1:0] == 2'b01) ? 4'd5 : 4'd7;
|
||||
|
||||
// Accept D: two outputs, non-blocking assignment, empty default branch, with default
|
||||
// values set before the case.
|
||||
logic [2:0] accept_d_out_0, accept_d_ref_0;
|
||||
logic [2:0] accept_d_out_1, accept_d_ref_1;
|
||||
always_ff @(posedge clk) begin
|
||||
accept_d_out_0 <= 3'd0;
|
||||
accept_d_out_1 <= 3'd7;
|
||||
case (cyc[1:0])
|
||||
2'b00: begin accept_d_out_0 <= 3'd1; accept_d_out_1 <= 3'd6; end
|
||||
2'b01: begin accept_d_out_0 <= 3'd2; accept_d_out_1 <= 3'd5; end
|
||||
default: begin end
|
||||
endcase
|
||||
end
|
||||
always_ff @(posedge clk) begin
|
||||
accept_d_ref_0 <= (cyc[1:0] == 2'b00) ? 3'd1 : (cyc[1:0] == 2'b01) ? 3'd2 : 3'd0;
|
||||
accept_d_ref_1 <= (cyc[1:0] == 2'b00) ? 3'd6 : (cyc[1:0] == 2'b01) ? 3'd5 : 3'd7;
|
||||
end
|
||||
|
||||
// Accept E: casez with a don't-care bit.
|
||||
logic [3:0] accept_e_out, accept_e_ref;
|
||||
always_comb
|
||||
casez (cyc[1:0])
|
||||
2'b1?: accept_e_out = 4'ha;
|
||||
2'b0?: accept_e_out = 4'hb;
|
||||
endcase
|
||||
assign accept_e_ref = cyc[1] ? 4'ha : 4'hb;
|
||||
|
||||
// Accept F: an item that can never match, and an item listing multiple values.
|
||||
logic [3:0] accept_f_out, accept_f_ref;
|
||||
// verilator lint_off CASEWITHX
|
||||
always_comb
|
||||
casez (cyc[1:0])
|
||||
2'bx0: accept_f_out = 4'h0; // X can never match in 2-state
|
||||
2'b01, 2'b11: accept_f_out = 4'h5; // lists two values
|
||||
default: accept_f_out = 4'h9;
|
||||
endcase
|
||||
// verilator lint_on CASEWITHX
|
||||
assign accept_f_ref = (cyc[1:0] == 2'b01 || cyc[1:0] == 2'b11) ? 4'h5 : 4'h9;
|
||||
|
||||
// Accept G: items assign different subsets of two outputs, with default values (and an
|
||||
// unrelated output) set before the case.
|
||||
logic [3:0] accept_g_out_0, accept_g_ref_0;
|
||||
logic [3:0] accept_g_out_1, accept_g_ref_1;
|
||||
logic [3:0] accept_g_out_2, accept_g_ref_2;
|
||||
// verilator lint_off CASEINCOMPLETE
|
||||
always_comb begin
|
||||
accept_g_out_0 = 4'h0;
|
||||
accept_g_out_1 = 4'hf;
|
||||
accept_g_out_2 = 4'h3; // not assigned in the case
|
||||
case (cyc[1:0])
|
||||
2'b00: accept_g_out_0 = 4'h1;
|
||||
2'b01: accept_g_out_1 = 4'h2;
|
||||
endcase
|
||||
end
|
||||
// verilator lint_on CASEINCOMPLETE
|
||||
assign accept_g_ref_0 = (cyc[1:0] == 2'b00) ? 4'h1 : 4'h0;
|
||||
assign accept_g_ref_1 = (cyc[1:0] == 2'b01) ? 4'h2 : 4'hf;
|
||||
assign accept_g_ref_2 = 4'h3;
|
||||
|
||||
// Accept H: single output, non-blocking assignment, all selector values covered.
|
||||
logic [3:0] accept_h_out, accept_h_ref;
|
||||
always_ff @(posedge clk)
|
||||
case (cyc[1:0])
|
||||
2'b00: accept_h_out <= 4'h1;
|
||||
2'b01: accept_h_out <= 4'h2;
|
||||
2'b10: accept_h_out <= 4'h4;
|
||||
2'b11: accept_h_out <= 4'h8;
|
||||
endcase
|
||||
always_ff @(posedge clk)
|
||||
accept_h_ref <= 4'h1 << cyc[1:0];
|
||||
|
||||
// Accept I: unique0 enum case; the selector may hold an out-of-range value.
|
||||
typedef enum logic [1:0] {E0, E1, E2} e_t;
|
||||
e_t accept_i_in;
|
||||
assign accept_i_in = e_t'(cyc[1:0]);
|
||||
logic [3:0] accept_i_out, accept_i_ref;
|
||||
always_comb begin
|
||||
accept_i_out = 4'hf;
|
||||
unique0 case (accept_i_in)
|
||||
E0: accept_i_out = 4'h1;
|
||||
E1: accept_i_out = 4'h2;
|
||||
E2: accept_i_out = 4'h3;
|
||||
endcase
|
||||
end
|
||||
assign accept_i_ref = (cyc[1:0] == 2'd0) ? 4'h1
|
||||
: (cyc[1:0] == 2'd1) ? 4'h2
|
||||
: (cyc[1:0] == 2'd2) ? 4'h3 : 4'hf;
|
||||
|
||||
// Accept J: wide output, materialized as a normal (not tiny) lookup table.
|
||||
logic [8:0] accept_j_out, accept_j_ref;
|
||||
always_comb
|
||||
case (cyc[3:0])
|
||||
4'd0: accept_j_out = 9'h001;
|
||||
4'd1: accept_j_out = 9'h002;
|
||||
4'd2: accept_j_out = 9'h004;
|
||||
4'd3: accept_j_out = 9'h008;
|
||||
default: accept_j_out = 9'h010;
|
||||
endcase
|
||||
assign accept_j_ref = (cyc[3:0] < 4'd4) ? (9'h1 << cyc[3:0]) : 9'h010;
|
||||
|
||||
// Accept K: a non-constant assignment precedes the case.
|
||||
logic [3:0] accept_k_out_0, accept_k_ref_0;
|
||||
logic [3:0] accept_k_out_1, accept_k_ref_1;
|
||||
always_comb begin
|
||||
accept_k_out_1 = cyc[3:0] ^ 4'ha; // non-constant value
|
||||
case (cyc[1:0])
|
||||
2'b00: accept_k_out_0 = 4'h1;
|
||||
2'b01: accept_k_out_0 = 4'h2;
|
||||
2'b10: accept_k_out_0 = 4'h4;
|
||||
2'b11: accept_k_out_0 = 4'h8;
|
||||
endcase
|
||||
end
|
||||
assign accept_k_ref_0 = 4'h1 << cyc[1:0];
|
||||
assign accept_k_ref_1 = cyc[3:0] ^ 4'ha;
|
||||
|
||||
// Accept L: the same output is given a default value twice before the case.
|
||||
logic [3:0] accept_l_out, accept_l_ref;
|
||||
// verilator lint_off CASEINCOMPLETE
|
||||
always_comb begin
|
||||
accept_l_out = 4'h1;
|
||||
accept_l_out = 4'h6; // assigned a second time before the case
|
||||
case (cyc[1:0])
|
||||
2'b00: accept_l_out = 4'h2;
|
||||
2'b01: accept_l_out = 4'h3;
|
||||
endcase
|
||||
end
|
||||
// verilator lint_on CASEINCOMPLETE
|
||||
assign accept_l_ref = (cyc[1:0] == 2'd0) ? 4'h2 : (cyc[1:0] == 2'd1) ? 4'h3 : 4'h6;
|
||||
|
||||
// The cases below are intentionally NOT converted to a lookup table.
|
||||
|
||||
// Reject A: an item whose body is not a simple assignment.
|
||||
logic [3:0] reject_a_out, reject_a_ref;
|
||||
always_comb begin
|
||||
reject_a_out = 4'h0;
|
||||
case (cyc[1:0])
|
||||
2'b00: reject_a_out = 4'h1;
|
||||
2'b01: if (cyc[0]) reject_a_out = 4'h2; // not a simple assignment
|
||||
default: reject_a_out = 4'h3;
|
||||
endcase
|
||||
end
|
||||
assign reject_a_ref = (cyc[1:0] == 2'd0) ? 4'h1 : (cyc[1:0] == 2'd1) ? 4'h2 : 4'h3;
|
||||
|
||||
// Reject B: an item assigns through a variable bit-select (the index is read).
|
||||
logic [3:0] reject_b_out, reject_b_ref;
|
||||
always_comb begin
|
||||
reject_b_out = 4'h0;
|
||||
case (cyc[1:0])
|
||||
2'b00: reject_b_out[cyc[1:0]] = 1'b1;
|
||||
default: reject_b_out = 4'h5;
|
||||
endcase
|
||||
end
|
||||
assign reject_b_ref = (cyc[1:0] == 2'd0) ? 4'h1 : 4'h5;
|
||||
|
||||
// Reject C: an item assigns the same output twice.
|
||||
logic [3:0] reject_c_out, reject_c_ref;
|
||||
always_comb begin
|
||||
reject_c_out = 4'h0;
|
||||
case (cyc[1:0])
|
||||
2'b00: begin reject_c_out = 4'h1; reject_c_out = 4'h2; end
|
||||
default: reject_c_out = 4'h3;
|
||||
endcase
|
||||
end
|
||||
assign reject_c_ref = (cyc[1:0] == 2'd0) ? 4'h2 : 4'h3;
|
||||
|
||||
// Reject D: a non-constant case-item value.
|
||||
logic [1:0] reject_d_in;
|
||||
assign reject_d_in = cyc[1:0];
|
||||
logic [3:0] reject_d_out, reject_d_ref;
|
||||
always_comb begin
|
||||
reject_d_out = 4'h0;
|
||||
case (cyc[1:0])
|
||||
reject_d_in: reject_d_out = 4'h7; // non-constant item value
|
||||
default: reject_d_out = 4'h9;
|
||||
endcase
|
||||
end
|
||||
assign reject_d_ref = 4'h7; // reject_d_in always equals the case expression
|
||||
|
||||
// Reject E: all items are empty.
|
||||
logic [3:0] reject_e_out, reject_e_ref;
|
||||
always_comb begin
|
||||
reject_e_out = 4'h7;
|
||||
case (cyc[2:0])
|
||||
3'd0: ;
|
||||
3'd1: ;
|
||||
3'd2: ;
|
||||
3'd3: ;
|
||||
3'd4: ;
|
||||
3'd5: ;
|
||||
3'd6: ;
|
||||
3'd7: ;
|
||||
endcase
|
||||
end
|
||||
assign reject_e_ref = 4'h7;
|
||||
|
||||
// Reject F: an item uses a delayed (intra-assignment) assignment.
|
||||
logic [3:0] reject_f_out, reject_f_ref;
|
||||
always_ff @(posedge clk)
|
||||
case (cyc[1:0])
|
||||
2'b00: reject_f_out <= #1 4'h1; // delayed assignment
|
||||
default: reject_f_out <= 4'h2;
|
||||
endcase
|
||||
always_ff @(posedge clk)
|
||||
if (cyc[1:0] == 2'b00) reject_f_ref <= #1 4'h1;
|
||||
else reject_f_ref <= 4'h2;
|
||||
|
||||
// Reject G: an output assigned with both blocking and non-blocking assignments. The three
|
||||
// variants exercise the distinct ways the assignment kinds conflict. The deliberate
|
||||
// mixing warnings are waived.
|
||||
// verilator lint_off BLKANDNBLK
|
||||
// verilator lint_off COMBDLY
|
||||
// verilator lint_off CASEINCOMPLETE
|
||||
// Variant 0: an item mixes a blocking and a non-blocking assignment to the same output.
|
||||
logic [3:0] reject_g_out_0, reject_g_ref_0;
|
||||
always_comb
|
||||
case (cyc[1:0])
|
||||
2'b00: reject_g_out_0 = 4'h1;
|
||||
2'b01: reject_g_out_0 <= 4'h2;
|
||||
default: reject_g_out_0 = 4'h3;
|
||||
endcase
|
||||
assign reject_g_ref_0 = (cyc[1:0] == 2'b00) ? 4'h1 : (cyc[1:0] == 2'b01) ? 4'h2 : 4'h3;
|
||||
// Variant 1: blocking items, but the pre-case default is a non-blocking assignment.
|
||||
logic [3:0] reject_g_out_1, reject_g_ref_1;
|
||||
always_comb begin
|
||||
reject_g_out_1 <= 4'h0;
|
||||
case (cyc[1:0])
|
||||
2'b00: reject_g_out_1 = 4'h1;
|
||||
2'b01: reject_g_out_1 = 4'h2;
|
||||
endcase
|
||||
end
|
||||
assign reject_g_ref_1 = (cyc[1:0] == 2'b00) ? 4'h1 : (cyc[1:0] == 2'b01) ? 4'h2 : 4'h0;
|
||||
// Variant 2: non-blocking items, but the pre-case default is a blocking assignment.
|
||||
logic [3:0] reject_g_out_2, reject_g_ref_2;
|
||||
always_comb begin
|
||||
reject_g_out_2 = 4'h0;
|
||||
case (cyc[1:0])
|
||||
2'b00: reject_g_out_2 <= 4'h1;
|
||||
2'b01: reject_g_out_2 <= 4'h2;
|
||||
endcase
|
||||
end
|
||||
assign reject_g_ref_2 = (cyc[1:0] == 2'b00) ? 4'h1 : (cyc[1:0] == 2'b01) ? 4'h2 : 4'h0;
|
||||
// verilator lint_on CASEINCOMPLETE
|
||||
// verilator lint_on COMBDLY
|
||||
// verilator lint_on BLKANDNBLK
|
||||
|
||||
// Reject H: items assign a real (non-packed) output.
|
||||
real reject_h_out, reject_h_ref;
|
||||
always_comb
|
||||
case (cyc[1:0])
|
||||
2'b00: reject_h_out = 1.5;
|
||||
2'b01: reject_h_out = 2.5;
|
||||
default: reject_h_out = 9.0;
|
||||
endcase
|
||||
always_comb reject_h_ref = (cyc[1:0] == 2'b00) ? 1.5 : (cyc[1:0] == 2'b01) ? 2.5 : 9.0;
|
||||
|
||||
// Reject I: items assign a string (non-packed) output.
|
||||
string reject_i_out, reject_i_ref;
|
||||
always_comb
|
||||
case (cyc[1:0])
|
||||
2'b00: reject_i_out = "zero";
|
||||
2'b01: reject_i_out = "one";
|
||||
default: reject_i_out = "other";
|
||||
endcase
|
||||
always_comb reject_i_ref = (cyc[1:0] == 2'b00) ? "zero" : (cyc[1:0] == 2'b01) ? "one" : "other";
|
||||
|
||||
// Test driver/checker
|
||||
always @(posedge clk) begin
|
||||
`checkh(accept_a_out, accept_a_ref);
|
||||
`checkh(accept_b_out, accept_b_ref);
|
||||
`checkh(accept_c_out_0, accept_c_ref_0);
|
||||
`checkh(accept_c_out_1, accept_c_ref_1);
|
||||
`checkh(accept_d_out_0, accept_d_ref_0);
|
||||
`checkh(accept_d_out_1, accept_d_ref_1);
|
||||
`checkh(accept_e_out, accept_e_ref);
|
||||
`checkh(accept_f_out, accept_f_ref);
|
||||
`checkh(accept_g_out_0, accept_g_ref_0);
|
||||
`checkh(accept_g_out_1, accept_g_ref_1);
|
||||
`checkh(accept_g_out_2, accept_g_ref_2);
|
||||
`checkh(accept_h_out, accept_h_ref);
|
||||
`checkh(accept_i_out, accept_i_ref);
|
||||
`checkh(accept_j_out, accept_j_ref);
|
||||
`checkh(accept_k_out_0, accept_k_ref_0);
|
||||
`checkh(accept_k_out_1, accept_k_ref_1);
|
||||
`checkh(accept_l_out, accept_l_ref);
|
||||
`checkh(reject_a_out, reject_a_ref);
|
||||
`checkh(reject_b_out, reject_b_ref);
|
||||
`checkh(reject_c_out, reject_c_ref);
|
||||
`checkh(reject_d_out, reject_d_ref);
|
||||
`checkh(reject_e_out, reject_e_ref);
|
||||
`checkh(reject_f_out, reject_f_ref);
|
||||
`checkh(reject_g_out_0, reject_g_ref_0);
|
||||
`checkh(reject_g_out_1, reject_g_ref_1);
|
||||
`checkh(reject_g_out_2, reject_g_ref_2);
|
||||
`checkr(reject_h_out, reject_h_ref);
|
||||
`checks(reject_i_out, reject_i_ref);
|
||||
|
||||
cyc <= cyc + 32'd1;
|
||||
if (cyc == 32'd32) begin
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
test.top_filename = "t/t_case_table_tiny.v"
|
||||
|
||||
test.compile(verilator_flags2=['--binary', '--stats', '-fno-case-table'])
|
||||
|
||||
test.execute()
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table tiny\s+(\d+)', 0)
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 0)
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of either the GNU Lesser General Public License Version 3
|
||||
# or the Perl Artistic License Version 2.0.
|
||||
# SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
test.compile(verilator_flags2=['--binary', '--stats'])
|
||||
|
||||
test.file_grep(test.stats, r'Optimizations, Cases table normal\s+(\d+)', 1)
|
||||
test.file_grep(test.stats, r'Optimizations, DFG, Peephole, remove var\s+(\d+)', 2)
|
||||
test.file_grep_not(test.stats, r'ConstPool, Constants emitted') # Removed by V3Dead later
|
||||
|
||||
test.passes()
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain.
|
||||
// SPDX-FileCopyrightText: 2026 Wilson Snyder
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
// verilog_format: off
|
||||
`define stop $stop
|
||||
`define checkh(gotv,expv) do if ((gotv) !== (expv)) begin $write("%%Error: %s:%0d: got=%0x exp=%0x (%s !== %s)\n", `__FILE__,`__LINE__, (gotv), (expv), `"gotv`", `"expv`"); `stop; end while(0);
|
||||
// verilog_format: on
|
||||
|
||||
module t;
|
||||
logic clk = 1'b0;
|
||||
always #5 clk = ~clk;
|
||||
|
||||
logic [31:0] cyc = 0;
|
||||
|
||||
// Converted to case table in const pool, but proven unused by Dfg
|
||||
logic [15:0] out;
|
||||
always_comb begin
|
||||
case (cyc[3:0])
|
||||
4'd0: out = 16'h1111;
|
||||
4'd1: out = 16'h2222;
|
||||
4'd2: out = 16'h4444;
|
||||
4'd3: out = 16'h8888;
|
||||
default: out = 16'h0f0f;
|
||||
endcase
|
||||
end
|
||||
|
||||
// Complicated way to write constant 0 that only Dfg can decipher
|
||||
wire [63:0] convoluted_zero = (({64{cyc[0]}} & ~{64{cyc[0]}}));
|
||||
|
||||
wire logic [15:0] zero = &convoluted_zero ? out : 16'd0;
|
||||
|
||||
// Test driver/checker
|
||||
always @(posedge clk) begin
|
||||
`checkh(zero, 16'd0);
|
||||
cyc <= cyc + 32'd1;
|
||||
if (cyc == 32'd32) begin
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
|||
|
||||
test.scenarios('simulator')
|
||||
|
||||
test.compile(verilator_flags2=["--stats"])
|
||||
test.compile(verilator_flags2=["--stats", "-fno-case-table"])
|
||||
|
||||
if test.vlt_all:
|
||||
test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
|||
|
||||
test.scenarios('simulator')
|
||||
|
||||
test.compile(verilator_flags2=["--stats"])
|
||||
test.compile(verilator_flags2=["--stats", "-fno-case-table"])
|
||||
|
||||
if test.vlt_all:
|
||||
test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
|||
|
||||
test.scenarios('simulator')
|
||||
|
||||
test.compile(verilator_flags2=["--stats"])
|
||||
test.compile(verilator_flags2=["--stats", "-fno-case-table"])
|
||||
|
||||
if test.vlt_all:
|
||||
test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
|||
|
||||
test.scenarios('simulator')
|
||||
|
||||
test.compile(verilator_flags2=["--stats"])
|
||||
test.compile(verilator_flags2=["--stats", "-fno-case-table"])
|
||||
|
||||
if test.vlt_all:
|
||||
test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 2)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
|||
|
||||
test.scenarios('simulator')
|
||||
|
||||
test.compile(verilator_flags2=["--stats"])
|
||||
test.compile(verilator_flags2=["--stats", "-fno-case-table"])
|
||||
|
||||
if test.vlt_all:
|
||||
test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
|||
|
||||
test.scenarios('simulator')
|
||||
|
||||
test.compile(verilator_flags2=["--stats"])
|
||||
test.compile(verilator_flags2=["--stats", "-fno-case-table"])
|
||||
|
||||
if test.vlt_all:
|
||||
test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import vltest_bootstrap
|
|||
|
||||
test.scenarios('simulator')
|
||||
|
||||
test.compile(verilator_flags2=["--stats"])
|
||||
test.compile(verilator_flags2=["--stats", "-fno-case-table"])
|
||||
|
||||
if test.vlt_all:
|
||||
test.file_grep(test.stats, r'Optimizations, Tables created\s+(\d+)', 1)
|
||||
|
|
|
|||
Loading…
Reference in New Issue