From 277611bcdd4d9420c82fa0c363aebdba47f57aee Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 16 Jun 2025 23:14:24 +0100 Subject: [PATCH] Add DFG binToOneHot pass to generate one-hot decoders (#6096) Somewhat commonly, there is code out there that compares an expression (or variable) against many different constants, e.g. a one-hot decoder: ```systemverilog assign oneHot = {x == 3, x == 2, x == 1, x == 0}; ``` If the width of the expression is sufficiently large, this can blow up a GCC pass and take an egregious amount of memory and time to compile. Adding a new DFG pass that will generate a cheap one-hot decoder: to compute: ```systemverilog wire [$bits(x)-1:0] idx = reg tab [1<<$bits(x)] = '{default: 0}; reg [$bits(x)-1:0] pre = '0; always_comb begin tab[pre] = 0; tab[idx] = 1; pre = idx ; // This assignment marked to avoid a false UNOPFTLAT end ``` We then replace the comparisons `x == CONST` with `tab[CONST]`. This is generally performance neutral, but avoids the compile time and memory blowup with GCC (128GB+ -> 1GB in one example). We do not apply this if the comparisons seem to be part of a `COMPARE ? val : COND` conditional tree, which the C++ compilers can turn into jump tables. This enables all XiangShan configurations from RTLMeter to now build with GCC, so in this patch we enabled those in the nightly runs. --- .github/workflows/rtlmeter.yml | 6 +- include/verilated_types.h | 5 + src/V3AstNodeOther.h | 2 +- src/V3AstNodes.cpp | 1 + src/V3Dfg.h | 5 + src/V3DfgPasses.cpp | 244 ++++++++++++++ src/V3DfgPasses.h | 13 + src/V3DfgVertices.h | 2 + test_regress/t/t_dfg_bin_to_one_hot.py | 21 ++ test_regress/t/t_dfg_bin_to_one_hot.v | 421 +++++++++++++++++++++++++ 10 files changed, 716 insertions(+), 4 deletions(-) create mode 100755 test_regress/t/t_dfg_bin_to_one_hot.py create mode 100644 test_regress/t/t_dfg_bin_to_one_hot.v diff --git a/.github/workflows/rtlmeter.yml b/.github/workflows/rtlmeter.yml index 0472470c9..bb972dfab 100644 --- a/.github/workflows/rtlmeter.yml +++ b/.github/workflows/rtlmeter.yml @@ -72,10 +72,10 @@ jobs: - "VeeR-EL2:hiperf*" - "Vortex:mini:*" - "Vortex:sane:*" - # - "XiangShan:default-chisel3:* !*:linux" - # - "XiangShan:default-chisel6:* !*:linux" + - "XiangShan:default-chisel3:* !*:linux" + - "XiangShan:default-chisel6:* !*:linux" - "XiangShan:mini-chisel3:* !*:linux" - # - "XiangShan:mini-chisel6:* !*:linux" + - "XiangShan:mini-chisel6:* !*:linux" - "XuanTie-E902:*" - "XuanTie-E906:*" - "XuanTie-C906:*" diff --git a/include/verilated_types.h b/include/verilated_types.h index 70e131423..91f9e8cba 100644 --- a/include/verilated_types.h +++ b/include/verilated_types.h @@ -1332,6 +1332,11 @@ public: const WData* data() const { return &m_storage[0]; } constexpr std::size_t size() const { return N_Depth; } + + void fill(const T_Value& value) { + std::fill(std::begin(m_storage), std::end(m_storage), value); + } + // To fit C++14 template int find_length(int dimension, std::false_type) const { diff --git a/src/V3AstNodeOther.h b/src/V3AstNodeOther.h index 5aa5b6a0f..af08c34e6 100644 --- a/src/V3AstNodeOther.h +++ b/src/V3AstNodeOther.h @@ -1916,7 +1916,7 @@ class AstVar final : public AstNode { bool m_isWrittenByDpi : 1; // This variable can be written by a DPI Export bool m_isWrittenBySuspendable : 1; // This variable can be written by a suspendable process bool m_ignorePostWrite : 1; // Ignore writes in 'Post' blocks during ordering - bool m_ignoreSchedWrite : 1; // Ignore writes in scheduling (for coverage increments) + bool m_ignoreSchedWrite : 1; // Ignore writes in scheduling (for special optimizations) void init() { m_ansi = false; diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 8981bfc0c..2e6e8b792 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -3004,6 +3004,7 @@ void AstCMethodHard::setPurity() { {"evaluate", false}, {"evaluation", false}, {"exists", true}, + {"fill", false}, {"find", true}, {"find_first", true}, {"find_first_index", true}, diff --git a/src/V3Dfg.h b/src/V3Dfg.h index efb610aa3..a9b495a4e 100644 --- a/src/V3Dfg.h +++ b/src/V3Dfg.h @@ -284,6 +284,11 @@ public: // or nullptr if no such variable exists in the graph. This is O(fanout). DfgVarPacked* getResultVar() VL_MT_DISABLED; + // If the node has a single sink, return it, otherwise return nullptr + DfgVertex* singleSink() const { + return m_sinksp && !m_sinksp->m_nextp ? m_sinksp->m_sinkp : nullptr; + } + // Unlink from container (graph or builder), then delete this vertex void unlinkDelete(DfgGraph& dfg) VL_MT_DISABLED; diff --git a/src/V3DfgPasses.cpp b/src/V3DfgPasses.cpp index 5b6ffb81a..6fd90bb86 100644 --- a/src/V3DfgPasses.cpp +++ b/src/V3DfgPasses.cpp @@ -25,6 +25,11 @@ VL_DEFINE_DEBUG_FUNCTIONS; +V3DfgBinToOneHotContext::~V3DfgBinToOneHotContext() { + V3Stats::addStat("Optimizations, DFG " + m_label + " BinToOneHot, decoders created", + m_decodersCreated); +} + V3DfgCseContext::~V3DfgCseContext() { V3Stats::addStat("Optimizations, DFG " + m_label + " CSE, expressions eliminated", m_eliminated); @@ -226,6 +231,244 @@ void V3DfgPasses::removeUnused(DfgGraph& dfg) { } } +void V3DfgPasses::binToOneHot(DfgGraph& dfg, V3DfgBinToOneHotContext& ctx) { + const auto userDataInUse = dfg.userDataInUse(); + + // Structure to keep track of comparison details + struct Term final { + DfgVertex* m_vtxp; // Vertex to replace + bool m_inv; // '!=', instead of '==' + Term() = default; + Term(DfgVertex* vtxp, bool inv) + : m_vtxp{vtxp} + , m_inv{inv} {} + }; + + // Map from 'value beign compared' -> 'terms', stored in DfgVertex::user() + using Val2Terms = std::map>; + // Allocator for Val2Terms, so it's cleaned up on return + std::deque val2TermsAllocator; + // List of vertices that are used as sources + std::vector srcps; + + // Only consider input variables from a reasonable range: + // - not too big to avoid huge tables, you are doomed anyway at that point.. + // - not too small, as it's probably not worth it + constexpr uint32_t WIDTH_MIN = 7; + constexpr uint32_t WIDTH_MAX = 20; + const auto widthOk = [](const DfgVertex* vtxp) { + const uint32_t width = vtxp->width(); + return WIDTH_MIN <= width && width <= WIDTH_MAX; + }; + + // Do not convert terms that look like they are in a Cond tree + // the C++ compiler can generate jump tables for these + const std::function useOk + = [&](const DfgVertex* vtxp, bool inv) -> bool { + // Go past a single 'Not' sink, which is common + if (DfgVertex* const sinkp = vtxp->singleSink()) { + if (sinkp->is()) return useOk(sinkp, !inv); + } + return !vtxp->findSink([vtxp, inv](const DfgCond& sink) { + if (sink.condp() != vtxp) return false; + return inv ? sink.thenp()->is() : sink.elsep()->is(); + }); + }; + + // Look at all comparison nodes and build the 'Val2Terms' map for each source vertex + uint32_t nTerms = 0; + for (DfgVertex& vtx : dfg.opVertices()) { + DfgVertex* srcp = nullptr; + uint32_t val = 0; + bool inv = false; + if (DfgEq* const eqp = vtx.cast()) { + DfgConst* const constp = eqp->lhsp()->cast(); + if (!constp || !widthOk(constp) || !useOk(eqp, false)) continue; + srcp = eqp->rhsp(); + val = constp->toU32(); + inv = false; + } else if (DfgNeq* const neqp = vtx.cast()) { + DfgConst* const constp = neqp->lhsp()->cast(); + if (!constp || !widthOk(constp) || !useOk(neqp, true)) continue; + srcp = neqp->rhsp(); + val = constp->toU32(); + inv = true; + } else if (DfgRedAnd* const redAndp = vtx.cast()) { + srcp = redAndp->srcp(); + if (!widthOk(srcp) || !useOk(redAndp, false)) continue; + val = (1U << srcp->width()) - 1; + inv = false; + } else if (DfgRedOr* const redOrp = vtx.cast()) { + srcp = redOrp->srcp(); + if (!widthOk(srcp) || !useOk(redOrp, true)) continue; + val = 0; + inv = true; + } else { + // Not a comparison-like vertex + continue; + } + // Grab the Val2Terms entry + Val2Terms*& val2Termspr = srcp->user(); + if (!val2Termspr) { + // Remeber and allocate on first encounter + srcps.emplace_back(srcp); + val2TermsAllocator.emplace_back(); + val2Termspr = &val2TermsAllocator.back(); + } + // Record term + (*val2Termspr)[val].emplace_back(&vtx, inv); + ++nTerms; + } + + // Somewhat arbitrarily, only apply if more than 64 unique comparisons are required + constexpr uint32_t TERM_LIMIT = 65; + // This should hold, otherwise we do redundant work gathering terms that will never be used + static_assert((1U << WIDTH_MIN) >= TERM_LIMIT, "TERM_LIMIT too big relative to 2**WIDTH_MIN"); + + // Fast path exit if we surely don't need to convet anything + if (nTerms < TERM_LIMIT) return; + + // Sequence numbers for name generation + size_t nTables = 0; + + // Create decoders for each srcp + for (DfgVertex* const srcp : srcps) { + const Val2Terms& val2Terms = *srcp->getUser(); + + // If not enough terms in this vertex, ignore + if (val2Terms.size() < TERM_LIMIT) continue; + + // Width of the decoded binary value + const uint32_t width = srcp->width(); + // Number of bits in the input operand + const uint32_t nBits = 1U << width; + + // Construct the decoder by converting many "const == vtx" by: + // - Adding a single decoder block, where 'tab' is zero initialized: + // always_comb begin + // tab[pre] = 0; + // tab[vtx] = 1; + // pre = vtx; + // end + // We mark 'pre' so the write is ignored during scheduling, so this + // won't cause a combinational cycle. + // Note that albeit this looks like partial udpates to 'tab', the + // actual result is that only one value in 'tab' is ever one, while + // all the others are always zero. + // - and replace the comparisons with 'tab[const]' + + FileLine* const flp = srcp->fileline(); + + // Required data types + AstNodeDType* const idxDTypep = srcp->dtypep(); + AstNodeDType* const bitDTypep = DfgVertex::dtypeForWidth(1); + AstUnpackArrayDType* const tabDTypep = new AstUnpackArrayDType{ + flp, bitDTypep, new AstRange{flp, static_cast(nBits - 1), 0}}; + v3Global.rootp()->typeTablep()->addTypesp(tabDTypep); + + // The index variable + DfgVarPacked* const idxVtxp = [&]() { + // If there is an existing result variable, use that, otherwise create a new variable + DfgVarPacked* varp = srcp->getResultVar(); + if (!varp) { + const std::string name = dfg.makeUniqueName("BinToOneHot_Idx", nTables); + varp = dfg.makeNewVar(flp, name, idxDTypep)->as(); + varp->varp()->isInternal(true); + } + varp->setHasModRefs(); + return varp; + }(); + // The previous index variable - we don't need a vertex for this + AstVar* const preVarp = [&]() { + const std::string name = dfg.makeUniqueName("BinToOneHot_Pre", nTables); + AstVar* const varp = new AstVar{flp, VVarType::MODULETEMP, name, idxDTypep}; + dfg.modulep()->addStmtsp(varp); + varp->isInternal(true); + varp->noReset(true); + varp->setIgnoreSchedWrite(); + return varp; + }(); + // The table variable + DfgVarArray* const tabVtxp = [&]() { + const std::string name = dfg.makeUniqueName("BinToOneHot_Tab", nTables); + DfgVarArray* const varp = dfg.makeNewVar(flp, name, tabDTypep)->as(); + varp->varp()->isInternal(true); + varp->varp()->noReset(true); + varp->setHasModRefs(); + return varp; + }(); + + ++nTables; + ++ctx.m_decodersCreated; + + // Initialize 'tab' and 'pre' variables statically + AstInitialStatic* const initp = new AstInitialStatic{flp, nullptr}; + dfg.modulep()->addStmtsp(initp); + { // pre = 0 + initp->addStmtsp(new AstAssign{ + flp, // + new AstVarRef{flp, preVarp, VAccess::WRITE}, // + new AstConst{flp, AstConst::WidthedValue{}, static_cast(width), 0}}); + } + { // tab.fill(0) + AstCMethodHard* const callp = new AstCMethodHard{ + flp, new AstVarRef{flp, tabVtxp->varp(), VAccess::WRITE}, "fill"}; + callp->addPinsp(new AstConst{flp, AstConst::BitFalse{}}); + callp->dtypeSetVoid(); + initp->addStmtsp(callp->makeStmt()); + } + + // Build the decoder logic + AstAlways* const logicp = new AstAlways{flp, VAlwaysKwd::ALWAYS_COMB, nullptr, nullptr}; + dfg.modulep()->addStmtsp(logicp); + { // tab[pre] = 0; + logicp->addStmtsp(new AstAssign{ + flp, // + new AstArraySel{flp, new AstVarRef{flp, tabVtxp->varp(), VAccess::WRITE}, + new AstVarRef{flp, preVarp, VAccess::READ}}, // + new AstConst{flp, AstConst::BitFalse{}}}); + } + { // tab[idx] = 1 + logicp->addStmtsp(new AstAssign{ + flp, // + new AstArraySel{flp, new AstVarRef{flp, tabVtxp->varp(), VAccess::WRITE}, + new AstVarRef{flp, idxVtxp->varp(), VAccess::READ}}, // + new AstConst{flp, AstConst::BitTrue{}}}); + } + { // pre = idx + logicp->addStmtsp(new AstAssign{flp, // + new AstVarRef{flp, preVarp, VAccess::WRITE}, // + new AstVarRef{flp, idxVtxp->varp(), VAccess::READ}}); + } + + // Replace terms with ArraySels + for (const auto& pair : val2Terms) { + const uint32_t val = pair.first; + const std::vector& terms = pair.second; + // Create the ArraySel + FileLine* const flp = terms.front().m_vtxp->fileline(); + DfgArraySel* const aselp = new DfgArraySel{dfg, flp, bitDTypep}; + aselp->fromp(tabVtxp); + aselp->bitp(new DfgConst{dfg, flp, width, val}); + // The inverted value, if needed + DfgNot* notp = nullptr; + // Repalce the terms + for (const Term& term : terms) { + if (term.m_inv) { + if (!notp) { + notp = new DfgNot{dfg, flp, bitDTypep}; + notp->srcp(aselp); + } + term.m_vtxp->replaceWith(notp); + } else { + term.m_vtxp->replaceWith(aselp); + } + VL_DO_DANGLING(term.m_vtxp->unlinkDelete(dfg), term.m_vtxp); + } + } + } +} + void V3DfgPasses::eliminateVars(DfgGraph& dfg, V3DfgEliminateVarsContext& ctx) { const auto userDataInUse = dfg.userDataInUse(); @@ -355,6 +598,7 @@ void V3DfgPasses::optimize(DfgGraph& dfg, V3DfgOptimizationContext& ctx) { apply(3, "input ", [&]() {}); apply(4, "inlineVars ", [&]() { inlineVars(dfg); }); apply(4, "cse0 ", [&]() { cse(dfg, ctx.m_cseContext0); }); + apply(4, "binToOneHot ", [&]() { binToOneHot(dfg, ctx.m_binToOneHotContext); }); if (v3Global.opt.fDfgPeephole()) { apply(4, "peephole ", [&]() { peephole(dfg, ctx.m_peepholeContext); }); // We just did CSE above, so without peephole there is no need to run it again these diff --git a/src/V3DfgPasses.h b/src/V3DfgPasses.h index ef8984786..efb33a505 100644 --- a/src/V3DfgPasses.h +++ b/src/V3DfgPasses.h @@ -30,6 +30,16 @@ class DfgGraph; // Various context objects hold data that need to persist across invocations // of a DFG pass. +class V3DfgBinToOneHotContext final { + const std::string m_label; // Label to apply to stats + +public: + VDouble0 m_decodersCreated; // Number of bianry to one-hot decoders created + explicit V3DfgBinToOneHotContext(const std::string& label) + : m_label{label} {} + ~V3DfgBinToOneHotContext() VL_MT_DISABLED; +}; + class V3DfgCseContext final { const std::string m_label; // Label to apply to stats @@ -82,6 +92,7 @@ public: VDouble0 m_nonRepWidth; // Equations non-representable due to width mismatch VDouble0 m_resultEquations; // Number of result combinational equations + V3DfgBinToOneHotContext m_binToOneHotContext{m_label}; V3DfgCseContext m_cseContext0{m_label + " 1st"}; V3DfgCseContext m_cseContext1{m_label + " 2nd"}; V3DfgPeepholeContext m_peepholeContext{m_label}; @@ -117,6 +128,8 @@ AstModule* dfgToAst(DfgGraph&, V3DfgOptimizationContext&) VL_MT_DISABLED; // Intermediate/internal operations //=========================================================================== +// Construct binary to oneHot decoders +void binToOneHot(DfgGraph&, V3DfgBinToOneHotContext&) VL_MT_DISABLED; // Common subexpression elimination void cse(DfgGraph&, V3DfgCseContext&) VL_MT_DISABLED; // Inline fully driven variables diff --git a/src/V3DfgVertices.h b/src/V3DfgVertices.h index 206342384..d4375d6e7 100644 --- a/src/V3DfgVertices.h +++ b/src/V3DfgVertices.h @@ -110,6 +110,8 @@ public: return static_cast(num().toUInt()); } + uint32_t toU32() const { return static_cast(num().toUInt()); } + bool isZero() const { return num().isEqZero(); } bool isOnes() const { return num().isEqAllOnes(width()); } diff --git a/test_regress/t/t_dfg_bin_to_one_hot.py b/test_regress/t/t_dfg_bin_to_one_hot.py new file mode 100755 index 000000000..1612ac838 --- /dev/null +++ b/test_regress/t/t_dfg_bin_to_one_hot.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2025 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('vlt') + +test.compile(verilator_flags2=["--stats"]) + +test.execute() + +test.file_grep(test.stats, r'Optimizations, DFG pre inline BinToOneHot, decoders created\s+(\d+)', + 2) + +test.passes() diff --git a/test_regress/t/t_dfg_bin_to_one_hot.v b/test_regress/t/t_dfg_bin_to_one_hot.v new file mode 100644 index 000000000..165a23f3c --- /dev/null +++ b/test_regress/t/t_dfg_bin_to_one_hot.v @@ -0,0 +1,421 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2025 by Wilson Snyder. +// SPDX-License-Identifier: CC0-1.0 + +`define stop $stop +`define check(got ,exp) do if ((got) !== (exp)) begin $write("%%Error: %s:%0d: cyc=%0d got='h%x exp='h%x\n", `__FILE__,`__LINE__, cyc, (got), (exp)); `stop; end while(0) + +module t (/*AUTOARG*/ + // Inputs + clk + ); + input clk; + + reg [31:0] cyc = 0; + reg [6:0] cntA = 0; + reg [6:0] cntB = 0; + reg [6:0] cntC = 0; + + always @ (posedge clk) begin + cyc <= cyc + 1; + if (cyc[0]) cntA <= cntA + 7'd1; + if (cntA[0]) cntB <= cntB + 7'd1; + if (cntB[0]) cntC <= cntC + 7'd1; + + if (cyc == 99) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end + + // Should create decoder + wire [127:0] cntAOneHot = { + cntA == 7'd127, + cntA == 7'd126, + cntA == 7'd125, + cntA == 7'd124, + cntA == 7'd123, + cntA == 7'd122, + cntA == 7'd121, + cntA == 7'd120, + cntA == 7'd119, + cntA == 7'd118, + cntA == 7'd117, + cntA == 7'd116, + cntA == 7'd115, + cntA == 7'd114, + cntA == 7'd113, + cntA == 7'd112, + cntA == 7'd111, + cntA == 7'd110, + cntA == 7'd109, + cntA == 7'd108, + cntA == 7'd107, + cntA == 7'd106, + cntA == 7'd105, + cntA == 7'd104, + cntA == 7'd103, + cntA == 7'd102, + cntA == 7'd101, + cntA == 7'd100, + cntA == 7'd99, + cntA == 7'd98, + cntA == 7'd97, + cntA == 7'd96, + cntA == 7'd95, + cntA == 7'd94, + cntA == 7'd93, + cntA == 7'd92, + cntA == 7'd91, + cntA == 7'd90, + cntA == 7'd89, + cntA == 7'd88, + cntA == 7'd87, + cntA == 7'd86, + cntA == 7'd85, + cntA == 7'd84, + cntA == 7'd83, + cntA == 7'd82, + cntA == 7'd81, + cntA == 7'd80, + cntA == 7'd79, + cntA == 7'd78, + cntA == 7'd77, + cntA == 7'd76, + cntA == 7'd75, + cntA == 7'd74, + cntA == 7'd73, + cntA == 7'd72, + cntA == 7'd71, + cntA == 7'd70, + cntA == 7'd69, + cntA == 7'd68, + cntA == 7'd67, + cntA == 7'd66, + cntA == 7'd65, + cntA == 7'd64, + cntA == 7'd63, + cntA == 7'd62, + cntA == 7'd61, + cntA == 7'd60, + cntA == 7'd59, + cntA == 7'd58, + cntA == 7'd57, + cntA == 7'd56, + cntA == 7'd55, + cntA == 7'd54, + cntA == 7'd53, + cntA == 7'd52, + cntA == 7'd51, + cntA == 7'd50, + cntA == 7'd49, + cntA == 7'd48, + cntA == 7'd47, + cntA == 7'd46, + cntA == 7'd45, + cntA == 7'd44, + cntA == 7'd43, + cntA == 7'd42, + cntA == 7'd41, + cntA == 7'd40, + cntA == 7'd39, + cntA == 7'd38, + cntA == 7'd37, + cntA == 7'd36, + cntA == 7'd35, + cntA == 7'd34, + cntA == 7'd33, + cntA == 7'd32, + cntA == 7'd31, + cntA == 7'd30, + cntA == 7'd29, + cntA == 7'd28, + cntA == 7'd27, + cntA == 7'd26, + cntA == 7'd25, + cntA == 7'd24, + cntA == 7'd23, + cntA == 7'd22, + cntA == 7'd21, + cntA == 7'd20, + cntA == 7'd19, + cntA == 7'd18, + cntA == 7'd17, + cntA == 7'd16, + cntA == 7'd15, + cntA == 7'd14, + cntA == 7'd13, + cntA == 7'd12, + cntA == 7'd11, + cntA == 7'd10, + cntA == 7'd9, + cntA == 7'd8, + cntA == 7'd7, + cntA == 7'd6, + cntA == 7'd5, + cntA == 7'd4, + cntA == 7'd3, + cntA == 7'd2, + cntA == 7'd1, + cntA == 7'd0 + }; + + // Should create decoder + wire stupidWayToWriteConstOne = 1'b0 + + (cntB == 7'd127) + + (cntB == 7'd126) + + (cntB == 7'd125) + + (cntB == 7'd124) + + (cntB == 7'd123) + + (cntB == 7'd122) + + (cntB == 7'd121) + + (cntB == 7'd120) + + (cntB == 7'd119) + + (cntB == 7'd118) + + (cntB == 7'd117) + + (cntB == 7'd116) + + (cntB == 7'd115) + + (cntB == 7'd114) + + (cntB == 7'd113) + + (cntB == 7'd112) + + (cntB == 7'd111) + + (cntB == 7'd110) + + (cntB == 7'd109) + + (cntB == 7'd108) + + (cntB == 7'd107) + + (cntB == 7'd106) + + (cntB == 7'd105) + + (cntB == 7'd104) + + (cntB == 7'd103) + + (cntB == 7'd102) + + (cntB == 7'd101) + + (cntB == 7'd100) + + (cntB == 7'd99) + + (cntB == 7'd98) + + (cntB == 7'd97) + + (cntB == 7'd96) + + (cntB == 7'd95) + + (cntB == 7'd94) + + (cntB == 7'd93) + + (cntB == 7'd92) + + (cntB == 7'd91) + + (cntB == 7'd90) + + (cntB == 7'd89) + + (cntB == 7'd88) + + (cntB == 7'd87) + + (cntB == 7'd86) + + (cntB == 7'd85) + + (cntB == 7'd84) + + (cntB == 7'd83) + + (cntB == 7'd82) + + (cntB == 7'd81) + + (cntB == 7'd80) + + (cntB == 7'd79) + + (cntB == 7'd78) + + (cntB == 7'd77) + + (cntB == 7'd76) + + (cntB == 7'd75) + + (cntB == 7'd74) + + (cntB == 7'd73) + + (cntB == 7'd72) + + (cntB == 7'd71) + + (cntB == 7'd70) + + (cntB == 7'd69) + + (cntB == 7'd68) + + (cntB == 7'd67) + + (cntB == 7'd66) + + (cntB == 7'd65) + + (cntB == 7'd64) + + (cntB == 7'd63) + + (cntB == 7'd62) + + (cntB == 7'd61) + + (cntB == 7'd60) + + (cntB == 7'd59) + + (cntB == 7'd58) + + (cntB == 7'd57) + + (cntB == 7'd56) + + (cntB == 7'd55) + + (cntB == 7'd54) + + (cntB == 7'd53) + + (cntB == 7'd52) + + (cntB == 7'd51) + + (cntB == 7'd50) + + (cntB == 7'd49) + + (cntB == 7'd48) + + (cntB == 7'd47) + + (cntB == 7'd46) + + (cntB == 7'd45) + + (cntB == 7'd44) + + (cntB == 7'd43) + + (cntB == 7'd42) + + (cntB == 7'd41) + + (cntB == 7'd40) + + (cntB == 7'd39) + + (cntB == 7'd38) + + (cntB == 7'd37) + + (cntB == 7'd36) + + (cntB == 7'd35) + + (cntB == 7'd34) + + (cntB == 7'd33) + + (cntB == 7'd32) + + (cntB == 7'd31) + + (cntB == 7'd30) + + (cntB == 7'd29) + + (cntB == 7'd28) + + (cntB == 7'd27) + + (cntB == 7'd26) + + (cntB == 7'd25) + + (cntB == 7'd24) + + (cntB == 7'd23) + + (cntB == 7'd22) + + (cntB == 7'd21) + + (cntB == 7'd20) + + (cntB == 7'd19) + + (cntB == 7'd18) + + (cntB <= 7'd17); + + // Should not create decoder + wire [6:0] twiceCntC = + cntC == 7'd127 ? (7'd127 * 7'd2) : + cntC == 7'd126 ? (7'd126 * 7'd2) : + cntC == 7'd125 ? (7'd125 * 7'd2) : + cntC == 7'd124 ? (7'd124 * 7'd2) : + cntC == 7'd123 ? (7'd123 * 7'd2) : + cntC == 7'd122 ? (7'd122 * 7'd2) : + cntC == 7'd121 ? (7'd121 * 7'd2) : + cntC == 7'd120 ? (7'd120 * 7'd2) : + cntC == 7'd119 ? (7'd119 * 7'd2) : + cntC == 7'd118 ? (7'd118 * 7'd2) : + cntC == 7'd117 ? (7'd117 * 7'd2) : + cntC == 7'd116 ? (7'd116 * 7'd2) : + cntC == 7'd115 ? (7'd115 * 7'd2) : + cntC == 7'd114 ? (7'd114 * 7'd2) : + cntC == 7'd113 ? (7'd113 * 7'd2) : + cntC == 7'd112 ? (7'd112 * 7'd2) : + cntC == 7'd111 ? (7'd111 * 7'd2) : + cntC == 7'd110 ? (7'd110 * 7'd2) : + cntC == 7'd109 ? (7'd109 * 7'd2) : + cntC == 7'd108 ? (7'd108 * 7'd2) : + cntC == 7'd107 ? (7'd107 * 7'd2) : + cntC == 7'd106 ? (7'd106 * 7'd2) : + cntC == 7'd105 ? (7'd105 * 7'd2) : + cntC == 7'd104 ? (7'd104 * 7'd2) : + cntC == 7'd103 ? (7'd103 * 7'd2) : + cntC == 7'd102 ? (7'd102 * 7'd2) : + cntC == 7'd101 ? (7'd101 * 7'd2) : + cntC == 7'd100 ? (7'd100 * 7'd2) : + cntC == 7'd99 ? (7'd99 * 7'd2) : + cntC == 7'd98 ? (7'd98 * 7'd2) : + cntC == 7'd97 ? (7'd97 * 7'd2) : + cntC == 7'd96 ? (7'd96 * 7'd2) : + cntC == 7'd95 ? (7'd95 * 7'd2) : + cntC == 7'd94 ? (7'd94 * 7'd2) : + cntC == 7'd93 ? (7'd93 * 7'd2) : + cntC == 7'd92 ? (7'd92 * 7'd2) : + cntC == 7'd91 ? (7'd91 * 7'd2) : + cntC == 7'd90 ? (7'd90 * 7'd2) : + cntC == 7'd89 ? (7'd89 * 7'd2) : + cntC == 7'd88 ? (7'd88 * 7'd2) : + cntC == 7'd87 ? (7'd87 * 7'd2) : + cntC == 7'd86 ? (7'd86 * 7'd2) : + cntC == 7'd85 ? (7'd85 * 7'd2) : + cntC == 7'd84 ? (7'd84 * 7'd2) : + cntC == 7'd83 ? (7'd83 * 7'd2) : + cntC == 7'd82 ? (7'd82 * 7'd2) : + cntC == 7'd81 ? (7'd81 * 7'd2) : + cntC == 7'd80 ? (7'd80 * 7'd2) : + cntC == 7'd79 ? (7'd79 * 7'd2) : + cntC == 7'd78 ? (7'd78 * 7'd2) : + cntC == 7'd77 ? (7'd77 * 7'd2) : + cntC == 7'd76 ? (7'd76 * 7'd2) : + cntC == 7'd75 ? (7'd75 * 7'd2) : + cntC == 7'd74 ? (7'd74 * 7'd2) : + cntC == 7'd73 ? (7'd73 * 7'd2) : + cntC == 7'd72 ? (7'd72 * 7'd2) : + cntC == 7'd71 ? (7'd71 * 7'd2) : + cntC == 7'd70 ? (7'd70 * 7'd2) : + cntC == 7'd69 ? (7'd69 * 7'd2) : + cntC == 7'd68 ? (7'd68 * 7'd2) : + cntC == 7'd67 ? (7'd67 * 7'd2) : + cntC == 7'd66 ? (7'd66 * 7'd2) : + cntC == 7'd65 ? (7'd65 * 7'd2) : + cntC == 7'd64 ? (7'd64 * 7'd2) : + cntC == 7'd63 ? (7'd63 * 7'd2) : + cntC == 7'd62 ? (7'd62 * 7'd2) : + cntC == 7'd61 ? (7'd61 * 7'd2) : + cntC == 7'd60 ? (7'd60 * 7'd2) : + cntC == 7'd59 ? (7'd59 * 7'd2) : + cntC == 7'd58 ? (7'd58 * 7'd2) : + cntC == 7'd57 ? (7'd57 * 7'd2) : + cntC == 7'd56 ? (7'd56 * 7'd2) : + cntC == 7'd55 ? (7'd55 * 7'd2) : + cntC == 7'd54 ? (7'd54 * 7'd2) : + cntC == 7'd53 ? (7'd53 * 7'd2) : + cntC == 7'd52 ? (7'd52 * 7'd2) : + cntC == 7'd51 ? (7'd51 * 7'd2) : + cntC == 7'd50 ? (7'd50 * 7'd2) : + cntC == 7'd49 ? (7'd49 * 7'd2) : + cntC == 7'd48 ? (7'd48 * 7'd2) : + cntC == 7'd47 ? (7'd47 * 7'd2) : + cntC == 7'd46 ? (7'd46 * 7'd2) : + cntC == 7'd45 ? (7'd45 * 7'd2) : + cntC == 7'd44 ? (7'd44 * 7'd2) : + cntC == 7'd43 ? (7'd43 * 7'd2) : + cntC == 7'd42 ? (7'd42 * 7'd2) : + cntC == 7'd41 ? (7'd41 * 7'd2) : + cntC == 7'd40 ? (7'd40 * 7'd2) : + cntC == 7'd39 ? (7'd39 * 7'd2) : + cntC == 7'd38 ? (7'd38 * 7'd2) : + cntC == 7'd37 ? (7'd37 * 7'd2) : + cntC == 7'd36 ? (7'd36 * 7'd2) : + cntC == 7'd35 ? (7'd35 * 7'd2) : + cntC == 7'd34 ? (7'd34 * 7'd2) : + cntC == 7'd33 ? (7'd33 * 7'd2) : + cntC == 7'd32 ? (7'd32 * 7'd2) : + cntC == 7'd31 ? (7'd31 * 7'd2) : + cntC == 7'd30 ? (7'd30 * 7'd2) : + cntC == 7'd29 ? (7'd29 * 7'd2) : + cntC == 7'd28 ? (7'd28 * 7'd2) : + cntC == 7'd27 ? (7'd27 * 7'd2) : + cntC == 7'd26 ? (7'd26 * 7'd2) : + cntC == 7'd25 ? (7'd25 * 7'd2) : + cntC == 7'd24 ? (7'd24 * 7'd2) : + cntC == 7'd23 ? (7'd23 * 7'd2) : + cntC == 7'd22 ? (7'd22 * 7'd2) : + cntC == 7'd21 ? (7'd21 * 7'd2) : + cntC == 7'd20 ? (7'd20 * 7'd2) : + cntC == 7'd19 ? (7'd19 * 7'd2) : + cntC == 7'd18 ? (7'd18 * 7'd2) : + cntC == 7'd17 ? (7'd17 * 7'd2) : + cntC == 7'd16 ? (7'd16 * 7'd2) : + cntC == 7'd15 ? (7'd15 * 7'd2) : + cntC == 7'd14 ? (7'd14 * 7'd2) : + cntC == 7'd13 ? (7'd13 * 7'd2) : + cntC == 7'd12 ? (7'd12 * 7'd2) : + cntC == 7'd11 ? (7'd11 * 7'd2) : + cntC == 7'd10 ? (7'd10 * 7'd2) : + cntC == 7'd9 ? (7'd9 * 7'd2) : + cntC == 7'd8 ? (7'd8 * 7'd2) : + cntC == 7'd7 ? (7'd7 * 7'd2) : + cntC == 7'd6 ? (7'd6 * 7'd2) : + cntC == 7'd5 ? (7'd5 * 7'd2) : + cntC == 7'd4 ? (7'd4 * 7'd2) : + cntC == 7'd3 ? (7'd3 * 7'd2) : + cntC == 7'd2 ? (7'd2 * 7'd2) : + cntC == 7'd1 ? (7'd1 * 7'd2) : 7'd0; + + always @(posedge clk) begin + `check(cntAOneHot[cntA], 1'b1); + for (int i = 0; i < $bits(cntAOneHot); i = i + 1) begin + if (i == int'(cntA)) continue; + `check(cntAOneHot[i], 1'b0); + end + + `check(stupidWayToWriteConstOne, 1'b1); + + `check(twiceCntC, cntC * 7'd2); + end + +endmodule