diff --git a/Changes b/Changes index 3688142fc..b75e1143d 100644 --- a/Changes +++ b/Changes @@ -5,6 +5,8 @@ indicates the contributor was also the author of the fix; Thanks! * Verilator 3.7*** +**** Fix $clog2 calculation error with powers-of-2, bug81. [Patricio Kaplan] + **** Fix error with tasks that have output first, bug78. [Andrea Foletto] **** Fix "cloning" error with -y/--top-module, bug76. [Dimitris Nalbantis] diff --git a/include/verilated.h b/include/verilated.h index d403fc67c..308afb88e 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -536,38 +536,6 @@ static inline IData VL_REDXOR_W(int words, WDataInP lwp) { return VL_REDXOR_32(r); } -static inline IData VL_CLOG2_I(IData lhs) { - // Perhaps can do better using fls GCC4 builtins - int n=1; - IData chk; - if (!lhs) return 0; - chk = lhs >> VL_UL(16); if (chk) { n += 16; lhs = chk; } - chk = lhs >> VL_UL(8); if (chk) { n += 8; lhs = chk; } - chk = lhs >> VL_UL(4); if (chk) { n += 4; lhs = chk; } - chk = lhs >> VL_UL(2); if (chk) { n += 2; lhs = chk; } - chk = lhs >> VL_UL(1); if (chk) { n += 1; lhs = chk; } - return n; -} -static inline IData VL_CLOG2_Q(QData lhs) { - // Perhaps can do better using fls GCC4 builtins - int n=1; - QData chk; - if (!lhs) return 0; - chk = lhs >> VL_ULL(32); if (chk) { n += 32; lhs = chk; } - chk = lhs >> VL_ULL(16); if (chk) { n += 16; lhs = chk; } - chk = lhs >> VL_ULL(8); if (chk) { n += 8; lhs = chk; } - chk = lhs >> VL_ULL(4); if (chk) { n += 4; lhs = chk; } - chk = lhs >> VL_ULL(2); if (chk) { n += 2; lhs = chk; } - chk = lhs >> VL_ULL(1); if (chk) { n += 1; lhs = chk; } - return n; -} -static inline IData VL_CLOG2_W(int words, WDataInP lwp) { - for (int i=words-1; i>=0; i--) { - if (lwp[i]) return VL_CLOG2_I(lwp[i])+i*VL_WORDSIZE; - } - return 0; -} - // EMIT_RULE: VL_COUNTONES_II: oclean = false; lhs clean static inline IData VL_COUNTONES_I(IData lhs) { // This is faster than __builtin_popcountl @@ -622,6 +590,36 @@ static inline IData VL_ONEHOT0_W(int words, WDataInP lwp) { return 1; } +static inline IData VL_CLOG2_I(IData lhs) { + // There are faster algorithms, or fls GCC4 builtins, but rarely used + if (!lhs) return 0; + lhs--; + int shifts=0; + for (; lhs!=0; shifts++) lhs = lhs >> 1; + return shifts; +} +static inline IData VL_CLOG2_Q(QData lhs) { + if (!lhs) return 0; + lhs--; + int shifts=0; + for (; lhs!=0; shifts++) lhs = lhs >> VL_ULL(1); + return shifts; +} +static inline IData VL_CLOG2_W(int words, WDataInP lwp) { + IData adjust = (VL_COUNTONES_W(words,lwp)==1) ? 0 : 1; + for (int i=words-1; i>=0; i--) { + if (lwp[i]) { + for (int bit=31; bit>=0; bit--) { + if (VL_UNLIKELY(VL_BITISSET_I(lwp[i],bit))) { + return i*VL_WORDSIZE + bit + adjust; + } + } + // Can't get here - one bit must be set + } + } + return 0; +} + //=================================================================== // SIMPLE LOGICAL OPERATORS diff --git a/src/V3Number.cpp b/src/V3Number.cpp index 41557792b..8f304302a 100644 --- a/src/V3Number.cpp +++ b/src/V3Number.cpp @@ -675,10 +675,11 @@ V3Number& V3Number::opOneHot0 (const V3Number& lhs) { } V3Number& V3Number::opCLog2 (const V3Number& lhs) { if (lhs.isFourState()) return setAllBitsX(); - int bit; - for (bit=lhs.width()-1; bit>=0; bit--) { + // IE if 4, this algorithm didn't pre-subtract 1, so we need to post-correct now + int adjust = (lhs.countOnes()==1) ? 0 : 1; + for (int bit=lhs.width()-1; bit>=0; bit--) { if (lhs.bitIs1(bit)) { - setLong(bit+1); + setLong(bit+adjust); return *this; } } diff --git a/test_regress/t/t_func_plog.v b/test_regress/t/t_func_plog.v index 0f1e883b2..8e3387deb 100644 --- a/test_regress/t/t_func_plog.v +++ b/test_regress/t/t_func_plog.v @@ -70,7 +70,7 @@ module Test #(parameter SAMPLE_WIDTH = 5 ) ( `ifdef verilator // UNSUPPORTED - output reg [$clog2(SAMPLE_WIDTH-1)-1:0] pos, + output reg [$clog2(SAMPLE_WIDTH)-1:0] pos, `else output reg [log2(SAMPLE_WIDTH-1)-1:0] pos, `endif diff --git a/test_regress/t/t_math_clog2.v b/test_regress/t/t_math_clog2.v index 2f65856ac..3fad0bc9b 100644 --- a/test_regress/t/t_math_clog2.v +++ b/test_regress/t/t_math_clog2.v @@ -19,13 +19,19 @@ module t (/*AUTOARG*/ reg [63:0] crc; reg [63:0] sum; - wire [31:0] out = `CLOG2(crc[31:0]); + // Need temp wires as function has different width rules than $clog2 + wire [127:0] pows = 128'h1<0; clog2_emulate=clog2_emulate+1) + if (arg!=0) arg = arg - 1; + for (clog2_emulate=0; arg!=0; clog2_emulate=clog2_emulate+1) arg = (arg >> 1); end endfunction