From 47b5157f01e22f83ebee137334e41d0b64daf4f4 Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Mon, 26 Oct 2009 20:12:09 -0400 Subject: [PATCH] Support division and modulus of > 64 bit vectors. --- Changes | 4 +- bin/verilator | 6 -- include/verilated.cpp | 111 ++++++++++++++++++++++++++ include/verilated.h | 83 ++++++++++++++++--- src/V3AstNodes.h | 8 +- src/V3Number.cpp | 144 ++++++++++++++++++++++++++++++--- src/V3Number.h | 2 + test_regress/t/t_math_divw.pl | 18 +++++ test_regress/t/t_math_divw.v | 145 ++++++++++++++++++++++++++++++++++ test_regress/t/t_math_vgen.v | 9 +++ test_verilated/Makefile_obj | 4 + test_verilated/vgen.pl | 86 +++++++++++++------- 12 files changed, 564 insertions(+), 56 deletions(-) create mode 100755 test_regress/t/t_math_divw.pl create mode 100644 test_regress/t/t_math_divw.v diff --git a/Changes b/Changes index 6a1e3cb3a..036829a19 100644 --- a/Changes +++ b/Changes @@ -7,7 +7,9 @@ indicates the contributor was also the author of the fix; Thanks! ** Support little endian bit vectors ("reg [0:2] x;"). -**** Fix writing to out-of-bounds arrays writing element 0. +** Support division and modulus of > 64 bit vectors. [Gary Thomas] + +*** Fix writing to out-of-bounds arrays writing element 0. **** Fix core dump with SystemVerilog var declarations under unnamed begins. diff --git a/bin/verilator b/bin/verilator index 2fcf56127..02e51bf8b 100755 --- a/bin/verilator +++ b/bin/verilator @@ -1647,12 +1647,6 @@ Bit ranges must be numbered with the MSB being numbered greater or the same as the LSB. Little-bit-endian busses [0:15] are not supported as they aren't easily made compatible with C++. -=head2 32-Bit Divide - -The division and modulus operators are limited to 32 bits. This can be -easily fixed if someone contributes the appropriate wide-integer math -functions. - =head2 Gate Primitives The 2-state gate primitives (and, buf, nand, nor, not, or, xnor, xor) are diff --git a/include/verilated.cpp b/include/verilated.cpp index 742896c37..8338f4ddb 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -136,6 +136,117 @@ WDataOutP VL_ZERO_RESET_W(int obits, WDataOutP outwp) { return outwp; } +//=========================================================================== +// Slow math + +WDataOutP _vl_moddiv_w(int lbits, WDataOutP owp, WDataInP lwp, WDataInP rwp, bool is_modulus) { + // See Knuth Algorithm D. Computes u/v = q.r + // This isn't massively tuned, as wide division is rare + // for debug see V3Number version + // Requires clean input + int words = VL_WORDS_I(lbits); + for (int i=0; i= 0; j--) { + vluint64_t unw64 = ((k<> 32 won't mask the value + for (int i = vw-1; i>0; i--) { + vn[i] = (rwp[i] << s) | (shift_mask & (rwp[i-1] >> (32-s))); + } + vn[0] = rwp[0] << s; + + // Copy and shift dividend by same amount; may set new upper word + if (s) un[uw] = lwp[uw-1] >> (32-s); + else un[uw] = 0; + for (int i=uw-1; i>0; i--) { + un[i] = (lwp[i] << s) | (shift_mask & (lwp[i-1] >> (32-s))); + } + un[0] = lwp[0] << s; + + // Main loop + for (int j = uw - vw; j >= 0; j--) { + // Estimate + vluint64_t unw64 = ((vluint64_t)(un[j+vw])<= VL_ULL(0x100000000) + || ((qhat*vn[vw-2]) > ((rhat<> VL_ULL(32)) - (t >> VL_ULL(32)); + } + t = un[j+vw] - k; + un[j+vw] = t; + owp[j] = qhat; // Save quotient digit + + if (t < 0) { + // Over subtracted; correct by adding back + owp[j]--; + k = 0; + for (int i=0; i> VL_ULL(32); + } + un[j+vw] = un[j+vw] + k; + } + } + + if (is_modulus) { // modulus + // Need to reverse normalization on copy to output + for (int i=0; i> s) | (shift_mask & (un[i+1] << (32-s))); + } + for (int i=vw; i>VL_WORDSIZE); } #define VL_SET_WI(owp,data) { owp[0]=(data); owp[1]=0; } #define VL_SET_QW(lwp) ( ((QData)(lwp[0])) | ((QData)(lwp[1])<<((QData)(VL_WORDSIZE)) )) +#define _VL_SET_QII(ld,rd) ( ((QData)(ld)<=0; i--) { - if (lwp[i]) { + if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken for (int bit=31; bit>=0; bit--) { if (VL_UNLIKELY(VL_BITISSET_I(lwp[i],bit))) { return i*VL_WORDSIZE + bit + adjust; @@ -621,6 +630,21 @@ static inline IData VL_CLOG2_W(int words, WDataInP lwp) { return 0; } +static inline IData VL_MOSTSETBITP1_W(int words, WDataInP lwp) { + // MSB set bit plus one; similar to FLS. 0=value is zero + for (int i=words-1; i>=0; i--) { + if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken + for (int bit=31; bit>=0; bit--) { + if (VL_UNLIKELY(VL_BITISSET_I(lwp[i],bit))) { + return i*VL_WORDSIZE + bit + 1; + } + } + // Can't get here - one bit must be set + } + } + return 0; +} + //=================================================================== // SIMPLE LOGICAL OPERATORS @@ -759,10 +783,12 @@ static inline int _VL_CMPS_W(int lbits, WDataInP lwp, WDataInP rwp) { // EMIT_RULE: VL_MUL: oclean=dirty; lclean==clean; rclean==clean; // EMIT_RULE: VL_DIV: oclean=dirty; lclean==clean; rclean==clean; // EMIT_RULE: VL_MODDIV: oclean=dirty; lclean==clean; rclean==clean; -#define VL_DIV_I(lhs,rhs) (((rhs)==0)?0:(lhs)/(rhs)) -#define VL_DIV_Q(lhs,rhs) (((rhs)==0)?0:(lhs)/(rhs)) -#define VL_MODDIV_I(lhs,rhs) (((rhs)==0)?0:(lhs)%(rhs)) -#define VL_MODDIV_Q(lhs,rhs) (((rhs)==0)?0:(lhs)%(rhs)) +#define VL_DIV_III(lbits,lhs,rhs) (((rhs)==0)?0:(lhs)/(rhs)) +#define VL_DIV_QQQ(lbits,lhs,rhs) (((rhs)==0)?0:(lhs)/(rhs)) +#define VL_DIV_WWW(lbits,owp,lwp,rwp) (_vl_moddiv_w(lbits,owp,lwp,rwp,0)) +#define VL_MODDIV_III(lbits,lhs,rhs) (((rhs)==0)?0:(lhs)%(rhs)) +#define VL_MODDIV_QQQ(lbits,lhs,rhs) (((rhs)==0)?0:(lhs)%(rhs)) +#define VL_MODDIV_WWW(lbits,owp,lwp,rwp) (_vl_moddiv_w(lbits,owp,lwp,rwp,1)) static inline WDataOutP VL_ADD_W(int words, WDataOutP owp,WDataInP lwp,WDataInP rwp){ QData carry = 0; @@ -866,31 +892,70 @@ static inline WDataOutP VL_MULS_WWW(int,int lbits,int, WDataOutP owp,WDataInP lw return(owp); } -static inline IData VL_DIVS_III(int,int lbits,int, IData lhs,IData rhs) { +static inline IData VL_DIVS_III(int lbits, IData lhs,IData rhs) { if (rhs==0) return 0; vlsint32_t lhs_signed = VL_EXTENDS_II(32, lbits, lhs); vlsint32_t rhs_signed = VL_EXTENDS_II(32, lbits, rhs); return lhs_signed / rhs_signed; } -static inline QData VL_DIVS_QQQ(int,int lbits,int, QData lhs,QData rhs) { +static inline QData VL_DIVS_QQQ(int lbits, QData lhs,QData rhs) { if (rhs==0) return 0; vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); return lhs_signed / rhs_signed; } -static inline IData VL_MODDIVS_III(int,int lbits,int, IData lhs,IData rhs) { +static inline IData VL_MODDIVS_III(int lbits, IData lhs,IData rhs) { if (rhs==0) return 0; vlsint32_t lhs_signed = VL_EXTENDS_II(32, lbits, lhs); vlsint32_t rhs_signed = VL_EXTENDS_II(32, lbits, rhs); return lhs_signed % rhs_signed; } -static inline QData VL_MODDIVS_QQQ(int,int lbits,int, QData lhs,QData rhs) { +static inline QData VL_MODDIVS_QQQ(int lbits, QData lhs,QData rhs) { if (rhs==0) return 0; vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); return lhs_signed % rhs_signed; } +static inline WDataOutP VL_DIVS_WWW(int lbits, WDataOutP owp,WDataInP lwp,WDataInP rwp) { + int words = VL_WORDS_I(lbits); + IData lsign = VL_SIGN_I(lbits,lwp[words-1]); + IData rsign = VL_SIGN_I(lbits,rwp[words-1]); + IData lwstore[VL_MULS_MAX_WORDS]; // Fixed size, as MSVC++ doesn't allow [words] here + IData rwstore[VL_MULS_MAX_WORDS]; + WDataInP ltup = lwp; + WDataInP rtup = rwp; + if (lsign) { ltup = _VL_CLEAN_INPLACE_W(lbits, VL_UNARYMIN_W(VL_WORDS_I(lbits), lwstore, lwp)); } + if (rsign) { rtup = _VL_CLEAN_INPLACE_W(lbits, VL_UNARYMIN_W(VL_WORDS_I(lbits), rwstore, rwp)); } + if ((lsign && !rsign) || (!lsign && rsign)) { + IData qNoSign[VL_MULS_MAX_WORDS]; + VL_DIV_WWW(lbits,qNoSign,ltup,rtup); + _VL_CLEAN_INPLACE_W(lbits, VL_UNARYMIN_W(VL_WORDS_I(lbits), owp, qNoSign)); + return owp; + } else { + return VL_DIV_WWW(lbits,owp,ltup,rtup); + } +} +static inline WDataOutP VL_MODDIVS_WWW(int lbits, WDataOutP owp,WDataInP lwp,WDataInP rwp) { + int words = VL_WORDS_I(lbits); + IData lsign = VL_SIGN_I(lbits,lwp[words-1]); + IData rsign = VL_SIGN_I(lbits,rwp[words-1]); + IData lwstore[VL_MULS_MAX_WORDS]; // Fixed size, as MSVC++ doesn't allow [words] here + IData rwstore[VL_MULS_MAX_WORDS]; + WDataInP ltup = lwp; + WDataInP rtup = rwp; + if (lsign) { ltup = _VL_CLEAN_INPLACE_W(lbits, VL_UNARYMIN_W(VL_WORDS_I(lbits), lwstore, lwp)); } + if (rsign) { rtup = _VL_CLEAN_INPLACE_W(lbits, VL_UNARYMIN_W(VL_WORDS_I(lbits), rwstore, rwp)); } + if (lsign) { // Only dividend sign matters for modulus + IData qNoSign[VL_MULS_MAX_WORDS]; + VL_MODDIV_WWW(lbits,qNoSign,ltup,rtup); + _VL_CLEAN_INPLACE_W(lbits, VL_UNARYMIN_W(VL_WORDS_I(lbits), owp, qNoSign)); + return owp; + } else { + return VL_MODDIV_WWW(lbits,owp,ltup,rtup); + } +} + static inline IData VL_POW_III(int, int, int rbits, IData lhs, IData rhs) { if (lhs==0) return 0; IData power = lhs; diff --git a/src/V3AstNodes.h b/src/V3AstNodes.h index 6c8e62622..8c2b002f4 100644 --- a/src/V3AstNodes.h +++ b/src/V3AstNodes.h @@ -2382,7 +2382,7 @@ struct AstDiv : public AstNodeBiop { ASTNODE_NODE_FUNCS(Div, DIV) virtual void numberOperate(V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opDiv(lhs,rhs); } virtual string emitVerilog() { return "%k(%l %k/ %r)"; } - virtual string emitC() { return "VL_DIV_%lq(%lW, %P, %li, %ri)"; } + virtual string emitC() { return "VL_DIV_%nq%lq%rq(%lw, %P, %li, %ri)"; } virtual bool cleanOut() {return false;} virtual bool cleanLhs() {return true;} virtual bool cleanRhs() {return true;} virtual bool sizeMattersLhs() {return true;} virtual bool sizeMattersRhs() {return true;} @@ -2394,7 +2394,7 @@ struct AstDivS : public AstNodeBiop { ASTNODE_NODE_FUNCS(DivS, DIVS) virtual void numberOperate(V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opDivS(lhs,rhs); } virtual string emitVerilog() { return "%k(%l %k/ %r)"; } - virtual string emitC() { return "VL_DIVS_%nq%lq%rq(%nw,%lw,%rw, %P, %li, %ri)"; } + virtual string emitC() { return "VL_DIVS_%nq%lq%rq(%lw, %P, %li, %ri)"; } virtual bool cleanOut() {return false;} virtual bool cleanLhs() {return true;} virtual bool cleanRhs() {return true;} virtual bool sizeMattersLhs() {return true;} virtual bool sizeMattersRhs() {return true;} @@ -2407,7 +2407,7 @@ struct AstModDiv : public AstNodeBiop { ASTNODE_NODE_FUNCS(ModDiv, MODDIV) virtual void numberOperate(V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opModDiv(lhs,rhs); } virtual string emitVerilog() { return "%k(%l %k%% %r)"; } - virtual string emitC() { return "VL_MODDIV_%lq(%lW, %P, %li, %ri)"; } + virtual string emitC() { return "VL_MODDIV_%nq%lq%rq(%lw, %P, %li, %ri)"; } virtual bool cleanOut() {return false;} virtual bool cleanLhs() {return true;} virtual bool cleanRhs() {return true;} virtual bool sizeMattersLhs() {return true;} virtual bool sizeMattersRhs() {return true;} @@ -2419,7 +2419,7 @@ struct AstModDivS : public AstNodeBiop { ASTNODE_NODE_FUNCS(ModDivS, MODDIVS) virtual void numberOperate(V3Number& out, const V3Number& lhs, const V3Number& rhs) { out.opModDivS(lhs,rhs); } virtual string emitVerilog() { return "%k(%l %k%% %r)"; } - virtual string emitC() { return "VL_MODDIVS_%nq%lq%rq(%nw,%lw,%rw, %P, %li, %ri)"; } + virtual string emitC() { return "VL_MODDIVS_%nq%lq%rq(%lw, %P, %li, %ri)"; } virtual bool cleanOut() {return false;} virtual bool cleanLhs() {return true;} virtual bool cleanRhs() {return true;} virtual bool sizeMattersLhs() {return true;} virtual bool sizeMattersRhs() {return true;} diff --git a/src/V3Number.cpp b/src/V3Number.cpp index b2cbedc97..3a49a4897 100644 --- a/src/V3Number.cpp +++ b/src/V3Number.cpp @@ -245,7 +245,7 @@ V3Number::V3Number (FileLine* fileline, const char* sourcep) { case 'd': setBit(obit++,1); setBit(obit++,0); setBit(obit++,1); setBit(obit++,1); break; case 'e': setBit(obit++,0); setBit(obit++,1); setBit(obit++,1); setBit(obit++,1); break; case 'f': setBit(obit++,1); setBit(obit++,1); setBit(obit++,1); setBit(obit++,1); break; - case 'z': case '?': + case 'z': case '?': setBit(obit++,'z'); setBit(obit++,'z'); setBit(obit++,'z'); setBit(obit++,'z'); break; case 'x': setBit(obit++,'x'); setBit(obit++,'x'); setBit(obit++,'x'); setBit(obit++,'x'); break; @@ -1087,37 +1087,47 @@ V3Number& V3Number::opMulS (const V3Number& lhs, const V3Number& rhs) { return *this; } V3Number& V3Number::opDiv (const V3Number& lhs, const V3Number& rhs) { + UINFO(9, "opdiv "<64) m_fileline->v3fatalSrc("Unsupported: Large / math not implemented yet: "<<*this); - if (rhs.width()>64) m_fileline->v3fatalSrc("Unsupported: Large / math not implemented yet: "<<*this); - setQuad(lhs.toUQuad() / rhs.toUQuad()); - return *this; + if (lhs.width()<=64) { + setQuad(lhs.toUQuad() / rhs.toUQuad()); + return *this; + } else { + // Wide division + return opModDivGuts(lhs,rhs,false); + } } V3Number& V3Number::opDivS (const V3Number& lhs, const V3Number& rhs) { // Signed divide + //UINFO(9, ">>divs-start "<divs-mid "<64) m_fileline->v3fatalSrc("Unsupported: Large % math not implemented yet: "<<*this); - setQuad(lhs.toUQuad() % rhs.toUQuad()); - return *this; + if (lhs.width()<=64) { + setQuad(lhs.toUQuad() % rhs.toUQuad()); + return *this; + } else { + // Wide modulus + return opModDivGuts(lhs,rhs,true); + } } V3Number& V3Number::opModDivS (const V3Number& lhs, const V3Number& rhs) { // Signed moddiv @@ -1133,6 +1143,122 @@ V3Number& V3Number::opModDivS (const V3Number& lhs, const V3Number& rhs) { } return *this; } +V3Number& V3Number::opModDivGuts(const V3Number& lhs, const V3Number& rhs, bool is_modulus) { + // See Knuth Algorithm D. Computes u/v = q.r + // This isn't massively tuned, as wide division is rare + setZero(); + // Find MSB and check for zero. + int words = lhs.words(); + int umsbp1 = lhs.mostSetBitP1(); // dividend + int vmsbp1 = rhs.mostSetBitP1(); // divisor + if (VL_UNLIKELY(vmsbp1==0) // rwp==0 so division by zero. Return 0. + || VL_UNLIKELY(umsbp1==0)) { // 0/x so short circuit and return 0 + UINFO(9, " opmoddiv-zero "<= 0; j--) { + vluint64_t unw64 = ((k<> 32 won't mask the value + for (int i = vw-1; i>0; i--) { + vn[i] = (rhs.m_value[i] << s) | (shift_mask & (rhs.m_value[i-1] >> (32-s))); + } + vn[0] = rhs.m_value[0] << s; + + // Copy and shift dividend by same amount; may set new upper word + if (s) un[uw] = lhs.m_value[uw-1] >> (32-s); + else un[uw] = 0; + for (int i=uw-1; i>0; i--) { + un[i] = (lhs.m_value[i] << s) | (shift_mask & (lhs.m_value[i-1] >> (32-s))); + } + un[0] = lhs.m_value[0] << s; + + //printf(" un="); for(int i=5; i>=0; i--) printf(" %08x",un[i]); printf("\n"); + //printf(" vn="); for(int i=5; i>=0; i--) printf(" %08x",vn[i]); printf("\n"); + //printf(" mv="); for(int i=5; i>=0; i--) printf(" %08x",m_value[i]); printf("\n"); + + // Main loop + for (int j = uw - vw; j >= 0; j--) { + // Estimate + vluint64_t unw64 = ((vluint64_t)(un[j+vw])<= VL_ULL(0x100000000) + || ((qhat*vn[vw-2]) > ((rhat<> VL_ULL(32)) - (t >> VL_ULL(32)); + } + t = un[j+vw] - k; + un[j+vw] = t; + this->m_value[j] = qhat; // Save quotient digit + + if (t < 0) { + // Over subtracted; correct by adding back + this->m_value[j]--; + k = 0; + for (int i=0; i> VL_ULL(32); + } + un[j+vw] = un[j+vw] + k; + } + } + + //printf(" un="); for(int i=5; i>=0; i--) printf(" %08x",un[i]); printf("\n"); + //printf(" vn="); for(int i=5; i>=0; i--) printf(" %08x",vn[i]); printf("\n"); + //printf(" mv="); for(int i=5; i>=0; i--) printf(" %08x",m_value[i]); printf("\n"); + + if (is_modulus) { // modulus + // Need to reverse normalization on copy to output + for (int i=0; i> s) | (shift_mask & (un[i+1] << (32-s))); + } + for (int i=vw; i1, + ); + +ok(1); +1; diff --git a/test_regress/t/t_math_divw.v b/test_regress/t/t_math_divw.v new file mode 100644 index 000000000..f8c5d899b --- /dev/null +++ b/test_regress/t/t_math_divw.v @@ -0,0 +1,145 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty, 2004 by Wilson Snyder. + +module t (/*AUTOARG*/ + // Inputs + clk + ); + + input clk; + + // verilator lint_off WIDTH + + //============================================================ + + reg bad; + initial begin + bad=0; + c96(96'h0_0000_0000_0000_0000, 96'h8_8888_8888_8888_8888, 96'h0_0000_0000_0000_0000, 96'h0); + c96(96'h8_8888_8888_8888_8888, 96'h0_0000_0000_0000_0000, 96'h0_0000_0000_0000_0000, 96'h0); + c96(96'h8_8888_8888_8888_8888, 96'h0_0000_0000_0000_0002, 96'h4_4444_4444_4444_4444, 96'h0); + c96(96'h8_8888_8888_8888_8888, 96'h0_2000_0000_0000_0000, 96'h0_0000_0000_0000_0044, 96'h0_0888_8888_8888_8888); + c96(96'h8_8888_8888_8888_8888, 96'h8_8888_8888_8888_8888, 96'h0_0000_0000_0000_0001, 96'h0); + c96(96'h8_8888_8888_8888_8888, 96'h8_8888_8888_8888_8889, 96'h0_0000_0000_0000_0000, 96'h8_8888_8888_8888_8888); + c96(96'h1_0000_0000_8eba_434a, 96'h0_0000_0000_0000_0001, 96'h1_0000_0000_8eba_434a, 96'h0); + + c96(96'h0003, 96'h0002, 96'h0001, 96'h0001); + c96(96'h0003, 96'h0003, 96'h0001, 96'h0000); + c96(96'h0003, 96'h0004, 96'h0000, 96'h0003); + c96(96'h0000, 96'hffff, 96'h0000, 96'h0000); + c96(96'hffff, 96'h0001, 96'hffff, 96'h0000); + c96(96'hffff, 96'hffff, 96'h0001, 96'h0000); + c96(96'hffff, 96'h0003, 96'h5555, 96'h0000); + c96(96'hffff_ffff, 96'h0001, 96'hffff_ffff, 96'h0000); + c96(96'hffff_ffff, 96'hffff, 96'h0001_0001, 96'h0000); + c96(96'hfffe_ffff, 96'hffff, 96'h0000_ffff, 96'hfffe); + c96(96'h1234_5678, 96'h9abc, 96'h0000_1e1e, 96'h2c70); + c96(96'h0000_0000, 96'h0001_0000, 96'h0000, 96'h0000_0000); + c96(96'h0007_0000, 96'h0003_0000, 96'h0002, 96'h0001_0000); + c96(96'h0007_0005, 96'h0003_0000, 96'h0002, 96'h0001_0005); + c96(96'h0006_0000, 96'h0002_0000, 96'h0003, 96'h0000_0000); + c96(96'h8000_0001, 96'h4000_7000, 96'h0001, 96'h3fff_9001); + c96(96'hbcde_789a, 96'hbcde_789a, 96'h0001, 96'h0000_0000); + c96(96'hbcde_789b, 96'hbcde_789a, 96'h0001, 96'h0000_0001); + c96(96'hbcde_7899, 96'hbcde_789a, 96'h0000, 96'hbcde_7899); + c96(96'hffff_ffff, 96'hffff_ffff, 96'h0001, 96'h0000_0000); + c96(96'hffff_ffff, 96'h0001_0000, 96'hffff, 96'h0000_ffff); + c96(96'h0123_4567_89ab, 96'h0001_0000, 96'h0123_4567, 96'h0000_89ab); + c96(96'h8000_fffe_0000, 96'h8000_ffff, 96'h0000_ffff, 96'h7fff_ffff); + c96(96'h8000_0000_0003, 96'h2000_0000_0001, 96'h0003, 96'h2000_0000_0000); + + c96(96'hffff_ffff_0000_0000, 96'h0001_0000_0000, 96'hffff_ffff, 96'h0000_0000_0000); + c96(96'hffff_ffff_0000_0000, 96'hffff_0000_0000, 96'h0001_0001, 96'h0000_0000_0000); + c96(96'hfffe_ffff_0000_0000, 96'hffff_0000_0000, 96'h0000_ffff, 96'hfffe_0000_0000); + c96(96'h1234_5678_0000_0000, 96'h9abc_0000_0000, 96'h0000_1e1e, 96'h2c70_0000_0000); + + c96(96'h0000_0000_0000_0000, 96'h0001_0000_0000_0000, 96'h0000, 96'h0000_0000_0000_0000); + c96(96'h0007_0000_0000_0000, 96'h0003_0000_0000_0000, 96'h0002, 96'h0001_0000_0000_0000); + c96(96'h0007_0005_0000_0000, 96'h0003_0000_0000_0000, 96'h0002, 96'h0001_0005_0000_0000); + c96(96'h0006_0000_0000_0000, 96'h0002_0000_0000_0000, 96'h0003, 96'h0000_0000_0000_0000); + c96(96'h8000_0001_0000_0000, 96'h4000_7000_0000_0000, 96'h0001, 96'h3fff_9001_0000_0000); + c96(96'hbcde_789a_0000_0000, 96'hbcde_789a_0000_0000, 96'h0001, 96'h0000_0000_0000_0000); + c96(96'hbcde_789b_0000_0000, 96'hbcde_789a_0000_0000, 96'h0001, 96'h0000_0001_0000_0000); + c96(96'hbcde_7899_0000_0000, 96'hbcde_789a_0000_0000, 96'h0000, 96'hbcde_7899_0000_0000); + c96(96'hffff_ffff_0000_0000, 96'hffff_ffff_0000_0000, 96'h0001, 96'h0000_0000_0000_0000); + c96(96'hffff_ffff_0000_0000, 96'h0001_0000_0000_0000, 96'hffff, 96'h0000_ffff_0000_0000); + c96(96'h7fff_8000_0000_0000, 96'h8000_0000_0001, 96'h0000_fffe, 96'h7fff_ffff_0002); + c96(96'h8000_0000_fffe_0000, 96'h8000_0000_ffff, 96'h0000_ffff, 96'h7fff_ffff_ffff); + c96(96'h0008_8888_8888_8888_8888, 96'h0002_0000_0000_0000, 96'h0004_4444, 96'h0000_8888_8888_8888); + + if (bad) $stop; + $write("*-* All Finished *-*\n"); + $finish; + end + + task c96; + input [95:0] u; + input [95:0] v; + input [95:0] expq; + input [95:0] expr; + c96u( u, v, expq, expr); + c96s( u, v, expq, expr); + c96s(-u, v,-expq,-expr); + c96s( u,-v,-expq, expr); + c96s(-u,-v, expq,-expr); + endtask + + task c96u; + input [95:0] u; + input [95:0] v; + input [95:0] expq; + input [95:0] expr; + reg [95:0] gotq; + reg [95:0] gotr; + gotq = u/v; + gotr = u%v; + if (gotq != expq && v!=0) begin + bad = 1; + end + if (gotr != expr && v!=0) begin + bad = 1; + end + if (bad +`ifdef TEST_VERBOSE + || 1 +`endif + ) begin + $write(" %x /u %x = got %x exp %x %% got %x exp %x", u,v,gotq,expq,gotr,expr); + // Test for v=0 to prevent Xs causing grief + if (gotq != expq && v!=0) $write(" BADQ"); + if (gotr != expr && v!=0) $write(" BADR"); + $write("\n"); + end + endtask + + task c96s; + input signed [95:0] u; + input signed [95:0] v; + input signed [95:0] expq; + input signed [95:0] expr; + reg signed [95:0] gotq; + reg signed [95:0] gotr; + gotq = u/v; + gotr = u%v; + if (gotq != expq && v!=0) begin + bad = 1; + end + if (gotr != expr && v!=0) begin + bad = 1; + end + if (bad +`ifdef TEST_VERBOSE + || 1 +`endif + ) begin + $write(" %x /s %x = got %x exp %x %% got %x exp %x", u,v,gotq,expq,gotr,expr); + // Test for v=0 to prevent Xs causing grief + if (gotq != expq && v!=0) $write(" BADQ"); + if (gotr != expr && v!=0) $write(" BADR"); + $write("\n"); + end + endtask + +endmodule diff --git a/test_regress/t/t_math_vgen.v b/test_regress/t/t_math_vgen.v index d5eb09c1c..fe4f43c98 100644 --- a/test_regress/t/t_math_vgen.v +++ b/test_regress/t/t_math_vgen.v @@ -273,6 +273,15 @@ module t (/*AUTOARG*/ //============================================================ + reg signed [105: 0] W0032 /*verilator public*/; //=106'h3ff0000000100000000bd597bb1 + always @(check) begin : Block237 + W0032 = 106'sh3ff0000000100000000bd597bb1; + if ((106'sh1ca0000000000000000b96b8dc2 / 106'sh3ff0000000100000000bd597bb1) != 106'sh3fffffffffffffffffffffffe36) if (check) $stop; + if ((106'sh1ca0000000000000000b96b8dc2 / W0032) != 106'sh3fffffffffffffffffffffffe36) if (check) $stop; + end + + //============================================================ + always @ (posedge clk) begin if (cyc!=0) begin cyc <= cyc + 1; diff --git a/test_verilated/Makefile_obj b/test_verilated/Makefile_obj index 86803d8ae..3422b7ddc 100644 --- a/test_verilated/Makefile_obj +++ b/test_verilated/Makefile_obj @@ -25,6 +25,10 @@ CPPFLAGS += -DWAVES=1 CPPFLAGS += -DVL_DEBUG=1 CPPFLAGS += $(CPPFLAGS_ADD) +# Random code often does / 0. Unfortunately VL_DIV_I(0,0) will warn +# without this flag, even though there's a conditional to prevent the divide. +CPPFLAGS += -Wno-div-by-zero + ####################################################################### # Linking final exe -- presumes have a sim_main.cpp diff --git a/test_verilated/vgen.pl b/test_verilated/vgen.pl index 757b37458..f82eb0472 100755 --- a/test_verilated/vgen.pl +++ b/test_verilated/vgen.pl @@ -49,7 +49,7 @@ our $Raise_Weight_Max = 50; 'VBITSELP'=> {weight=>1&&10, width=>0, signed=>0,sc=>0, terminal=>0, v=>'%i[%2+:%3]', }, 'VBITSELM'=> {weight=>1&&10, width=>0, signed=>0,sc=>0, terminal=>0, v=>'%i[%2-:%3]', }, # Unary - 'VEXTEND'=> {weight=>1&&3, width=>-2, signed=>0,sc=>0, terminal=>0, v=>'{%xw\'h0,%1}', }, + 'VEXTEND'=> {weight=>1&&3, width=>-2, signed=>0,sc=>0, terminal=>0, v=>'{%xd\'h0,%1}', }, 'VLOGNOT'=> {weight=>1&&1, width=>1, signed=>0, sc=>0, terminal=>0, v=>'(! %1)', }, 'VREDAND'=> {weight=>1&&1, width=>1, signed=>0, sc=>0, terminal=>0, v=>'(& %1)', }, 'VREDOR'=> {weight=>1&&1, width=>1, signed=>0, sc=>0, terminal=>0, v=>'(| %1)', }, @@ -82,8 +82,8 @@ our $Raise_Weight_Max = 50; 'VADD'=> {weight=>1&&10, width=>0, sc=>1, terminal=>0, v=>'(%1 + %2)', }, 'VSUB'=> {weight=>1&&10, width=>0, sc=>1, terminal=>0, v=>'(%1 - %2)', }, 'VMUL'=> {weight=>1&&15,width=>0, sc=>1, terminal=>0, v=>'(%1 * %2)', }, # High % as rarely applyable - #'VDIV'=> {weight=>2&&0, width=>-32, sc=>1, terminal=>0, v=>'(%1 / %2)', }, # FIX - #'VMODDIV'=> {weight=>2&&0, width=>-32, sc=>1, terminal=>0, v=>'(%1 %% %2)', }, # FIX + 'VDIV'=> {weight=>1&&8, width=>0, sc=>1, terminal=>0, v=>'((%2)==%xw\'h0 ? %xw\'%xsh0:(%1 / %2))', }, + 'VMODDIV'=> {weight=>1&&8, width=>0, sc=>1, terminal=>0, v=>'((%2)==%xw\'h0 ? %xw\'%xsh0:(%1 %% %2))', }, #'VPOW'=> {weight=>2&&0,width=>-64, sc=>0, terminal=>0, v=>'(%1 ** %2)', }, 'VSHIFTL'=> {weight=>1&&8, width=>0, signed=>0, sc=>0, terminal=>0, v=>'(%1 << %2)', }, 'VSHIFTLS'=> {weight=>1&&8, width=>0, signed=>1, sc=>0, terminal=>0, v=>'(%1 <<< %2)', }, @@ -167,8 +167,8 @@ my %ops2 = 'VADD'=> {pl=>'VADD (%tr,%1v,%2v);', rnd=>'%1r=gen_leaf(width=>%tw,signed=>%tg); %2r=gen_leaf(width=>%tw,signed=>%tg);', trunc=>1,}, 'VSUB'=> {pl=>'VSUB (%tr,%1v,%2v);', rnd=>'%1r=gen_leaf(width=>%tw,signed=>%tg); %2r=gen_leaf(width=>%tw,signed=>%tg);', trunc=>1,}, 'VMUL'=> {pl=>'VMUL (%tr,%1v,%2v);', rnd=>'%1r=gen_leaf(width=>%tw,signed=>%tg); %2r=gen_leaf(width=>%tw,signed=>%tg);', trunc=>1,}, # Multiply generates larger width, so need truncate for safety - #'VDIV'=> {pl=>'VDIV (%tr,%1v,%2v);', rnd=>'%1r=gen_leaf(width=>%tw,signed=>%tg); %2r=gen_leaf(width=>%tw,signed=>%tg);'}, - #'VMODDIV'=> {pl=>'VMODDIV(%tr,%1v,%2v);', rnd=>'%1r=gen_leaf(width=>%tw,signed=>%tg); %2r=gen_leaf(width=>%tw,signed=>%tg);'}, + 'VDIV'=> {pl=>'VDIV (%tr,%1r,%2r,0);', rnd=>'%1r=gen_leaf(width=>%tw,signed=>%tg); %2r=gen_leaf(width=>%tw,signed=>%tg);'}, + 'VMODDIV'=> {pl=>'VDIV (%tr,%1r,%2r,1);', rnd=>'%1r=gen_leaf(width=>%tw,signed=>%tg); %2r=gen_leaf(width=>%tw,signed=>%tg);'}, #'VPOW'=> {pl=>'VPOW (%tr,%1r,%2r);', rnd=>'%1r=gen_leaf(width=>min(%tw,6),signed=>%tg); %2r=gen_leaf(width=>min(%tw,8),signed=>%tg);', trunc=>1,}, # Generates larger width, so need truncate for safety 'VSHIFTL'=> {pl=>'VSHIFTL(%tr,%1v,%2v);', rnd=>'%1r=gen_leaf(width=>%tw,signed=>%tg); %2r=gen_leaf(width=>log2(%tw)+1,signed=>%tg);'}, 'VSHIFTLS'=> {pl=>'VSHIFTL(%tr,%1v,%2v);', rnd=>'%1r=gen_leaf(width=>%tw,signed=>%tg); %2r=gen_leaf(width=>log2(%tw)+1,signed=>%tg);'}, @@ -227,6 +227,7 @@ if ($opt_seed==0) { } srand($opt_seed); init(); +selftest(); gentest(); write_output_sc("vgen.cpp") if $Opt_Sc; write_output_v("vgen.v") if !$Opt_Sc; @@ -293,7 +294,10 @@ sub _rnd_op_ok { my $paramref = shift; return (($opref->{width} == 0 || $opref->{width} == $paramref->{width} + # Note -2 means >, while -32 means {width}==-31 && $paramref->{width}<=31) # -31... must be <31 bits || ($opref->{width}==-32 && $paramref->{width}<=32) # -32... must be <32 bits + || ($opref->{width}==-63 && $paramref->{width}<=63) # -63... must be <63 bits || ($opref->{width}==-64 && $paramref->{width}<=64) # -64... must be <64 bits || ($opref->{width}==-2 && $paramref->{width}>=2) # -2... must be >2 bits ) @@ -719,7 +723,8 @@ sub gen_leaf { $treeref->{val_size} = $treeref->{val}->Size; #Debugging $treeref->{val_text} = $treeref->{val}->to_Hex; #Debugging - ($treeref->{val}->Size == $treeref->{width}) or die "%Error: Size mismatch,"; + ($treeref->{val}->Size == $treeref->{width}) + or die "%Error: Size mismatch ",$treeref->{val}->Size,"!=",$treeref->{width},"\n",Dumper($treeref); return $treeref; } @@ -733,18 +738,20 @@ sub gen_v { $fmt =~ s/%3/%s/g; $fmt =~ s/%v/%s/g; $fmt =~ s/%i/%s/g; - $fmt =~ s/%xw/%s/g; + $fmt =~ s/%x[wds]/%s/g; my $argl = $opref->{v}; my @args; - while ($argl =~ s/(%xw|%.)//) { + while ($argl =~ s/(%x.|%.)//) { my $arg = $1; push @args, '$treeref->{op1}{text}' if $arg =~ /%1/; push @args, '$treeref->{op2}{text}' if $arg =~ /%2/; push @args, '$treeref->{op3}{text}' if $arg =~ /%3/; push @args, '$treeref->val_to_text' if $arg =~ /%v/; push @args, '$treeref->{id}' if $arg =~ /%i/; - push @args, '$treeref->{width}-$treeref->{op1}{width}' if $arg =~ /%xw/; + push @args, '$treeref->{signed}?"s":""' if $arg =~ /%xs/; + push @args, '$treeref->{width}' if $arg =~ /%xw/; + push @args, '$treeref->{width}-$treeref->{op1}{width}' if $arg =~ /%xd/; } my $func = ("sub { " @@ -848,6 +855,21 @@ sub decl_text { ####################################################################### # Math Functions +sub selftest { + my $o = {}; + VDIV($o, {val=>Bit::Vector->new_Dec(8,0xff)}, {val=>Bit::Vector->new_Dec(8,0x13)}, 0); + ($o->{val}->Word_Read(0) == 0x0d) or die; + VDIV($o, {val=>Bit::Vector->new_Dec(8,0xff)}, {val=>Bit::Vector->new_Dec(8,0x13)}, 1); + ($o->{val}->Word_Read(0) == 0x08) or die; + VDIV($o, {val=>Bit::Vector->new_Dec(8,0xff), signed=>1}, {val=>Bit::Vector->new_Dec(8,0x13), signed=>1}, 0); + ($o->{val}->Word_Read(0) == 0x00) or die; + VDIV($o, {val=>Bit::Vector->new_Dec(8,0xff), signed=>1}, {val=>Bit::Vector->new_Dec(8,0x13), signed=>1}, 1); + ($o->{val}->Word_Read(0) == 0xff) or die; + VDIV($o, {val=>Bit::Vector->new_Dec(8,0xff), signed=>1}, {val=>Bit::Vector->new_Dec(8,0xdb), signed=>1}, 1); + ($o->{val}->Word_Read(0) == 0xff) or die; + VDIV($o, {val=>Bit::Vector->new_Dec(8,0x72), signed=>1}, {val=>Bit::Vector->new_Dec(8,0xdb), signed=>1}, 1); + ($o->{val}->Word_Read(0) == 0x3) or die; +} sub val_leaf { return {width=>32, signed=>0, val=>Bit::Vector->new_Dec(32,$_[0]), text=>$_[0],}; } sub makebool { return (Bit::Vector->new_Dec(1,$_[0])); } @@ -936,25 +958,35 @@ sub VRESIZE { } sub VADD { $_[0]{val}=my $o=newsized($_[1]); $o->add($_[1],$_[2],0); } sub VSUB { $_[0]{val}=my $o=newsized($_[1]); $o->subtract($_[1],$_[2],0); } -sub VMUL { # Multiply is signed, so need an additional sign bit - my $a=$_[1]->Clone; $a->Resize($_[1]->Size + 1); - my $b=$_[2]->Clone; $b->Resize($_[1]->Size + 1); - my $mo=Bit::Vector->new($_[1]->Size + $_[2]->Size + 1); - $mo->Multiply($a,$b); - my $o=newsized($_[1]); $o->Interval_Copy($mo,0,0,$_[1]->Size); - $_[0]{val}=$o; - } -sub VDIV { my $a=$_[1]->Clone; my $b=$_[2]->Clone; # Else it will take them as signed #s - $a->Resize($a->Size + 1); $b->Resize($b->Size + 1); - print ("//DIVpp ",$_[1]->to_Hex,' ',$_[2]->to_Hex,' ',$_[1]->Size,'.',$_[2]->Size," \n"); - print ("//DIVpp ",$a->to_Hex,' ',$b->to_Hex,' ',$a->Size,'.',$b->Size," \n"); - my $o=newsized($a); my $rem=newsized($a); - if (!$_[2]->is_empty) { $o->Divide($a,$b,$rem); } # No division by zero - #push @Lines, ("//DIV ",$_[1]{val}->to_Hex,' ',$_[2]->to_Hex,' ',$o->to_Hex,'.',$rem->to_Hex," \n"); - $_[0]{val}=$o; } +sub VMUL { + # Multiply is signed, so need an additional sign bit + my $a=$_[1]->Clone; $a->Resize($a->Size + 1); + my $b=$_[2]->Clone; $b->Resize($b->Size + 1); + my $mo=Bit::Vector->new($_[1]->Size + $_[2]->Size + 1); + $mo->Multiply($a,$b); + my $o=newsized($_[1]); $o->Interval_Copy($mo,0,0,$_[1]->Size); + $_[0]{val}=$o; +} +sub VDIV { + my $is_mod = $_[3]; + if ($_[2]{val}->is_empty) { # Avoid divide by zero + $_[0]{val}=newsized($_[1]{val}); + return; + } + my $a=$_[1]{val}->Clone; if (!$_[1]->{signed}) { $a->Resize($a->Size + 1); } + my $b=$_[2]{val}->Clone; if (!$_[2]->{signed}) { $b->Resize($b->Size + 1); } + #print ("//DIVpp ",$_[1]->to_Hex,' ',$_[2]->to_Hex,' ',$_[1]->Size,'.',$_[2]->Size," \n"); + #print ("//DIVpp ",$a->to_Hex,' ',$b->to_Hex,' ',$a->Size,'.',$b->Size," \n"); + my $quo=newsized($a); my $rem=newsized($a); + $quo->Divide($a,$b,$rem); # No division by zero - handled by if above + my $o=newsized($_[1]{val}); + $o->Interval_Copy($is_mod ? $rem : $quo,0,0,$_[1]{val}->Size); + #print "//DIV",($_[1]->{signed}?"S":" "),' w',$a->Size,' ',$_[1]{val}->to_Hex,' ',$_[2]{val}->to_Hex,' =',$quo->to_Hex,'.',$rem->to_Hex," \n"; + $_[0]{val}=$o; +} sub VPOW { # Power is a signed operation - my $a=$_[1]{val}->Clone; if (!$_[1]->{Signed}) { $a->Resize($_[1]{val}->Size + 1); } - my $b=$_[2]{val}->Clone; if (!$_[2]->{Signed}) { $b->Resize($_[2]{val}->Size + 1); } + my $a=$_[1]{val}->Clone; if (!$_[1]->{signed}) { $a->Resize($_[1]{val}->Size + 1); } + my $b=$_[2]{val}->Clone; if (!$_[2]->{signed}) { $b->Resize($_[2]{val}->Size + 1); } print "VVpow = ",$_[1]{val}->to_Hex," ** ",$_[2]{val}->to_Hex,"\n"; my $mo=Bit::Vector->new($_[1]{val}->Size + 1); $mo->Power($a,$b);