Improve vvp handling of excessively large shift distances.

2019-11-16 12:11:49 +00:00 · 2019-11-16 12:11:49 +00:00 · 351a4e5f5e
parent 0a4cae2644
commit 351a4e5f5e
5 changed files with 43 additions and 22 deletions
--- a/vvp/arith.cc
+++ b/vvp/arith.cc
@ -955,13 +955,14 @@ void vvp_shiftl::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit,

      vvp_vector4_t out (op_a_.size());

+      bool overflow_flag;
      unsigned long shift;
-      if (! vector4_to_value(op_b_, shift)) {
+      if (! vector4_to_value(op_b_, overflow_flag, shift)) {
 	    ptr.ptr()->send_vec4(x_val_, 0);
 	    return;
      }

-      if (shift > out.size())
+      if (overflow_flag || shift > out.size())
 	    shift = out.size();

      for (unsigned idx = 0 ;  idx < shift ;  idx += 1)
@ -989,13 +990,14 @@ void vvp_shiftr::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit,

      vvp_vector4_t out (op_a_.size());

+      bool overflow_flag;
      unsigned long shift;
-      if (! vector4_to_value(op_b_, shift)) {
+      if (! vector4_to_value(op_b_, overflow_flag, shift)) {
 	    ptr.ptr()->send_vec4(x_val_, 0);
 	    return;
      }

-      if (shift > out.size())
+      if (overflow_flag || shift > out.size())
 	    shift = out.size();

      for (unsigned idx = shift ;  idx < out.size() ;  idx += 1)
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@ -621,12 +621,13 @@ or z, then the index register gets the value 0. The %ix/vec4/s
 instruction is the same, except that it assumes the source vector is
 sign extended to fit the index register.

-The instruction also writes into bit 4 a 1 if any of the bits of the
+The instruction also writes into flag 4 a 1 if any of the bits of the
 input vector are x or z. This is a flag that the 0 value written into
 the index register is really the result of calculating from unknown
-bits.
+bits. It writes an X into flag 4 if the vec4 value overflows the index
+register.

-	4: unknown value
+	4: unknown value or overflow
 	5: (reserved)
 	6: (reserved)

@ -634,8 +635,8 @@ bits.
 * %ix/getv/s <idx>, <functor-label>

 These instructions are like the %ix/vec4 instructions, except that they
-read directly from a functor label instead of from thread bits. They
-set bit 4 just like %ix/get.
+read directly from a functor label instead of from thread bits. They set
+flag 4 just like %ix/get (overflow is not currently checked by ix/getv/s).

 * %ix/load <idx>, <low>, <high>

--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@ -3311,8 +3311,9 @@ bool of_IX_GETV(vthread_t thr, vvp_code_t cp)

      vvp_vector4_t vec;
      sig->vec4_value(vec);
+      bool overflow_flag;
      uint64_t val;
-      bool known_flag = vector4_to_value(vec, val);
+      bool known_flag = vector4_to_value(vec, overflow_flag, val);

      if (known_flag)
 	    thr->words[index].w_uint = val;
@ -3320,7 +3321,7 @@ bool of_IX_GETV(vthread_t thr, vvp_code_t cp)
 	    thr->words[index].w_uint = 0;

 	/* Set bit 4 as a flag if the input is unknown. */
-      thr->flags[4] = known_flag ? BIT4_0 : BIT4_1;
+      thr->flags[4] = known_flag ? (overflow_flag ? BIT4_X : BIT4_0) : BIT4_1;

      return true;
 }
@ -3376,12 +3377,19 @@ static uint64_t vec4_to_index(vthread_t thr, bool signed_flag)
      thr->flags[4] = BIT4_0;

      assert(sizeof(bits[0]) <= sizeof(v));
-	//assert(val_size <= 8*sizeof(v));

      v = 0;
      for (unsigned idx = 0 ; idx < val_size ; idx += 8*sizeof(bits[0])) {
 	    uint64_t tmp = bits[idx/8/sizeof(bits[0])];
-	    v |= tmp << idx;
+	    if (idx < 8*sizeof(v)) {
+		  v |= tmp << idx;
+	    } else {
+		  bool overflow = signed_flag && (v >> 63) ? ~tmp != 0 : tmp != 0;
+		  if (overflow) {
+			thr->flags[4] = BIT4_X;
+			break;
+		  }
+	    }
      }

 	// Set the high bits that are not necessarily filled in by the
@ -5411,7 +5419,7 @@ bool of_SHIFTL(vthread_t thr, vvp_code_t cp)
 	      // The result is 'bx if the shift amount is undefined
 	    val = vvp_vector4_t(wid, BIT4_X);

-      } else if (shift >= wid) {
+      } else if (thr->flags[4] == BIT4_X || shift >= wid) {
 	      // Shift is so big that all value is shifted out. Write
 	      // a constant 0 result.
 	    val = vvp_vector4_t(wid, BIT4_0);
@ -5443,7 +5451,7 @@ bool of_SHIFTR(vthread_t thr, vvp_code_t cp)
      if (thr->flags[4] == BIT4_1) {
 	    val = vvp_vector4_t(wid, BIT4_X);

-      } else if (shift > wid) {
+      } else if (thr->flags[4] == BIT4_X || shift > wid) {
 	    val = vvp_vector4_t(wid, BIT4_0);

      } else if (shift > 0) {
@ -5473,7 +5481,7 @@ bool of_SHIFTR_S(vthread_t thr, vvp_code_t cp)
      if (thr->flags[4] == BIT4_1) {
 	    val = vvp_vector4_t(wid, BIT4_X);

-      } else if (shift > wid) {
+      } else if (thr->flags[4] == BIT4_X || shift > wid) {
 	    val = vvp_vector4_t(wid, sign_bit);

      } else if (shift > 0) {
--- a/vvp/vvp_net.cc
+++ b/vvp/vvp_net.cc
@ -2042,20 +2042,21 @@ template bool vector4_to_value(const vvp_vector4_t&vec, uint32_t&val,
 template bool vector4_to_value(const vvp_vector4_t&vec, uint64_t&val,
 			       bool is_signed, bool is_arithmetic);

-template <class T> bool vector4_to_value(const vvp_vector4_t&vec, T&val)
+template <class T> bool vector4_to_value(const vvp_vector4_t&vec,
+                                         bool&overflow_flag, T&val)
 {
      T res = 0;
      T msk = 1;

+      overflow_flag = false;
      unsigned size = vec.size();
      for (unsigned idx = 0 ;  idx < size ;  idx += 1) {
 	    switch (vec.value(idx)) {
 		case BIT4_0:
 		  break;
 		case BIT4_1:
-		    // On overflow, return the maximum value of type T
 		  if (msk == 0)
-			res = ~msk;
+			overflow_flag = true;
 		  else
 			res |= msk;
 		  break;
@ -2070,9 +2071,11 @@ template <class T> bool vector4_to_value(const vvp_vector4_t&vec, T&val)
      return true;
 }

-template bool vector4_to_value(const vvp_vector4_t&vec, unsigned long&val);
+template bool vector4_to_value(const vvp_vector4_t&vec, bool&overflow_flag,
+                               unsigned long&val);
 #ifndef UL_AND_TIME64_SAME
-template bool vector4_to_value(const vvp_vector4_t&vec, vvp_time64_t&val);
+template bool vector4_to_value(const vvp_vector4_t&vec, bool&overflow_flag,
+                               vvp_time64_t&val);
 #endif

 bool vector4_to_value(const vvp_vector4_t&vec, double&val, bool signed_flag)
--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@ -558,7 +558,14 @@ template <class T> extern bool vector4_to_value(const vvp_vector4_t&a, T&val,
 						bool is_signed,
 						bool is_arithmetic =true);

-template <class T> extern bool vector4_to_value(const vvp_vector4_t&a, T&val);
+template <class T> extern bool vector4_to_value(const vvp_vector4_t&a,
+                                                bool&overflow_flag, T&val);
+
+template <class T> inline bool vector4_to_value(const vvp_vector4_t&a, T&val)
+{
+      bool overflow_flag;
+      return vector4_to_value(a, overflow_flag, val);
+}

 extern bool vector4_to_value(const vvp_vector4_t&a, double&val, bool is_signed);