From 13cad6f268f92cd4c86c1ab06edb843fad2aa280 Mon Sep 17 00:00:00 2001
From: Martin Whitaker <icarus@martin-whitaker.me.uk>
Date: Sat, 19 Dec 2009 18:00:08 +0000
Subject: [PATCH] Make vvp thread word storage consistently 64 bits.

The vvp thread word storage had previously been changed to always store
64-bit values, but some instructions still only operate on native long
values. This patch ensures all instructions that modify thread words
support 64-bit values.
---
 vvp/README.txt  |   5 ++-
 vvp/codes.h     |   2 +-
 vvp/compile.cc  |   2 +-
 vvp/config.h.in |  33 ++++++++++++--
 vvp/main.cc     |  16 -------
 vvp/opcodes.txt |   4 +-
 vvp/vthread.cc  | 112 +++++++++++++++++++++++-------------------------
 vvp/vvp_net.cc  |  35 +++++++++++++++
 vvp/vvp_net.h   |   1 +
 9 files changed, 125 insertions(+), 85 deletions(-)

diff --git a/vvp/README.txt b/vvp/README.txt
index 617eec689..7deb3b04c 100644
--- a/vvp/README.txt
+++ b/vvp/README.txt
@@ -761,8 +761,9 @@ make vectors of 0, 1, x or z values, so these can be used to
 manufacture complex values elsewhere.
 
 The word memory is a region of tagged words. The value in each word
-may be native long or real. These words have a distinct address space
-from the bits.
+may be either a 64-bit unsigned integer (uint64_t), a 64-bit signed
+integer (int64_t), or a 64-bit floating point number (double). These
+words have a distinct address space from the bits.
 
 * Threads and scopes
 
diff --git a/vvp/codes.h b/vvp/codes.h
index 0c5110b46..3e721d51a 100644
--- a/vvp/codes.h
+++ b/vvp/codes.h
@@ -99,7 +99,7 @@ extern bool of_INV(vthread_t thr, vvp_code_t code);
 extern bool of_IX_ADD(vthread_t thr, vvp_code_t code);
 extern bool of_IX_GET(vthread_t thr, vvp_code_t code);
 extern bool of_IX_GETV(vthread_t thr, vvp_code_t code);
-extern bool of_IX_GETVS(vthread_t thr, vvp_code_t code);
+extern bool of_IX_GETV_S(vthread_t thr, vvp_code_t code);
 extern bool of_IX_GET_S(vthread_t thr, vvp_code_t code);
 extern bool of_IX_LOAD(vthread_t thr, vvp_code_t code);
 extern bool of_IX_MUL(vthread_t thr, vvp_code_t code);
diff --git a/vvp/compile.cc b/vvp/compile.cc
index 0557d533a..71515761f 100644
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@@ -144,7 +144,7 @@ const static struct opcode_table_s opcode_table[] = {
       { "%ix/get", of_IX_GET, 3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%ix/get/s",of_IX_GET_S,3,{OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%ix/getv",of_IX_GETV,2,  {OA_BIT1,     OA_FUNC_PTR, OA_NONE} },
-      { "%ix/getv/s",of_IX_GETVS,2, {OA_BIT1,   OA_FUNC_PTR, OA_NONE} },
+      { "%ix/getv/s",of_IX_GETV_S,2, {OA_BIT1,   OA_FUNC_PTR, OA_NONE} },
       { "%ix/load",of_IX_LOAD,3,  {OA_NUMBER,   OA_BIT1,     OA_BIT2} },
       { "%ix/mul", of_IX_MUL, 3,  {OA_NUMBER,   OA_BIT1,     OA_BIT2} },
       { "%ix/sub", of_IX_SUB, 3,  {OA_NUMBER,   OA_BIT1,     OA_BIT2} },
diff --git a/vvp/config.h.in b/vvp/config.h.in
index 50b4c5450..2a6885b41 100644
--- a/vvp/config.h.in
+++ b/vvp/config.h.in
@@ -135,13 +135,38 @@ typedef unsigned long vvp_time64_t;
 # undef LINUX
 
 #if !defined(HAVE_LROUND)
-#if defined(__cplusplus)
-extern "C" long lround(double x);
+/*
+ * If the system doesn't provide the lround function, then we provide
+ * it ourselves here. It is simply the nearest integer, rounded away
+ * from zero.
+ */
+inline long int lround(double x)
+{
+      if (x >= 0.0)
+	    return (long)floor(x+0.5);
+      else
+	    return (long)ceil(x-0.5);
+}
+/*
+ * We also need an equivalent function with a 64-bit return value.
+ */
+inline int64_t i64round(double x)
+{
+      if (x >= 0.0)
+	    return (int64_t)floor(x+0.5);
+      else
+	    return (int64_t)ceil(x-0.5);
+}
+#else /* HAVE_LROUND */
+
+#if ((SIZEOF_UNSIGNED_LONG < 8) && (SIZEOF_UNSIGNED_LONG_LONG >= 8))
+# define i64round llround
 #else
-extern long lround(double x);
-#endif
+# define i64round lround
 #endif
 
+#endif /* HAVE_LROUND */
+
 #if !defined(HAVE_NAN)
 # define nan(x) (NAN)
 #endif
diff --git a/vvp/main.cc b/vvp/main.cc
index 7f6fbc700..bb447731f 100644
--- a/vvp/main.cc
+++ b/vvp/main.cc
@@ -52,22 +52,6 @@ extern "C" int optind;
 extern "C" const char*optarg;
 #endif
 
-#if !defined(HAVE_LROUND)
-/*
- * If the system doesn't provide the lround function, then we provide
- * it ourselves here. It is simply the nearest integer, rounded away
- * from zero.
- */
-# include  <math.h>
-extern "C" long int lround(double x)
-{
-      if (x >= 0.0)
-	    return (long)floor(x+0.5);
-      else
-	    return (long)ceil(x-0.5);
-}
-#endif
-
 bool verbose_flag = false;
 bool version_flag = false;
 static int vvp_return_value = 0;
diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt
index 54e49c152..a89c0d91a 100644
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@@ -440,8 +440,8 @@ set bit 4 just like %ix/get.
 * %ix/load <idx>, <low>, <high>
 
 This instruction loads an immediate value into the addressed index
-register. The index register holds signed 64 bit numeric values, so
-<low> and <high> are used to separate the value in two 32 bit chunks.
+register. The index register holds 64 bit numeric values, so <low>
+and <high> are used to separate the value in two 32 bit chunks.
 The idx value selects the index register. This is different from
 %ix/get, which loads the index register from a value in the thread bit
 vector. The values are unsigned decimal values and are combined as
diff --git a/vvp/vthread.cc b/vvp/vthread.cc
index 41904e1b3..55362c6cb 100644
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@@ -833,7 +833,7 @@ bool of_ASSIGN_AR(vthread_t thr, vvp_code_t cp)
 bool of_ASSIGN_ARD(vthread_t thr, vvp_code_t cp)
 {
       long adr = thr->words[3].w_int;
-      vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_int;
+      vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
       double value = thr->words[cp->bit_idx[1]].w_real;
 
       if (adr >= 0) {
@@ -916,7 +916,7 @@ bool of_ASSIGN_AVD(vthread_t thr, vvp_code_t cp)
       unsigned wid = thr->words[0].w_int;
       long off = thr->words[1].w_int;
       long adr = thr->words[3].w_int;
-      vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_int;
+      vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
       unsigned bit = cp->bit_idx[1];
 
       if (adr < 0) return true;
@@ -1014,7 +1014,7 @@ bool of_ASSIGN_V0D(vthread_t thr, vvp_code_t cp)
       unsigned wid = thr->words[0].w_int;
       assert(wid > 0);
 
-      vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_int;
+      vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
       unsigned bit = cp->bit_idx[1];
 
       vvp_net_ptr_t ptr (cp->net, 0);
@@ -1101,7 +1101,7 @@ bool of_ASSIGN_V0X1D(vthread_t thr, vvp_code_t cp)
 {
       unsigned wid = thr->words[0].w_int;
       long off = thr->words[1].w_int;
-      vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_int;
+      vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
       unsigned bit = cp->bit_idx[1];
 
       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
@@ -1207,7 +1207,7 @@ bool of_ASSIGN_WR(vthread_t thr, vvp_code_t cp)
 
 bool of_ASSIGN_WRD(vthread_t thr, vvp_code_t cp)
 {
-      vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_int;
+      vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
       unsigned index = cp->bit_idx[1];
       s_vpi_time del;
 
@@ -1777,14 +1777,14 @@ bool of_CMPZ(vthread_t thr, vvp_code_t cp)
 bool of_CVT_IR(vthread_t thr, vvp_code_t cp)
 {
       double r = thr->words[cp->bit_idx[1]].w_real;
-      thr->words[cp->bit_idx[0]].w_int = lround(r);
+      thr->words[cp->bit_idx[0]].w_int = i64round(r);
 
       return true;
 }
 
 bool of_CVT_RI(vthread_t thr, vvp_code_t cp)
 {
-      long r = thr->words[cp->bit_idx[1]].w_int;
+      int64_t r = thr->words[cp->bit_idx[1]].w_int;
       thr->words[cp->bit_idx[0]].w_real = (double)(r);
 
       return true;
@@ -1888,10 +1888,10 @@ bool of_DELAY(vthread_t thr, vvp_code_t cp)
 
 bool of_DELAYX(vthread_t thr, vvp_code_t cp)
 {
-      unsigned long delay;
+      vvp_time64_t delay;
 
       assert(cp->number < 4);
-      delay = thr->words[cp->number].w_int;
+      delay = thr->words[cp->number].w_uint;
       schedule_vthread(thr, delay);
       return false;
 }
@@ -2624,39 +2624,15 @@ bool of_IX_LOAD(vthread_t thr, vvp_code_t cp)
  * bits of the vector are x or z, then set the value to 0,
  * set bit[4] to 1, and give up.
  */
-bool of_IX_GET(vthread_t thr, vvp_code_t cp)
+
+static uint64_t vector_to_index(vthread_t thr, unsigned base,
+                                unsigned width, bool signed_flag)
 {
-      unsigned index = cp->bit_idx[0];
-      unsigned base  = cp->bit_idx[1];
-      unsigned width = cp->number;
-
-      unsigned long*array = vector_to_array(thr, base, width);
-      if (array == 0) {
-	      /* If there are unknowns in the vector bits, then give
-		 up immediately. Set the value to 0, and set thread
-		 bit 4 to 1 to flag the error. */
-	    thr->words[index].w_int = 0;
-	    thr_put_bit(thr, 4, BIT4_1);
-	    return true;
-      }
-
-      thr->words[index].w_int = array[0];
-      thr_put_bit(thr, 4, BIT4_0);
-      delete[]array;
-      return true;
-}
-
-bool of_IX_GET_S(vthread_t thr, vvp_code_t cp)
-{
-      unsigned index = cp->bit_idx[0];
-      unsigned base  = cp->bit_idx[1];
-      unsigned width = cp->number;
-
       uint64_t v = 0;
       bool unknown_flag = false;
 
       vvp_bit4_t vv = BIT4_0;
-      for (unsigned i = 0 ;  i<width ;  i += 1) {
+      for (unsigned i = 0 ;  i < width ;  i += 1) {
 	    vv = thr_get_bit(thr, base);
 	    if (bit4_is_xz(vv)) {
 		  v = 0UL;
@@ -2670,19 +2646,37 @@ bool of_IX_GET_S(vthread_t thr, vvp_code_t cp)
 		  base += 1;
       }
 
-	/* Sign-extend to fill the integer value. */
-      if (!unknown_flag) {
+	/* Extend to fill the integer value. */
+      if (signed_flag && !unknown_flag) {
 	    uint64_t pad = vv;
 	    for (unsigned i = width ; i < 8*sizeof(v) ;  i += 1) {
 		  v |= pad << i;
 	    }
       }
 
-      thr->words[index].w_int = v;
-
 	/* Set bit 4 as a flag if the input is unknown. */
-      thr_put_bit(thr, 4, unknown_flag? BIT4_1 : BIT4_0);
+      thr_put_bit(thr, 4, unknown_flag ? BIT4_1 : BIT4_0);
 
+      return v;
+}
+
+bool of_IX_GET(vthread_t thr, vvp_code_t cp)
+{
+      unsigned index = cp->bit_idx[0];
+      unsigned base  = cp->bit_idx[1];
+      unsigned width = cp->number;
+
+      thr->words[index].w_uint = vector_to_index(thr, base, width, false);
+      return true;
+}
+
+bool of_IX_GET_S(vthread_t thr, vvp_code_t cp)
+{
+      unsigned index = cp->bit_idx[0];
+      unsigned base  = cp->bit_idx[1];
+      unsigned width = cp->number;
+
+      thr->words[index].w_int = vector_to_index(thr, base, width, true);
       return true;
 }
 
@@ -2701,27 +2695,28 @@ bool of_IX_GETV(vthread_t thr, vvp_code_t cp)
 
       vvp_vector4_t vec;
       sig->vec4_value(vec);
-      unsigned long val;
+      uint64_t val;
       bool known_flag = vector4_to_value(vec, val);
 
       if (known_flag)
-	    thr->words[index].w_int = val;
+	    thr->words[index].w_uint = val;
       else
-	    thr->words[index].w_int = 0;
+	    thr->words[index].w_uint = 0;
 
 	/* Set bit 4 as a flag if the input is unknown. */
-      thr_put_bit(thr, 4, known_flag? BIT4_0 : BIT4_1);
+      thr_put_bit(thr, 4, known_flag ? BIT4_0 : BIT4_1);
 
       return true;
 }
 
-bool of_IX_GETVS(vthread_t thr, vvp_code_t cp)
+bool of_IX_GETV_S(vthread_t thr, vvp_code_t cp)
 {
       unsigned index = cp->bit_idx[0];
       vvp_net_t*net = cp->net;
 
       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*>(net->fil);
       if (sig == 0) {
+	    assert(net->fil);
 	    cerr << "%%ix/getv/s error: Net arg not a vector signal? "
 		 << "fun=" << typeid(*net->fil).name()
 		 << ", fil=" << (net->fil? typeid(*net->fil).name() : "<>")
@@ -2731,7 +2726,7 @@ bool of_IX_GETVS(vthread_t thr, vvp_code_t cp)
 
       vvp_vector4_t vec;
       sig->vec4_value(vec);
-      long val;
+      int64_t val;
       bool known_flag = vector4_to_value(vec, val, true, true);
 
       if (known_flag)
@@ -2740,7 +2735,7 @@ bool of_IX_GETVS(vthread_t thr, vvp_code_t cp)
 	    thr->words[index].w_int = 0;
 
 	/* Set bit 4 as a flag if the input is unknown. */
-      thr_put_bit(thr, 4, known_flag? BIT4_0 : BIT4_1);
+      thr_put_bit(thr, 4, known_flag ? BIT4_0 : BIT4_1);
 
       return true;
 }
@@ -2922,6 +2917,11 @@ bool of_LOAD_AV(vthread_t thr, vvp_code_t cp)
 /*
  * %load/vp0, %load/vp0/s, %load/avp0 and %load/avp0/s share this function.
 */
+#if (SIZEOF_UNSIGNED_LONG >= 8)
+# define CPU_WORD_STRIDE CPU_WORD_BITS - 1  // avoid a warning
+#else
+# define CPU_WORD_STRIDE CPU_WORD_BITS
+#endif
 static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&sig_value)
 {
       unsigned bit = cp->bit_idx[0];
@@ -2941,16 +2941,10 @@ static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&si
       unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
       unsigned long carry = 0;
       unsigned long imm = addend;
-      if (addend >= 0) {
-	    for (unsigned idx = 0 ; idx < words ; idx += 1) {
-		  val[idx] = add_with_carry(val[idx], imm, carry);
-		  imm = 0UL;
-	    }
-      } else {
-	    for (unsigned idx = 0 ; idx < words ; idx += 1) {
-		  val[idx] = add_with_carry(val[idx], imm, carry);
-		  imm = -1UL;
-	    }
+      for (unsigned idx = 0 ; idx < words ; idx += 1) {
+            val[idx] = add_with_carry(val[idx], imm, carry);
+            addend >>= CPU_WORD_STRIDE;
+            imm = addend;
       }
 
 	/* Copy the vector bits into the bits4 vector. Do the copy
diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc
index 1284ce108..b9d1a1198 100644
--- a/vvp/vvp_net.cc
+++ b/vvp/vvp_net.cc
@@ -1510,6 +1510,41 @@ bool vector4_to_value(const vvp_vector4_t&vec, unsigned long&val)
 }
 
 #ifndef UL_AND_TIME64_SAME
+bool vector4_to_value(const vvp_vector4_t&vec, int64_t&val,
+		      bool is_signed, bool is_arithmetic)
+{
+      long res = 0;
+      long msk = 1;
+      bool rc_flag = true;
+
+      unsigned size = vec.size();
+      if (size > 8*sizeof(val)) size = 8*sizeof(val);
+      for (unsigned idx = 0 ;  idx < size ;  idx += 1) {
+	    switch (vec.value(idx)) {
+		case BIT4_0:
+		  break;
+		case BIT4_1:
+		  res |= msk;
+		  break;
+		default:
+		  if (is_arithmetic)
+			return false;
+		  else
+			rc_flag = false;
+	    }
+
+	    msk <<= 1L;
+      }
+
+      if (is_signed && vec.value(vec.size()-1) == BIT4_1) {
+	    if (vec.size() < 8*sizeof(val))
+		  res |= (-1L) << vec.size();
+      }
+
+      val = res;
+      return rc_flag;
+}
+
 bool vector4_to_value(const vvp_vector4_t&vec, vvp_time64_t&val)
 {
       vvp_time64_t res = 0;
diff --git a/vvp/vvp_net.h b/vvp/vvp_net.h
index b5384393a..b264a0a26 100644
--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@@ -475,6 +475,7 @@ template <class T> extern T coerce_to_width(const T&that, unsigned width);
 extern bool vector4_to_value(const vvp_vector4_t&a, long&val, bool is_signed, bool is_arithmetic =true);
 extern bool vector4_to_value(const vvp_vector4_t&a, unsigned long&val);
 #ifndef UL_AND_TIME64_SAME
+extern bool vector4_to_value(const vvp_vector4_t&a, int64_t&val, bool is_signed, bool is_arithmetic =true);
 extern bool vector4_to_value(const vvp_vector4_t&a, vvp_time64_t&val);
 #endif
 extern bool vector4_to_value(const vvp_vector4_t&a, double&val, bool is_signed);