From 345c9cf21cc7a7049e9babbed468df17c8fc2a19 Mon Sep 17 00:00:00 2001
From: Martin Whitaker <icarus@martin-whitaker.me.uk>
Date: Wed, 19 Feb 2014 20:11:57 +0000
Subject: [PATCH] Implement correct behaviour for signed vector power
 operations in vvp.

Signed vector power operations were being implemented using the double
pow() function. This gave inaccurate results when the operands or
result were not exactly representable by a 64-bit floating point number.
---
 vvp/arith.cc   | 54 +++++++++++++++++++------------------
 vvp/vthread.cc | 73 ++++++++++++++++++++++++++------------------------
 2 files changed, 66 insertions(+), 61 deletions(-)

diff --git a/vvp/arith.cc b/vvp/arith.cc
index c7ee15e85..7b23e3bd6 100644
--- a/vvp/arith.cc
+++ b/vvp/arith.cc
@@ -453,35 +453,37 @@ void vvp_arith_pow::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit,
 {
       dispatch_operand_(ptr, bit);
 
-      vvp_vector4_t res4;
-      if (signed_flag_) {
-	    if (op_a_.has_xz() || op_b_.has_xz()) {
-		  ptr.ptr()->send_vec4(x_val_, 0);
-		  return;
-	    }
+      vvp_vector2_t a2 (op_a_, true);
+      vvp_vector2_t b2 (op_b_, true);
 
-	    double ad, bd, resd;
-	    vector4_to_value(op_a_, ad, true);
-	    vector4_to_value(op_b_, bd, true);
-	      /* 2**-1 and -2**-1 are defined to be zero. */
-	    if ((bd == -1) && (fabs(ad) == 2.0)) resd = 0.0;
-	    else resd = pow(ad, bd);
-
-	    res4 = vvp_vector4_t(wid_, resd);
-      } else {
-	    vvp_vector2_t a2 (op_a_, true);
-	    vvp_vector2_t b2 (op_b_, true);
-
-	    if (a2.is_NaN() || b2.is_NaN()) {
-		  ptr.ptr()->send_vec4(x_val_, 0);
-		  return;
-	    }
-
-	    vvp_vector2_t result = pow(a2, b2);
-	    res4 = vector2_to_vector4(result, wid_);
+        // If we have an X or Z in the arguments return X.
+      if (a2.is_NaN() || b2.is_NaN()) {
+	    ptr.ptr()->send_vec4(x_val_, 0);
+	    return;
       }
 
-      ptr.ptr()->send_vec4(res4, 0);
+	// Is the exponent negative? If so, table 5-6 in IEEE1364-2005
+	// defines what value is returned.
+      if (signed_flag_ && b2.value(b2.size()-1)) {
+	    int a_val;
+	    double r_val = 0.0;
+	    if (vector2_to_value(a2, a_val, true)) {
+		  if (a_val == 0) {
+			ptr.ptr()->send_vec4(x_val_, 0);
+			return;
+		  }
+		  if (a_val == 1) {
+			r_val = 1.0;
+		  }
+		  if (a_val == -1) {
+			r_val = b2.value(0) ? -1.0 : 1.0;
+		  }
+	    }
+	    ptr.ptr()->send_vec4(vvp_vector4_t(wid_, r_val), 0);
+	    return;
+      }
+
+      ptr.ptr()->send_vec4(vector2_to_vector4(pow(a2, b2), wid_), 0);
 }
 
 
diff --git a/vvp/vthread.cc b/vvp/vthread.cc
index 73d9b41f5..5b5171b9b 100644
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@@ -4540,24 +4540,50 @@ bool of_POP_STR(vthread_t thr, vvp_code_t cp)
       return true;
 }
 
-bool of_POW(vthread_t thr, vvp_code_t cp)
+static bool of_POW_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
 {
       assert(cp->bit_idx[0] >= 4);
 
       unsigned idx = cp->bit_idx[0];
       unsigned idy = cp->bit_idx[1];
       unsigned wid = cp->number;
-      vvp_vector2_t xv2 = vvp_vector2_t(vthread_bits_to_vector(thr, idx, wid), true);
-      vvp_vector2_t yv2 = vvp_vector2_t(vthread_bits_to_vector(thr, idy, wid), true);
+      vvp_vector2_t a2 = vvp_vector2_t(vthread_bits_to_vector(thr, idx, wid), true);
+      vvp_vector2_t b2 = vvp_vector2_t(vthread_bits_to_vector(thr, idy, wid), true);
 
-        /* If we have an X or Z in the arguments return X. */
-      if (xv2.is_NaN() || yv2.is_NaN()) {
+        // If we have an X or Z in the arguments return X.
+      if (a2.is_NaN() || b2.is_NaN()) {
 	    for (unsigned jdx = 0 ;  jdx < wid ;  jdx += 1)
 		  thr_put_bit(thr, cp->bit_idx[0]+jdx, BIT4_X);
 	    return true;
       }
 
-      vvp_vector2_t result = pow(xv2, yv2);
+	// Is the exponent negative? If so, table 5-6 in IEEE1364-2005
+	// defines what value is returned.
+      if (signed_flag && b2.value(b2.size()-1)) {
+	    int a_val;
+	    vvp_bit4_t pad = BIT4_0, lsb = BIT4_0;
+	    if (vector2_to_value(a2, a_val, true)) {
+		  if (a_val == 0) {
+			pad = BIT4_X; lsb = BIT4_X;
+		  }
+		  if (a_val == 1) {
+			pad = BIT4_0; lsb = BIT4_1;
+		  }
+		  if (a_val == -1) {
+			if (b2.value(0)) {
+			      pad = BIT4_1; lsb = BIT4_1;
+			} else {
+			      pad = BIT4_0; lsb = BIT4_1;
+			}
+		  }
+	    }
+	    thr_put_bit(thr, cp->bit_idx[0], lsb);
+	    for (unsigned jdx = 1 ;  jdx < wid ;  jdx += 1)
+		  thr_put_bit(thr, cp->bit_idx[0]+jdx, pad);
+	    return true;
+      }
+
+      vvp_vector2_t result = pow(a2, b2);
 
         /* Copy only what we need of the result. */
       for (unsigned jdx = 0;  jdx < wid;  jdx += 1)
@@ -4567,37 +4593,14 @@ bool of_POW(vthread_t thr, vvp_code_t cp)
       return true;
 }
 
+bool of_POW(vthread_t thr, vvp_code_t cp)
+{
+      return of_POW_base(thr, cp, false);
+}
+
 bool of_POW_S(vthread_t thr, vvp_code_t cp)
 {
-      assert(cp->bit_idx[0] >= 4);
-
-      unsigned idx = cp->bit_idx[0];
-      unsigned idy = cp->bit_idx[1];
-      unsigned wid = cp->number;
-      vvp_vector4_t xv = vthread_bits_to_vector(thr, idx, wid);
-      vvp_vector4_t yv = vthread_bits_to_vector(thr, idy, wid);
-
-        /* If we have an X or Z in the arguments return X. */
-      if (xv.has_xz() || yv.has_xz()) {
-	    for (unsigned jdx = 0 ;  jdx < wid ;  jdx += 1)
-		  thr_put_bit(thr, cp->bit_idx[0]+jdx, BIT4_X);
-	    return true;
-      }
-
-        /* Calculate the result using the double pow() function. */
-      double xd, yd, resd;
-      vector4_to_value(xv, xd, true);
-      vector4_to_value(yv, yd, true);
-	/* 2**-1 and -2**-1 are defined to be zero. */
-      if ((yd == -1.0) && (fabs(xd) == 2.0)) resd = 0.0;
-      else resd = pow(xd, yd);
-      vvp_vector4_t res = vvp_vector4_t(wid, resd);
-
-        /* Copy the result. */
-      for (unsigned jdx = 0;  jdx < wid;  jdx += 1)
-	    thr_put_bit(thr, cp->bit_idx[0]+jdx, res.value(jdx));
-
-      return true;
+      return of_POW_base(thr, cp, true);
 }
 
 bool of_POW_WR(vthread_t thr, vvp_code_t)