From ca517b5519ea046d398c2da59174c7f386d07610 Mon Sep 17 00:00:00 2001
From: Stephen Williams <steve@icarus.com>
Date: Tue, 6 May 2008 22:19:59 -0700
Subject: [PATCH] Handle corner cases of abs(), min() and max()

The abs() function needs to be able to turn -0.0 into 0.0. This proved
to be too clunky (and perhaps impossible) to do with tests and jumps,
so add an %abs/wr opcode to do it using fabs().

The min/max functions need to take special care with the handling
of NaN operands. These matter, so generate the extra code to handle
them.
---
 tgt-vvp/eval_real.c | 44 ++++++++++++++++++++++++--------------------
 vvp/codes.h         |  1 +
 vvp/compile.cc      |  1 +
 vvp/opcodes.txt     |  4 ++++
 vvp/vthread.cc      |  8 ++++++++
 5 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/tgt-vvp/eval_real.c b/tgt-vvp/eval_real.c
index cb3fda4f6..96405d6dc 100644
--- a/tgt-vvp/eval_real.c
+++ b/tgt-vvp/eval_real.c
@@ -115,18 +115,36 @@ static int draw_binary_real(ivl_expr_t exp)
 
 	  case 'm': { // min(l,r)
 		int lab_out = local_count++;
+		int lab_r = local_count++;
+		  /* If r is NaN, the go out and accept l as result. */
+		fprintf(vvp_out, "  %%cmp/wr %d, %d; Is NaN?\n", r, r);
+		fprintf(vvp_out, "  %%jmp/0xz T_%d.%d, 4;\n", thread_count, lab_out);
+		  /* If l is NaN, the go out and accept r as result. */
+		fprintf(vvp_out, "  %%cmp/wr %d, %d; Is NaN?\n", l, l);
+		fprintf(vvp_out, "  %%jmp/0xz T_%d.%d, 4;\n", thread_count, lab_r);
+		  /* If l <= r then go out. */
 		fprintf(vvp_out, "   %%cmp/wr %d, %d;\n", r, l);
 		fprintf(vvp_out, "   %%jmp/0xz T_%d.%d, 5;\n", thread_count, lab_out);
-		fprintf(vvp_out, "   %%mov/wr %d, %d;\n", l, r);
+		  /* At this point we know we want r as the result. */
+		fprintf(vvp_out, "T_%d.%d %%mov/wr %d, %d;\n", thread_count, lab_r, l, r);
 		fprintf(vvp_out, "T_%d.%d ;\n", thread_count, lab_out);
 		break;
 	  }
 
 	  case 'M': { // max(l,r)
 		int lab_out = local_count++;
-		fprintf(vvp_out, "   %%cmp/wr %d, %d;\n", l, r);
-		fprintf(vvp_out, "   %%jmp/0xz T_%d.%d, 5;\n", thread_count, lab_out);
-		fprintf(vvp_out, "   %%mov/wr %d, %d;\n", l, r);
+		int lab_r = local_count++;
+		  /* If r is NaN, the go out and accept l as result. */
+		fprintf(vvp_out, "  %%cmp/wr %d, %d; Is NaN?\n", r, r);
+		fprintf(vvp_out, "  %%jmp/0xz T_%d.%d, 4;\n", thread_count, lab_out);
+		  /* If l is NaN, the go out and accept r as result. */
+		fprintf(vvp_out, "  %%cmp/wr %d, %d; Is NaN?\n", l, l);
+		fprintf(vvp_out, "  %%jmp/0xz T_%d.%d, 4;\n", thread_count, lab_r);
+		  /* if l >= r then go out. */
+		fprintf(vvp_out, "  %%cmp/wr %d, %d;\n", l, r);
+		fprintf(vvp_out, "  %%jmp/0xz T_%d.%d, 5;\n", thread_count, lab_out);
+
+		fprintf(vvp_out, "T_%d.%d %%mov/wr %d, %d;\n", thread_count, lab_r, l, r);
 		fprintf(vvp_out, "T_%d.%d ;\n", thread_count, lab_out);
 		break;
 	  }
@@ -439,22 +457,8 @@ static int draw_unary_real(ivl_expr_t exp)
       }
 
       if (ivl_expr_opcode(exp) == 'm') { /* abs(sube) */
-	    unsigned lab_positive = local_count++;
-	    unsigned lab_out = local_count++;
-	    int res = allocate_word();
-	    fprintf(vvp_out, "   %%loadi/wr %d, 0, 0; load 0.0 -- %d = abs(%d)\n",
-		    res, res, sub);
-	    fprintf(vvp_out, "   %%cmp/wr %d, %d;\n", sub, res);
-	    fprintf(vvp_out, "   %%jmp/0xz T_%d.%d, 5;\n",
-		    thread_count, lab_positive);
-	    fprintf(vvp_out, "   %%sub/wr %d, %d;\n", res, sub);
-	    fprintf(vvp_out, "   %%jmp T_%d.%d;\n", thread_count, lab_out);
-	    fprintf(vvp_out, "T_%d.%d %%mov/wr %d, %d;\n",
-		    thread_count, lab_positive, res, sub);
-	    fprintf(vvp_out, "T_%d.%d ;\n", thread_count, lab_out);
-
-	    clr_word(sub);
-	    return res;
+	    fprintf(vvp_out, "    %%abs/wr %d, %d;\n", sub, sub);
+	    return sub;
       }
 
       fprintf(vvp_out, "; XXXX unary (%c)\n", ivl_expr_opcode(exp));
diff --git a/vvp/codes.h b/vvp/codes.h
index 5a74ed374..cb033f16f 100644
--- a/vvp/codes.h
+++ b/vvp/codes.h
@@ -32,6 +32,7 @@ typedef bool (*vvp_code_fun)(vthread_t thr, vvp_code_t code);
  * implementation lives in the vthread.cc file so that they have
  * access to the thread context.
  */
+extern bool of_ABS_WR(vthread_t thr, vvp_code_t code);
 extern bool of_ADD(vthread_t thr, vvp_code_t code);
 extern bool of_ADD_WR(vthread_t thr, vvp_code_t code);
 extern bool of_ADDI(vthread_t thr, vvp_code_t code);
diff --git a/vvp/compile.cc b/vvp/compile.cc
index a8f2f7069..5d6e819a8 100644
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@@ -82,6 +82,7 @@ struct opcode_table_s {
 };
 
 const static struct opcode_table_s opcode_table[] = {
+      { "%abs/wr", of_ABS_WR, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
       { "%add",    of_ADD,    3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%add/wr", of_ADD_WR, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
       { "%addi",   of_ADDI,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt
index 2c8f320c6..84941815c 100644
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@@ -19,6 +19,10 @@ the left operand, the right operand, and the base. The left operand is
 replaced with the result, which is the same width as the left and
 right operands.
 
+* %abs/wr <bit-o>, <bit-i>
+
+This instructure calculate the absolute value of a real value. It uses
+the fabs() function in the run-time to do the work.
 
 * %add <bit-l>, <bit-r>, <wid>
 
diff --git a/vvp/vthread.cc b/vvp/vthread.cc
index 66fb23dd6..4267a5c02 100644
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@@ -401,6 +401,14 @@ void vthread_schedule_list(vthread_t thr)
       schedule_vthread(thr, 0);
 }
 
+bool of_ABS_WR(vthread_t thr, vvp_code_t cp)
+{
+      unsigned dst = cp->bit_idx[0];
+      unsigned src = cp->bit_idx[1];
+
+      thr->words[dst].w_real = fabs(thr->words[src].w_real);
+      return true;
+}
 
 bool of_AND(vthread_t thr, vvp_code_t cp)
 {