From 5d750b7779555aa4e643e0d9c0a498d075f0fff9 Mon Sep 17 00:00:00 2001
From: Stephen Williams <steve@icarus.com>
Date: Wed, 3 Oct 2007 20:58:40 -0700
Subject: [PATCH] Optomize runtime using immediate compare

Implement compare-immediate instructions and generate code to use
these new instructions to improve runtime performance.

Signed-off-by: Stephen Williams <steve@icarus.com>
---
 tgt-vvp/eval_expr.c | 89 +++++++++++++++++++++++++++++++++------------
 vvp/codes.h         |  2 +
 vvp/compile.cc      |  2 +
 vvp/opcodes.txt     |  6 +++
 vvp/vthread.cc      | 63 ++++++++++++++++++++++++++++++++
 5 files changed, 139 insertions(+), 23 deletions(-)

diff --git a/tgt-vvp/eval_expr.c b/tgt-vvp/eval_expr.c
index 46447f1ac..4627ecf77 100644
--- a/tgt-vvp/eval_expr.c
+++ b/tgt-vvp/eval_expr.c
@@ -36,6 +36,9 @@ int number_is_unknown(ivl_expr_t ex)
       const char*bits;
       unsigned idx;
 
+      if (ivl_expr_type(ex) == IVL_EX_ULONG)
+	    return 0;
+
       assert(ivl_expr_type(ex) == IVL_EX_NUMBER);
 
       bits = ivl_expr_bits(ex);
@@ -66,6 +69,10 @@ int number_is_immediate(ivl_expr_t ex, unsigned lim_wid)
 	    if (bits[idx] != '0')
 		  return 0;
 
+	/* Negative numbers are not "immediate". */
+      if (ivl_expr_signed(ex) && bits[ivl_expr_width(ex)-1]=='1')
+	    return 0;
+
       return 1;
 }
 
@@ -290,13 +297,7 @@ static struct vector_info draw_binary_expr_eq(ivl_expr_t exp,
 	    return draw_binary_expr_eq_real(exp);
       }
 
-      if ((ivl_expr_type(re) == IVL_EX_ULONG)
-	  && (0 == (ivl_expr_uvalue(re) & ~0xffff)))
-	    return draw_eq_immediate(exp, ewid, le, re, stuff_ok_flag);
-
-      if ((ivl_expr_type(re) == IVL_EX_NUMBER)
-	  && (! number_is_unknown(re))
-	  && number_is_immediate(re, 16))
+      if (number_is_immediate(re,16) && !number_is_unknown(re))
 	    return draw_eq_immediate(exp, ewid, le, re, stuff_ok_flag);
 
       assert(ivl_expr_value(le) == IVL_VT_LOGIC
@@ -683,42 +684,78 @@ static struct vector_info draw_binary_expr_le(ivl_expr_t exp,
       assert(ivl_expr_value(re) == IVL_VT_LOGIC
 	     || ivl_expr_value(re) == IVL_VT_BOOL);
 
-      lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ);
-      rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ);
+      lv.wid = 0;
+      rv.wid = 0;
 
       switch (ivl_expr_opcode(exp)) {
 	  case 'G':
-	    assert(lv.wid == rv.wid);
-	    fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
-		    rv.base, lv.base, lv.wid);
+	    rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ);
+	    if (number_is_immediate(le,16) && !number_is_unknown(le)) {
+		  unsigned imm = get_number_immediate(le);
+		  assert(imm >= 0);
+		  fprintf(vvp_out, "   %%cmpi/%c %u, %u, %u;\n", s_flag,
+			  rv.base, imm, rv.wid);
+	    } else {
+		  lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ);
+		  assert(lv.wid == rv.wid);
+		  fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
+			  rv.base, lv.base, lv.wid);
+	    }
 	    fprintf(vvp_out, "    %%or 5, 4, 1;\n");
 	    break;
 
 	  case 'L':
-	    assert(lv.wid == rv.wid);
-	    fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
-		    lv.base, rv.base, lv.wid);
+	    lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ);
+	    if (number_is_immediate(re,16) && !number_is_unknown(re)) {
+		  unsigned imm = get_number_immediate(re);
+		  assert(imm >= 0);
+		  fprintf(vvp_out, "   %%cmpi/%c %u, %u, %u;\n", s_flag,
+			  lv.base, imm, lv.wid);
+	    } else {
+		  rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ);
+		  assert(lv.wid == rv.wid);
+		  fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
+			  lv.base, rv.base, lv.wid);
+	    }
 	    fprintf(vvp_out, "    %%or 5, 4, 1;\n");
 	    break;
 
 	  case '<':
-	    assert(lv.wid == rv.wid);
-	    fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
-		    lv.base, rv.base, lv.wid);
+	    lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ);
+	    if (number_is_immediate(re,16) && !number_is_unknown(re)) {
+		  unsigned imm = get_number_immediate(re);
+		  assert(imm >= 0);
+		  fprintf(vvp_out, "   %%cmpi/%c %u, %u, %u;\n", s_flag,
+			  lv.base, imm, lv.wid);
+	    } else {
+		  rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ);
+		  assert(lv.wid == rv.wid);
+		  fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
+			  lv.base, rv.base, lv.wid);
+	    }
 	    break;
 
 	  case '>':
-	    assert(lv.wid == rv.wid);
-	    fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
-		    rv.base, lv.base, lv.wid);
+	    rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ);
+	    if (number_is_immediate(le,16) && !number_is_unknown(le)) {
+		  unsigned imm = get_number_immediate(le);
+		  assert(imm >= 0);
+		  fprintf(vvp_out, "   %%cmpi/%c %u, %u, %u;\n", s_flag,
+			  rv.base, imm, rv.wid);
+	    } else {
+		  lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ);
+		  assert(lv.wid == rv.wid);
+		  fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
+			  rv.base, lv.base, lv.wid);
+	    }
 	    break;
 
 	  default:
 	    assert(0);
       }
 
-      clr_vector(lv);
-      clr_vector(rv);
+      if (lv.wid > 0) clr_vector(lv);
+      if (rv.wid > 0) clr_vector(rv);
 
       if ((stuff_ok_flag&STUFF_OK_47) && (wid == 1)) {
 	    lv.base = 5;
@@ -1365,6 +1402,12 @@ static struct vector_info draw_number_expr(ivl_expr_t exp, unsigned wid)
 	   load the constant bit values. */
       res.base = allocate_vector(wid);
 
+      if ((!number_is_unknown(exp)) && number_is_immediate(exp, 16)) {
+	    int val = get_number_immediate(exp);
+	    fprintf(vvp_out, "   %%movi %u, %d, %u;\n", res.base, val, wid);
+	    return res;
+      }
+
       idx = 0;
       while (idx < nwid) {
 	    unsigned cnt;
diff --git a/vvp/codes.h b/vvp/codes.h
index 7243202dc..c70093494 100644
--- a/vvp/codes.h
+++ b/vvp/codes.h
@@ -54,6 +54,7 @@ extern bool of_BLEND(vthread_t thr, vvp_code_t code);
 extern bool of_BREAKPOINT(vthread_t thr, vvp_code_t code);
 extern bool of_CASSIGN_LINK(vthread_t thr, vvp_code_t code);
 extern bool of_CASSIGN_V(vthread_t thr, vvp_code_t code);
+extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
 extern bool of_CMPIU(vthread_t thr, vvp_code_t code);
 extern bool of_CMPS(vthread_t thr, vvp_code_t code);
 extern bool of_CMPU(vthread_t thr, vvp_code_t code);
@@ -103,6 +104,7 @@ extern bool of_MOD_S(vthread_t thr, vvp_code_t code);
 extern bool of_MOD_WR(vthread_t thr, vvp_code_t code);
 extern bool of_MOV(vthread_t thr, vvp_code_t code);
 extern bool of_MOV_WR(vthread_t thr, vvp_code_t code);
+extern bool of_MOVI(vthread_t thr, vvp_code_t code);
 extern bool of_MUL(vthread_t thr, vvp_code_t code);
 extern bool of_MUL_WR(vthread_t thr, vvp_code_t code);
 extern bool of_MULI(vthread_t thr, vvp_code_t code);
diff --git a/vvp/compile.cc b/vvp/compile.cc
index 9a4dcb9fa..505c41d37 100644
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@@ -124,6 +124,7 @@ const static struct opcode_table_s opcode_table[] = {
       { "%cmp/wu", of_CMPWU,  2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
       { "%cmp/x",  of_CMPX,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%cmp/z",  of_CMPZ,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
+      { "%cmpi/s", of_CMPIS,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%cmpi/u", of_CMPIU,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%cvt/ir", of_CVT_IR, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
       { "%cvt/ri", of_CVT_RI, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
@@ -164,6 +165,7 @@ const static struct opcode_table_s opcode_table[] = {
       { "%mod/wr", of_MOD_WR, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
       { "%mov",    of_MOV,    3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%mov/wr", of_MOV_WR, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
+      { "%movi",   of_MOVI,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%mul",    of_MUL,    3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
       { "%mul/wr", of_MUL_WR, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
       { "%muli",   of_MULI,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt
index bb2d5d91d..74a64f21b 100644
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@@ -189,6 +189,12 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The
 compare. In either case, if either operand contains x or z, then lt
 bit gets the x value.
 
+* %cmpi/s <bit-l>, <immr>, <wid>
+* %cmpi/u <bit-l>, <immr>, <wid>
+
+These instructions are similar to the %cmp instructions above, except
+that the right hand operand is an immediate value. This is a positive
+number that the vector is compared with.
 
 * %cmp/wr <bit-l>, <bit-r>
 
diff --git a/vvp/vthread.cc b/vvp/vthread.cc
index a7fddbc43..1e8707684 100644
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@@ -891,6 +891,55 @@ bool of_CMPS(vthread_t thr, vvp_code_t cp)
       return true;
 }
 
+bool of_CMPIS(vthread_t thr, vvp_code_t cp)
+{
+      vvp_bit4_t eq  = BIT4_1;
+      vvp_bit4_t eeq = BIT4_1;
+      vvp_bit4_t lt  = BIT4_0;
+
+      unsigned idx1 = cp->bit_idx[0];
+      unsigned imm  = cp->bit_idx[1];
+
+      const unsigned end1 = (idx1 < 4)? idx1 : idx1 + cp->number - 1;
+      thr_check_addr(thr, end1);
+      const vvp_bit4_t sig1 = thr_get_bit(thr, end1);
+
+      for (unsigned idx = 0 ;  idx < cp->number ;  idx += 1) {
+	    vvp_bit4_t lv = thr_get_bit(thr, idx1);
+	    vvp_bit4_t rv = (imm & 1)? BIT4_1 : BIT4_0;
+	    imm >>= 1;
+
+	    if (lv > rv) {
+		  lt = BIT4_0;
+		  eeq = BIT4_0;
+	    } else if (lv < rv) {
+		  lt = BIT4_1;
+		  eeq = BIT4_0;
+	    }
+	    if (eq != BIT4_X) {
+		  if ((lv == BIT4_0) && (rv != BIT4_0))
+			eq = BIT4_0;
+		  if ((lv == BIT4_1) && (rv != BIT4_1))
+			eq = BIT4_0;
+		  if (bit4_is_xz(lv) || bit4_is_xz(rv))
+			eq = BIT4_X;
+	    }
+
+	    if (idx1 >= 4) idx1 += 1;
+      }
+
+      if (eq == BIT4_X)
+	    lt = BIT4_X;
+      else if (sig1 == BIT4_1)
+	    lt = BIT4_1;
+
+      thr_put_bit(thr, 4, eq);
+      thr_put_bit(thr, 5, lt);
+      thr_put_bit(thr, 6, eeq);
+
+      return true;
+}
+
 bool of_CMPIU(vthread_t thr, vvp_code_t cp)
 {
       vvp_bit4_t eq  = BIT4_1;
@@ -2531,6 +2580,20 @@ bool of_MOV_WR(vthread_t thr, vvp_code_t cp)
       return true;
 }
 
+bool of_MOVI(vthread_t thr, vvp_code_t cp)
+{
+      unsigned dst = cp->bit_idx[0];
+      unsigned val = cp->bit_idx[1];
+      unsigned wid = cp->number;
+
+      thr_check_addr(thr, dst+wid);
+
+      for (unsigned idx = 0 ;  idx < wid ;  idx += 1, val >>= 1)
+	    thr->bits4.set_bit(dst+idx, (val&1)? BIT4_1 : BIT4_0);
+
+      return true;
+}
+
 bool of_MUL(vthread_t thr, vvp_code_t cp)
 {
       assert(cp->bit_idx[0] >= 4);