Optomize runtime using immediate compare

Implement compare-immediate instructions and generate code to use these new instructions to improve runtime performance. Signed-off-by: Stephen Williams <steve@icarus.com>
2007-10-03 20:58:40 -07:00 · 2007-10-03 20:58:40 -07:00 · 5d750b7779
parent d587499276
commit 5d750b7779
5 changed files with 139 additions and 23 deletions
--- a/tgt-vvp/eval_expr.c
+++ b/tgt-vvp/eval_expr.c
@ -36,6 +36,9 @@ int number_is_unknown(ivl_expr_t ex)
      const char*bits;
      unsigned idx;

+      if (ivl_expr_type(ex) == IVL_EX_ULONG)
+	    return 0;
+
      assert(ivl_expr_type(ex) == IVL_EX_NUMBER);

      bits = ivl_expr_bits(ex);
@ -66,6 +69,10 @@ int number_is_immediate(ivl_expr_t ex, unsigned lim_wid)
 	    if (bits[idx] != '0')
 		  return 0;

+	/* Negative numbers are not "immediate". */
+      if (ivl_expr_signed(ex) && bits[ivl_expr_width(ex)-1]=='1')
+	    return 0;
+
      return 1;
 }

@ -290,13 +297,7 @@ static struct vector_info draw_binary_expr_eq(ivl_expr_t exp,
 	    return draw_binary_expr_eq_real(exp);
      }

-      if ((ivl_expr_type(re) == IVL_EX_ULONG)
-	  && (0 == (ivl_expr_uvalue(re) & ~0xffff)))
-	    return draw_eq_immediate(exp, ewid, le, re, stuff_ok_flag);
-
-      if ((ivl_expr_type(re) == IVL_EX_NUMBER)
-	  && (! number_is_unknown(re))
-	  && number_is_immediate(re, 16))
+      if (number_is_immediate(re,16) && !number_is_unknown(re))
 	    return draw_eq_immediate(exp, ewid, le, re, stuff_ok_flag);

      assert(ivl_expr_value(le) == IVL_VT_LOGIC
@ -683,42 +684,78 @@ static struct vector_info draw_binary_expr_le(ivl_expr_t exp,
      assert(ivl_expr_value(re) == IVL_VT_LOGIC
 	     || ivl_expr_value(re) == IVL_VT_BOOL);

-      lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ);
-      rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ);
+      lv.wid = 0;
+      rv.wid = 0;

      switch (ivl_expr_opcode(exp)) {
 	  case 'G':
-	    assert(lv.wid == rv.wid);
-	    fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
-		    rv.base, lv.base, lv.wid);
+	    rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ);
+	    if (number_is_immediate(le,16) && !number_is_unknown(le)) {
+		  unsigned imm = get_number_immediate(le);
+		  assert(imm >= 0);
+		  fprintf(vvp_out, "   %%cmpi/%c %u, %u, %u;\n", s_flag,
+			  rv.base, imm, rv.wid);
+	    } else {
+		  lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ);
+		  assert(lv.wid == rv.wid);
+		  fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
+			  rv.base, lv.base, lv.wid);
+	    }
 	    fprintf(vvp_out, "    %%or 5, 4, 1;\n");
 	    break;

 	  case 'L':
-	    assert(lv.wid == rv.wid);
-	    fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
-		    lv.base, rv.base, lv.wid);
+	    lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ);
+	    if (number_is_immediate(re,16) && !number_is_unknown(re)) {
+		  unsigned imm = get_number_immediate(re);
+		  assert(imm >= 0);
+		  fprintf(vvp_out, "   %%cmpi/%c %u, %u, %u;\n", s_flag,
+			  lv.base, imm, lv.wid);
+	    } else {
+		  rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ);
+		  assert(lv.wid == rv.wid);
+		  fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
+			  lv.base, rv.base, lv.wid);
+	    }
 	    fprintf(vvp_out, "    %%or 5, 4, 1;\n");
 	    break;

 	  case '<':
-	    assert(lv.wid == rv.wid);
-	    fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
-		    lv.base, rv.base, lv.wid);
+	    lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ);
+	    if (number_is_immediate(re,16) && !number_is_unknown(re)) {
+		  unsigned imm = get_number_immediate(re);
+		  assert(imm >= 0);
+		  fprintf(vvp_out, "   %%cmpi/%c %u, %u, %u;\n", s_flag,
+			  lv.base, imm, lv.wid);
+	    } else {
+		  rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ);
+		  assert(lv.wid == rv.wid);
+		  fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
+			  lv.base, rv.base, lv.wid);
+	    }
 	    break;

 	  case '>':
-	    assert(lv.wid == rv.wid);
-	    fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
-		    rv.base, lv.base, lv.wid);
+	    rv = draw_eval_expr_wid(re, owid, STUFF_OK_XZ);
+	    if (number_is_immediate(le,16) && !number_is_unknown(le)) {
+		  unsigned imm = get_number_immediate(le);
+		  assert(imm >= 0);
+		  fprintf(vvp_out, "   %%cmpi/%c %u, %u, %u;\n", s_flag,
+			  rv.base, imm, rv.wid);
+	    } else {
+		  lv = draw_eval_expr_wid(le, owid, STUFF_OK_XZ);
+		  assert(lv.wid == rv.wid);
+		  fprintf(vvp_out, "    %%cmp/%c %u, %u, %u;\n", s_flag,
+			  rv.base, lv.base, lv.wid);
+	    }
 	    break;

 	  default:
 	    assert(0);
      }

-      clr_vector(lv);
-      clr_vector(rv);
+      if (lv.wid > 0) clr_vector(lv);
+      if (rv.wid > 0) clr_vector(rv);

      if ((stuff_ok_flag&STUFF_OK_47) && (wid == 1)) {
 	    lv.base = 5;
@ -1365,6 +1402,12 @@ static struct vector_info draw_number_expr(ivl_expr_t exp, unsigned wid)
 	   load the constant bit values. */
      res.base = allocate_vector(wid);

+      if ((!number_is_unknown(exp)) && number_is_immediate(exp, 16)) {
+	    int val = get_number_immediate(exp);
+	    fprintf(vvp_out, "   %%movi %u, %d, %u;\n", res.base, val, wid);
+	    return res;
+      }
+
      idx = 0;
      while (idx < nwid) {
 	    unsigned cnt;
--- a/vvp/codes.h
+++ b/vvp/codes.h
@ -54,6 +54,7 @@ extern bool of_BLEND(vthread_t thr, vvp_code_t code);
 extern bool of_BREAKPOINT(vthread_t thr, vvp_code_t code);
 extern bool of_CASSIGN_LINK(vthread_t thr, vvp_code_t code);
 extern bool of_CASSIGN_V(vthread_t thr, vvp_code_t code);
+extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
 extern bool of_CMPIU(vthread_t thr, vvp_code_t code);
 extern bool of_CMPS(vthread_t thr, vvp_code_t code);
 extern bool of_CMPU(vthread_t thr, vvp_code_t code);
@ -103,6 +104,7 @@ extern bool of_MOD_S(vthread_t thr, vvp_code_t code);
 extern bool of_MOD_WR(vthread_t thr, vvp_code_t code);
 extern bool of_MOV(vthread_t thr, vvp_code_t code);
 extern bool of_MOV_WR(vthread_t thr, vvp_code_t code);
+extern bool of_MOVI(vthread_t thr, vvp_code_t code);
 extern bool of_MUL(vthread_t thr, vvp_code_t code);
 extern bool of_MUL_WR(vthread_t thr, vvp_code_t code);
 extern bool of_MULI(vthread_t thr, vvp_code_t code);
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@ -124,6 +124,7 @@ const static struct opcode_table_s opcode_table[] = {
      { "%cmp/wu", of_CMPWU,  2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
      { "%cmp/x",  of_CMPX,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%cmp/z",  of_CMPZ,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
+      { "%cmpi/s", of_CMPIS,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%cmpi/u", of_CMPIU,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%cvt/ir", of_CVT_IR, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
      { "%cvt/ri", of_CVT_RI, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
@ -164,6 +165,7 @@ const static struct opcode_table_s opcode_table[] = {
      { "%mod/wr", of_MOD_WR, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
      { "%mov",    of_MOV,    3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%mov/wr", of_MOV_WR, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
+      { "%movi",   of_MOVI,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%mul",    of_MUL,    3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%mul/wr", of_MUL_WR, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
      { "%muli",   of_MULI,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@ -189,6 +189,12 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The
 compare. In either case, if either operand contains x or z, then lt
 bit gets the x value.

+* %cmpi/s <bit-l>, <immr>, <wid>
+* %cmpi/u <bit-l>, <immr>, <wid>
+
+These instructions are similar to the %cmp instructions above, except
+that the right hand operand is an immediate value. This is a positive
+number that the vector is compared with.

 * %cmp/wr <bit-l>, <bit-r>

--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@ -891,6 +891,55 @@ bool of_CMPS(vthread_t thr, vvp_code_t cp)
      return true;
 }

+bool of_CMPIS(vthread_t thr, vvp_code_t cp)
+{
+      vvp_bit4_t eq  = BIT4_1;
+      vvp_bit4_t eeq = BIT4_1;
+      vvp_bit4_t lt  = BIT4_0;
+
+      unsigned idx1 = cp->bit_idx[0];
+      unsigned imm  = cp->bit_idx[1];
+
+      const unsigned end1 = (idx1 < 4)? idx1 : idx1 + cp->number - 1;
+      thr_check_addr(thr, end1);
+      const vvp_bit4_t sig1 = thr_get_bit(thr, end1);
+
+      for (unsigned idx = 0 ;  idx < cp->number ;  idx += 1) {
+	    vvp_bit4_t lv = thr_get_bit(thr, idx1);
+	    vvp_bit4_t rv = (imm & 1)? BIT4_1 : BIT4_0;
+	    imm >>= 1;
+
+	    if (lv > rv) {
+		  lt = BIT4_0;
+		  eeq = BIT4_0;
+	    } else if (lv < rv) {
+		  lt = BIT4_1;
+		  eeq = BIT4_0;
+	    }
+	    if (eq != BIT4_X) {
+		  if ((lv == BIT4_0) && (rv != BIT4_0))
+			eq = BIT4_0;
+		  if ((lv == BIT4_1) && (rv != BIT4_1))
+			eq = BIT4_0;
+		  if (bit4_is_xz(lv) || bit4_is_xz(rv))
+			eq = BIT4_X;
+	    }
+
+	    if (idx1 >= 4) idx1 += 1;
+      }
+
+      if (eq == BIT4_X)
+	    lt = BIT4_X;
+      else if (sig1 == BIT4_1)
+	    lt = BIT4_1;
+
+      thr_put_bit(thr, 4, eq);
+      thr_put_bit(thr, 5, lt);
+      thr_put_bit(thr, 6, eeq);
+
+      return true;
+}
+
 bool of_CMPIU(vthread_t thr, vvp_code_t cp)
 {
      vvp_bit4_t eq  = BIT4_1;
@ -2531,6 +2580,20 @@ bool of_MOV_WR(vthread_t thr, vvp_code_t cp)
      return true;
 }

+bool of_MOVI(vthread_t thr, vvp_code_t cp)
+{
+      unsigned dst = cp->bit_idx[0];
+      unsigned val = cp->bit_idx[1];
+      unsigned wid = cp->number;
+
+      thr_check_addr(thr, dst+wid);
+
+      for (unsigned idx = 0 ;  idx < wid ;  idx += 1, val >>= 1)
+	    thr->bits4.set_bit(dst+idx, (val&1)? BIT4_1 : BIT4_0);
+
+      return true;
+}
+
 bool of_MUL(vthread_t thr, vvp_code_t cp)
 {
      assert(cp->bit_idx[0] >= 4);