Implement %cmp/ne and %cmpi/ne

These pull in the inversion of the output flags so that they more efficiently implement != and !==, without %flag_inv instructions.
2014-12-03 11:06:11 -08:00 · 2014-12-03 11:06:11 -08:00 · 85c7b07a9b
parent 0c5ed2b60f
commit 85c7b07a9b
5 changed files with 61 additions and 7 deletions
--- a/tgt-vvp/eval_condit.c
+++ b/tgt-vvp/eval_condit.c
@ -83,26 +83,31 @@ static int draw_condition_binary_compare(ivl_expr_t expr)
      draw_eval_vec4(le);
      resize_vec4_wid(le, use_wid);

+      char use_opcode = ivl_expr_opcode(expr);
+
+
      if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) {
 	      /* Special case: If the right operand can be handled as
 		 an immediate operand, then use that instead. */
-	    draw_immediate_vec4(re, "%cmpi/e");
+	    if (use_opcode=='n' || use_opcode=='N')
+		  draw_immediate_vec4(re, "%cmpi/ne");
+	    else
+		  draw_immediate_vec4(re, "%cmpi/e");
      } else {
 	    draw_eval_vec4(re);
 	    resize_vec4_wid(re, use_wid);
-	    fprintf(vvp_out, "    %%cmp/e;\n");
+	    if (use_opcode=='n' || use_opcode=='N')
+		  fprintf(vvp_out, "    %%cmp/ne;\n");
+	    else
+		  fprintf(vvp_out, "    %%cmp/e;\n");
      }

      switch (ivl_expr_opcode(expr)) {
 	  case 'n': /* != */
-	    fprintf(vvp_out, "    %%flag_inv 4;\n");
-	    ; /* fall through.. */
 	  case 'e': /* == */
 	    return 4;
 	    break;
 	  case 'N': /* !== */
-	    fprintf(vvp_out, "    %%flag_inv 6;\n");
-	    ; /* fall through.. */
 	  case 'E': /* === */
 	    return 6;
 	  default:
--- a/vvp/codes.h
+++ b/vvp/codes.h
@ -63,6 +63,8 @@ extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code);
 extern bool of_CAST2(vthread_t thr, vvp_code_t code);
 extern bool of_CMPE(vthread_t thr, vvp_code_t code);
 extern bool of_CMPIE(vthread_t thr, vvp_code_t code);
+extern bool of_CMPINE(vthread_t thr, vvp_code_t code);
+extern bool of_CMPNE(vthread_t thr, vvp_code_t code);
 extern bool of_CMPS(vthread_t thr, vvp_code_t code);
 extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
 extern bool of_CMPSTR(vthread_t thr, vvp_code_t code);
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@ -113,6 +113,7 @@ static const struct opcode_table_s opcode_table[] = {
      { "%cassign/wr",  of_CASSIGN_WR,  1,{OA_FUNC_PTR,OA_NONE,     OA_NONE} },
      { "%cast2",  of_CAST2,  0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/e",  of_CMPE,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
+      { "%cmp/ne", of_CMPNE,  0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/s",  of_CMPS,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/str",of_CMPSTR, 0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/u",  of_CMPU,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
@ -122,6 +123,7 @@ static const struct opcode_table_s opcode_table[] = {
      { "%cmp/x",  of_CMPX,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/z",  of_CMPZ,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmpi/e", of_CMPIE,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
+      { "%cmpi/ne",of_CMPINE, 3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%cmpi/s", of_CMPIS,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%cmpi/u", of_CMPIU,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%concat/str",  of_CONCAT_STR,  0,{OA_NONE,  OA_NONE,  OA_NONE} },
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@ -253,9 +253,11 @@ vector2 (binary) value, and push the result.
 * %cmp/s
 * %cmp/u
 * %cmp/e
+* %cmp/ne
 * %cmpi/s <vala>, <valb>, <wid>
 * %cmpi/u <vala>, <valb>, <wid>
 * %cmpi/e <vala>, <valb>, <wid>
+* %cmpi/ne <vala>, <valb>, <wid>

 These instructions perform a generic comparison of two vectors of
 equal size. Two values are pulled from the top of the stack, and not
@ -284,9 +286,14 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The
 compare. In either case, if either operand contains x or z, then lt
 bit gets the x value.

-Thje %cmp/e and %cmpi/e variants are the same, but they do not bother
+The %cmp/e and %cmpi/e variants are the same, but they do not bother
 to calculate the lt flag. These are faster if the lt flag is not needed.

+The %cmp/ne and %cmpi/ne variants are the same as the %cmp/e and
+%cmpi/e variants, but the 4 and 6 flags are inverted in order to
+eliminate the need for a %flag_inv instruction to implement != and !==
+operations.
+
 * %cmp/wr

 Compare real values for equality and less-then. This opcode pops to
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@ -1560,6 +1560,23 @@ bool of_CMPE(vthread_t thr, vvp_code_t)
      return true;
 }

+bool of_CMPNE(vthread_t thr, vvp_code_t)
+{
+	// We are going to pop these and push nothing in their
+	// place, but for now it is more efficient to use a constant
+	// reference. When we finish, pop the stack without copies.
+      const vvp_vector4_t&rval = thr->peek_vec4(0);
+      const vvp_vector4_t&lval = thr->peek_vec4(1);
+
+      do_CMPE(thr, lval, rval);
+
+      thr->flags[4] =  ~thr->flags[4];
+      thr->flags[6] =  ~thr->flags[6];
+
+      thr->pop_vec4(2);
+      return true;
+}
+
 /*
 * %cmpi/e <vala>, <valb>, <wid>
 *
@ -1583,6 +1600,27 @@ bool of_CMPIE(vthread_t thr, vvp_code_t cp)
      return true;
 }

+bool of_CMPINE(vthread_t thr, vvp_code_t cp)
+{
+      unsigned wid = cp->number;
+
+      vvp_vector4_t&lval = thr->peek_vec4();
+
+	// I expect that most of the bits of an immediate value are
+	// going to be zero, so start the result vector with all zero
+	// bits. Then we only need to replace the bits that are different.
+      vvp_vector4_t rval (wid, BIT4_0);
+      get_immediate_rval (cp, rval);
+
+      do_CMPE(thr, lval, rval);
+
+      thr->flags[4] =  ~thr->flags[4];
+      thr->flags[6] =  ~thr->flags[6];
+
+      thr->pop_vec4(1);
+      return true;
+}
+


 static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)