Add the %cmp/e instructions, and put them to use.

When testing for == and ===, there is no need to also calculate <, so it makes sense to have a special instruction for these cases.
2014-11-21 16:45:27 -08:00 · 2014-11-21 16:45:27 -08:00 · 663c79d4af
parent bea03db25d
commit 663c79d4af
6 changed files with 102 additions and 6 deletions
--- a/tgt-vvp/eval_condit.c
+++ b/tgt-vvp/eval_condit.c
@ -86,11 +86,11 @@ static int draw_condition_binary_compare(ivl_expr_t expr)
      if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) {
 	      /* Special case: If the right operand can be handled as
 		 an immediate operand, then use that instead. */
-	    draw_immediate_vec4(re, "%cmpi/u");
+	    draw_immediate_vec4(re, "%cmpi/e");
      } else {
 	    draw_eval_vec4(re);
 	    resize_vec4_wid(re, use_wid);
-	    fprintf(vvp_out, "    %%cmp/u;\n");
+	    fprintf(vvp_out, "    %%cmp/e;\n");
      }

      switch (ivl_expr_opcode(expr)) {
--- a/tgt-vvp/eval_vec4.c
+++ b/tgt-vvp/eval_vec4.c
@ -375,20 +375,20 @@ static void draw_binary_vec4_compare(ivl_expr_t expr)

      switch (ivl_expr_opcode(expr)) {
 	  case 'e': /* == */
-	    fprintf(vvp_out, "    %%cmp/u;\n");
+	    fprintf(vvp_out, "    %%cmp/e;\n");
 	    fprintf(vvp_out, "    %%flag_get/vec4 4;\n");
 	    break;
 	  case 'n': /* != */
-	    fprintf(vvp_out, "    %%cmp/u;\n");
+	    fprintf(vvp_out, "    %%cmp/e;\n");
 	    fprintf(vvp_out, "    %%flag_get/vec4 4;\n");
 	    fprintf(vvp_out, "    %%inv;\n");
 	    break;
 	  case 'E': /* === */
-	    fprintf(vvp_out, "    %%cmp/u;\n");
+	    fprintf(vvp_out, "    %%cmp/e;\n");
 	    fprintf(vvp_out, "    %%flag_get/vec4 6;\n");
 	    break;
 	  case 'N': /* !== */
-	    fprintf(vvp_out, "    %%cmp/u;\n");
+	    fprintf(vvp_out, "    %%cmp/e;\n");
 	    fprintf(vvp_out, "    %%flag_get/vec4 6;\n");
 	    fprintf(vvp_out, "    %%inv;\n");
 	    break;
--- a/vvp/codes.h
+++ b/vvp/codes.h
@ -61,6 +61,8 @@ extern bool of_CASSIGN_VEC4(vthread_t thr, vvp_code_t code);
 extern bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t code);
 extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code);
 extern bool of_CAST2(vthread_t thr, vvp_code_t code);
+extern bool of_CMPE(vthread_t thr, vvp_code_t code);
+extern bool of_CMPIE(vthread_t thr, vvp_code_t code);
 extern bool of_CMPS(vthread_t thr, vvp_code_t code);
 extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
 extern bool of_CMPSTR(vthread_t thr, vvp_code_t code);
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@ -112,6 +112,7 @@ static const struct opcode_table_s opcode_table[] = {
      { "%cassign/vec4/off",of_CASSIGN_VEC4_OFF,2,{OA_FUNC_PTR,OA_BIT1,     OA_NONE} },
      { "%cassign/wr",  of_CASSIGN_WR,  1,{OA_FUNC_PTR,OA_NONE,     OA_NONE} },
      { "%cast2",  of_CAST2,  0,  {OA_NONE,     OA_NONE,     OA_NONE} },
+      { "%cmp/e",  of_CMPE,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/s",  of_CMPS,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/str",of_CMPSTR, 0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/u",  of_CMPU,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
@ -120,6 +121,7 @@ static const struct opcode_table_s opcode_table[] = {
      { "%cmp/wu", of_CMPWU,  2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
      { "%cmp/x",  of_CMPX,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/z",  of_CMPZ,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
+      { "%cmpi/e", of_CMPIE,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%cmpi/s", of_CMPIS,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%cmpi/u", of_CMPIU,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%concat/str",  of_CONCAT_STR,  0,{OA_NONE,  OA_NONE,  OA_NONE} },
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@ -252,8 +252,10 @@ vector2 (binary) value, and push the result.

 * %cmp/s
 * %cmp/u
+* %cmp/e
 * %cmpi/s <vala>, <valb>, <wid>
 * %cmpi/u <vala>, <valb>, <wid>
+* %cmpi/e <vala>, <valb>, <wid>

 These instructions perform a generic comparison of two vectors of
 equal size. Two values are pulled from the top of the stack, and not
@ -282,6 +284,9 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The
 compare. In either case, if either operand contains x or z, then lt
 bit gets the x value.

+Thje %cmp/e and %cmpi/e variants are the same, but they do not bother
+to calculate the lt flag. These are faster if the lt flag is not needed.
+
 * %cmp/wr

 Compare real values for equality and less-then. This opcode pops to
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@ -1477,6 +1477,93 @@ bool of_CAST2(vthread_t thr, vvp_code_t)
      return true;
 }

+static void do_CMPE(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
+{
+      assert(rval.size() == lval.size());
+
+      if (lval.has_xz() || rval.has_xz()) {
+
+	    unsigned wid = lval.size();
+	    vvp_bit4_t eq  = BIT4_1;
+	    vvp_bit4_t eeq = BIT4_1;
+
+	    for (unsigned idx = 0 ; idx < wid ; idx += 1) {
+		  vvp_bit4_t lv = lval.value(idx);
+		  vvp_bit4_t rv = rval.value(idx);
+
+		  if (lv != rv)
+			eeq = BIT4_0;
+
+		  if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv)))
+			eq = BIT4_X;
+		  if ((lv == BIT4_0) && (rv==BIT4_1))
+			eq = BIT4_0;
+		  if ((lv == BIT4_1) && (rv==BIT4_0))
+			eq = BIT4_0;
+
+		  if (eq == BIT4_0)
+			break;
+	    }
+
+	    thr->flags[4] = eq;
+	    thr->flags[6] = eeq;
+
+      } else {
+	      // If there are no XZ bits anywhere, then the results of
+	      // == match the === test.
+	    thr->flags[4] = thr->flags[6] = (lval.eeq(rval)? BIT4_1 : BIT4_0);
+      }
+}
+
+/*
+ *  %cmp/e
+ *
+ * Pop the operands from the stack, and do not replace them. The
+ * results are written to flag bits:
+ *
+ *	4: eq  (equal)
+ *
+ *	6: eeq (case equal)
+ */
+bool of_CMPE(vthread_t thr, vvp_code_t)
+{
+	// We are going to pop these and push nothing in their
+	// place, but for now it is more efficient to use a constant
+	// reference. When we finish, pop the stack without copies.
+      const vvp_vector4_t&rval = thr->peek_vec4(0);
+      const vvp_vector4_t&lval = thr->peek_vec4(1);
+
+      do_CMPE(thr, lval, rval);
+
+      thr->pop_vec4(2);
+      return true;
+}
+
+/*
+ * %cmpi/e <vala>, <valb>, <wid>
+ *
+ * Pop1 operand, get the other operand from the arguments.
+ */
+bool of_CMPIE(vthread_t thr, vvp_code_t cp)
+{
+      unsigned wid = cp->number;
+
+      vvp_vector4_t&lval = thr->peek_vec4();
+
+	// I expect that most of the bits of an immediate value are
+	// going to be zero, so start the result vector with all zero
+	// bits. Then we only need to replace the bits that are different.
+      vvp_vector4_t rval (wid, BIT4_0);
+      get_immediate_rval (cp, rval);
+
+      do_CMPE(thr, lval, rval);
+
+      thr->pop_vec4(1);
+      return true;
+}
+
+
+
 static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
 {
      vvp_bit4_t eq  = BIT4_1;