vec4 versions of a bunch of unary operators.

2014-01-05 14:12:27 -08:00 · 2014-01-05 14:12:27 -08:00 · e5eb754150
parent 063c6d6065
commit e5eb754150
4 changed files with 163 additions and 106 deletions
--- a/tgt-vvp/eval_vec4.c
+++ b/tgt-vvp/eval_vec4.c
@ -609,6 +609,21 @@ static void draw_unary_vec4(ivl_expr_t expr, int stuff_ok_flag)
      ivl_expr_t sub = ivl_expr_oper1(expr);

      switch (ivl_expr_opcode(expr)) {
+	  case '&':
+	    draw_eval_vec4(sub, stuff_ok_flag);
+	    fprintf(vvp_out, "    %%and/r;\n");
+	    break;
+
+	  case '|':
+	    draw_eval_vec4(sub, stuff_ok_flag);
+	    fprintf(vvp_out, "    %%or/r;\n");
+	    break;
+
+	  case '^':
+	    draw_eval_vec4(sub, stuff_ok_flag);
+	    fprintf(vvp_out, "    %%xor/r;\n");
+	    break;
+
 	  case '~':
 	    draw_eval_vec4(sub, stuff_ok_flag);
 	    fprintf(vvp_out, "    %%inv;\n");
@ -619,8 +634,72 @@ static void draw_unary_vec4(ivl_expr_t expr, int stuff_ok_flag)
 	    fprintf(vvp_out, "    %%nor/r;\n");
 	    break;

+	  case '-':
+	    draw_eval_vec4(sub, stuff_ok_flag);
+	    fprintf(vvp_out, "    %%inv;\n");
+	    fprintf(vvp_out, "    %%pushi/vec4 1, 0, %u;\n", ivl_expr_width(sub));
+	    fprintf(vvp_out, "    %%add;\n");
+	    break;
+
+	  case 'A': /* nand (~&) */
+	    draw_eval_vec4(sub, stuff_ok_flag);
+	    fprintf(vvp_out, "    %%nand/r;\n");
+	    break;
+
+	  case 'N': /* nor (~|) */
+	    draw_eval_vec4(sub, stuff_ok_flag);
+	    fprintf(vvp_out, "    %%nor/r;\n");
+	    break;
+
+	  case 'X': /* xnor (~^) */
+	    draw_eval_vec4(sub, stuff_ok_flag);
+	    fprintf(vvp_out, "    %%xnor/r;\n");
+	    break;
+
+	  case 'm': /* abs(m) */
+	    draw_eval_vec4(sub, stuff_ok_flag);
+	    if (! ivl_expr_signed(sub))
+		  break;
+
+	      /* Test if (m) < 0 */
+	    fprintf(vvp_out, "    %%dup/vec4;\n");
+	    fprintf(vvp_out, "    %%pushi/vec4 0, 0, %u;\n", ivl_expr_width(sub));
+	    fprintf(vvp_out, "    %%cmp/s;\n");
+	    fprintf(vvp_out, "    %%jmp/0xz T_%u.%u, 5;\n", thread_count, local_count);
+	      /* If so, calculate -(m) */
+	    fprintf(vvp_out, "    %%inv;\n");
+	    fprintf(vvp_out, "    %%pushi/vec4 1, 0, %u;\n", ivl_expr_width(sub));
+	    fprintf(vvp_out, "    %%add;\n");
+	    fprintf(vvp_out, "T_%u.%u ;\n", thread_count, local_count);
+	    break;
+
+	  case 'v': /* Cast real to vec4 */
+	    assert(ivl_expr_value(sub) == IVL_VT_REAL);
+	    draw_eval_real(sub);
+	    fprintf(vvp_out, "    %%cvt/vr %u;\n", ivl_expr_width(expr));
+	    break;
+
+	  case '2': /* Cast expression to bool */
+	    switch (ivl_expr_value(sub)) {
+		case IVL_VT_LOGIC:
+		  draw_eval_vec4(sub, STUFF_OK_XZ);
+		  fprintf(vvp_out, "    %%cast2;\n");
+		  break;
+		case IVL_VT_BOOL:
+		  draw_eval_vec4(sub, 0);
+		  break;
+		case IVL_VT_REAL:
+		  draw_eval_real(sub);
+		  fprintf(vvp_out, "    %%cvt/vr;\n");
+		  break;
+		default:
+		  assert(0);
+		  break;
+	    }
+	    break;
+
 	  default:
-	    fprintf(stderr, "XXXX Unary operator %c no implemented\n", ivl_expr_opcode(expr));
+	    fprintf(stderr, "XXXX Unary operator %c not implemented\n", ivl_expr_opcode(expr));
 	    break;
      }
 }
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@ -90,7 +90,7 @@ static const struct opcode_table_s opcode_table[] = {
      { "%addi",   of_ADDI,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%alloc",  of_ALLOC,  1,  {OA_VPI_PTR,  OA_NONE,     OA_NONE} },
      { "%and",    of_AND,    0,  {OA_NONE,     OA_NONE,     OA_NONE} },
-      { "%and/r",  of_ANDR,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
+      { "%and/r",  of_ANDR,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%andi",   of_ANDI,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%assign/ar",of_ASSIGN_AR,2,{OA_ARR_PTR,OA_BIT1,     OA_NONE} },
      { "%assign/ar/d",of_ASSIGN_ARD,2,{OA_ARR_PTR,OA_BIT1,  OA_NONE} },
@ -117,7 +117,7 @@ static const struct opcode_table_s opcode_table[] = {
      { "%cassign/vec4",    of_CASSIGN_VEC4,    1,{OA_FUNC_PTR,OA_NONE,     OA_NONE} },
      { "%cassign/vec4/off",of_CASSIGN_VEC4_OFF,2,{OA_FUNC_PTR,OA_BIT1,     OA_NONE} },
      { "%cassign/wr",  of_CASSIGN_WR,  1,{OA_FUNC_PTR,OA_NONE,     OA_NONE} },
-      { "%cast2",  of_CAST2,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
+      { "%cast2",  of_CAST2,  0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/s",  of_CMPS,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/str",of_CMPSTR, 0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%cmp/u",  of_CMPU,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
@ -137,7 +137,7 @@ static const struct opcode_table_s opcode_table[] = {
      { "%cvt/rv/s", of_CVT_RV_S,2, {OA_BIT1,    OA_BIT2,    OA_NONE} },
      { "%cvt/sr", of_CVT_SR, 1,  {OA_BIT1,     OA_NONE,     OA_NONE} },
      { "%cvt/ur", of_CVT_UR, 1,  {OA_BIT1,     OA_NONE,     OA_NONE} },
-      { "%cvt/vr", of_CVT_VR, 2,  {OA_BIT1,     OA_NUMBER,   OA_NONE} },
+      { "%cvt/vr", of_CVT_VR, 1,  {OA_NUMBER,   OA_NONE,     OA_NONE} },
      { "%deassign",of_DEASSIGN,3,{OA_FUNC_PTR, OA_BIT1,     OA_BIT2} },
      { "%deassign/wr",of_DEASSIGN_WR,1,{OA_FUNC_PTR, OA_NONE,     OA_NONE} },
      { "%debug/thr",  of_DEBUG_THR,  0,{OA_NONE,     OA_NONE,     OA_NONE} },
@ -210,7 +210,7 @@ static const struct opcode_table_s opcode_table[] = {
      { "%mul/wr", of_MUL_WR, 0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%muli",   of_MULI,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%nand",   of_NAND,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
-      { "%nand/r", of_NANDR,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
+      { "%nand/r", of_NANDR,  0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%new/cobj",  of_NEW_COBJ,  1, {OA_VPI_PTR,OA_NONE,  OA_NONE} },
      { "%new/darray",of_NEW_DARRAY,2, {OA_BIT1,   OA_STRING,OA_NONE} },
      { "%noop",   of_NOOP,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
@ -218,7 +218,7 @@ static const struct opcode_table_s opcode_table[] = {
      { "%nor/r",  of_NORR,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%null",   of_NULL,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%or",     of_OR,     0,  {OA_NONE,     OA_NONE,     OA_NONE} },
-      { "%or/r",   of_ORR,    3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
+      { "%or/r",   of_ORR,    0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%pad/s",  of_PAD_S,  1,  {OA_NUMBER,   OA_NONE,     OA_NONE} },
      { "%pad/u",  of_PAD_U,  1,  {OA_NUMBER,   OA_NONE,     OA_NONE} },
      { "%part/s", of_PART_S, 1,  {OA_NUMBER,   OA_NONE,     OA_NONE} },
@ -276,9 +276,9 @@ static const struct opcode_table_s opcode_table[] = {
      { "%wait",   of_WAIT,   1,  {OA_FUNC_PTR, OA_NONE,     OA_NONE} },
      { "%wait/fork",of_WAIT_FORK,0,{OA_NONE,   OA_NONE,     OA_NONE} },
      { "%xnor",   of_XNOR,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
-      { "%xnor/r", of_XNORR,  3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
+      { "%xnor/r", of_XNORR,  0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%xor",    of_XOR,    0,  {OA_NONE,     OA_NONE,     OA_NONE} },
-      { "%xor/r",  of_XORR,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
+      { "%xor/r",  of_XORR,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { 0, of_NOOP, 0, {OA_NONE, OA_NONE, OA_NONE} }
 };

--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@ -95,6 +95,11 @@ bits. AND means the following:
 The input vectors must be the same width, and the output vector will
 be the width of the input.

+* %and/r
+
+Pop the top value from the vec4 stack, perform a reduction &, then
+return the single-bit result.
+
 * %assign/ar <array-label>, <delay>
 * %assign/ar/d <array-label>, <delayx>
 * %assign/ar/e <array-label>
@ -414,11 +419,12 @@ value stack. Precision may be lost in the conversion.
 The %cvt/rv/s instruction is the same as %cvt/rv, but treats the thread
 vector as a signed value.

-* %cvt/vr <bit-l>, <wid>
+* %cvt/vr <wid>

-The %cvt/vr opcode converts a real word from the stack to a thread vector
-starting at <bit-l> and with the width <wid>. Non-integer precision is
-lost in the conversion, and the real value is popped from the stack.
+The %cvt/vr opcode converts a real word from the stack to a vec4 that
+is <wid> wide. Non-integer precision is lost in the conversion, and
+the real value is popped from the stack. The result is pushed to the
+vec4 stack.

 * %deassign <var-label>, <base>, <width>

--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@ -1742,36 +1742,25 @@ bool of_CASSIGN_WR(vthread_t thr, vvp_code_t cp)
      return true;
 }

-
-bool of_CAST2(vthread_t thr, vvp_code_t cp)
+/*
+ * %cast2
+ */
+bool of_CAST2(vthread_t thr, vvp_code_t)
 {
-#if 0
-      unsigned dst = cp->bit_idx[0];
-      unsigned src = cp->bit_idx[1];
-      unsigned wid = cp->number;
+      vvp_vector4_t val = thr->pop_vec4();
+      unsigned wid = val.size();

-      thr_check_addr(thr, dst+wid-1);
-      thr_check_addr(thr, src+wid-1);
-
-      vvp_vector4_t res;
-      switch (src) {
-	  case 0:
-	  case 2:
-	  case 3:
-	    res = vvp_vector4_t(wid, BIT4_0);
-	    break;
-	  case 1:
-	    res = vvp_vector4_t(wid, BIT4_1);
-	    break;
-	  default:
-	    res = vector2_to_vector4(vvp_vector2_t(vthread_bits_to_vector(thr, src, wid)), wid);
-	    break;
+      for (unsigned idx = 0 ; idx < wid ; idx += 1) {
+	    switch (val.value(idx)) {
+		case BIT4_1:
+		  val.set_bit(idx, BIT4_1);
+		  break;
+		default:
+		  val.set_bit(idx, BIT4_0);
+		  break;
+	    }
      }
-
-      thr->bits4.set_vec(dst, res);
-#else
-      fprintf(stderr, "XXXX NOT IMPLEMENTED: %%cast2 ...\n");
-#endif
+      thr->push_vec4(val);
      return true;
 }

@ -2272,22 +2261,15 @@ bool of_CVT_UR(vthread_t thr, vvp_code_t cp)
 }

 /*
- * %cvt/vr <bit> <wid>
+ * %cvt/vr <wid>
 */
 bool of_CVT_VR(vthread_t thr, vvp_code_t cp)
 {
-#if 0
      double r = thr->pop_real();
-      unsigned base = cp->bit_idx[0];
      unsigned wid = cp->number;
-      vvp_vector4_t tmp(wid, r);

-	/* Make sure there is enough space for the new vector. */
-      thr_check_addr(thr, base+wid-1);
-      thr->bits4.set_vec(base, tmp);
-#else
-      fprintf(stderr, "XXXX NOT IMPLEMENTED: %%cvt/vr ...\n");
-#endif
+      vvp_vector4_t tmp(wid, r);
+      thr->push_vec4(tmp);
      return true;
 }

@ -4709,45 +4691,43 @@ bool of_NULL(vthread_t thr, vvp_code_t)
      return true;
 }

-
-bool of_ANDR(vthread_t thr, vvp_code_t cp)
+/*
+ * %and/r
+ */
+bool of_ANDR(vthread_t thr, vvp_code_t)
 {
-#if 0
-      assert(cp->bit_idx[0] >= 4);
+      vvp_vector4_t val = thr->pop_vec4();

      vvp_bit4_t lb = BIT4_1;
-      unsigned idx2 = cp->bit_idx[1];

-      for (unsigned idx = 0 ;  idx < cp->number ;  idx += 1) {
-
-	    vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
+      for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
+	    vvp_bit4_t rb = val.value(idx);
 	    if (rb == BIT4_0) {
 		  lb = BIT4_0;
 		  break;
 	    }

-	    if (rb != BIT4_1)
+	    if (rb != 1)
 		  lb = BIT4_X;
      }

-      thr_put_bit(thr, cp->bit_idx[0], lb);
-#else
-      fprintf(stderr, "XXXX NOT IMPLEMENTED: %%and/r ...\n");
-#endif
+      vvp_vector4_t res (1, lb);
+      thr->push_vec4(res);
+
      return true;
 }

-bool of_NANDR(vthread_t thr, vvp_code_t cp)
+/*
+ * %nand/r
+ */
+bool of_NANDR(vthread_t thr, vvp_code_t)
 {
-#if 0
-      assert(cp->bit_idx[0] >= 4);
+      vvp_vector4_t val = thr->pop_vec4();

      vvp_bit4_t lb = BIT4_0;
-      unsigned idx2 = cp->bit_idx[1];
+      for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {

-      for (unsigned idx = 0 ;  idx < cp->number ;  idx += 1) {
-
-	    vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
+	    vvp_bit4_t rb = val.value(idx);
 	    if (rb == BIT4_0) {
 		  lb = BIT4_1;
 		  break;
@ -4757,24 +4737,22 @@ bool of_NANDR(vthread_t thr, vvp_code_t cp)
 		  lb = BIT4_X;
      }

-      thr_put_bit(thr, cp->bit_idx[0], lb);
-#else
-      fprintf(stderr, "XXXX NOT IMPLEMENTED: %%nand/r ...\n");
-#endif
+      vvp_vector4_t res (1, lb);
+      thr->push_vec4(res);
+
      return true;
 }

-bool of_ORR(vthread_t thr, vvp_code_t cp)
+/*
+ * %or/r
+ */
+bool of_ORR(vthread_t thr, vvp_code_t)
 {
-#if 0
-      assert(cp->bit_idx[0] >= 4);
+      vvp_vector4_t val = thr->pop_vec4();

      vvp_bit4_t lb = BIT4_0;
-      unsigned idx2 = cp->bit_idx[1];
-
-      for (unsigned idx = 0 ;  idx < cp->number ;  idx += 1) {
-
-	    vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
+      for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
+	    vvp_bit4_t rb = val.value(idx);
 	    if (rb == BIT4_1) {
 		  lb = BIT4_1;
 		  break;
@ -4784,24 +4762,22 @@ bool of_ORR(vthread_t thr, vvp_code_t cp)
 		  lb = BIT4_X;
      }

-      thr_put_bit(thr, cp->bit_idx[0], lb);
-#else
-      fprintf(stderr, "XXXX NOT IMPLEMENTED: %%orr ...\n");
-#endif
+      vvp_vector4_t res (1, lb);
+      thr->push_vec4(res);
      return true;
 }

-bool of_XORR(vthread_t thr, vvp_code_t cp)
+/*
+ * %xor/r
+ */
+bool of_XORR(vthread_t thr, vvp_code_t)
 {
-#if 0
-      assert(cp->bit_idx[0] >= 4);
+      vvp_vector4_t val = thr->pop_vec4();

      vvp_bit4_t lb = BIT4_0;
-      unsigned idx2 = cp->bit_idx[1];
+      for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {

-      for (unsigned idx = 0 ;  idx < cp->number ;  idx += 1) {
-
-	    vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
+	    vvp_bit4_t rb = val.value(idx);
 	    if (rb == BIT4_1)
 		  lb = ~lb;
 	    else if (rb != BIT4_0) {
@ -4810,24 +4786,22 @@ bool of_XORR(vthread_t thr, vvp_code_t cp)
 	    }
      }

-      thr_put_bit(thr, cp->bit_idx[0], lb);
-#else
-      fprintf(stderr, "XXXX NOT IMPLEMENTED: %%xorr ...\n");
-#endif
+      vvp_vector4_t res (1, lb);
+      thr->push_vec4(res);
      return true;
 }

+/*
+ * %xnor/r
+ */
 bool of_XNORR(vthread_t thr, vvp_code_t cp)
 {
-#if 0
-      assert(cp->bit_idx[0] >= 4);
+      vvp_vector4_t val = thr->pop_vec4();

      vvp_bit4_t lb = BIT4_1;
-      unsigned idx2 = cp->bit_idx[1];
+      for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {

-      for (unsigned idx = 0 ;  idx < cp->number ;  idx += 1) {
-
-	    vvp_bit4_t rb = thr_get_bit(thr, idx2+idx);
+	    vvp_bit4_t rb = val.value(idx);
 	    if (rb == BIT4_1)
 		  lb = ~lb;
 	    else if (rb != BIT4_0) {
@ -4836,10 +4810,8 @@ bool of_XNORR(vthread_t thr, vvp_code_t cp)
 	    }
      }

-      thr_put_bit(thr, cp->bit_idx[0], lb);
-#else
-      fprintf(stderr, "XXXX NOT IMPLEMENTED: %%xnorr...\n");
-#endif
+      vvp_vector4_t res (1, lb);
+      thr->push_vec4(res);
      return true;
 }