Implement and put to use the %muli instruction.

2014-12-02 12:46:17 -08:00 · 2014-12-02 12:46:17 -08:00 · 58fb80aec4
parent 38f277d81b
commit 58fb80aec4
5 changed files with 43 additions and 7 deletions
--- a/tgt-vvp/eval_vec4.c
+++ b/tgt-vvp/eval_vec4.c
@ -138,6 +138,9 @@ static void draw_binary_vec4_arith(ivl_expr_t expr)
 		case '+':
 		  draw_immediate_vec4(re, "%addi");
 		  return;
+		case '*':
+		  draw_immediate_vec4(re, "%muli");
+		  return;
 		default:
 		  break;
 	    }
--- a/vvp/codes.h
+++ b/vvp/codes.h
@ -151,6 +151,7 @@ extern bool of_MOD_S(vthread_t thr, vvp_code_t code);
 extern bool of_MOD_WR(vthread_t thr, vvp_code_t code);
 extern bool of_MOV_WU(vthread_t thr, vvp_code_t code);
 extern bool of_MUL(vthread_t thr, vvp_code_t code);
+extern bool of_MULI(vthread_t thr, vvp_code_t code);
 extern bool of_MUL_WR(vthread_t thr, vvp_code_t code);
 extern bool of_NAND(vthread_t thr, vvp_code_t code);
 extern bool of_NANDR(vthread_t thr, vvp_code_t code);
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@ -200,6 +200,7 @@ static const struct opcode_table_s opcode_table[] = {
      { "%mov/wu", of_MOV_WU, 2,  {OA_BIT1,     OA_BIT2,     OA_NONE} },
      { "%mul",    of_MUL,    0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%mul/wr", of_MUL_WR, 0,  {OA_NONE,     OA_NONE,     OA_NONE} },
+      { "%muli",   of_MULI,   3,  {OA_BIT1,     OA_BIT2,     OA_NUMBER} },
      { "%nand",   of_NAND,   0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%nand/r", of_NANDR,  0,  {OA_NONE,     OA_NONE,     OA_NONE} },
      { "%new/cobj",  of_NEW_COBJ,  1, {OA_VPI_PTR,OA_NONE,  OA_NONE} },
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@ -754,6 +754,7 @@ This opcode is the real-valued modulus of the two real values.
 * %mov/wu <dst>, <src>

 * %mul
+* %muli <vala>, <valb>, <wid>

 This instruction multiplies the left vector by the right vector, the
 vectors pare popped from the vec4 stack and have the same width. If
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@ -3676,7 +3676,7 @@ static void do_verylong_mod(vvp_vector4_t&vala, const vvp_vector4_t&valb,

      vvp_vector4_t tmp (len, BIT4_X);
      carry = out_is_neg? 1 : 0;
-      for (unsigned idx = 0 ;  idx < len ;  idx += 1) {
+      for (int idx = 0 ;  idx < len ;  idx += 1) {
 	    unsigned ob = z[idx];
 	    if (out_is_neg) {
 		  ob = (1-ob) + carry;
@ -4004,13 +4004,8 @@ bool of_MOV_WU(vthread_t thr, vvp_code_t cp)
      return true;
 }

-/*
- * %mul
- */
-bool of_MUL(vthread_t thr, vvp_code_t)
+static bool do_MUL(vvp_vector4_t&vala, const vvp_vector4_t&valb)
 {
-      vvp_vector4_t valb = thr->pop_vec4();
-      vvp_vector4_t&vala = thr->peek_vec4();
      assert(vala.size() == valb.size());
      unsigned wid = vala.size();

@ -4065,6 +4060,41 @@ bool of_MUL(vthread_t thr, vvp_code_t)
      return true;
 }

+/*
+ * %mul
+ */
+bool of_MUL(vthread_t thr, vvp_code_t)
+{
+      vvp_vector4_t r = thr->pop_vec4();
+	// Rather then pop l, use it directly from the stack. When we
+	// assign to 'l', that will edit the top of the stack, which
+	// replaces a pop and a pull.
+      vvp_vector4_t&l = thr->peek_vec4();
+
+      return do_MUL(l, r);
+}
+
+/*
+ * %muli <vala>, <valb>, <wid>
+ *
+ * Pop1 operand, get the other operand from the arguments, and push
+ * the result.
+ */
+bool of_MULI(vthread_t thr, vvp_code_t cp)
+{
+      unsigned wid = cp->number;
+
+      vvp_vector4_t&l = thr->peek_vec4();
+
+	// I expect that most of the bits of an immediate value are
+	// going to be zero, so start the result vector with all zero
+	// bits. Then we only need to replace the bits that are different.
+      vvp_vector4_t r (wid, BIT4_0);
+      get_immediate_rval (cp, r);
+
+      return do_MUL(l, r);
+}
+
 bool of_MUL_WR(vthread_t thr, vvp_code_t)
 {
      double r = thr->pop_real();