Load_add_immediate to work with signed expressions

The %load/vp0 instruction adds a signed value to the signal value being loaded, but it doesn't allow for a signed source vector. Add the %load/vp0/s instruction that pads the loaded vector, and add the code generator details to properly use it.
2008-06-13 20:23:40 -07:00 · 2008-06-13 20:23:40 -07:00 · 6f0d8e8dda
parent 62d7c081dc
commit 6f0d8e8dda
5 changed files with 70 additions and 37 deletions
--- a/tgt-vvp/eval_expr.c
+++ b/tgt-vvp/eval_expr.c
@ -28,7 +28,7 @@
 static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest,
 				int ok_flags);
 static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
-			     int add_index, unsigned long immediate);
+			     int add_index, long immediate);

 int number_is_unknown(ivl_expr_t ex)
 {
@ -1158,12 +1158,11 @@ static struct vector_info draw_binary_expr_lrs(ivl_expr_t exp, unsigned wid)

 static struct vector_info draw_load_add_immediate(ivl_expr_t le,
 						  ivl_expr_t re,
-						  unsigned wid)
+						  unsigned wid,
+						  int signed_flag)
 {
      struct vector_info lv;
-      unsigned long imm;
-
-      imm = get_number_immediate(re);
+      long imm = get_number_immediate(re);
      lv.base = allocate_vector(wid);
      lv.wid = wid;
      if (lv.base == 0) {
@ -1176,7 +1175,7 @@ static struct vector_info draw_load_add_immediate(ivl_expr_t le,

 	/* Load the signal value with a %load that adds the index
 	   register to the value being loaded. */
-      draw_signal_dest(le, lv, 0, imm);
+      draw_signal_dest(le, lv, signed_flag, imm);

      return lv;
 }
@ -1319,25 +1318,27 @@ static struct vector_info draw_binary_expr_arith(ivl_expr_t exp, unsigned wid)

      const char*sign_string = ivl_expr_signed(le) && ivl_expr_signed(re)? "/s" : "";

+      int signed_flag = ivl_expr_signed(exp)? 1 : 0;
+
      if ((ivl_expr_opcode(exp) == '+')
 	  && (ivl_expr_type(le) == IVL_EX_SIGNAL)
 	  && (ivl_expr_type(re) == IVL_EX_ULONG))
-	    return draw_load_add_immediate(le, re, wid);
+	    return draw_load_add_immediate(le, re, wid, signed_flag);

      if ((ivl_expr_opcode(exp) == '+')
 	  && (ivl_expr_type(le) == IVL_EX_SIGNAL)
 	  && (ivl_expr_type(re) == IVL_EX_NUMBER))
-	    return draw_load_add_immediate(le, re, wid);
+	    return draw_load_add_immediate(le, re, wid, signed_flag);

      if ((ivl_expr_opcode(exp) == '+')
 	  && (ivl_expr_type(re) == IVL_EX_SIGNAL)
 	  && (ivl_expr_type(le) == IVL_EX_ULONG))
-	    return draw_load_add_immediate(re, le, wid);
+	    return draw_load_add_immediate(re, le, wid, signed_flag);

      if ((ivl_expr_opcode(exp) == '+')
 	  && (ivl_expr_type(re) == IVL_EX_SIGNAL)
 	  && (ivl_expr_type(le) == IVL_EX_NUMBER))
-	    return draw_load_add_immediate(re, le, wid);
+	    return draw_load_add_immediate(re, le, wid, signed_flag);

      if ((ivl_expr_opcode(exp) == '+')
 	  && (ivl_expr_type(re) == IVL_EX_ULONG))
@ -1963,11 +1964,13 @@ void pad_expr_in_place(ivl_expr_t exp, struct vector_info res, unsigned swid)
 * offsetting the read from the lsi (least significant index) of the
 * signal.
 *
- * If the add_index is >=0, then generate a %load/vp0 to add the
- * word0 value to the loaded value before storing it into the destination.
+ * If the add_index is 0, then generate a %load/vp0 to add the
+ * word0 value to the loaded value before storing it into the
+ * destination. If the add_index is 1, then generate a %load/vp0/s to
+ * do a signed load.
 */
 static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
-			     int add_index, unsigned long immediate)
+			     int add_index, long immediate)
 {
      unsigned swid = ivl_expr_width(exp);
      ivl_signal_t sig = ivl_expr_signal(exp);
@ -2009,13 +2012,17 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,

      } else if (add_index >= 0) {

-	    assert(add_index == 0);
+	    const char*sign_flag = add_index==1? "/s" : "";

 	      /* If this is a REG (a variable) then I can do a vector read. */
-	    fprintf(vvp_out, "    %%ix/load 0, %lu;\n", immediate);
-	    fprintf(vvp_out, "    %%ix/load 2, %u;\n", res.wid);
-	    fprintf(vvp_out, "    %%load/vp0 %u, v%p_%u, %u;\n",
-		    res.base, sig, word, swid);
+	    if (immediate >= 0) {
+		  fprintf(vvp_out, "    %%ix/load 0, %lu;\n", immediate);
+	    } else {
+		  fprintf(vvp_out, "   %%ix/load 0, 0; immediate=%ld\n", immediate);
+		  fprintf(vvp_out, "   %%ix/sub 0, %ld;\n", -immediate);
+	    }
+	    fprintf(vvp_out, "    %%load/vp0%s %u, v%p_%u, %u;\n", sign_flag,
+		    res.base, sig,word, res.wid);
 	    swid = res.wid;

      } else {
--- a/vvp/codes.h
+++ b/vvp/codes.h
@ -100,6 +100,7 @@ extern bool of_LOAD_AVP0(vthread_t thr, vvp_code_t code);
 extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code);
 extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code);
 extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code);
+extern bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t code);
 extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code);
 extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code);
 extern bool of_LOADI_WR(vthread_t thr, vvp_code_t code);
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@ -143,6 +143,7 @@ const static struct opcode_table_s opcode_table[] = {
      { "%load/avx.p",of_LOAD_AVX_P,3,{OA_BIT1, OA_ARR_PTR,  OA_BIT2} },
      { "%load/v", of_LOAD_VEC,3, {OA_BIT1,     OA_FUNC_PTR, OA_BIT2} },
      { "%load/vp0",of_LOAD_VP0,3,{OA_BIT1,     OA_FUNC_PTR, OA_BIT2} },
+      { "%load/vp0/s",of_LOAD_VP0_S,3,{OA_BIT1,     OA_FUNC_PTR, OA_BIT2} },
      { "%load/wr",of_LOAD_WR,2,  {OA_BIT1,     OA_VPI_PTR,  OA_BIT2} },
      { "%load/x1p",of_LOAD_X1P,3,{OA_BIT1,     OA_FUNC_PTR, OA_BIT2} },
      { "%loadi/wr",of_LOADI_WR,3,{OA_BIT1,     OA_NUMBER,   OA_BIT2} },
--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@ -437,18 +437,21 @@ the specified thread register bit. The functor-label can refer to a
 from the least significant up to <wid> bits, is loaded starting at
 thread bit <bit>. It is an OK for the width to not match the vector
 width at the functor. If the <wid> is less than the width at the
-functor, then the most significant bits are dropped.
+functor, then the most significant bits are dropped. If the <wid> is
+more than the width at the functor, the value is padded with X bits.

 * %load/vp0 <bit>, <functor-label>, <wid>
+* %load/vp0/s <bit>, <functor-label>, <wid>

-This instruction is the same as %load/v above, except that it also
-adds the integer value is index register 0 into the loaded value. The
-addition is a Verilog-style add, which means that if any of the input
-bits are X or Z, the entire result is turned into a vector of X bits.
+This instruction is the similar %load/v above, except that it also
+adds the signed integer value in index register 0 into the loaded
+value. The addition is a Verilog-style add, which means that if any of
+the input bits are X or Z, the entire result is turned into a vector
+of X bits.

-Index register 2 contains the result width. The addition of the loaded
-value and the index are done at this width to avoid the problem of a
-small vector with a large immediate offset indexing an array.
+The <wid> is, line the %load/v, the result width. But unlike the
+%load/v, the vector is padded with 0s (%load/vp0) or sign extended
+(%load/vp0/s) to the desired width.

 * %load/wr <bit>, <vpi-label>

--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@ -2467,9 +2467,6 @@ bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t cp)
 */
 static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
 {
-      assert(cp->bit_idx[0] >= 4);
-      assert(cp->bit_idx[1] > 0);
-
      vvp_net_t*net = cp->net;

 	/* For the %load to work, the functor must actually be a
@ -2501,6 +2498,8 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
 	   directly to skip the excess calls to thr_check_addr. */
      thr->bits4.set_vec(bit, sig_value);

+	/* If the source is shorter then the desired width, then pad
+	   with BIT4_X values. */
      for (unsigned idx = sig_value.size() ; idx < wid ; idx += 1)
 	    thr->bits4.set_bit(bit+idx, BIT4_X);

@ -2511,16 +2510,12 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
 * This is like of_LOAD_VEC, but includes an add of an integer value from
 * index 0. The <wid> is the expected result width not the vector width.
 */
-bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
+
+static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&sig_value)
 {
      unsigned bit = cp->bit_idx[0];
+      unsigned wid = cp->bit_idx[1];
      int64_t addend = thr->words[0].w_int;
-      unsigned wid = thr->words[2].w_int;
-
-        /* We need a vector this wide to make the math work correctly.
-         * Copy the base bits into the vector, but keep the width. */
-      vvp_vector4_t sig_value(wid, BIT4_0);
-      sig_value.copy_bits(load_base(thr, cp));

 	/* Check the address once, before we scan the vector. */
      thr_check_addr(thr, bit+wid-1);
@ -2529,7 +2524,7 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
      if (val == 0) {
 	    vvp_vector4_t tmp(wid, BIT4_X);
 	    thr->bits4.set_vec(bit, tmp);
-	    return true;
+	    return;
      }

      unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
@ -2551,7 +2546,33 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
 	   directly to skip the excess calls to thr_check_addr. */
      thr->bits4.setarray(bit, wid, val);
      delete[]val;
+}

+bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
+{
+      unsigned wid = cp->bit_idx[1];
+
+        /* We need a vector this wide to make the math work correctly.
+         * Copy the base bits into the vector, but keep the width. */
+      vvp_vector4_t sig_value(wid, BIT4_0);
+      sig_value.copy_bits(load_base(thr, cp));
+
+      load_vp0_common(thr, cp, sig_value);
+      return true;
+}
+
+bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t cp)
+{
+      unsigned wid = cp->bit_idx[1];
+
+      vvp_vector4_t tmp (load_base(thr, cp));
+
+        /* We need a vector this wide to make the math work correctly.
+         * Copy the base bits into the vector, but keep the width. */
+      vvp_vector4_t sig_value(wid, tmp.value(tmp.size()-1));
+      sig_value.copy_bits(tmp);
+
+      load_vp0_common(thr, cp, sig_value);
      return true;
 }