Minor optimization of partial load/v

When loading a part select from the least significant bits, it is OK to use the %load/v instruction to strip the high bits off. This allows the zero-based part select to work in one step, without loading excess bits.
2007-12-09 17:28:49 -08:00 · 2007-12-09 17:28:49 -08:00 · 8d9998c44e
parent bee4772ac8
commit 8d9998c44e
2 changed files with 9 additions and 30 deletions
--- a/tgt-vvp/eval_expr.c
+++ b/tgt-vvp/eval_expr.c
@ -1861,42 +1861,20 @@ static struct vector_info draw_select_signal(ivl_expr_t sube,
 	    use_word = get_number_immediate(ix);
      }

-	/* Try the special case that the base is 0 and the width
-	   exactly matches the signal. Just load the signal in one
-	   instruction. */
+	/* Try the special case that the part is at the beginning of
+	   the signal (or the entire width). Just load the early bits
+	   in one go. */
      if (number_is_immediate(bit_idx, 32)
 	  && get_number_immediate(bit_idx) == 0
-	  && ivl_expr_width(sube) == wid) {
+	  && (ivl_expr_width(sube) >= wid)) {
+
 	    res.base = allocate_vector(wid);
 	    res.wid = wid;
-	    fprintf(vvp_out, "   %%load/v %u, v%p_%u, %u;\n",
-		    res.base, sig, use_word, ivl_expr_width(sube));
-
-	    return res;
-      }
-
-	/* Try the special case that the part is at the beginning and
-	   nearly the width of the signal. In this case, just load the
-	   entire signal in one go, then simply drop the excess bits. */
-      if (number_is_immediate(bit_idx, 32)
-	  && get_number_immediate(bit_idx) == 0
-	  && (ivl_expr_width(sube) > wid)
-	  && (ivl_expr_width(sube) < (wid+wid/10))) {
-
-	    res.base = allocate_vector(ivl_expr_width(sube));
-	    res.wid = ivl_expr_width(sube);
 	    fprintf(vvp_out, "   %%load/v %u, v%p_%u, %u; Only need %u of %u bits\n",
-		    res.base, sig, use_word, ivl_expr_width(sube), wid, res.wid);
+		    res.base, sig, use_word, wid, wid, ivl_expr_width(sube));
 
 	    save_signal_lookaside(res.base, sig, use_word, res.wid);

-	    {
-		  struct vector_info tmp;
-		  tmp.base = res.base + wid;
-		  tmp.wid = res.wid - wid;
-		  clr_vector(tmp);
-		  res.wid = wid;
-	    }
 	    return res;
      }

--- a/vvp/opcodes.txt
+++ b/vvp/opcodes.txt
@ -418,8 +418,9 @@ This instruction loads a vector value from the given functor node into
 the specified thread register bit. The functor-label can refer to a
 .net, a .var or a .functor with a vector output. The entire vector,
 from the least significant up to <wid> bits, is loaded starting at
-thread bit <bit>. It is an error for the width to not match the vector
-width at the functor.
+thread bit <bit>. It is an OK for the width to not match the vector
+width at the functor. If the <wid> is less then the width at the
+functor, the the most significant bits are dropped.

 * %load/vp0 <bit>, <functor-label>, <wid>