More efficient way to set arithmetic results into vector4.

The vvp_vector4_t often receives the results of vector arithmetic. Add an optimized method for setting that data into the vector. Take into account that arithmetic results have no X/Z bits, etc.
2008-04-23 13:50:05 -07:00 · 2008-04-23 13:50:05 -07:00 · b775d178d2
parent 10ea9904f1
commit b775d178d2
3 changed files with 76 additions and 13 deletions
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@ -453,10 +453,7 @@ bool of_ADD(vthread_t thr, vvp_code_t cp)
 	/* We know from the vector_to_array that the address is valid
 	   in the thr->bitr4 vector, so just do the set bit. */

-      for (unsigned idx = 0 ;  idx < cp->number ;  idx += 1) {
-	    unsigned bit = lva[idx/CPU_WORD_BITS] >> (idx % CPU_WORD_BITS);
-	    thr->bits4.set_bit(cp->bit_idx[0]+idx, (bit&1) ? BIT4_1 : BIT4_0);
-      }
+      thr->bits4.setarray(cp->bit_idx[0], cp->number, lva);

      delete[]lva;
      delete[]lvb;
@ -525,11 +522,10 @@ bool of_ADDI(vthread_t thr, vvp_code_t cp)
 	    lva[idx] = sum;
      }

-      thr_check_addr(thr, bit_addr + bit_width - 1);
-      for (unsigned idx = 0 ;  idx < bit_width ;  idx += 1) {
-	    unsigned long bit = lva[idx/CPU_WORD_BITS] >> (idx%CPU_WORD_BITS);
-	    thr->bits4.set_bit(bit_addr+idx, (bit&1UL) ? BIT4_1:BIT4_0);
-      }
+	/* We know from the vector_to_array that the address is valid
+	   in the thr->bitr4 vector, so just do the set bit. */
+
+      thr->bits4.setarray(bit_addr, bit_width, lva);

      delete[]lva;
      delete[]lvb;
@ -3761,10 +3757,10 @@ bool of_SUBI(vthread_t thr, vvp_code_t cp)
 	    lva[idx] = sum;
      }

-      for (unsigned idx = 0 ;  idx < cp->number ;  idx += 1) {
-	    unsigned bit = lva[idx/CPU_WORD_BITS] >> (idx % CPU_WORD_BITS);
-	    thr_put_bit(thr, cp->bit_idx[0]+idx, (bit&1) ? BIT4_1 : BIT4_0);
-      }
+	/* We know from the vector_to_array that the address is valid
+	   in the thr->bitr4 vector, so just do the set bit. */
+
+      thr->bits4.setarray(cp->bit_idx[0], cp->number, lva);

      delete[]lva;
      delete[]lvb;
--- a/vvp/vvp_net.cc
+++ b/vvp/vvp_net.cc
@ -414,6 +414,72 @@ unsigned long* vvp_vector4_t::subarray(unsigned adr, unsigned wid) const
      return 0;
 }

+void vvp_vector4_t::setarray(unsigned adr, unsigned wid, const unsigned long*val)
+{
+      assert(adr+wid <= size_);
+
+      const unsigned BIT2_PER_WORD = 8*sizeof(unsigned long);
+
+      if (size_ <= BITS_PER_WORD) {
+	      // We know here that both the source and the target are
+	      // within a single word. Write the bits into the
+	      // abits_val_ directly.
+
+	    assert(BIT2_PER_WORD <= BITS_PER_WORD);
+	    unsigned long lmask = (1UL << adr) - 1UL;
+	    unsigned long hmask = ((adr+wid) < BITS_PER_WORD)
+		  ? -1UL << (adr+wid)
+		  : 0;
+	    unsigned long mask = ~(hmask | lmask);
+
+	    abits_val_ &= ~mask;
+	    bbits_val_ &= ~mask;
+
+	    abits_val_ |= mask & (val[0] << adr);
+
+      } else {
+	      // The general case, there are multiple words of
+	      // destination, and possibly multiple words of source
+	      // data. Shift and mask as we go.
+	    unsigned off = adr % BITS_PER_WORD;
+	    unsigned ptr = adr / BITS_PER_WORD;
+	    unsigned val_off = 0;
+	    unsigned val_ptr = 0;
+	    while (wid > 0) {
+		  unsigned trans = wid;
+		  if (trans > (BIT2_PER_WORD-val_off))
+			trans = BIT2_PER_WORD-val_off;
+		  if (trans > (BITS_PER_WORD-off))
+			trans = BITS_PER_WORD-off;
+
+		  unsigned long lmask = (1UL << off) - 1UL;
+		  unsigned long hmask = ((off+trans) < BITS_PER_WORD)
+			? -1UL << (off+trans)
+			: 0;
+		  unsigned long mask = ~(hmask | lmask);
+
+		  abits_ptr_[ptr] &= ~mask;
+		  bbits_ptr_[ptr] &= ~mask;
+		  if (val_off >= off)
+			abits_ptr_[ptr] |= mask & (val[val_ptr] >> (val_off-off));
+		  else
+			abits_ptr_[ptr] |= mask & (val[val_ptr] << (off-val_off));
+
+		  wid -= trans;
+		  val_off += trans;
+		  if (val_off == BIT2_PER_WORD) {
+			val_off = 0;
+			val_ptr += 1;
+		  }
+		  off += trans;
+		  if (off == BITS_PER_WORD) {
+			off = 0;
+			ptr += 1;
+		  }
+	    }
+      }
+}
+
 /*
 * Set the bits of that vector, which must be a subset of this vector,
 * into the addressed part of this vector. Use bit masking and word
--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@ -130,6 +130,7 @@ class vvp_vector4_t {
 	// array of longs, or a nil pointer if an XZ bit was detected
 	// in the array.
      unsigned long*subarray(unsigned idx, unsigned size) const;
+      void setarray(unsigned idx, unsigned size, const unsigned long*val);

      void set_bit(unsigned idx, vvp_bit4_t val);
      void set_vec(unsigned idx, const vvp_vector4_t&that);