More efficient way to set arithmetic results into vector4.

The vvp_vector4_t often receives the results of vector arithmetic.
Add an optimized method for setting that data into the vector. Take
into account that arithmetic results have no X/Z bits, etc.
This commit is contained in:
Stephen Williams 2008-04-23 13:50:05 -07:00
parent 10ea9904f1
commit b775d178d2
3 changed files with 76 additions and 13 deletions

View File

@ -453,10 +453,7 @@ bool of_ADD(vthread_t thr, vvp_code_t cp)
/* We know from the vector_to_array that the address is valid
in the thr->bitr4 vector, so just do the set bit. */
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
unsigned bit = lva[idx/CPU_WORD_BITS] >> (idx % CPU_WORD_BITS);
thr->bits4.set_bit(cp->bit_idx[0]+idx, (bit&1) ? BIT4_1 : BIT4_0);
}
thr->bits4.setarray(cp->bit_idx[0], cp->number, lva);
delete[]lva;
delete[]lvb;
@ -525,11 +522,10 @@ bool of_ADDI(vthread_t thr, vvp_code_t cp)
lva[idx] = sum;
}
thr_check_addr(thr, bit_addr + bit_width - 1);
for (unsigned idx = 0 ; idx < bit_width ; idx += 1) {
unsigned long bit = lva[idx/CPU_WORD_BITS] >> (idx%CPU_WORD_BITS);
thr->bits4.set_bit(bit_addr+idx, (bit&1UL) ? BIT4_1:BIT4_0);
}
/* We know from the vector_to_array that the address is valid
in the thr->bitr4 vector, so just do the set bit. */
thr->bits4.setarray(bit_addr, bit_width, lva);
delete[]lva;
delete[]lvb;
@ -3761,10 +3757,10 @@ bool of_SUBI(vthread_t thr, vvp_code_t cp)
lva[idx] = sum;
}
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
unsigned bit = lva[idx/CPU_WORD_BITS] >> (idx % CPU_WORD_BITS);
thr_put_bit(thr, cp->bit_idx[0]+idx, (bit&1) ? BIT4_1 : BIT4_0);
}
/* We know from the vector_to_array that the address is valid
in the thr->bitr4 vector, so just do the set bit. */
thr->bits4.setarray(cp->bit_idx[0], cp->number, lva);
delete[]lva;
delete[]lvb;

View File

@ -414,6 +414,72 @@ unsigned long* vvp_vector4_t::subarray(unsigned adr, unsigned wid) const
return 0;
}
void vvp_vector4_t::setarray(unsigned adr, unsigned wid, const unsigned long*val)
{
assert(adr+wid <= size_);
const unsigned BIT2_PER_WORD = 8*sizeof(unsigned long);
if (size_ <= BITS_PER_WORD) {
// We know here that both the source and the target are
// within a single word. Write the bits into the
// abits_val_ directly.
assert(BIT2_PER_WORD <= BITS_PER_WORD);
unsigned long lmask = (1UL << adr) - 1UL;
unsigned long hmask = ((adr+wid) < BITS_PER_WORD)
? -1UL << (adr+wid)
: 0;
unsigned long mask = ~(hmask | lmask);
abits_val_ &= ~mask;
bbits_val_ &= ~mask;
abits_val_ |= mask & (val[0] << adr);
} else {
// The general case, there are multiple words of
// destination, and possibly multiple words of source
// data. Shift and mask as we go.
unsigned off = adr % BITS_PER_WORD;
unsigned ptr = adr / BITS_PER_WORD;
unsigned val_off = 0;
unsigned val_ptr = 0;
while (wid > 0) {
unsigned trans = wid;
if (trans > (BIT2_PER_WORD-val_off))
trans = BIT2_PER_WORD-val_off;
if (trans > (BITS_PER_WORD-off))
trans = BITS_PER_WORD-off;
unsigned long lmask = (1UL << off) - 1UL;
unsigned long hmask = ((off+trans) < BITS_PER_WORD)
? -1UL << (off+trans)
: 0;
unsigned long mask = ~(hmask | lmask);
abits_ptr_[ptr] &= ~mask;
bbits_ptr_[ptr] &= ~mask;
if (val_off >= off)
abits_ptr_[ptr] |= mask & (val[val_ptr] >> (val_off-off));
else
abits_ptr_[ptr] |= mask & (val[val_ptr] << (off-val_off));
wid -= trans;
val_off += trans;
if (val_off == BIT2_PER_WORD) {
val_off = 0;
val_ptr += 1;
}
off += trans;
if (off == BITS_PER_WORD) {
off = 0;
ptr += 1;
}
}
}
}
/*
* Set the bits of that vector, which must be a subset of this vector,
* into the addressed part of this vector. Use bit masking and word

View File

@ -130,6 +130,7 @@ class vvp_vector4_t {
// array of longs, or a nil pointer if an XZ bit was detected
// in the array.
unsigned long*subarray(unsigned idx, unsigned size) const;
void setarray(unsigned idx, unsigned size, const unsigned long*val);
void set_bit(unsigned idx, vvp_bit4_t val);
void set_vec(unsigned idx, const vvp_vector4_t&that);