More efficient way to set arithmetic results into vector4.
The vvp_vector4_t often receives the results of vector arithmetic. Add an optimized method for setting that data into the vector. Take into account that arithmetic results have no X/Z bits, etc.
This commit is contained in:
parent
10ea9904f1
commit
b775d178d2
|
|
@ -453,10 +453,7 @@ bool of_ADD(vthread_t thr, vvp_code_t cp)
|
|||
/* We know from the vector_to_array that the address is valid
|
||||
in the thr->bitr4 vector, so just do the set bit. */
|
||||
|
||||
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
||||
unsigned bit = lva[idx/CPU_WORD_BITS] >> (idx % CPU_WORD_BITS);
|
||||
thr->bits4.set_bit(cp->bit_idx[0]+idx, (bit&1) ? BIT4_1 : BIT4_0);
|
||||
}
|
||||
thr->bits4.setarray(cp->bit_idx[0], cp->number, lva);
|
||||
|
||||
delete[]lva;
|
||||
delete[]lvb;
|
||||
|
|
@ -525,11 +522,10 @@ bool of_ADDI(vthread_t thr, vvp_code_t cp)
|
|||
lva[idx] = sum;
|
||||
}
|
||||
|
||||
thr_check_addr(thr, bit_addr + bit_width - 1);
|
||||
for (unsigned idx = 0 ; idx < bit_width ; idx += 1) {
|
||||
unsigned long bit = lva[idx/CPU_WORD_BITS] >> (idx%CPU_WORD_BITS);
|
||||
thr->bits4.set_bit(bit_addr+idx, (bit&1UL) ? BIT4_1:BIT4_0);
|
||||
}
|
||||
/* We know from the vector_to_array that the address is valid
|
||||
in the thr->bitr4 vector, so just do the set bit. */
|
||||
|
||||
thr->bits4.setarray(bit_addr, bit_width, lva);
|
||||
|
||||
delete[]lva;
|
||||
delete[]lvb;
|
||||
|
|
@ -3761,10 +3757,10 @@ bool of_SUBI(vthread_t thr, vvp_code_t cp)
|
|||
lva[idx] = sum;
|
||||
}
|
||||
|
||||
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
|
||||
unsigned bit = lva[idx/CPU_WORD_BITS] >> (idx % CPU_WORD_BITS);
|
||||
thr_put_bit(thr, cp->bit_idx[0]+idx, (bit&1) ? BIT4_1 : BIT4_0);
|
||||
}
|
||||
/* We know from the vector_to_array that the address is valid
|
||||
in the thr->bitr4 vector, so just do the set bit. */
|
||||
|
||||
thr->bits4.setarray(cp->bit_idx[0], cp->number, lva);
|
||||
|
||||
delete[]lva;
|
||||
delete[]lvb;
|
||||
|
|
|
|||
|
|
@ -414,6 +414,72 @@ unsigned long* vvp_vector4_t::subarray(unsigned adr, unsigned wid) const
|
|||
return 0;
|
||||
}
|
||||
|
||||
void vvp_vector4_t::setarray(unsigned adr, unsigned wid, const unsigned long*val)
|
||||
{
|
||||
assert(adr+wid <= size_);
|
||||
|
||||
const unsigned BIT2_PER_WORD = 8*sizeof(unsigned long);
|
||||
|
||||
if (size_ <= BITS_PER_WORD) {
|
||||
// We know here that both the source and the target are
|
||||
// within a single word. Write the bits into the
|
||||
// abits_val_ directly.
|
||||
|
||||
assert(BIT2_PER_WORD <= BITS_PER_WORD);
|
||||
unsigned long lmask = (1UL << adr) - 1UL;
|
||||
unsigned long hmask = ((adr+wid) < BITS_PER_WORD)
|
||||
? -1UL << (adr+wid)
|
||||
: 0;
|
||||
unsigned long mask = ~(hmask | lmask);
|
||||
|
||||
abits_val_ &= ~mask;
|
||||
bbits_val_ &= ~mask;
|
||||
|
||||
abits_val_ |= mask & (val[0] << adr);
|
||||
|
||||
} else {
|
||||
// The general case, there are multiple words of
|
||||
// destination, and possibly multiple words of source
|
||||
// data. Shift and mask as we go.
|
||||
unsigned off = adr % BITS_PER_WORD;
|
||||
unsigned ptr = adr / BITS_PER_WORD;
|
||||
unsigned val_off = 0;
|
||||
unsigned val_ptr = 0;
|
||||
while (wid > 0) {
|
||||
unsigned trans = wid;
|
||||
if (trans > (BIT2_PER_WORD-val_off))
|
||||
trans = BIT2_PER_WORD-val_off;
|
||||
if (trans > (BITS_PER_WORD-off))
|
||||
trans = BITS_PER_WORD-off;
|
||||
|
||||
unsigned long lmask = (1UL << off) - 1UL;
|
||||
unsigned long hmask = ((off+trans) < BITS_PER_WORD)
|
||||
? -1UL << (off+trans)
|
||||
: 0;
|
||||
unsigned long mask = ~(hmask | lmask);
|
||||
|
||||
abits_ptr_[ptr] &= ~mask;
|
||||
bbits_ptr_[ptr] &= ~mask;
|
||||
if (val_off >= off)
|
||||
abits_ptr_[ptr] |= mask & (val[val_ptr] >> (val_off-off));
|
||||
else
|
||||
abits_ptr_[ptr] |= mask & (val[val_ptr] << (off-val_off));
|
||||
|
||||
wid -= trans;
|
||||
val_off += trans;
|
||||
if (val_off == BIT2_PER_WORD) {
|
||||
val_off = 0;
|
||||
val_ptr += 1;
|
||||
}
|
||||
off += trans;
|
||||
if (off == BITS_PER_WORD) {
|
||||
off = 0;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the bits of that vector, which must be a subset of this vector,
|
||||
* into the addressed part of this vector. Use bit masking and word
|
||||
|
|
|
|||
|
|
@ -130,6 +130,7 @@ class vvp_vector4_t {
|
|||
// array of longs, or a nil pointer if an XZ bit was detected
|
||||
// in the array.
|
||||
unsigned long*subarray(unsigned idx, unsigned size) const;
|
||||
void setarray(unsigned idx, unsigned size, const unsigned long*val);
|
||||
|
||||
void set_bit(unsigned idx, vvp_bit4_t val);
|
||||
void set_vec(unsigned idx, const vvp_vector4_t&that);
|
||||
|
|
|
|||
Loading…
Reference in New Issue