Optimize the SUB instruction

The %sub instruction didn't have the efficent implementation that
the %add instructions used. Update subtraction to use the array
method, so that it gets the same performance benefits.
This commit is contained in:
Stephen Williams 2008-04-23 14:03:52 -07:00
parent b775d178d2
commit 7b81eb3494
1 changed files with 20 additions and 16 deletions

View File

@ -3685,20 +3685,24 @@ bool of_SUB(vthread_t thr, vvp_code_t cp)
unsigned carry;
carry = 1;
for (unsigned idx = 0 ; idx < cp->number ; idx += 1) {
unsigned long tmp;
unsigned sum = carry;
tmp = lva[idx/CPU_WORD_BITS];
sum += 1 & (tmp >> (idx%CPU_WORD_BITS));
tmp = lvb[idx/CPU_WORD_BITS];
sum += 1 & ~(tmp >> (idx%CPU_WORD_BITS));
carry = sum / 2;
thr_put_bit(thr, cp->bit_idx[0]+idx, (sum&1) ? BIT4_1 : BIT4_0);
for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < cp->number ; idx += 1) {
unsigned long tmp = ~lvb[idx] + carry;
unsigned long sum = tmp + lva[idx];
carry = 0;
if (tmp < ~lvb[idx])
carry = 1;
if (sum < tmp)
carry = 1;
if (sum < lva[idx])
carry = 1;
lva[idx] = sum;
}
/* We know from the vector_to_array that the address is valid
in the thr->bitr4 vector, so just do the set bit. */
thr->bits4.setarray(cp->bit_idx[0], cp->number, lva);
delete[]lva;
delete[]lvb;
@ -3708,8 +3712,8 @@ bool of_SUB(vthread_t thr, vvp_code_t cp)
delete[]lva;
delete[]lvb;
for (unsigned idx = 0 ; idx < cp->number ; idx += 1)
thr_put_bit(thr, cp->bit_idx[0]+idx, BIT4_X);
vvp_vector4_t tmp(cp->number, BIT4_X);
thr->bits4.set_vec(cp->bit_idx[0], tmp);
return true;
}
@ -3770,8 +3774,8 @@ bool of_SUBI(vthread_t thr, vvp_code_t cp)
x_out:
delete[]lva;
for (unsigned idx = 0 ; idx < cp->number ; idx += 1)
thr_put_bit(thr, cp->bit_idx[0]+idx, BIT4_X);
vvp_vector4_t tmp(cp->number, BIT4_X);
thr->bits4.set_vec(cp->bit_idx[0], tmp);
return true;
}