Optimize the %sub instruction by integrating it with vvp_vector4_t class

This commit is contained in:
Stephen Williams 2014-12-04 17:01:16 -08:00
parent 0f740289e9
commit b1d2393789
3 changed files with 65 additions and 34 deletions

View File

@ -5461,41 +5461,9 @@ bool of_STORE_VEC4A(vthread_t thr, vvp_code_t cp)
bool of_SUB(vthread_t thr, vvp_code_t)
{
vvp_vector4_t r = thr->pop_vec4();
vvp_vector4_t l = thr->pop_vec4();
unsigned wid = l.size();
assert(wid == r.size());
unsigned long*lva = l.subarray(0,wid);
unsigned long*lvb = r.subarray(0,wid);
if (lva == 0 || lvb == 0)
goto x_out;
unsigned long carry;
carry = 1;
for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < wid ; idx += 1)
lva[idx] = add_with_carry(lva[idx], ~lvb[idx], carry);
/* We know from the vector_to_array that the address is valid
in the thr->bitr4 vector, so just do the set bit. */
l.setarray(0,wid,lva);
thr->push_vec4(l);
delete[]lva;
delete[]lvb;
return true;
x_out:
delete[]lva;
delete[]lvb;
vvp_vector4_t tmp(wid, BIT4_X);
thr->push_vec4(tmp);
vvp_vector4_t&l = thr->peek_vec4();
l.sub(r);
return true;
}

View File

@ -1480,6 +1480,66 @@ void vvp_vector4_t::add(const vvp_vector4_t&that)
}
}
void vvp_vector4_t::sub(const vvp_vector4_t&that)
{
assert(size_ == that.size_);
if (size_ < BITS_PER_WORD) {
unsigned long mask = ~(-1UL << size_);
if ((bbits_val_|that.bbits_val_) & mask) {
abits_val_ |= mask;
bbits_val_ |= mask;
return;
}
abits_val_ -= that.abits_val_;
abits_val_ &= mask;
return;
}
if (size_ == BITS_PER_WORD) {
if (bbits_val_ | that.bbits_val_) {
abits_val_ = WORD_X_ABITS;
bbits_val_ = WORD_X_BBITS;
} else {
abits_val_ -= that.abits_val_;
}
return;
}
int cnt = size_ / BITS_PER_WORD;
unsigned long carry = 1;
for (int idx = 0 ; idx < cnt ; idx += 1) {
if (bbits_ptr_[idx] | that.bbits_ptr_[idx])
goto x_out;
abits_ptr_[idx] = add_with_carry(abits_ptr_[idx], ~that.abits_ptr_[idx], carry);
}
if (unsigned tail = size_ % BITS_PER_WORD) {
unsigned long mask = ~( -1UL << tail );
if ((bbits_ptr_[cnt] | that.bbits_ptr_[cnt])&mask)
goto x_out;
abits_ptr_[cnt] = add_with_carry(abits_ptr_[cnt], ~that.abits_ptr_[cnt], carry);
abits_ptr_[cnt] &= mask;
}
return;
x_out:
for (int idx = 0 ; idx < cnt ; idx += 1) {
abits_ptr_[idx] = WORD_X_ABITS;
bbits_ptr_[idx] = WORD_X_BBITS;
}
if (unsigned tail = size_%BITS_PER_WORD) {
unsigned long mask = ~( -1UL << tail );
abits_ptr_[cnt] = WORD_X_ABITS&mask;
bbits_ptr_[cnt] = WORD_X_BBITS&mask;
}
}
void vvp_vector4_t::mov(unsigned dst, unsigned src, unsigned cnt)
{
assert(dst+cnt <= size_);

View File

@ -296,6 +296,9 @@ class vvp_vector4_t {
// Add that to this in the Verilog way.
void add(const vvp_vector4_t&that);
// Subtract that from this in the Verilog way.
void sub(const vvp_vector4_t&that);
// Multiply this by that in the Verilog way.
void mul(const vvp_vector4_t&that);