Optimize the %sub instruction by integrating it with vvp_vector4_t class
This commit is contained in:
parent
0f740289e9
commit
b1d2393789
|
|
@ -5461,41 +5461,9 @@ bool of_STORE_VEC4A(vthread_t thr, vvp_code_t cp)
|
|||
bool of_SUB(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
vvp_vector4_t r = thr->pop_vec4();
|
||||
vvp_vector4_t l = thr->pop_vec4();
|
||||
|
||||
unsigned wid = l.size();
|
||||
assert(wid == r.size());
|
||||
|
||||
unsigned long*lva = l.subarray(0,wid);
|
||||
unsigned long*lvb = r.subarray(0,wid);
|
||||
if (lva == 0 || lvb == 0)
|
||||
goto x_out;
|
||||
|
||||
|
||||
unsigned long carry;
|
||||
carry = 1;
|
||||
for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < wid ; idx += 1)
|
||||
lva[idx] = add_with_carry(lva[idx], ~lvb[idx], carry);
|
||||
|
||||
|
||||
/* We know from the vector_to_array that the address is valid
|
||||
in the thr->bitr4 vector, so just do the set bit. */
|
||||
|
||||
l.setarray(0,wid,lva);
|
||||
thr->push_vec4(l);
|
||||
|
||||
delete[]lva;
|
||||
delete[]lvb;
|
||||
|
||||
return true;
|
||||
|
||||
x_out:
|
||||
delete[]lva;
|
||||
delete[]lvb;
|
||||
|
||||
vvp_vector4_t tmp(wid, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
vvp_vector4_t&l = thr->peek_vec4();
|
||||
|
||||
l.sub(r);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1480,6 +1480,66 @@ void vvp_vector4_t::add(const vvp_vector4_t&that)
|
|||
}
|
||||
}
|
||||
|
||||
void vvp_vector4_t::sub(const vvp_vector4_t&that)
|
||||
{
|
||||
assert(size_ == that.size_);
|
||||
|
||||
if (size_ < BITS_PER_WORD) {
|
||||
unsigned long mask = ~(-1UL << size_);
|
||||
if ((bbits_val_|that.bbits_val_) & mask) {
|
||||
abits_val_ |= mask;
|
||||
bbits_val_ |= mask;
|
||||
return;
|
||||
}
|
||||
|
||||
abits_val_ -= that.abits_val_;
|
||||
abits_val_ &= mask;
|
||||
return;
|
||||
}
|
||||
|
||||
if (size_ == BITS_PER_WORD) {
|
||||
if (bbits_val_ | that.bbits_val_) {
|
||||
abits_val_ = WORD_X_ABITS;
|
||||
bbits_val_ = WORD_X_BBITS;
|
||||
} else {
|
||||
abits_val_ -= that.abits_val_;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int cnt = size_ / BITS_PER_WORD;
|
||||
unsigned long carry = 1;
|
||||
for (int idx = 0 ; idx < cnt ; idx += 1) {
|
||||
if (bbits_ptr_[idx] | that.bbits_ptr_[idx])
|
||||
goto x_out;
|
||||
|
||||
abits_ptr_[idx] = add_with_carry(abits_ptr_[idx], ~that.abits_ptr_[idx], carry);
|
||||
}
|
||||
|
||||
if (unsigned tail = size_ % BITS_PER_WORD) {
|
||||
unsigned long mask = ~( -1UL << tail );
|
||||
if ((bbits_ptr_[cnt] | that.bbits_ptr_[cnt])&mask)
|
||||
goto x_out;
|
||||
|
||||
abits_ptr_[cnt] = add_with_carry(abits_ptr_[cnt], ~that.abits_ptr_[cnt], carry);
|
||||
abits_ptr_[cnt] &= mask;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
x_out:
|
||||
for (int idx = 0 ; idx < cnt ; idx += 1) {
|
||||
abits_ptr_[idx] = WORD_X_ABITS;
|
||||
bbits_ptr_[idx] = WORD_X_BBITS;
|
||||
}
|
||||
if (unsigned tail = size_%BITS_PER_WORD) {
|
||||
unsigned long mask = ~( -1UL << tail );
|
||||
abits_ptr_[cnt] = WORD_X_ABITS&mask;
|
||||
bbits_ptr_[cnt] = WORD_X_BBITS&mask;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vvp_vector4_t::mov(unsigned dst, unsigned src, unsigned cnt)
|
||||
{
|
||||
assert(dst+cnt <= size_);
|
||||
|
|
|
|||
|
|
@ -296,6 +296,9 @@ class vvp_vector4_t {
|
|||
// Add that to this in the Verilog way.
|
||||
void add(const vvp_vector4_t&that);
|
||||
|
||||
// Subtract that from this in the Verilog way.
|
||||
void sub(const vvp_vector4_t&that);
|
||||
|
||||
// Multiply this by that in the Verilog way.
|
||||
void mul(const vvp_vector4_t&that);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue