Merge pull request #1341 from larsclausen/vvp-vector-ops-speed-up

vvp: Use word wide bitwise logical ops
This commit is contained in:
Cary R. 2026-05-06 20:37:49 -07:00 committed by GitHub
commit 33a6d58258
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 44 additions and 68 deletions

View File

@ -94,21 +94,11 @@ void vvp_fun_and::run_run()
vvp_vector4_t result (input_[0]);
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
vvp_bit4_t bitbit = result.value(idx);
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) {
if (input_[pdx].size() < idx) {
bitbit = BIT4_X;
break;
}
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
result &= input_[pdx];
bitbit = bitbit & input_[pdx].value(idx);
}
if (invert_)
bitbit = ~bitbit;
result.set_bit(idx, bitbit);
}
if (invert_)
result.invert();
ptr->send_vec4(result, 0);
}
@ -535,21 +525,11 @@ void vvp_fun_or::run_run()
vvp_vector4_t result (input_[0]);
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
vvp_bit4_t bitbit = result.value(idx);
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) {
if (input_[pdx].size() < idx) {
bitbit = BIT4_X;
break;
}
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
result |= input_[pdx];
bitbit = bitbit | input_[pdx].value(idx);
}
if (invert_)
bitbit = ~bitbit;
result.set_bit(idx, bitbit);
}
if (invert_)
result.invert();
ptr->send_vec4(result, 0);
}
@ -571,21 +551,11 @@ void vvp_fun_xor::run_run()
vvp_vector4_t result (input_[0]);
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
vvp_bit4_t bitbit = result.value(idx);
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) {
if (input_[pdx].size() < idx) {
bitbit = BIT4_X;
break;
}
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
result ^= input_[pdx];
bitbit = bitbit ^ input_[pdx].value(idx);
}
if (invert_)
bitbit = ~bitbit;
result.set_bit(idx, bitbit);
}
if (invert_)
result.invert();
ptr->send_vec4(result, 0);
}

View File

@ -4491,13 +4491,9 @@ bool of_NAND(vthread_t thr, vvp_code_t)
vvp_vector4_t valr = thr->pop_vec4();
vvp_vector4_t&vall = thr->peek_vec4();
assert(vall.size() == valr.size());
unsigned wid = vall.size();
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
vvp_bit4_t lb = vall.value(idx);
vvp_bit4_t rb = valr.value(idx);
vall.set_bit(idx, ~(lb&rb));
}
vall &= valr;
vall.invert();
return true;
}
@ -4755,13 +4751,9 @@ bool of_NOR(vthread_t thr, vvp_code_t)
vvp_vector4_t valr = thr->pop_vec4();
vvp_vector4_t&vall = thr->peek_vec4();
assert(vall.size() == valr.size());
unsigned wid = vall.size();
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
vvp_bit4_t lb = vall.value(idx);
vvp_bit4_t rb = valr.value(idx);
vall.set_bit(idx, ~(lb|rb));
}
vall |= valr;
vall.invert();
return true;
}
@ -6564,14 +6556,9 @@ bool of_XNOR(vthread_t thr, vvp_code_t)
vvp_vector4_t valr = thr->pop_vec4();
vvp_vector4_t&vall = thr->peek_vec4();
assert(vall.size() == valr.size());
unsigned wid = vall.size();
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
vvp_bit4_t lb = vall.value(idx);
vvp_bit4_t rb = valr.value(idx);
vall.set_bit(idx, ~(lb ^ rb));
}
vall ^= valr;
vall.invert();
return true;
}
@ -6584,14 +6571,8 @@ bool of_XOR(vthread_t thr, vvp_code_t)
vvp_vector4_t valr = thr->pop_vec4();
vvp_vector4_t&vall = thr->peek_vec4();
assert(vall.size() == valr.size());
unsigned wid = vall.size();
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
vvp_bit4_t lb = vall.value(idx);
vvp_bit4_t rb = valr.value(idx);
vall.set_bit(idx, lb ^ rb);
}
vall ^= valr;
return true;
}

View File

@ -1964,6 +1964,30 @@ vvp_vector4_t& vvp_vector4_t::operator |= (const vvp_vector4_t&that)
return *this;
}
vvp_vector4_t& vvp_vector4_t::operator ^= (const vvp_vector4_t&that)
{
// The truth table is:
// 00 01 11 10
// 00 00 01 11 11
// 01 01 00 11 11
// 11 11 11 11 11
// 10 11 11 11 11
if (size_ <= BITS_PER_WORD) {
unsigned long bval = bbits_val_ | that.bbits_val_;
bbits_val_ = bval;
abits_val_ = (abits_val_ ^ that.abits_val_) | bval;
} else {
unsigned words = (size_ + BITS_PER_WORD - 1) / BITS_PER_WORD;
for (unsigned idx = 0; idx < words ; idx += 1) {
unsigned long bval = bbits_ptr_[idx] | that.bbits_ptr_[idx];
bbits_ptr_[idx] = bval;
abits_ptr_[idx] = (abits_ptr_[idx] ^ that.abits_ptr_[idx]) | bval;
}
}
return *this;
}
/*
* Add an integer to the vvp_vector4_t in place, bit by bit so that
* there is no size limitations.

View File

@ -323,6 +323,7 @@ class vvp_vector4_t {
void invert();
vvp_vector4_t& operator &= (const vvp_vector4_t&that);
vvp_vector4_t& operator |= (const vvp_vector4_t&that);
vvp_vector4_t& operator ^= (const vvp_vector4_t&that);
vvp_vector4_t& operator += (int64_t);
private: