Merge pull request #1341 from larsclausen/vvp-vector-ops-speed-up
vvp: Use word wide bitwise logical ops
This commit is contained in:
commit
33a6d58258
54
vvp/logic.cc
54
vvp/logic.cc
|
|
@ -94,21 +94,11 @@ void vvp_fun_and::run_run()
|
|||
|
||||
vvp_vector4_t result (input_[0]);
|
||||
|
||||
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
|
||||
vvp_bit4_t bitbit = result.value(idx);
|
||||
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) {
|
||||
if (input_[pdx].size() < idx) {
|
||||
bitbit = BIT4_X;
|
||||
break;
|
||||
}
|
||||
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
|
||||
result &= input_[pdx];
|
||||
|
||||
bitbit = bitbit & input_[pdx].value(idx);
|
||||
}
|
||||
|
||||
if (invert_)
|
||||
bitbit = ~bitbit;
|
||||
result.set_bit(idx, bitbit);
|
||||
}
|
||||
if (invert_)
|
||||
result.invert();
|
||||
|
||||
ptr->send_vec4(result, 0);
|
||||
}
|
||||
|
|
@ -535,21 +525,11 @@ void vvp_fun_or::run_run()
|
|||
|
||||
vvp_vector4_t result (input_[0]);
|
||||
|
||||
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
|
||||
vvp_bit4_t bitbit = result.value(idx);
|
||||
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) {
|
||||
if (input_[pdx].size() < idx) {
|
||||
bitbit = BIT4_X;
|
||||
break;
|
||||
}
|
||||
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
|
||||
result |= input_[pdx];
|
||||
|
||||
bitbit = bitbit | input_[pdx].value(idx);
|
||||
}
|
||||
|
||||
if (invert_)
|
||||
bitbit = ~bitbit;
|
||||
result.set_bit(idx, bitbit);
|
||||
}
|
||||
if (invert_)
|
||||
result.invert();
|
||||
|
||||
ptr->send_vec4(result, 0);
|
||||
}
|
||||
|
|
@ -571,21 +551,11 @@ void vvp_fun_xor::run_run()
|
|||
|
||||
vvp_vector4_t result (input_[0]);
|
||||
|
||||
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
|
||||
vvp_bit4_t bitbit = result.value(idx);
|
||||
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) {
|
||||
if (input_[pdx].size() < idx) {
|
||||
bitbit = BIT4_X;
|
||||
break;
|
||||
}
|
||||
for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
|
||||
result ^= input_[pdx];
|
||||
|
||||
bitbit = bitbit ^ input_[pdx].value(idx);
|
||||
}
|
||||
|
||||
if (invert_)
|
||||
bitbit = ~bitbit;
|
||||
result.set_bit(idx, bitbit);
|
||||
}
|
||||
if (invert_)
|
||||
result.invert();
|
||||
|
||||
ptr->send_vec4(result, 0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4491,13 +4491,9 @@ bool of_NAND(vthread_t thr, vvp_code_t)
|
|||
vvp_vector4_t valr = thr->pop_vec4();
|
||||
vvp_vector4_t&vall = thr->peek_vec4();
|
||||
assert(vall.size() == valr.size());
|
||||
unsigned wid = vall.size();
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
||||
vvp_bit4_t lb = vall.value(idx);
|
||||
vvp_bit4_t rb = valr.value(idx);
|
||||
vall.set_bit(idx, ~(lb&rb));
|
||||
}
|
||||
vall &= valr;
|
||||
vall.invert();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -4755,13 +4751,9 @@ bool of_NOR(vthread_t thr, vvp_code_t)
|
|||
vvp_vector4_t valr = thr->pop_vec4();
|
||||
vvp_vector4_t&vall = thr->peek_vec4();
|
||||
assert(vall.size() == valr.size());
|
||||
unsigned wid = vall.size();
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
||||
vvp_bit4_t lb = vall.value(idx);
|
||||
vvp_bit4_t rb = valr.value(idx);
|
||||
vall.set_bit(idx, ~(lb|rb));
|
||||
}
|
||||
vall |= valr;
|
||||
vall.invert();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -6564,14 +6556,9 @@ bool of_XNOR(vthread_t thr, vvp_code_t)
|
|||
vvp_vector4_t valr = thr->pop_vec4();
|
||||
vvp_vector4_t&vall = thr->peek_vec4();
|
||||
assert(vall.size() == valr.size());
|
||||
unsigned wid = vall.size();
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
||||
|
||||
vvp_bit4_t lb = vall.value(idx);
|
||||
vvp_bit4_t rb = valr.value(idx);
|
||||
vall.set_bit(idx, ~(lb ^ rb));
|
||||
}
|
||||
vall ^= valr;
|
||||
vall.invert();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -6584,14 +6571,8 @@ bool of_XOR(vthread_t thr, vvp_code_t)
|
|||
vvp_vector4_t valr = thr->pop_vec4();
|
||||
vvp_vector4_t&vall = thr->peek_vec4();
|
||||
assert(vall.size() == valr.size());
|
||||
unsigned wid = vall.size();
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
||||
|
||||
vvp_bit4_t lb = vall.value(idx);
|
||||
vvp_bit4_t rb = valr.value(idx);
|
||||
vall.set_bit(idx, lb ^ rb);
|
||||
}
|
||||
vall ^= valr;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1964,6 +1964,30 @@ vvp_vector4_t& vvp_vector4_t::operator |= (const vvp_vector4_t&that)
|
|||
return *this;
|
||||
}
|
||||
|
||||
vvp_vector4_t& vvp_vector4_t::operator ^= (const vvp_vector4_t&that)
|
||||
{
|
||||
// The truth table is:
|
||||
// 00 01 11 10
|
||||
// 00 00 01 11 11
|
||||
// 01 01 00 11 11
|
||||
// 11 11 11 11 11
|
||||
// 10 11 11 11 11
|
||||
if (size_ <= BITS_PER_WORD) {
|
||||
unsigned long bval = bbits_val_ | that.bbits_val_;
|
||||
bbits_val_ = bval;
|
||||
abits_val_ = (abits_val_ ^ that.abits_val_) | bval;
|
||||
} else {
|
||||
unsigned words = (size_ + BITS_PER_WORD - 1) / BITS_PER_WORD;
|
||||
for (unsigned idx = 0; idx < words ; idx += 1) {
|
||||
unsigned long bval = bbits_ptr_[idx] | that.bbits_ptr_[idx];
|
||||
bbits_ptr_[idx] = bval;
|
||||
abits_ptr_[idx] = (abits_ptr_[idx] ^ that.abits_ptr_[idx]) | bval;
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add an integer to the vvp_vector4_t in place, bit by bit so that
|
||||
* there is no size limitations.
|
||||
|
|
|
|||
|
|
@ -323,6 +323,7 @@ class vvp_vector4_t {
|
|||
void invert();
|
||||
vvp_vector4_t& operator &= (const vvp_vector4_t&that);
|
||||
vvp_vector4_t& operator |= (const vvp_vector4_t&that);
|
||||
vvp_vector4_t& operator ^= (const vvp_vector4_t&that);
|
||||
vvp_vector4_t& operator += (int64_t);
|
||||
|
||||
private:
|
||||
|
|
|
|||
Loading…
Reference in New Issue