From 41c34232094859e978998e9951a5c79c3275cd73 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 7 Jan 2024 18:45:03 -0800 Subject: [PATCH 1/3] vvp: Implement `vvp_vector4_t` xor operator `vvp_vector4_t` has word wide in-place operators for and and or, but not for xor. Add `operator ^=` using the same internal word representation. Signed-off-by: Lars-Peter Clausen --- vvp/vvp_net.cc | 24 ++++++++++++++++++++++++ vvp/vvp_net.h | 1 + 2 files changed, 25 insertions(+) diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc index ee1e123a1..23f9de4ef 100644 --- a/vvp/vvp_net.cc +++ b/vvp/vvp_net.cc @@ -1964,6 +1964,30 @@ vvp_vector4_t& vvp_vector4_t::operator |= (const vvp_vector4_t&that) return *this; } +vvp_vector4_t& vvp_vector4_t::operator ^= (const vvp_vector4_t&that) +{ + // The truth table is: + // 00 01 11 10 + // 00 00 01 11 11 + // 01 01 00 11 11 + // 11 11 11 11 11 + // 10 11 11 11 11 + if (size_ <= BITS_PER_WORD) { + unsigned long bval = bbits_val_ | that.bbits_val_; + bbits_val_ = bval; + abits_val_ = (abits_val_ ^ that.abits_val_) | bval; + } else { + unsigned words = (size_ + BITS_PER_WORD - 1) / BITS_PER_WORD; + for (unsigned idx = 0; idx < words ; idx += 1) { + unsigned long bval = bbits_ptr_[idx] | that.bbits_ptr_[idx]; + bbits_ptr_[idx] = bval; + abits_ptr_[idx] = (abits_ptr_[idx] ^ that.abits_ptr_[idx]) | bval; + } + } + + return *this; +} + /* * Add an integer to the vvp_vector4_t in place, bit by bit so that * there is no size limitations. diff --git a/vvp/vvp_net.h b/vvp/vvp_net.h index 5d0e77e2f..197406bb7 100644 --- a/vvp/vvp_net.h +++ b/vvp/vvp_net.h @@ -323,6 +323,7 @@ class vvp_vector4_t { void invert(); vvp_vector4_t& operator &= (const vvp_vector4_t&that); vvp_vector4_t& operator |= (const vvp_vector4_t&that); + vvp_vector4_t& operator ^= (const vvp_vector4_t&that); vvp_vector4_t& operator += (int64_t); private: From cf53479ba23af5419c7ce3db4ec5ec7f80dddc90 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 7 Jan 2024 18:45:41 -0800 Subject: [PATCH 2/3] vvp: vthread: Use word wide vector operations The vthread binary logic opcodes update vectors bit by bit. Use the in-place `vvp_vector4_t` operators instead. This reuses the word wide implementation and avoids per-bit `value()` and `set_bit()` calls. Signed-off-by: Lars-Peter Clausen --- vvp/vthread.cc | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/vvp/vthread.cc b/vvp/vthread.cc index de7578b00..1ef7aaf76 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -4491,13 +4491,9 @@ bool of_NAND(vthread_t thr, vvp_code_t) vvp_vector4_t valr = thr->pop_vec4(); vvp_vector4_t&vall = thr->peek_vec4(); assert(vall.size() == valr.size()); - unsigned wid = vall.size(); - for (unsigned idx = 0 ; idx < wid ; idx += 1) { - vvp_bit4_t lb = vall.value(idx); - vvp_bit4_t rb = valr.value(idx); - vall.set_bit(idx, ~(lb&rb)); - } + vall &= valr; + vall.invert(); return true; } @@ -4755,13 +4751,9 @@ bool of_NOR(vthread_t thr, vvp_code_t) vvp_vector4_t valr = thr->pop_vec4(); vvp_vector4_t&vall = thr->peek_vec4(); assert(vall.size() == valr.size()); - unsigned wid = vall.size(); - for (unsigned idx = 0 ; idx < wid ; idx += 1) { - vvp_bit4_t lb = vall.value(idx); - vvp_bit4_t rb = valr.value(idx); - vall.set_bit(idx, ~(lb|rb)); - } + vall |= valr; + vall.invert(); return true; } @@ -6564,14 +6556,9 @@ bool of_XNOR(vthread_t thr, vvp_code_t) vvp_vector4_t valr = thr->pop_vec4(); vvp_vector4_t&vall = thr->peek_vec4(); assert(vall.size() == valr.size()); - unsigned wid = vall.size(); - for (unsigned idx = 0 ; idx < wid ; idx += 1) { - - vvp_bit4_t lb = vall.value(idx); - vvp_bit4_t rb = valr.value(idx); - vall.set_bit(idx, ~(lb ^ rb)); - } + vall ^= valr; + vall.invert(); return true; } @@ -6584,14 +6571,8 @@ bool of_XOR(vthread_t thr, vvp_code_t) vvp_vector4_t valr = thr->pop_vec4(); vvp_vector4_t&vall = thr->peek_vec4(); assert(vall.size() == valr.size()); - unsigned wid = vall.size(); - for (unsigned idx = 0 ; idx < wid ; idx += 1) { - - vvp_bit4_t lb = vall.value(idx); - vvp_bit4_t rb = valr.value(idx); - vall.set_bit(idx, lb ^ rb); - } + vall ^= valr; return true; } From 0f454ff548f4815a79a6746c6093409b035610b5 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 7 Jan 2024 20:20:08 -0800 Subject: [PATCH 3/3] vvp: Use word wide vector operations for logic functors The logic functors combine their input vectors bit by bit. Use the in-place `vvp_vector4_t` operators for the vector operation and invert the result once for the inverted functors. Signed-off-by: Lars-Peter Clausen --- vvp/logic.cc | 54 ++++++++++++---------------------------------------- 1 file changed, 12 insertions(+), 42 deletions(-) diff --git a/vvp/logic.cc b/vvp/logic.cc index 458cc6cb6..9b9a83e41 100644 --- a/vvp/logic.cc +++ b/vvp/logic.cc @@ -94,21 +94,11 @@ void vvp_fun_and::run_run() vvp_vector4_t result (input_[0]); - for (unsigned idx = 0 ; idx < result.size() ; idx += 1) { - vvp_bit4_t bitbit = result.value(idx); - for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) { - if (input_[pdx].size() < idx) { - bitbit = BIT4_X; - break; - } + for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) + result &= input_[pdx]; - bitbit = bitbit & input_[pdx].value(idx); - } - - if (invert_) - bitbit = ~bitbit; - result.set_bit(idx, bitbit); - } + if (invert_) + result.invert(); ptr->send_vec4(result, 0); } @@ -535,21 +525,11 @@ void vvp_fun_or::run_run() vvp_vector4_t result (input_[0]); - for (unsigned idx = 0 ; idx < result.size() ; idx += 1) { - vvp_bit4_t bitbit = result.value(idx); - for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) { - if (input_[pdx].size() < idx) { - bitbit = BIT4_X; - break; - } + for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) + result |= input_[pdx]; - bitbit = bitbit | input_[pdx].value(idx); - } - - if (invert_) - bitbit = ~bitbit; - result.set_bit(idx, bitbit); - } + if (invert_) + result.invert(); ptr->send_vec4(result, 0); } @@ -571,21 +551,11 @@ void vvp_fun_xor::run_run() vvp_vector4_t result (input_[0]); - for (unsigned idx = 0 ; idx < result.size() ; idx += 1) { - vvp_bit4_t bitbit = result.value(idx); - for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) { - if (input_[pdx].size() < idx) { - bitbit = BIT4_X; - break; - } + for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1) + result ^= input_[pdx]; - bitbit = bitbit ^ input_[pdx].value(idx); - } - - if (invert_) - bitbit = ~bitbit; - result.set_bit(idx, bitbit); - } + if (invert_) + result.invert(); ptr->send_vec4(result, 0); }