From 41c34232094859e978998e9951a5c79c3275cd73 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Sun, 7 Jan 2024 18:45:03 -0800
Subject: [PATCH 1/3] vvp: Implement `vvp_vector4_t` xor operator

`vvp_vector4_t` has word wide in-place operators for and and or, but not
for xor.

Add `operator ^=` using the same internal word representation.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
---
 vvp/vvp_net.cc | 24 ++++++++++++++++++++++++
 vvp/vvp_net.h  |  1 +
 2 files changed, 25 insertions(+)

diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc
index ee1e123a1..23f9de4ef 100644
--- a/vvp/vvp_net.cc
+++ b/vvp/vvp_net.cc
@@ -1964,6 +1964,30 @@ vvp_vector4_t& vvp_vector4_t::operator |= (const vvp_vector4_t&that)
       return *this;
 }
 
+vvp_vector4_t& vvp_vector4_t::operator ^= (const vvp_vector4_t&that)
+{
+	// The truth table is:
+	//     00 01 11 10
+	//  00 00 01 11 11
+	//  01 01 00 11 11
+	//  11 11 11 11 11
+	//  10 11 11 11 11
+      if (size_ <= BITS_PER_WORD) {
+	    unsigned long bval = bbits_val_ | that.bbits_val_;
+	    bbits_val_ = bval;
+	    abits_val_ = (abits_val_ ^ that.abits_val_) | bval;
+      } else {
+	    unsigned words = (size_ + BITS_PER_WORD - 1) / BITS_PER_WORD;
+	    for (unsigned idx = 0; idx < words ; idx += 1) {
+		  unsigned long bval = bbits_ptr_[idx] | that.bbits_ptr_[idx];
+		  bbits_ptr_[idx] = bval;
+		  abits_ptr_[idx] = (abits_ptr_[idx] ^ that.abits_ptr_[idx]) | bval;
+	    }
+      }
+
+      return *this;
+}
+
 /*
 * Add an integer to the vvp_vector4_t in place, bit by bit so that
 * there is no size limitations.
diff --git a/vvp/vvp_net.h b/vvp/vvp_net.h
index 5d0e77e2f..197406bb7 100644
--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@@ -323,6 +323,7 @@ class vvp_vector4_t {
       void invert();
       vvp_vector4_t& operator &= (const vvp_vector4_t&that);
       vvp_vector4_t& operator |= (const vvp_vector4_t&that);
+      vvp_vector4_t& operator ^= (const vvp_vector4_t&that);
       vvp_vector4_t& operator += (int64_t);
 
     private:

From cf53479ba23af5419c7ce3db4ec5ec7f80dddc90 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Sun, 7 Jan 2024 18:45:41 -0800
Subject: [PATCH 2/3] vvp: vthread: Use word wide vector operations

The vthread binary logic opcodes update vectors bit by bit.

Use the in-place `vvp_vector4_t` operators instead. This reuses the word
wide implementation and avoids per-bit `value()` and `set_bit()` calls.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
---
 vvp/vthread.cc | 33 +++++++--------------------------
 1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/vvp/vthread.cc b/vvp/vthread.cc
index de7578b00..1ef7aaf76 100644
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@@ -4491,13 +4491,9 @@ bool of_NAND(vthread_t thr, vvp_code_t)
       vvp_vector4_t valr = thr->pop_vec4();
       vvp_vector4_t&vall = thr->peek_vec4();
       assert(vall.size() == valr.size());
-      unsigned wid = vall.size();
 
-      for (unsigned idx = 0 ; idx < wid ; idx += 1) {
-	    vvp_bit4_t lb = vall.value(idx);
-	    vvp_bit4_t rb = valr.value(idx);
-	    vall.set_bit(idx, ~(lb&rb));
-      }
+      vall &= valr;
+      vall.invert();
 
       return true;
 }
@@ -4755,13 +4751,9 @@ bool of_NOR(vthread_t thr, vvp_code_t)
       vvp_vector4_t valr = thr->pop_vec4();
       vvp_vector4_t&vall = thr->peek_vec4();
       assert(vall.size() == valr.size());
-      unsigned wid = vall.size();
 
-      for (unsigned idx = 0 ; idx < wid ; idx += 1) {
-	    vvp_bit4_t lb = vall.value(idx);
-	    vvp_bit4_t rb = valr.value(idx);
-	    vall.set_bit(idx, ~(lb|rb));
-      }
+      vall |= valr;
+      vall.invert();
 
       return true;
 }
@@ -6564,14 +6556,9 @@ bool of_XNOR(vthread_t thr, vvp_code_t)
       vvp_vector4_t valr = thr->pop_vec4();
       vvp_vector4_t&vall = thr->peek_vec4();
       assert(vall.size() == valr.size());
-      unsigned wid = vall.size();
 
-      for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-
-	    vvp_bit4_t lb = vall.value(idx);
-	    vvp_bit4_t rb = valr.value(idx);
-	    vall.set_bit(idx, ~(lb ^ rb));
-      }
+      vall ^= valr;
+      vall.invert();
 
       return true;
 }
@@ -6584,14 +6571,8 @@ bool of_XOR(vthread_t thr, vvp_code_t)
       vvp_vector4_t valr = thr->pop_vec4();
       vvp_vector4_t&vall = thr->peek_vec4();
       assert(vall.size() == valr.size());
-      unsigned wid = vall.size();
 
-      for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-
-	    vvp_bit4_t lb = vall.value(idx);
-	    vvp_bit4_t rb = valr.value(idx);
-	    vall.set_bit(idx, lb ^ rb);
-      }
+      vall ^= valr;
 
       return true;
 }

From 0f454ff548f4815a79a6746c6093409b035610b5 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Sun, 7 Jan 2024 20:20:08 -0800
Subject: [PATCH 3/3] vvp: Use word wide vector operations for logic functors

The logic functors combine their input vectors bit by bit.

Use the in-place `vvp_vector4_t` operators for the vector operation and
invert the result once for the inverted functors.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
---
 vvp/logic.cc | 54 ++++++++++++----------------------------------------
 1 file changed, 12 insertions(+), 42 deletions(-)

diff --git a/vvp/logic.cc b/vvp/logic.cc
index 458cc6cb6..9b9a83e41 100644
--- a/vvp/logic.cc
+++ b/vvp/logic.cc
@@ -94,21 +94,11 @@ void vvp_fun_and::run_run()
 
       vvp_vector4_t result (input_[0]);
 
-      for (unsigned idx = 0 ;  idx < result.size() ;  idx += 1) {
-	    vvp_bit4_t bitbit = result.value(idx);
-	    for (unsigned pdx = 1 ;  pdx < 4 ;  pdx += 1) {
-		  if (input_[pdx].size() < idx) {
-			bitbit = BIT4_X;
-			break;
-		  }
+      for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
+	    result &= input_[pdx];
 
-		  bitbit = bitbit & input_[pdx].value(idx);
-	    }
-
-	    if (invert_)
-		  bitbit = ~bitbit;
-	    result.set_bit(idx, bitbit);
-      }
+      if (invert_)
+	    result.invert();
 
       ptr->send_vec4(result, 0);
 }
@@ -535,21 +525,11 @@ void vvp_fun_or::run_run()
 
       vvp_vector4_t result (input_[0]);
 
-      for (unsigned idx = 0 ;  idx < result.size() ;  idx += 1) {
-	    vvp_bit4_t bitbit = result.value(idx);
-	    for (unsigned pdx = 1 ;  pdx < 4 ;  pdx += 1) {
-		  if (input_[pdx].size() < idx) {
-			bitbit = BIT4_X;
-			break;
-		  }
+      for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
+	    result |= input_[pdx];
 
-		  bitbit = bitbit | input_[pdx].value(idx);
-	    }
-
-	    if (invert_)
-		  bitbit = ~bitbit;
-	    result.set_bit(idx, bitbit);
-      }
+      if (invert_)
+	    result.invert();
 
       ptr->send_vec4(result, 0);
 }
@@ -571,21 +551,11 @@ void vvp_fun_xor::run_run()
 
       vvp_vector4_t result (input_[0]);
 
-      for (unsigned idx = 0 ;  idx < result.size() ;  idx += 1) {
-	    vvp_bit4_t bitbit = result.value(idx);
-	    for (unsigned pdx = 1 ;  pdx < 4 ;  pdx += 1) {
-		  if (input_[pdx].size() < idx) {
-			bitbit = BIT4_X;
-			break;
-		  }
+      for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
+	    result ^= input_[pdx];
 
-		  bitbit = bitbit ^ input_[pdx].value(idx);
-	    }
-
-	    if (invert_)
-		  bitbit = ~bitbit;
-	    result.set_bit(idx, bitbit);
-      }
+      if (invert_)
+	    result.invert();
 
       ptr->send_vec4(result, 0);
 }