Merge pull request #1341 from larsclausen/vvp-vector-ops-speed-up

vvp: Use word wide bitwise logical ops
2026-05-06 20:37:49 -07:00 · 2026-05-06 20:37:49 -07:00 · 33a6d58258
parent e9cffe506b 0f454ff548
commit 33a6d58258
4 changed files with 44 additions and 68 deletions
--- a/vvp/logic.cc
+++ b/vvp/logic.cc
@ -94,21 +94,11 @@ void vvp_fun_and::run_run()

      vvp_vector4_t result (input_[0]);

-      for (unsigned idx = 0 ;  idx < result.size() ;  idx += 1) {
-	    vvp_bit4_t bitbit = result.value(idx);
-	    for (unsigned pdx = 1 ;  pdx < 4 ;  pdx += 1) {
-		  if (input_[pdx].size() < idx) {
-			bitbit = BIT4_X;
-			break;
-		  }
+      for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
+	    result &= input_[pdx];

-		  bitbit = bitbit & input_[pdx].value(idx);
-	    }
-
-	    if (invert_)
-		  bitbit = ~bitbit;
-	    result.set_bit(idx, bitbit);
-      }
+      if (invert_)
+	    result.invert();

      ptr->send_vec4(result, 0);
 }
@ -535,21 +525,11 @@ void vvp_fun_or::run_run()

      vvp_vector4_t result (input_[0]);

-      for (unsigned idx = 0 ;  idx < result.size() ;  idx += 1) {
-	    vvp_bit4_t bitbit = result.value(idx);
-	    for (unsigned pdx = 1 ;  pdx < 4 ;  pdx += 1) {
-		  if (input_[pdx].size() < idx) {
-			bitbit = BIT4_X;
-			break;
-		  }
+      for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
+	    result |= input_[pdx];

-		  bitbit = bitbit | input_[pdx].value(idx);
-	    }
-
-	    if (invert_)
-		  bitbit = ~bitbit;
-	    result.set_bit(idx, bitbit);
-      }
+      if (invert_)
+	    result.invert();

      ptr->send_vec4(result, 0);
 }
@ -571,21 +551,11 @@ void vvp_fun_xor::run_run()

      vvp_vector4_t result (input_[0]);

-      for (unsigned idx = 0 ;  idx < result.size() ;  idx += 1) {
-	    vvp_bit4_t bitbit = result.value(idx);
-	    for (unsigned pdx = 1 ;  pdx < 4 ;  pdx += 1) {
-		  if (input_[pdx].size() < idx) {
-			bitbit = BIT4_X;
-			break;
-		  }
+      for (unsigned pdx = 1 ; pdx < 4 ; pdx += 1)
+	    result ^= input_[pdx];

-		  bitbit = bitbit ^ input_[pdx].value(idx);
-	    }
-
-	    if (invert_)
-		  bitbit = ~bitbit;
-	    result.set_bit(idx, bitbit);
-      }
+      if (invert_)
+	    result.invert();

      ptr->send_vec4(result, 0);
 }
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@ -4491,13 +4491,9 @@ bool of_NAND(vthread_t thr, vvp_code_t)
      vvp_vector4_t valr = thr->pop_vec4();
      vvp_vector4_t&vall = thr->peek_vec4();
      assert(vall.size() == valr.size());
-      unsigned wid = vall.size();

-      for (unsigned idx = 0 ; idx < wid ; idx += 1) {
-	    vvp_bit4_t lb = vall.value(idx);
-	    vvp_bit4_t rb = valr.value(idx);
-	    vall.set_bit(idx, ~(lb&rb));
-      }
+      vall &= valr;
+      vall.invert();

      return true;
 }
@ -4755,13 +4751,9 @@ bool of_NOR(vthread_t thr, vvp_code_t)
      vvp_vector4_t valr = thr->pop_vec4();
      vvp_vector4_t&vall = thr->peek_vec4();
      assert(vall.size() == valr.size());
-      unsigned wid = vall.size();

-      for (unsigned idx = 0 ; idx < wid ; idx += 1) {
-	    vvp_bit4_t lb = vall.value(idx);
-	    vvp_bit4_t rb = valr.value(idx);
-	    vall.set_bit(idx, ~(lb|rb));
-      }
+      vall |= valr;
+      vall.invert();

      return true;
 }
@ -6564,14 +6556,9 @@ bool of_XNOR(vthread_t thr, vvp_code_t)
      vvp_vector4_t valr = thr->pop_vec4();
      vvp_vector4_t&vall = thr->peek_vec4();
      assert(vall.size() == valr.size());
-      unsigned wid = vall.size();

-      for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-
-	    vvp_bit4_t lb = vall.value(idx);
-	    vvp_bit4_t rb = valr.value(idx);
-	    vall.set_bit(idx, ~(lb ^ rb));
-      }
+      vall ^= valr;
+      vall.invert();

      return true;
 }
@ -6584,14 +6571,8 @@ bool of_XOR(vthread_t thr, vvp_code_t)
      vvp_vector4_t valr = thr->pop_vec4();
      vvp_vector4_t&vall = thr->peek_vec4();
      assert(vall.size() == valr.size());
-      unsigned wid = vall.size();

-      for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-
-	    vvp_bit4_t lb = vall.value(idx);
-	    vvp_bit4_t rb = valr.value(idx);
-	    vall.set_bit(idx, lb ^ rb);
-      }
+      vall ^= valr;

      return true;
 }
--- a/vvp/vvp_net.cc
+++ b/vvp/vvp_net.cc
@ -1964,6 +1964,30 @@ vvp_vector4_t& vvp_vector4_t::operator |= (const vvp_vector4_t&that)
      return *this;
 }

+vvp_vector4_t& vvp_vector4_t::operator ^= (const vvp_vector4_t&that)
+{
+	// The truth table is:
+	//     00 01 11 10
+	//  00 00 01 11 11
+	//  01 01 00 11 11
+	//  11 11 11 11 11
+	//  10 11 11 11 11
+      if (size_ <= BITS_PER_WORD) {
+	    unsigned long bval = bbits_val_ | that.bbits_val_;
+	    bbits_val_ = bval;
+	    abits_val_ = (abits_val_ ^ that.abits_val_) | bval;
+      } else {
+	    unsigned words = (size_ + BITS_PER_WORD - 1) / BITS_PER_WORD;
+	    for (unsigned idx = 0; idx < words ; idx += 1) {
+		  unsigned long bval = bbits_ptr_[idx] | that.bbits_ptr_[idx];
+		  bbits_ptr_[idx] = bval;
+		  abits_ptr_[idx] = (abits_ptr_[idx] ^ that.abits_ptr_[idx]) | bval;
+	    }
+      }
+
+      return *this;
+}
+
 /*
 * Add an integer to the vvp_vector4_t in place, bit by bit so that
 * there is no size limitations.
--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@ -323,6 +323,7 @@ class vvp_vector4_t {
      void invert();
      vvp_vector4_t& operator &= (const vvp_vector4_t&that);
      vvp_vector4_t& operator |= (const vvp_vector4_t&that);
+      vvp_vector4_t& operator ^= (const vvp_vector4_t&that);
      vvp_vector4_t& operator += (int64_t);

    private: