Optimize handling of invert gates / Tweak vvp_vector8_t performance

The process of inverting and copying can be collapsed into a single operation that should run a little faster. Also, inverting readily vectorizes itself. I've also possibly reduced useless vvp_not_fun iterations. Also, include some minor tweaks to the vvp_vector8_t handling of vector copies. This is more to clean up code, although it should slightly improve performance as well.
2010-01-11 11:42:25 -08:00 · 2010-01-11 11:42:25 -08:00 · d0b1936fb5
parent 85e0f8a328
commit d0b1936fb5
4 changed files with 138 additions and 59 deletions
--- a/vvp/logic.cc
+++ b/vvp/logic.cc
@ -399,7 +399,7 @@ void vvp_fun_not::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit,
      if (ptr.port() != 0)
 	    return;

-      if (input_ .eeq( bit ))
+      if (input_ .eq_xz( bit ))
 	    return;

      input_ = bit;
@ -414,13 +414,7 @@ void vvp_fun_not::run_run()
      vvp_net_t*ptr = net_;
      net_ = 0;

-      vvp_vector4_t result (input_);
-
-      for (unsigned idx = 0 ;  idx < result.size() ;  idx += 1) {
-	    vvp_bit4_t bitbit = ~ result.value(idx);
-	    result.set_bit(idx, bitbit);
-      }
-
+      vvp_vector4_t result (input_, true /* invert */);
      ptr->send_vec4(result, 0);
 }

--- a/vvp/vvp_island.cc
+++ b/vvp/vvp_island.cc
@ -173,7 +173,12 @@ vvp_island_port::~vvp_island_port()
 void vvp_island_port::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
                                vvp_context_t)
 {
-      recv_vec8(port, vvp_vector8_t(bit, 6, 6));
+      vvp_vector8_t tmp (bit, 6, 6);
+      if (invalue .eeq(tmp))
+	    return;
+
+      invalue = tmp;
+      island_->flag_island();
 }

 void vvp_island_port::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
--- a/vvp/vvp_net.cc
+++ b/vvp/vvp_net.cc
@ -583,6 +583,41 @@ void vvp_vector4_t::copy_from_(const vvp_vector4_t&that)
      }
 }

+/*
+ * The copy_inverted_from_ method is just like the copy_from_ method,
+ * except that we combine that with an invert. This allows the ~ and
+ * the assignment to be blended in many common cases.
+ */
+void vvp_vector4_t::copy_inverted_from_(const vvp_vector4_t&that)
+{
+      size_ = that.size_;
+      if (size_ > BITS_PER_WORD) {
+	    unsigned words = (size_+BITS_PER_WORD-1) / BITS_PER_WORD;
+	    abits_ptr_ = new unsigned long[2*words];
+	    bbits_ptr_ = abits_ptr_ + words;
+
+	    unsigned remaining = size_;
+	    unsigned idx = 0;
+	    while (remaining >= BITS_PER_WORD) {
+		  abits_ptr_[idx] = that.bbits_ptr_[idx] | ~that.abits_ptr_[idx];
+		  idx += 1;
+		  remaining -= BITS_PER_WORD;
+	    }
+	    if (remaining > 0) {
+		  unsigned long mask = (1UL<<remaining) - 1UL;
+		  abits_ptr_[idx] = mask & (that.bbits_ptr_[idx] | ~that.abits_ptr_[idx]);
+	    }
+
+	    for (unsigned idx = 0 ;  idx < words ;  idx += 1)
+		  bbits_ptr_[idx] = that.bbits_ptr_[idx];
+
+      } else {
+	    unsigned long mask = (size_<BITS_PER_WORD)? (1UL<<size_)-1UL : -1UL;
+	    abits_val_ = mask & (that.bbits_val_ | ~that.abits_val_);
+	    bbits_val_ = that.bbits_val_;
+      }
+}
+
 void vvp_vector4_t::allocate_words_(unsigned wid, unsigned long inita, unsigned long initb)
 {
      if (size_ > BITS_PER_WORD) {
@ -1238,6 +1273,40 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
      return true;
 }

+bool vvp_vector4_t::eq_xz(const vvp_vector4_t&that) const
+{
+      if (size_ != that.size_)
+	    return false;
+
+      if (size_ < BITS_PER_WORD) {
+	    unsigned long mask = (1UL << size_) - 1;
+	    return ((abits_val_|bbits_val_)&mask) == ((that.abits_val_|that.bbits_val_)&mask)
+		  && (bbits_val_&mask) == (that.bbits_val_&mask);
+      }
+
+      if (size_ == BITS_PER_WORD) {
+	    return ((abits_val_|bbits_val_) == (that.abits_val_|that.bbits_val_))
+		  && (bbits_val_ == that.bbits_val_);
+      }
+
+      unsigned words = size_ / BITS_PER_WORD;
+      for (unsigned idx = 0 ;  idx < words ;  idx += 1) {
+	    if ((abits_ptr_[idx]|bbits_ptr_[idx]) != (that.abits_ptr_[idx]|that.bbits_ptr_[idx]))
+		  return false;
+	    if (bbits_ptr_[idx] != that.bbits_ptr_[idx])
+		  return false;
+      }
+
+      unsigned long mask = size_%BITS_PER_WORD;
+      if (mask > 0) {
+	    mask = (1UL << mask) - 1;
+	    return ((abits_ptr_[words]|bbits_ptr_[words])&mask) == ((that.abits_ptr_[words]|that.bbits_ptr_[words])&mask)
+		  && (bbits_ptr_[words]&mask) == (that.bbits_ptr_[words]&mask);
+      }
+
+      return true;
+}
+
 bool vvp_vector4_t::has_xz() const
 {
      if (size_ < BITS_PER_WORD) {
@ -2557,12 +2626,11 @@ ostream& operator<< (ostream&out, const vvp_vector2_t&that)
 vvp_vector8_t::vvp_vector8_t(const vvp_vector8_t&that)
 {
      size_ = that.size_;
-      if (size_ <= PTR_THRESH) {
-	    memcpy(val_, that.val_, sizeof(val_));
+      if (size_ <= sizeof val_) {
+	    ptr_ = that.ptr_;
      } else {
-	    ptr_ = new vvp_scalar_t[size_];
-	    for (unsigned idx = 0 ;  idx < size_ ;  idx += 1)
-		  ptr_[idx] = that.ptr_[idx];
+	    ptr_ = new unsigned char[size_];
+	    memcpy(ptr_, that.ptr_, size_);
      }
 }

@ -2573,15 +2641,14 @@ vvp_vector8_t::vvp_vector8_t(const vvp_vector4_t&that,
      if (size_ == 0)
 	    return;

-      vvp_scalar_t*tmp;
-      if (size_ <= PTR_THRESH)
-	    tmp = new (val_) vvp_scalar_t[PTR_THRESH];
-      else
-	    tmp = ptr_ = new vvp_scalar_t[size_];
-
-      for (unsigned idx = 0 ;  idx < size_ ;  idx += 1)
-	    tmp[idx] = vvp_scalar_t (that.value(idx), str0, str1);
-
+      if (size_ <= sizeof val_) {
+	    for (unsigned idx = 0 ; idx < size_ ; idx += 1)
+		  val_[idx] = vvp_scalar_t(that.value(idx),str0, str1).raw();
+      } else {
+	    ptr_ = new unsigned char[size_];
+	    for (unsigned idx = 0 ;  idx < size_ ;  idx += 1)
+		  ptr_[idx] = vvp_scalar_t(that.value(idx), str0, str1).raw();
+      }
 }

 const vvp_vector8_t vvp_vector8_t::nil;
@ -2589,12 +2656,11 @@ const vvp_vector8_t vvp_vector8_t::nil;
 vvp_vector8_t& vvp_vector8_t::operator= (const vvp_vector8_t&that)
 {
 	// Assign to self.
-      if (this == &that || (size_ > PTR_THRESH && that.size_ > PTR_THRESH &&
-                            ptr_ == that.ptr_))
+      if (this == &that)
 	    return *this;

      if (size_ != that.size_) {
-	    if (size_ > PTR_THRESH)
+	    if (size_ > sizeof val_)
 		  delete[]ptr_;
 	    size_ = 0;
      }
@ -2604,7 +2670,7 @@ vvp_vector8_t& vvp_vector8_t::operator= (const vvp_vector8_t&that)
 	    return *this;
      }

-      if (that.size_ <= PTR_THRESH) {
+      if (that.size_ <= sizeof val_) {
 	    size_ = that.size_;
 	    memcpy(val_, that.val_, sizeof(val_));
 	    return *this;
@ -2612,11 +2678,10 @@ vvp_vector8_t& vvp_vector8_t::operator= (const vvp_vector8_t&that)

      if (size_ == 0) {
 	    size_ = that.size_;
-	    ptr_ = new vvp_scalar_t[size_];
+	    ptr_ = new unsigned char[size_];
      }

-      for (unsigned idx = 0 ;  idx < size_ ;  idx += 1)
-	    ptr_[idx] = that.ptr_[idx];
+      memcpy(ptr_, that.ptr_, size_);

      return *this;
 }
@ -2625,12 +2690,12 @@ vvp_vector8_t vvp_vector8_t::subvalue(unsigned base, unsigned wid) const
 {
      vvp_vector8_t tmp (wid);

-      vvp_scalar_t* tmp_ptr = tmp.size_<=PTR_THRESH? reinterpret_cast<vvp_scalar_t*>(tmp.val_) : tmp.ptr_;
-      const vvp_scalar_t* ptr = size_<=PTR_THRESH? reinterpret_cast<const vvp_scalar_t*>(val_) : ptr_;
+      unsigned char*tmp_ptr = tmp.size_ <= sizeof val_? tmp.val_ : tmp.ptr_;
+      const unsigned char*use_ptr = size_ <= sizeof val_? val_ : ptr_;

      unsigned idx = 0;
      while ((idx < wid) && (base+idx < size_)) {
-	    tmp_ptr[idx] = ptr[base+idx];
+	    tmp_ptr[idx] = use_ptr[base+idx];
 	    idx += 1;
      }

@ -2649,8 +2714,8 @@ vvp_vector8_t part_expand(const vvp_vector8_t&that, unsigned wid, unsigned off)
      assert(off < wid);
      vvp_vector8_t tmp (wid);

-      vvp_scalar_t* tmp_ptr = tmp.size_<=vvp_vector8_t::PTR_THRESH? reinterpret_cast<vvp_scalar_t*>(tmp.val_) : tmp.ptr_;
-      const vvp_scalar_t* that_ptr = that.size_<=vvp_vector8_t::PTR_THRESH? reinterpret_cast<const vvp_scalar_t*>(that.val_) : that.ptr_;
+      unsigned char* tmp_ptr = tmp.size_<= sizeof tmp.val_? tmp.val_ : tmp.ptr_;
+      const unsigned char* that_ptr = that.size_<= sizeof that.val_? that.val_ : that.ptr_;

      unsigned idx = off;

--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@ -211,6 +211,7 @@ class vvp_vector4_t {
 			     unsigned adr, unsigned wid);

      vvp_vector4_t(const vvp_vector4_t&that);
+      vvp_vector4_t(const vvp_vector4_t&that, bool invert_flag);
      vvp_vector4_t& operator= (const vvp_vector4_t&that);

      ~vvp_vector4_t();
@ -240,6 +241,9 @@ class vvp_vector4_t {
 	// Test that the vectors are exactly equal
      bool eeq(const vvp_vector4_t&that) const;

+	// Test that the vectors are equal, with xz comparing as equal.
+      bool eq_xz(const vvp_vector4_t&that) const;
+
 	// Return true if there is an X or Z anywhere in the vector.
      bool has_xz() const;

@ -283,6 +287,7 @@ class vvp_vector4_t {
 	// Initialize and operator= use this private method to copy
 	// the data from that object into this object.
      void copy_from_(const vvp_vector4_t&that);
+      void copy_inverted_from_(const vvp_vector4_t&that);

      void allocate_words_(unsigned size, unsigned long inita, unsigned long initb);

@ -313,6 +318,14 @@ inline vvp_vector4_t::vvp_vector4_t(const vvp_vector4_t&that)
      copy_from_(that);
 }

+inline vvp_vector4_t::vvp_vector4_t(const vvp_vector4_t&that, bool invert_flag)
+{
+      if (invert_flag)
+	    copy_inverted_from_(that);
+      else
+	    copy_from_(that);
+}
+
 inline vvp_vector4_t::vvp_vector4_t(unsigned size__, vvp_bit4_t val)
 : size_(size__)
 {
@ -442,8 +455,7 @@ inline void vvp_vector4_t::set_bit(unsigned idx, vvp_bit4_t val)

 inline vvp_vector4_t operator ~ (const vvp_vector4_t&that)
 {
-      vvp_vector4_t res = that;
-      res.invert();
+      vvp_vector4_t res (that, true);
      return res;
 }

@ -700,6 +712,14 @@ class vvp_scalar_t {
      bool eeq(vvp_scalar_t that) const { return value_ == that.value_; }
      bool is_hiz() const { return value_ == 0; }

+    private:
+	// This class and the vvp_vector8_t class are closely related,
+	// so allow vvp_vector8_t access to the raw encoding so that
+	// it can do compact vectoring of vvp_scalar_t objects.
+      friend class vvp_vector8_t;
+      explicit vvp_scalar_t(unsigned char raw) : value_(raw) { }
+      unsigned char raw() const { return value_; }
+
    private:
      unsigned char value_;
 };
@ -813,11 +833,10 @@ class vvp_vector8_t {
 	// This is the number of vvp_scalar_t objects we can keep in
 	// the val_ buffer. If the vector8 is bigger then this, then
 	// resort to allocations to get a larger buffer.
-      enum { PTR_THRESH = 8 };
      unsigned size_;
      union {
-	    vvp_scalar_t*ptr_;
-	    char val_[PTR_THRESH * sizeof(vvp_scalar_t)];
+	    unsigned char*ptr_;
+	    unsigned char val_[sizeof(void*)];
      };
 };

@ -853,35 +872,36 @@ extern ostream& operator<< (ostream&, const vvp_vector8_t&);
 inline vvp_vector8_t::vvp_vector8_t(unsigned size__)
 : size_(size__)
 {
-      if (size_ <= PTR_THRESH) {
-	    new (val_) vvp_scalar_t[PTR_THRESH];
+      if (size_ <= sizeof val_) {
+	    ptr_ = 0;
      } else {
-	    ptr_ = new vvp_scalar_t[size_];
+	    ptr_ = new unsigned char[size_];
+	    memset(ptr_, 0, size_);
      }
 }

 inline vvp_vector8_t::~vvp_vector8_t()
 {
-      if (size_ > PTR_THRESH)
+      if (size_ > sizeof val_)
 	    delete[]ptr_;
 }

 inline vvp_scalar_t vvp_vector8_t::value(unsigned idx) const
 {
      assert(idx < size_);
-      if (size_ <= PTR_THRESH)
-	    return reinterpret_cast<const vvp_scalar_t*>(val_) [idx];
+      if (size_ <= sizeof val_)
+	    return vvp_scalar_t(val_[idx]);
      else
-	    return ptr_[idx];
+	    return vvp_scalar_t(ptr_[idx]);
 }

 inline void vvp_vector8_t::set_bit(unsigned idx, vvp_scalar_t val)
 {
      assert(idx < size_);
-      if (size_ <= PTR_THRESH)
-	    reinterpret_cast<vvp_scalar_t*>(val_) [idx] = val;
+      if (size_ <= sizeof val_)
+	    val_[idx] = val.raw();
      else
-	    ptr_[idx] = val;
+	    ptr_[idx] = val.raw();
 }

  // Exactly-equal for vvp_vector8_t is common and should be as tight
@ -893,15 +913,10 @@ inline bool vvp_vector8_t::eeq(const vvp_vector8_t&that) const
      if (size_ == 0)
 	    return true;

-      if (size_ <= PTR_THRESH)
-	    return 0 == memcmp(val_, that.val_, sizeof(val_));
-
-      for (unsigned idx = 0 ;  idx < size_ ;  idx += 1) {
-	    if (! ptr_[idx] .eeq( that.ptr_[idx] ))
-		return false;
-      }
-
-      return true;
+      if (size_ <= sizeof val_)
+	    return ptr_ == that.ptr_;
+      else
+	    return memcmp(ptr_, that.ptr_, size_) == 0;
 }

 /*