Rework scheduling of concat, part, buf/not and resolv for efficiency.

The concat and resolv functors are best evaluated lazily, because each evaluation is costly and there is a high probability that an evaluation will be invalidated when new input comes in. Also optimization the recv_vec4_pv method of the resolver, which is commonly used, and adjust the order of handling of vvp_fun_part to work more efficiently.
2008-06-06 15:31:22 -07:00 · 2008-06-06 15:31:22 -07:00 · 2e95a740da
parent 2f4e5bf5b6
commit 2e95a740da
7 changed files with 121 additions and 107 deletions
--- a/vvp/concat.cc
+++ b/vvp/concat.cc
@ -20,6 +20,7 @@

 # include  "compile.h"
 # include  "vvp_net.h"
+# include  "schedule.h"
 # include  <stdlib.h>
 # include  <iostream>
 #ifdef HAVE_MALLOC_H
@ -27,18 +28,41 @@
 #endif
 # include  <assert.h>

+/* vvp_fun_concat
+ * This node function creates vectors (vvp_vector4_t) from the
+ * concatenation of the inputs. The inputs (4) may be vector or
+ * vector8 objects, but they are reduced to vector4 values and
+ * strength information lost.
+ *
+ * The expected widths of the input vectors must be given up front so
+ * that the positions in the output vector (and also the size of the
+ * output vector) can be worked out. The input vectors must match the
+ * expected width.
+ */
+class vvp_fun_concat  : public vvp_net_fun_t, private vvp_gen_event_s {
+
+    public:
+      vvp_fun_concat(unsigned w0, unsigned w1,
+		     unsigned w2, unsigned w3);
+      ~vvp_fun_concat();
+
+      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
+
+    private:
+      void run_run();
+      vvp_net_t*net_;
+      vvp_vector4_t input_[4];
+};
+

 vvp_fun_concat::vvp_fun_concat(unsigned w0, unsigned w1,
 			       unsigned w2, unsigned w3)
-: val_(w0+w1+w2+w3)
+: net_(0)
 {
-      wid_[0] = w0;
-      wid_[1] = w1;
-      wid_[2] = w2;
-      wid_[3] = w3;
-
-      for (unsigned idx = 0 ;  idx < val_.size() ;  idx += 1)
-	    val_.set_bit(idx, BIT4_X);
+      input_[0] = vvp_vector4_t(w0);
+      input_[1] = vvp_vector4_t(w1);
+      input_[2] = vvp_vector4_t(w2);
+      input_[3] = vvp_vector4_t(w3);
 }

 vvp_fun_concat::~vvp_fun_concat()
@ -49,22 +73,38 @@ void vvp_fun_concat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
 {
      unsigned pdx = port.port();

-      if (bit.size() != wid_[pdx]) {
+      if (bit.size() != input_[pdx].size()) {
 	    cerr << "internal error: port " << pdx
-		 << " expects wid=" << wid_[pdx]
+		 << " expects wid=" << input_[pdx].size()
 		 << ", got wid=" << bit.size() << endl;
 	    assert(0);
      }

-      unsigned off = 0;
-      for (unsigned idx = 0 ;  idx < pdx ;  idx += 1)
-	    off += wid_[idx];
+      if (input_[pdx] .eeq(bit))
+	    return;

-      for (unsigned idx = 0 ;  idx < wid_[pdx] ;  idx += 1) {
-	    val_.set_bit(off+idx, bit.value(idx));
+      input_[pdx] = bit;
+      if (net_ == 0) {
+	    net_ = port.ptr();
+	    schedule_generic(this, 0, false);
+      }
+}
+
+void vvp_fun_concat::run_run()
+{
+      vvp_net_t*ptr = net_;
+      net_ = 0;
+
+      unsigned off = 0;
+      unsigned owid = input_[0].size() + input_[1].size() + input_[2].size() + input_[3].size();
+
+      vvp_vector4_t res (owid);
+      for (unsigned idx = 0 ; idx < 4 && (off<owid) ; idx += 1) {
+	    res.set_vec(off, input_[idx]);
+	    off += input_[idx].size();
      }

-      vvp_send_vec4(port.ptr()->out, val_);
+      vvp_send_vec4(ptr->out, res);
 }

 void compile_concat(char*label, unsigned w0, unsigned w1,
--- a/vvp/logic.cc
+++ b/vvp/logic.cc
@ -140,7 +140,6 @@ void vvp_fun_eeq::run_run()

 vvp_fun_buf::vvp_fun_buf()
 {
-      net_ = 0;
      count_functors_logic += 1;
 }

@ -157,25 +156,12 @@ void vvp_fun_buf::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit)
      if (ptr.port() != 0)
 	    return;

-      if (input_ .eeq( bit ))
+      if (input_ .eq_xz( bit ))
 	    return;

      input_ = bit;
-
-      if (net_ == 0) {
-	    net_ = ptr.ptr();
-	    schedule_generic(this, 0, false);
-      }
-}
-
-void vvp_fun_buf::run_run()
-{
-      vvp_net_t*ptr = net_;
-      net_ = 0;
-
-      vvp_vector4_t tmp (input_);
-      tmp.change_z2x();
-      vvp_send_vec4(ptr->out, tmp);
+      input_.change_z2x();
+      vvp_send_vec4(ptr.ptr()->out, input_);
 }

 vvp_fun_bufz::vvp_fun_bufz()
@ -394,7 +380,6 @@ void vvp_fun_muxz::run_run()

 vvp_fun_not::vvp_fun_not()
 {
-      net_ = 0;
      count_functors_logic += 1;
 }

@ -411,30 +396,13 @@ void vvp_fun_not::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit)
      if (ptr.port() != 0)
 	    return;

-      if (input_ .eeq( bit ))
+      if (input_ .eq_xz( bit ))
 	    return;

      input_ = bit;
-      if (net_ == 0) {
-	    net_ = ptr.ptr();
-	    schedule_generic(this, 0, false);
-      }
+      vvp_send_vec4(ptr.ptr()->out, ~input_);
 }

-void vvp_fun_not::run_run()
-{
-      vvp_net_t*ptr = net_;
-      net_ = 0;
-
-      vvp_vector4_t result (input_);
-
-      for (unsigned idx = 0 ;  idx < result.size() ;  idx += 1) {
-	    vvp_bit4_t bitbit = ~ result.value(idx);
-	    result.set_bit(idx, bitbit);
-      }
-
-      vvp_send_vec4(ptr->out, result);
-}

 vvp_fun_or::vvp_fun_or(unsigned wid, bool invert)
 : vvp_fun_boolean_(wid), invert_(invert)
--- a/vvp/logic.h
+++ b/vvp/logic.h
@ -69,7 +69,7 @@ class vvp_fun_eeq  : public vvp_fun_boolean_ {
 * The retransmitted vector has all Z values changed to X, just like
 * the buf(Q,D) gate in Verilog.
 */
-class vvp_fun_buf: public vvp_net_fun_t, private vvp_gen_event_s {
+class vvp_fun_buf: public vvp_net_fun_t {

    public:
      explicit vvp_fun_buf();
@ -77,12 +77,8 @@ class vvp_fun_buf: public vvp_net_fun_t, private vvp_gen_event_s {

      void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);

-    private:
-      void run_run();
-
    private:
      vvp_vector4_t input_;
-      vvp_net_t*net_;
 };

 /*
@ -152,7 +148,7 @@ class vvp_fun_muxr : public vvp_net_fun_t, private vvp_gen_event_s {
      sel_type select_;
 };

-class vvp_fun_not: public vvp_net_fun_t, private vvp_gen_event_s {
+class vvp_fun_not: public vvp_net_fun_t {

    public:
      explicit vvp_fun_not();
@ -160,12 +156,8 @@ class vvp_fun_not: public vvp_net_fun_t, private vvp_gen_event_s {

      void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);

-    private:
-      void run_run();
-
    private:
      vvp_vector4_t input_;
-      vvp_net_t*net_;
 };

 class vvp_fun_or  : public vvp_fun_boolean_ {
--- a/vvp/part.cc
+++ b/vvp/part.cc
@ -28,7 +28,7 @@
 # include  <assert.h>

 vvp_fun_part::vvp_fun_part(unsigned base, unsigned wid)
-: base_(base), wid_(wid)
+: base_(base), val_(wid)
 {
      net_ = 0;
 }
@ -41,10 +41,18 @@ void vvp_fun_part::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
 {
      assert(port.port() == 0);

-      if (val_ .eeq( bit ))
+      vvp_vector4_t tmp = val_;
+      for (unsigned idx = 0 ;  idx < tmp.size() ;  idx += 1) {
+	    if ((idx + base_) < bit.size())
+		  tmp.set_bit(idx, bit.value(base_+idx));
+	    else
+		  tmp.set_bit(idx, BIT4_X);
+      }
+
+      if (val_ .eeq( tmp ))
 	    return;

-      val_ = bit;
+      val_ = tmp;

      if (net_ == 0) {
 	    net_ = port.ptr();
@ -63,6 +71,11 @@ void vvp_fun_part::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
 {
      assert(bit.size() == wid);

+      if (base >= base_+val_.size())
+	    return;
+      if ((base+wid) <= base_)
+	    return;
+
      vvp_vector4_t tmp = val_;
      if (tmp.size() == 0)
 	    tmp = vvp_vector4_t(vwid);
@ -76,13 +89,7 @@ void vvp_fun_part::run_run()
 {
      vvp_net_t*ptr = net_;
      net_ = 0;
-
-      vvp_vector4_t res (wid_, BIT4_X);
-      for (unsigned idx = 0 ;  idx < wid_ ;  idx += 1) {
-	    if (idx + base_ < val_.size())
-		  res.set_bit(idx, val_.value(base_+idx));
-      }
-      vvp_send_vec4(ptr->out, res);
+      vvp_send_vec4(ptr->out, val_);
 }

 vvp_fun_part_pv::vvp_fun_part_pv(unsigned b, unsigned w, unsigned v)
--- a/vvp/resolv.cc
+++ b/vvp/resolv.cc
@ -44,18 +44,14 @@ void resolv_functor::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
 				  unsigned base, unsigned wid, unsigned vwid)
 {
      assert(bit.size() == wid);
-      vvp_vector4_t res (vwid);

-      for (unsigned idx = 0 ;  idx < base ;  idx += 1)
-	    res.set_bit(idx, BIT4_Z);
+      vvp_vector8_t tmp (bit,6,6);
+      vvp_vector8_t tmpw (vwid);

      for (unsigned idx = 0 ;  idx < wid ;  idx += 1)
-	    res.set_bit(idx+base, bit.value(idx));
+	    tmpw.set_bit(idx+base, tmp.value(idx));

-      for (unsigned idx = base+wid ;  idx < vwid ;  idx += 1)
-	    res.set_bit(idx, BIT4_Z);
-
-      recv_vec4(port, res);
+      recv_vec8(port, tmpw);
 }

 void resolv_functor::recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit)
--- a/vvp/vvp_net.cc
+++ b/vvp/vvp_net.cc
@ -779,6 +779,41 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
      return true;
 }

+bool vvp_vector4_t::eq_xz(const vvp_vector4_t&that) const
+{
+      if (size_ != that.size_)
+	    return false;
+
+      if (size_ < BITS_PER_WORD) {
+	    unsigned long mask = (1UL << size_) - 1;
+	    return ((abits_val_|bbits_val_)&mask) == ((that.abits_val_|that.bbits_val_)&mask)
+		  && (bbits_val_&mask) == (that.bbits_val_&mask);
+      }
+
+      if (size_ == BITS_PER_WORD) {
+	    return ((abits_val_|bbits_val_) == (that.abits_val_|that.bbits_val_))
+		  && (bbits_val_ == that.bbits_val_);
+      }
+
+      unsigned words = size_ / BITS_PER_WORD;
+      for (unsigned idx = 0 ;  idx < words ;  idx += 1) {
+	    if ((abits_ptr_[idx]|bbits_ptr_[idx]) != (that.abits_ptr_[idx]|that.bbits_ptr_[idx]))
+		  return false;
+	    if (bbits_ptr_[idx] != that.bbits_ptr_[idx])
+		  return false;
+      }
+
+      unsigned long mask = size_%BITS_PER_WORD;
+      if (mask > 0) {
+	    mask = (1UL << mask) - 1;
+	    return ((abits_ptr_[words]|bbits_ptr_[words])&mask) == ((that.abits_ptr_[words]|that.bbits_ptr_[words])&mask)
+		  && (bbits_ptr_[words]&mask) == (that.bbits_ptr_[words]&mask);
+      }
+
+      return true;
+}
+
+
 bool vvp_vector4_t::has_xz() const
 {
      if (size_ < BITS_PER_WORD) {
--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@ -40,7 +40,6 @@ class  vvp_net_t;
 class  vvp_net_fun_t;

 /* Core net function types. */
-class  vvp_fun_concat;
 class  vvp_fun_drive;
 class  vvp_fun_part;

@ -154,6 +153,8 @@ class vvp_vector4_t {

 	// Test that the vectors are exactly equal
      bool eeq(const vvp_vector4_t&that) const;
+	// Test that the vectors are equal, with x and z comparing equal.
+      bool eq_xz(const vvp_vector4_t&that) const;

 	// Return true if there is an X or Z anywhere in the vector.
      bool has_xz() const;
@ -791,31 +792,6 @@ class vvp_net_fun_t {

 /* **** Some core net functions **** */

-/* vvp_fun_concat
- * This node function creates vectors (vvp_vector4_t) from the
- * concatenation of the inputs. The inputs (4) may be vector or
- * vector8 objects, but they are reduced to vector4 values and
- * strength information lost.
- *
- * The expected widths of the input vectors must be given up front so
- * that the positions in the output vector (and also the size of the
- * output vector) can be worked out. The input vectors must match the
- * expected width.
- */
-class vvp_fun_concat  : public vvp_net_fun_t {
-
-    public:
-      vvp_fun_concat(unsigned w0, unsigned w1,
-		     unsigned w2, unsigned w3);
-      ~vvp_fun_concat();
-
-      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
-
-    private:
-      unsigned wid_[4];
-      vvp_vector4_t val_;
-};
-
 /* vvp_fun_repeat
 * This node function create vectors by repeating the input. The width
 * is the width of the output vector, and the repeat is the number of