Rework scheduling of concat, part, buf/not and resolv for efficiency.

The concat and resolv functors are best evaluated lazily, because each evaluation is costly and there is a high probability that an evaluation will be invalidated when new input comes in. Also optimization the recv_vec4_pv method of the resolver, which is commonly used, and adjust the order of handling of vvp_fun_part to work more efficiently.
2008-06-06 15:31:22 -07:00 · 2008-06-06 15:31:22 -07:00 · 2e95a740da
parent 2f4e5bf5b6
commit 2e95a740da
7 changed files with 121 additions and 107 deletions
--- a/vvp/concat.cc
+++ b/vvp/concat.cc
@ -20,6 +20,7 @@
 # include  "compile.h"
 # include  "vvp_net.h"
 # include  "schedule.h"
 # include  <stdlib.h>
 # include  <iostream>
 #ifdef HAVE_MALLOC_H
@ -27,18 +28,41 @@
 #endif
 # include  <assert.h>
 /* vvp_fun_concat
 * This node function creates vectors (vvp_vector4_t) from the
 * concatenation of the inputs. The inputs (4) may be vector or
 * vector8 objects, but they are reduced to vector4 values and
 * strength information lost.
 *
 * The expected widths of the input vectors must be given up front so
 * that the positions in the output vector (and also the size of the
 * output vector) can be worked out. The input vectors must match the
 * expected width.
 */
 class vvp_fun_concat  : public vvp_net_fun_t, private vvp_gen_event_s {
    public:
      vvp_fun_concat(unsigned w0, unsigned w1,
 		     unsigned w2, unsigned w3);
      ~vvp_fun_concat();
      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
    private:
      void run_run();
      vvp_net_t*net_;
      vvp_vector4_t input_[4];
 };
 vvp_fun_concat::vvp_fun_concat(unsigned w0, unsigned w1,
 			       unsigned w2, unsigned w3)
-: val_(w0+w1+w2+w3)
+: net_(0)
 {
-      wid_[0] = w0;
+      input_[0] = vvp_vector4_t(w0);
-      wid_[1] = w1;
+      input_[1] = vvp_vector4_t(w1);
-      wid_[2] = w2;
+      input_[2] = vvp_vector4_t(w2);
-      wid_[3] = w3;
+      input_[3] = vvp_vector4_t(w3);
      for (unsigned idx = 0 ;  idx < val_.size() ;  idx += 1)
 	    val_.set_bit(idx, BIT4_X);
 }
 vvp_fun_concat::~vvp_fun_concat()
@ -49,22 +73,38 @@ void vvp_fun_concat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
 {
      unsigned pdx = port.port();
-      if (bit.size() != wid_[pdx]) {
+      if (bit.size() != input_[pdx].size()) {
 	    cerr << "internal error: port " << pdx
-		 << " expects wid=" << wid_[pdx]
+		 << " expects wid=" << input_[pdx].size()
 		 << ", got wid=" << bit.size() << endl;
 	    assert(0);
      }
-      unsigned off = 0;
+      if (input_[pdx] .eeq(bit))
-      for (unsigned idx = 0 ;  idx < pdx ;  idx += 1)
+	    return;
 	    off += wid_[idx];
-      for (unsigned idx = 0 ;  idx < wid_[pdx] ;  idx += 1) {
+      input_[pdx] = bit;
-	    val_.set_bit(off+idx, bit.value(idx));
+      if (net_ == 0) {
 	    net_ = port.ptr();
 	    schedule_generic(this, 0, false);
      }
 }
 void vvp_fun_concat::run_run()
 {
      vvp_net_t*ptr = net_;
      net_ = 0;
      unsigned off = 0;
      unsigned owid = input_[0].size() + input_[1].size() + input_[2].size() + input_[3].size();
      vvp_vector4_t res (owid);
      for (unsigned idx = 0 ; idx < 4 && (off<owid) ; idx += 1) {
 	    res.set_vec(off, input_[idx]);
 	    off += input_[idx].size();
      }
-      vvp_send_vec4(port.ptr()->out, val_);
+      vvp_send_vec4(ptr->out, res);
 }
 void compile_concat(char*label, unsigned w0, unsigned w1,
--- a/vvp/logic.cc
+++ b/vvp/logic.cc
@ -140,7 +140,6 @@ void vvp_fun_eeq::run_run()
 vvp_fun_buf::vvp_fun_buf()
 {
      net_ = 0;
      count_functors_logic += 1;
 }
@ -157,25 +156,12 @@ void vvp_fun_buf::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit)
      if (ptr.port() != 0)
 	    return;
-      if (input_ .eeq( bit ))
+      if (input_ .eq_xz( bit ))
 	    return;
      input_ = bit;
-
+      input_.change_z2x();
-      if (net_ == 0) {
+      vvp_send_vec4(ptr.ptr()->out, input_);
 	    net_ = ptr.ptr();
 	    schedule_generic(this, 0, false);
      }
 }
 void vvp_fun_buf::run_run()
 {
      vvp_net_t*ptr = net_;
      net_ = 0;
      vvp_vector4_t tmp (input_);
      tmp.change_z2x();
      vvp_send_vec4(ptr->out, tmp);
 }
 vvp_fun_bufz::vvp_fun_bufz()
@ -394,7 +380,6 @@ void vvp_fun_muxz::run_run()
 vvp_fun_not::vvp_fun_not()
 {
      net_ = 0;
      count_functors_logic += 1;
 }
@ -411,30 +396,13 @@ void vvp_fun_not::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit)
      if (ptr.port() != 0)
 	    return;
-      if (input_ .eeq( bit ))
+      if (input_ .eq_xz( bit ))
 	    return;
      input_ = bit;
-      if (net_ == 0) {
+      vvp_send_vec4(ptr.ptr()->out, ~input_);
 	    net_ = ptr.ptr();
 	    schedule_generic(this, 0, false);
      }
 }
 void vvp_fun_not::run_run()
 {
      vvp_net_t*ptr = net_;
      net_ = 0;
      vvp_vector4_t result (input_);
      for (unsigned idx = 0 ;  idx < result.size() ;  idx += 1) {
 	    vvp_bit4_t bitbit = ~ result.value(idx);
 	    result.set_bit(idx, bitbit);
      }
      vvp_send_vec4(ptr->out, result);
 }
 vvp_fun_or::vvp_fun_or(unsigned wid, bool invert)
 : vvp_fun_boolean_(wid), invert_(invert)
--- a/vvp/logic.h
+++ b/vvp/logic.h
@ -69,7 +69,7 @@ class vvp_fun_eeq  : public vvp_fun_boolean_ {
 * The retransmitted vector has all Z values changed to X, just like
 * the buf(Q,D) gate in Verilog.
 */
-class vvp_fun_buf: public vvp_net_fun_t, private vvp_gen_event_s {
+class vvp_fun_buf: public vvp_net_fun_t {
    public:
      explicit vvp_fun_buf();
@ -77,12 +77,8 @@ class vvp_fun_buf: public vvp_net_fun_t, private vvp_gen_event_s {
      void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);
    private:
      void run_run();
    private:
      vvp_vector4_t input_;
      vvp_net_t*net_;
 };
 /*
@ -152,7 +148,7 @@ class vvp_fun_muxr : public vvp_net_fun_t, private vvp_gen_event_s {
      sel_type select_;
 };
-class vvp_fun_not: public vvp_net_fun_t, private vvp_gen_event_s {
+class vvp_fun_not: public vvp_net_fun_t {
    public:
      explicit vvp_fun_not();
@ -160,12 +156,8 @@ class vvp_fun_not: public vvp_net_fun_t, private vvp_gen_event_s {
      void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);
    private:
      void run_run();
    private:
      vvp_vector4_t input_;
      vvp_net_t*net_;
 };
 class vvp_fun_or  : public vvp_fun_boolean_ {
--- a/vvp/part.cc
+++ b/vvp/part.cc
@ -28,7 +28,7 @@
 # include  <assert.h>
 vvp_fun_part::vvp_fun_part(unsigned base, unsigned wid)
-: base_(base), wid_(wid)
+: base_(base), val_(wid)
 {
      net_ = 0;
 }
@ -41,10 +41,18 @@ void vvp_fun_part::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
 {
      assert(port.port() == 0);
-      if (val_ .eeq( bit ))
+      vvp_vector4_t tmp = val_;
      for (unsigned idx = 0 ;  idx < tmp.size() ;  idx += 1) {
 	    if ((idx + base_) < bit.size())
 		  tmp.set_bit(idx, bit.value(base_+idx));
 	    else
 		  tmp.set_bit(idx, BIT4_X);
      }
      if (val_ .eeq( tmp ))
 	    return;
-      val_ = bit;
+      val_ = tmp;
      if (net_ == 0) {
 	    net_ = port.ptr();
@ -63,6 +71,11 @@ void vvp_fun_part::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
 {
      assert(bit.size() == wid);
      if (base >= base_+val_.size())
 	    return;
      if ((base+wid) <= base_)
 	    return;
      vvp_vector4_t tmp = val_;
      if (tmp.size() == 0)
 	    tmp = vvp_vector4_t(vwid);
@ -76,13 +89,7 @@ void vvp_fun_part::run_run()
 {
      vvp_net_t*ptr = net_;
      net_ = 0;
-
+      vvp_send_vec4(ptr->out, val_);
      vvp_vector4_t res (wid_, BIT4_X);
      for (unsigned idx = 0 ;  idx < wid_ ;  idx += 1) {
 	    if (idx + base_ < val_.size())
 		  res.set_bit(idx, val_.value(base_+idx));
      }
      vvp_send_vec4(ptr->out, res);
 }
 vvp_fun_part_pv::vvp_fun_part_pv(unsigned b, unsigned w, unsigned v)
--- a/vvp/resolv.cc
+++ b/vvp/resolv.cc
@ -44,18 +44,14 @@ void resolv_functor::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
 				  unsigned base, unsigned wid, unsigned vwid)
 {
      assert(bit.size() == wid);
      vvp_vector4_t res (vwid);
-      for (unsigned idx = 0 ;  idx < base ;  idx += 1)
+      vvp_vector8_t tmp (bit,6,6);
-	    res.set_bit(idx, BIT4_Z);
+      vvp_vector8_t tmpw (vwid);
      for (unsigned idx = 0 ;  idx < wid ;  idx += 1)
-	    res.set_bit(idx+base, bit.value(idx));
+	    tmpw.set_bit(idx+base, tmp.value(idx));
-      for (unsigned idx = base+wid ;  idx < vwid ;  idx += 1)
+      recv_vec8(port, tmpw);
 	    res.set_bit(idx, BIT4_Z);
      recv_vec4(port, res);
 }
 void resolv_functor::recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit)
--- a/vvp/vvp_net.cc
+++ b/vvp/vvp_net.cc
@ -779,6 +779,41 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
      return true;
 }
 bool vvp_vector4_t::eq_xz(const vvp_vector4_t&that) const
 {
      if (size_ != that.size_)
 	    return false;
      if (size_ < BITS_PER_WORD) {
 	    unsigned long mask = (1UL << size_) - 1;
 	    return ((abits_val_|bbits_val_)&mask) == ((that.abits_val_|that.bbits_val_)&mask)
 		  && (bbits_val_&mask) == (that.bbits_val_&mask);
      }
      if (size_ == BITS_PER_WORD) {
 	    return ((abits_val_|bbits_val_) == (that.abits_val_|that.bbits_val_))
 		  && (bbits_val_ == that.bbits_val_);
      }
      unsigned words = size_ / BITS_PER_WORD;
      for (unsigned idx = 0 ;  idx < words ;  idx += 1) {
 	    if ((abits_ptr_[idx]|bbits_ptr_[idx]) != (that.abits_ptr_[idx]|that.bbits_ptr_[idx]))
 		  return false;
 	    if (bbits_ptr_[idx] != that.bbits_ptr_[idx])
 		  return false;
      }
      unsigned long mask = size_%BITS_PER_WORD;
      if (mask > 0) {
 	    mask = (1UL << mask) - 1;
 	    return ((abits_ptr_[words]|bbits_ptr_[words])&mask) == ((that.abits_ptr_[words]|that.bbits_ptr_[words])&mask)
 		  && (bbits_ptr_[words]&mask) == (that.bbits_ptr_[words]&mask);
      }
      return true;
 }
 bool vvp_vector4_t::has_xz() const
 {
      if (size_ < BITS_PER_WORD) {
--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@ -40,7 +40,6 @@ class  vvp_net_t;
 class  vvp_net_fun_t;
 /* Core net function types. */
 class  vvp_fun_concat;
 class  vvp_fun_drive;
 class  vvp_fun_part;
@ -154,6 +153,8 @@ class vvp_vector4_t {
 	// Test that the vectors are exactly equal
      bool eeq(const vvp_vector4_t&that) const;
 	// Test that the vectors are equal, with x and z comparing equal.
      bool eq_xz(const vvp_vector4_t&that) const;
 	// Return true if there is an X or Z anywhere in the vector.
      bool has_xz() const;
@ -791,31 +792,6 @@ class vvp_net_fun_t {
 /* **** Some core net functions **** */
 /* vvp_fun_concat
 * This node function creates vectors (vvp_vector4_t) from the
 * concatenation of the inputs. The inputs (4) may be vector or
 * vector8 objects, but they are reduced to vector4 values and
 * strength information lost.
 *
 * The expected widths of the input vectors must be given up front so
 * that the positions in the output vector (and also the size of the
 * output vector) can be worked out. The input vectors must match the
 * expected width.
 */
 class vvp_fun_concat  : public vvp_net_fun_t {
    public:
      vvp_fun_concat(unsigned w0, unsigned w1,
 		     unsigned w2, unsigned w3);
      ~vvp_fun_concat();
      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
    private:
      unsigned wid_[4];
      vvp_vector4_t val_;
 };
 /* vvp_fun_repeat
 * This node function create vectors by repeating the input. The width
 * is the width of the output vector, and the repeat is the number of