Merge pull request #1066 from larsclausen/vvp-concat-performance

vvp: Improve concat performance
2024-01-20 10:49:20 -08:00 · 2024-01-20 10:49:20 -08:00 · 6d1a9181bb
parent 192b6aec96 5b509e69f6
commit 6d1a9181bb
7 changed files with 167 additions and 159 deletions
--- a/ivtest/ivltests/br_gh497a.v
+++ b/ivtest/ivltests/br_gh497a.v
@ -25,6 +25,7 @@ assign array6[2:1] = 8'h32;
 reg failed = 0;

 initial begin
+  #0
  $display("%h", array1);
  if (array1 !== 16'h4321) failed = 1;
  $display("%h", array2);
--- a/ivtest/ivltests/part_sel_port.v
+++ b/ivtest/ivltests/part_sel_port.v
@ -33,6 +33,7 @@ mod_test dut(test_string[1:8]);
 mod_test2 dut2(test_string[9:16]);

 initial begin
+    #0
    if(test_string !== "testTESTabcdefgh") begin
        $display("FAILED");
        $finish();
--- a/ivtest/ivltests/pr1002.v
+++ b/ivtest/ivltests/pr1002.v
@ -8,29 +8,11 @@ assign dataout = datain >>> 2;

 reg test_failed;

-initial
-  begin
-     test_failed = 0;
-     #1 datain = 14'h0FFF;
-     #1 datain = 14'h0000;
-     #1 datain = 14'h1FFF;
-     #1 datain = 14'h1000;
-     #1 datain = 14'h2FFF;
-     #1 datain = 14'h2000;
-     #1 datain = 14'h3FFF;
-     #1 datain = 14'h3000;
-     #2;
-     if (test_failed)
-       $display("TEST FAILED :-(");
-     else
-       $display("TEST PASSED :-)");
-  end
-
 wire signed [15:0] expected_dataout;

 assign expected_dataout = ($signed({datain[13:2], 2'b0}) / 4) ;

-always @(dataout)
+task check_data;
  if (expected_dataout != dataout)
    begin
       $display("datain = %d dataout = %h expected = %h ... CHECK FAILED", datain, dataout, expected_dataout);
@ -38,5 +20,32 @@ always @(dataout)
    end
  else
    $display("datain = %d dataout = %d expected = %d ... CHECK PASSED", datain, dataout, expected_dataout);
+endtask
+
+initial
+  begin
+     test_failed = 0;
+     #1 datain = 14'h0FFF;
+     #0 check_data; // #0 delay to allow the wire to resolve
+     #1 datain = 14'h0000;
+     #0 check_data;
+     #1 datain = 14'h1FFF;
+     #0 check_data;
+     #1 datain = 14'h1000;
+     #0 check_data;
+     #1 datain = 14'h2FFF;
+     #0 check_data;
+     #1 datain = 14'h2000;
+     #0 check_data;
+     #1 datain = 14'h3FFF;
+     #0 check_data;
+     #1 datain = 14'h3000;
+     #0 check_data;
+     #2;
+     if (test_failed)
+       $display("TEST FAILED :-(");
+     else
+       $display("TEST PASSED :-)");
+  end

 endmodule // top
--- a/vvp/concat.cc
+++ b/vvp/concat.cc
@ -18,7 +18,7 @@
 */

 # include  "compile.h"
-# include  "vvp_net.h"
+# include  "concat.h"
 # include  <cstdlib>
 # include  <iostream>
 # include  <cassert>
@ -27,15 +27,12 @@ using namespace std;

 vvp_fun_concat::vvp_fun_concat(unsigned w0, unsigned w1,
 			       unsigned w2, unsigned w3)
-: val_(w0+w1+w2+w3)
+: val_(w0+w1+w2+w3, BIT4_Z)
 {
      wid_[0] = w0;
      wid_[1] = w1;
      wid_[2] = w2;
      wid_[3] = w3;
-
-      for (unsigned idx = 0 ;  idx < val_.size() ;  idx += 1)
-	    val_.set_bit(idx, BIT4_Z);
 }

 vvp_fun_concat::~vvp_fun_concat()
@ -43,33 +40,15 @@ vvp_fun_concat::~vvp_fun_concat()
 }

 void vvp_fun_concat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
-                               vvp_context_t)
+                               vvp_context_t context)
 {
-      unsigned pdx = port.port();
-
-      if (bit.size() != wid_[pdx]) {
-	    cerr << "internal error: port " << pdx
-		 << " expects wid=" << wid_[pdx]
-		 << ", got wid=" << bit.size() << endl;
-	    assert(0);
-      }
-
-      unsigned off = 0;
-      for (unsigned idx = 0 ;  idx < pdx ;  idx += 1)
-	    off += wid_[idx];
-
-      for (unsigned idx = 0 ;  idx < wid_[pdx] ;  idx += 1) {
-	    val_.set_bit(off+idx, bit.value(idx));
-      }
-
-      port.ptr()->send_vec4(val_, 0);
+      recv_vec4_pv(port, bit, 0, bit.size(), context);
 }

 void vvp_fun_concat::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
                                  unsigned base, unsigned vwid, vvp_context_t)
 {
      unsigned pdx = port.port();
-      unsigned wid = bit.size();

      if (vwid != wid_[pdx]) {
 	    cerr << "internal error: port " << pdx
@ -78,19 +57,25 @@ void vvp_fun_concat::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
 	    assert(0);
      }

-      unsigned off = 0;
+      unsigned off = base;
      for (unsigned idx = 0 ;  idx < pdx ;  idx += 1)
 	    off += wid_[idx];

-      unsigned limit = off + wid_[pdx];
+      if (!val_.set_vec(off, bit))
+	    return;

-      off += base;
-      for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-            if (off+idx >= limit) break;
-	    val_.set_bit(off+idx, bit.value(idx));
-      }
+      if (net_)
+	    return;

-      port.ptr()->send_vec4(val_, 0);
+      net_ = port.ptr();
+      schedule_functor(this);
+}
+
+void vvp_fun_concat::run_run()
+{
+      vvp_net_t *ptr = net_;
+      net_ = nullptr;
+      ptr->send_vec4(val_, 0);
 }

 void compile_concat(char*label, unsigned w0, unsigned w1,
@ -118,9 +103,6 @@ vvp_fun_concat8::vvp_fun_concat8(unsigned w0, unsigned w1,
      wid_[1] = w1;
      wid_[2] = w2;
      wid_[3] = w3;
-
-      for (unsigned idx = 0 ;  idx < val_.size() ;  idx += 1)
-	    val_.set_bit(idx, vvp_scalar_t(BIT4_Z, 0, 0));
 }

 vvp_fun_concat8::~vvp_fun_concat8()
@ -131,7 +113,7 @@ void vvp_fun_concat8::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
 				vvp_context_t)
 {
      vvp_vector8_t bit8 (bit, 6, 6);
-      recv_vec8(port, bit8);
+      recv_vec8_pv(port, bit8, 0, bit8.size());
 }

 void vvp_fun_concat8::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
@ -143,31 +125,13 @@ void vvp_fun_concat8::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,

 void vvp_fun_concat8::recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit)
 {
-      unsigned pdx = port.port();
-
-      if (bit.size() != wid_[pdx]) {
-	    cerr << "internal error: port " << pdx
-		 << " expects wid=" << wid_[pdx]
-		 << ", got wid=" << bit.size() << endl;
-	    assert(0);
-      }
-
-      unsigned off = 0;
-      for (unsigned idx = 0 ;  idx < pdx ;  idx += 1)
-	    off += wid_[idx];
-
-      for (unsigned idx = 0 ;  idx < wid_[pdx] ;  idx += 1) {
-	    val_.set_bit(off+idx, bit.value(idx));
-      }
-
-      port.ptr()->send_vec8(val_);
+      recv_vec8_pv(port, bit, 0, bit.size());
 }

 void vvp_fun_concat8::recv_vec8_pv(vvp_net_ptr_t port, const vvp_vector8_t&bit,
 				   unsigned base, unsigned vwid)
 {
      unsigned pdx = port.port();
-      unsigned wid = bit.size();

      if (vwid != wid_[pdx]) {
 	    cerr << "internal error: port " << pdx
@ -176,19 +140,24 @@ void vvp_fun_concat8::recv_vec8_pv(vvp_net_ptr_t port, const vvp_vector8_t&bit,
 	    assert(0);
      }

-      unsigned off = 0;
+      unsigned off = base;
      for (unsigned idx = 0 ;  idx < pdx ;  idx += 1)
 	    off += wid_[idx];

-      unsigned limit = off + wid_[pdx];
+      val_.set_vec(off, bit);

-      off += base;
-      for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-            if (off+idx >= limit) break;
-	    val_.set_bit(off+idx, bit.value(idx));
-      }
+      if (net_)
+	    return;

-      port.ptr()->send_vec8(val_);
+      net_ = port.ptr();
+      schedule_functor(this);
+}
+
+void vvp_fun_concat8::run_run()
+{
+      vvp_net_t *ptr = net_;
+      net_ = nullptr;
+      ptr->send_vec8(val_);
 }

 void compile_concat8(char*label, unsigned w0, unsigned w1,
@ -226,9 +195,7 @@ void vvp_fun_repeat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
      for (unsigned rdx = 0 ;  rdx < rep_ ;  rdx += 1) {
 	    unsigned off = rdx * bit.size();

-	    for (unsigned idx = 0 ; idx < bit.size() ;  idx += 1)
-		  val.set_bit(off+idx, bit.value(idx));
-
+	    val.set_vec(off, bit);
      }

      port.ptr()->send_vec4(val, 0);
--- a/vvp/concat.h
+++ b/vvp/concat.h
@ -0,0 +1,102 @@
+#ifndef IVL_concat_H
+#define IVL_concat_H
+/*
+ * Copyright (c) 2004-2024 Stephen Williams (steve@icarus.com)
+ *
+ *    This source code is free software; you can redistribute it
+ *    and/or modify it in source code form under the terms of the GNU
+ *    General Public License as published by the Free Software
+ *    Foundation; either version 2 of the License, or (at your option)
+ *    any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+# include "vvp_net.h"
+
+/* vvp_fun_concat
+ * This node function creates vectors (vvp_vector4_t) from the
+ * concatenation of the inputs. The inputs (4) may be vector or
+ * vector8 objects, but they are reduced to vector4 values and
+ * strength information lost.
+ *
+ * The expected widths of the input vectors must be given up front so
+ * that the positions in the output vector (and also the size of the
+ * output vector) can be worked out. The input vectors must match the
+ * expected width.
+ */
+class vvp_fun_concat  : public vvp_net_fun_t, protected vvp_gen_event_s  {
+
+    public:
+      vvp_fun_concat(unsigned w0, unsigned w1,
+		     unsigned w2, unsigned w3);
+      ~vvp_fun_concat();
+
+      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
+                     vvp_context_t context) final;
+
+      void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
+			unsigned base, unsigned vwid, vvp_context_t) final;
+    private:
+      void run_run() final;
+
+      unsigned wid_[4];
+      vvp_vector4_t val_;
+      vvp_net_t *net_ = nullptr;
+};
+
+class vvp_fun_concat8  : public vvp_net_fun_t, protected vvp_gen_event_s {
+
+    public:
+      vvp_fun_concat8(unsigned w0, unsigned w1,
+		     unsigned w2, unsigned w3);
+      ~vvp_fun_concat8();
+
+      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
+                     vvp_context_t context) final;
+      void recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit) final;
+
+      void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
+			unsigned base, unsigned vwid, vvp_context_t) final;
+      void recv_vec8_pv(vvp_net_ptr_t p, const vvp_vector8_t&bit,
+			unsigned base, unsigned vwid) final;
+
+    private:
+      void run_run() final;
+
+      unsigned wid_[4];
+      vvp_vector8_t val_;
+      vvp_net_t *net_ = nullptr;
+};
+
+/* vvp_fun_repeat
+ * This node function create vectors by repeating the input. The width
+ * is the width of the output vector, and the repeat is the number of
+ * times to repeat the input. The width of the input vector is
+ * implicit from these values.
+ */
+class vvp_fun_repeat  : public vvp_net_fun_t {
+
+    public:
+      vvp_fun_repeat(unsigned width, unsigned repeat);
+      ~vvp_fun_repeat();
+
+      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
+                     vvp_context_t context);
+      void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
+			unsigned int base, unsigned int vwid,
+			vvp_context_t context) final;
+
+    private:
+      unsigned wid_;
+      unsigned rep_;
+};
+
+#endif
--- a/vvp/vpi_priv.cc
+++ b/vvp/vpi_priv.cc
@ -23,6 +23,7 @@
 # include  "schedule.h"
 # include  "logic.h"
 # include  "part.h"
+# include  "concat.h"
 #ifdef CHECK_WITH_VALGRIND
 # include  "vvp_cleanup.h"
 #endif
--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@ -52,7 +52,6 @@ class  vvp_net_fun_t;
 class  vvp_net_fil_t;

 /* Core net function types. */
-class  vvp_fun_concat;
 class  vvp_fun_drive;
 class  vvp_fun_part;

@ -1366,55 +1365,6 @@ class vvp_net_fil_t  : public vvp_vpi_callback {

 /* **** Some core net functions **** */

-/* vvp_fun_concat
- * This node function creates vectors (vvp_vector4_t) from the
- * concatenation of the inputs. The inputs (4) may be vector or
- * vector8 objects, but they are reduced to vector4 values and
- * strength information lost.
- *
- * The expected widths of the input vectors must be given up front so
- * that the positions in the output vector (and also the size of the
- * output vector) can be worked out. The input vectors must match the
- * expected width.
- */
-class vvp_fun_concat  : public vvp_net_fun_t {
-
-    public:
-      vvp_fun_concat(unsigned w0, unsigned w1,
-		     unsigned w2, unsigned w3);
-      ~vvp_fun_concat();
-
-      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
-                     vvp_context_t context);
-
-      void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
-			unsigned base, unsigned vwid, vvp_context_t);
-    private:
-      unsigned wid_[4];
-      vvp_vector4_t val_;
-};
-
-class vvp_fun_concat8  : public vvp_net_fun_t {
-
-    public:
-      vvp_fun_concat8(unsigned w0, unsigned w1,
-		     unsigned w2, unsigned w3);
-      ~vvp_fun_concat8();
-
-      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
-                     vvp_context_t context);
-      void recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit);
-
-      void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
-			unsigned base, unsigned vwid, vvp_context_t);
-      void recv_vec8_pv(vvp_net_ptr_t p, const vvp_vector8_t&bit,
-			unsigned base, unsigned vwid);
-
-    private:
-      unsigned wid_[4];
-      vvp_vector8_t val_;
-};
-
 /*
 * The vvp_fun_force class objects are net functors that use their input
 * to force the associated filter. They do not actually  have an
@ -1436,29 +1386,6 @@ class vvp_fun_force : public vvp_net_fun_t {
      void recv_real(vvp_net_ptr_t port, double bit, vvp_context_t);
 };

-/* vvp_fun_repeat
- * This node function create vectors by repeating the input. The width
- * is the width of the output vector, and the repeat is the number of
- * times to repeat the input. The width of the input vector is
- * implicit from these values.
- */
-class vvp_fun_repeat  : public vvp_net_fun_t {
-
-    public:
-      vvp_fun_repeat(unsigned width, unsigned repeat);
-      ~vvp_fun_repeat();
-
-      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
-                     vvp_context_t context);
-      void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
-			unsigned int base, unsigned int vwid,
-			vvp_context_t context) final;
-
-    private:
-      unsigned wid_;
-      unsigned rep_;
-};
-
 /* vvp_fun_drive
 * This node function takes an input vvp_vector4_t as input, and
 * repeats that value as a vvp_vector8_t with all the bits given the