From 9b1ac6ab50adea75396dddfe8aae56e68a930fdc Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 23 Dec 2023 19:55:14 -0800 Subject: [PATCH 1/5] ivtest: Fix `assign` vs `initial` race condition in some tests The first evaluation of an `assign` statement is scheduled at the same time as `initial` statements. There are some test cases that evaluate the result of an `assign` statement in an `initial` statement. This is an inherent race condition and might fail depending on the exact order of evaluation. To fix this add an additional delay in the `initial` block. This will make sure that all `assign` statements get fully resolved first. Signed-off-by: Lars-Peter Clausen --- ivtest/ivltests/br_gh497a.v | 1 + ivtest/ivltests/part_sel_port.v | 1 + 2 files changed, 2 insertions(+) diff --git a/ivtest/ivltests/br_gh497a.v b/ivtest/ivltests/br_gh497a.v index 33319f1a0..dbc5b6e0f 100644 --- a/ivtest/ivltests/br_gh497a.v +++ b/ivtest/ivltests/br_gh497a.v @@ -25,6 +25,7 @@ assign array6[2:1] = 8'h32; reg failed = 0; initial begin + #0 $display("%h", array1); if (array1 !== 16'h4321) failed = 1; $display("%h", array2); diff --git a/ivtest/ivltests/part_sel_port.v b/ivtest/ivltests/part_sel_port.v index 64e502b10..91fce2697 100644 --- a/ivtest/ivltests/part_sel_port.v +++ b/ivtest/ivltests/part_sel_port.v @@ -33,6 +33,7 @@ mod_test dut(test_string[1:8]); mod_test2 dut2(test_string[9:16]); initial begin + #0 if(test_string !== "testTESTabcdefgh") begin $display("FAILED"); $finish(); From 2d611c43477a5c8f7df90c4d48ab7532d800ce3e Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 7 Jan 2024 10:17:10 -0800 Subject: [PATCH 2/5] ivtest: pr1002: Avoid race condition The pr1002 test has a always block with the `dataout` in its sensitivity list. It compares `dataout` to `expected_dataout`. Both `dataout` and `expected_dataout` depend on `datain` and are updated in the same cycle. This means there is no guarantee in which order they are updated and the always block might get scheduled before `expected_dataout` has been updated. This can lead to a test failure. To avoid this slightly change the test to use a task to perform the comparison and add an explicit delay before the task is executed so that all updates have a chance to be fully resolved Signed-off-by: Lars-Peter Clausen --- ivtest/ivltests/pr1002.v | 47 ++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/ivtest/ivltests/pr1002.v b/ivtest/ivltests/pr1002.v index ce11d5a97..ff8ff7b72 100644 --- a/ivtest/ivltests/pr1002.v +++ b/ivtest/ivltests/pr1002.v @@ -8,29 +8,11 @@ assign dataout = datain >>> 2; reg test_failed; -initial - begin - test_failed = 0; - #1 datain = 14'h0FFF; - #1 datain = 14'h0000; - #1 datain = 14'h1FFF; - #1 datain = 14'h1000; - #1 datain = 14'h2FFF; - #1 datain = 14'h2000; - #1 datain = 14'h3FFF; - #1 datain = 14'h3000; - #2; - if (test_failed) - $display("TEST FAILED :-("); - else - $display("TEST PASSED :-)"); - end - wire signed [15:0] expected_dataout; assign expected_dataout = ($signed({datain[13:2], 2'b0}) / 4) ; -always @(dataout) +task check_data; if (expected_dataout != dataout) begin $display("datain = %d dataout = %h expected = %h ... CHECK FAILED", datain, dataout, expected_dataout); @@ -38,5 +20,32 @@ always @(dataout) end else $display("datain = %d dataout = %d expected = %d ... CHECK PASSED", datain, dataout, expected_dataout); +endtask + +initial + begin + test_failed = 0; + #1 datain = 14'h0FFF; + #0 check_data; // #0 delay to allow the wire to resolve + #1 datain = 14'h0000; + #0 check_data; + #1 datain = 14'h1FFF; + #0 check_data; + #1 datain = 14'h1000; + #0 check_data; + #1 datain = 14'h2FFF; + #0 check_data; + #1 datain = 14'h2000; + #0 check_data; + #1 datain = 14'h3FFF; + #0 check_data; + #1 datain = 14'h3000; + #0 check_data; + #2; + if (test_failed) + $display("TEST FAILED :-("); + else + $display("TEST PASSED :-)"); + end endmodule // top From bb1d3c9ac68c136931e435aceca2d85ae58bc58a Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 6 Jan 2024 17:44:29 -0800 Subject: [PATCH 3/5] vvp: Implement concat `recv_vec()` using `recv_vec_pv()` The implementation for partial receive for concat only differs from the regular receive in that it takes an additional offset. The regular receive can easily be implemented by calling the partial receive with an offset of 0. This allows to remove some duplicated code. The overhead of this is negligible, but to help the compiler to optimize this a bit better mark the `recv_vec()` and `recv_vec_pv()` functions as final. Signed-off-by: Lars-Peter Clausen --- vvp/concat.cc | 42 ++++-------------------------------------- vvp/vvp_net.h | 12 ++++++------ 2 files changed, 10 insertions(+), 44 deletions(-) diff --git a/vvp/concat.cc b/vvp/concat.cc index 1f19dd139..951453801 100644 --- a/vvp/concat.cc +++ b/vvp/concat.cc @@ -43,26 +43,9 @@ vvp_fun_concat::~vvp_fun_concat() } void vvp_fun_concat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, - vvp_context_t) + vvp_context_t context) { - unsigned pdx = port.port(); - - if (bit.size() != wid_[pdx]) { - cerr << "internal error: port " << pdx - << " expects wid=" << wid_[pdx] - << ", got wid=" << bit.size() << endl; - assert(0); - } - - unsigned off = 0; - for (unsigned idx = 0 ; idx < pdx ; idx += 1) - off += wid_[idx]; - - for (unsigned idx = 0 ; idx < wid_[pdx] ; idx += 1) { - val_.set_bit(off+idx, bit.value(idx)); - } - - port.ptr()->send_vec4(val_, 0); + recv_vec4_pv(port, bit, 0, bit.size(), context); } void vvp_fun_concat::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, @@ -131,7 +114,7 @@ void vvp_fun_concat8::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, vvp_context_t) { vvp_vector8_t bit8 (bit, 6, 6); - recv_vec8(port, bit8); + recv_vec8_pv(port, bit8, 0, bit8.size()); } void vvp_fun_concat8::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, @@ -143,24 +126,7 @@ void vvp_fun_concat8::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, void vvp_fun_concat8::recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit) { - unsigned pdx = port.port(); - - if (bit.size() != wid_[pdx]) { - cerr << "internal error: port " << pdx - << " expects wid=" << wid_[pdx] - << ", got wid=" << bit.size() << endl; - assert(0); - } - - unsigned off = 0; - for (unsigned idx = 0 ; idx < pdx ; idx += 1) - off += wid_[idx]; - - for (unsigned idx = 0 ; idx < wid_[pdx] ; idx += 1) { - val_.set_bit(off+idx, bit.value(idx)); - } - - port.ptr()->send_vec8(val_); + recv_vec8_pv(port, bit, 0, bit.size()); } void vvp_fun_concat8::recv_vec8_pv(vvp_net_ptr_t port, const vvp_vector8_t&bit, diff --git a/vvp/vvp_net.h b/vvp/vvp_net.h index fdf9f28cc..925bc3deb 100644 --- a/vvp/vvp_net.h +++ b/vvp/vvp_net.h @@ -1385,10 +1385,10 @@ class vvp_fun_concat : public vvp_net_fun_t { ~vvp_fun_concat(); void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, - vvp_context_t context); + vvp_context_t context) final; void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, - unsigned base, unsigned vwid, vvp_context_t); + unsigned base, unsigned vwid, vvp_context_t) final; private: unsigned wid_[4]; vvp_vector4_t val_; @@ -1402,13 +1402,13 @@ class vvp_fun_concat8 : public vvp_net_fun_t { ~vvp_fun_concat8(); void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, - vvp_context_t context); - void recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit); + vvp_context_t context) final; + void recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit) final; void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, - unsigned base, unsigned vwid, vvp_context_t); + unsigned base, unsigned vwid, vvp_context_t) final; void recv_vec8_pv(vvp_net_ptr_t p, const vvp_vector8_t&bit, - unsigned base, unsigned vwid); + unsigned base, unsigned vwid) final; private: unsigned wid_[4]; From 4637b399538e80f5d3d21f323e83d7f65c46bc05 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 28 May 2022 13:56:37 +0200 Subject: [PATCH 4/5] vvp: concat: Avoid using individual bit access The concat functors use individual bit access to initialize and copy values. For initialization pass the initial bit value to the constructor and for coping use set_vec() instead. Both can be a fair bit faster since data is copied word by word rather than bit by bit. Signed-off-by: Lars-Peter Clausen --- vvp/concat.cc | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/vvp/concat.cc b/vvp/concat.cc index 951453801..6379ccb9b 100644 --- a/vvp/concat.cc +++ b/vvp/concat.cc @@ -27,15 +27,12 @@ using namespace std; vvp_fun_concat::vvp_fun_concat(unsigned w0, unsigned w1, unsigned w2, unsigned w3) -: val_(w0+w1+w2+w3) +: val_(w0+w1+w2+w3, BIT4_Z) { wid_[0] = w0; wid_[1] = w1; wid_[2] = w2; wid_[3] = w3; - - for (unsigned idx = 0 ; idx < val_.size() ; idx += 1) - val_.set_bit(idx, BIT4_Z); } vvp_fun_concat::~vvp_fun_concat() @@ -52,7 +49,6 @@ void vvp_fun_concat::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, unsigned base, unsigned vwid, vvp_context_t) { unsigned pdx = port.port(); - unsigned wid = bit.size(); if (vwid != wid_[pdx]) { cerr << "internal error: port " << pdx @@ -61,17 +57,12 @@ void vvp_fun_concat::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, assert(0); } - unsigned off = 0; + unsigned off = base; for (unsigned idx = 0 ; idx < pdx ; idx += 1) off += wid_[idx]; - unsigned limit = off + wid_[pdx]; - - off += base; - for (unsigned idx = 0 ; idx < wid ; idx += 1) { - if (off+idx >= limit) break; - val_.set_bit(off+idx, bit.value(idx)); - } + if (!val_.set_vec(off, bit)) + return; port.ptr()->send_vec4(val_, 0); } @@ -101,9 +92,6 @@ vvp_fun_concat8::vvp_fun_concat8(unsigned w0, unsigned w1, wid_[1] = w1; wid_[2] = w2; wid_[3] = w3; - - for (unsigned idx = 0 ; idx < val_.size() ; idx += 1) - val_.set_bit(idx, vvp_scalar_t(BIT4_Z, 0, 0)); } vvp_fun_concat8::~vvp_fun_concat8() @@ -133,7 +121,6 @@ void vvp_fun_concat8::recv_vec8_pv(vvp_net_ptr_t port, const vvp_vector8_t&bit, unsigned base, unsigned vwid) { unsigned pdx = port.port(); - unsigned wid = bit.size(); if (vwid != wid_[pdx]) { cerr << "internal error: port " << pdx @@ -142,17 +129,11 @@ void vvp_fun_concat8::recv_vec8_pv(vvp_net_ptr_t port, const vvp_vector8_t&bit, assert(0); } - unsigned off = 0; + unsigned off = base; for (unsigned idx = 0 ; idx < pdx ; idx += 1) off += wid_[idx]; - unsigned limit = off + wid_[pdx]; - - off += base; - for (unsigned idx = 0 ; idx < wid ; idx += 1) { - if (off+idx >= limit) break; - val_.set_bit(off+idx, bit.value(idx)); - } + val_.set_vec(off, bit); port.ptr()->send_vec8(val_); } @@ -192,9 +173,7 @@ void vvp_fun_repeat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, for (unsigned rdx = 0 ; rdx < rep_ ; rdx += 1) { unsigned off = rdx * bit.size(); - for (unsigned idx = 0 ; idx < bit.size() ; idx += 1) - val.set_bit(off+idx, bit.value(idx)); - + val.set_vec(off, bit); } port.ptr()->send_vec4(val, 0); From 5b509e69f66fd749cd1776afe8e21d74f275c3ab Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 23 Dec 2023 15:37:27 -0800 Subject: [PATCH 5/5] vvp: concat: Defer update to end of the current simulation cycle A concat typically has multiple inputs. Whenever one of the input values change the output value of the concat is updated and propagated to its downstream consumers. When multiple inputs change within the same cycle each input will cause a update propagation. Depending of the overall structure of the design this can cause a significant performance penalty. E.g. the following synthetic structure has a exponential runtime increase based on the value of N. ``` reg [N-1:0] x; generate for (genvar i = 0; i < N - 1; i++) assign x[i+1] = ^{x[i],x[i]}; endgenerate ``` To improve this defer the value propagation of the concat to the end of the current cycle, this allows multiple input updates to be included in a single output update. For the example in report #1052 this reduced the runtime from 2 minutes to essentially 0. Signed-off-by: Lars-Peter Clausen --- vvp/concat.cc | 28 +++++++++++-- vvp/concat.h | 102 ++++++++++++++++++++++++++++++++++++++++++++++++ vvp/vpi_priv.cc | 1 + vvp/vvp_net.h | 73 ---------------------------------- 4 files changed, 128 insertions(+), 76 deletions(-) create mode 100644 vvp/concat.h diff --git a/vvp/concat.cc b/vvp/concat.cc index 6379ccb9b..4d83b80f2 100644 --- a/vvp/concat.cc +++ b/vvp/concat.cc @@ -18,7 +18,7 @@ */ # include "compile.h" -# include "vvp_net.h" +# include "concat.h" # include # include # include @@ -64,7 +64,18 @@ void vvp_fun_concat::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, if (!val_.set_vec(off, bit)) return; - port.ptr()->send_vec4(val_, 0); + if (net_) + return; + + net_ = port.ptr(); + schedule_functor(this); +} + +void vvp_fun_concat::run_run() +{ + vvp_net_t *ptr = net_; + net_ = nullptr; + ptr->send_vec4(val_, 0); } void compile_concat(char*label, unsigned w0, unsigned w1, @@ -135,7 +146,18 @@ void vvp_fun_concat8::recv_vec8_pv(vvp_net_ptr_t port, const vvp_vector8_t&bit, val_.set_vec(off, bit); - port.ptr()->send_vec8(val_); + if (net_) + return; + + net_ = port.ptr(); + schedule_functor(this); +} + +void vvp_fun_concat8::run_run() +{ + vvp_net_t *ptr = net_; + net_ = nullptr; + ptr->send_vec8(val_); } void compile_concat8(char*label, unsigned w0, unsigned w1, diff --git a/vvp/concat.h b/vvp/concat.h new file mode 100644 index 000000000..1edc9dc00 --- /dev/null +++ b/vvp/concat.h @@ -0,0 +1,102 @@ +#ifndef IVL_concat_H +#define IVL_concat_H +/* + * Copyright (c) 2004-2024 Stephen Williams (steve@icarus.com) + * + * This source code is free software; you can redistribute it + * and/or modify it in source code form under the terms of the GNU + * General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +# include "vvp_net.h" + +/* vvp_fun_concat + * This node function creates vectors (vvp_vector4_t) from the + * concatenation of the inputs. The inputs (4) may be vector or + * vector8 objects, but they are reduced to vector4 values and + * strength information lost. + * + * The expected widths of the input vectors must be given up front so + * that the positions in the output vector (and also the size of the + * output vector) can be worked out. The input vectors must match the + * expected width. + */ +class vvp_fun_concat : public vvp_net_fun_t, protected vvp_gen_event_s { + + public: + vvp_fun_concat(unsigned w0, unsigned w1, + unsigned w2, unsigned w3); + ~vvp_fun_concat(); + + void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, + vvp_context_t context) final; + + void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, + unsigned base, unsigned vwid, vvp_context_t) final; + private: + void run_run() final; + + unsigned wid_[4]; + vvp_vector4_t val_; + vvp_net_t *net_ = nullptr; +}; + +class vvp_fun_concat8 : public vvp_net_fun_t, protected vvp_gen_event_s { + + public: + vvp_fun_concat8(unsigned w0, unsigned w1, + unsigned w2, unsigned w3); + ~vvp_fun_concat8(); + + void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, + vvp_context_t context) final; + void recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit) final; + + void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, + unsigned base, unsigned vwid, vvp_context_t) final; + void recv_vec8_pv(vvp_net_ptr_t p, const vvp_vector8_t&bit, + unsigned base, unsigned vwid) final; + + private: + void run_run() final; + + unsigned wid_[4]; + vvp_vector8_t val_; + vvp_net_t *net_ = nullptr; +}; + +/* vvp_fun_repeat + * This node function create vectors by repeating the input. The width + * is the width of the output vector, and the repeat is the number of + * times to repeat the input. The width of the input vector is + * implicit from these values. + */ +class vvp_fun_repeat : public vvp_net_fun_t { + + public: + vvp_fun_repeat(unsigned width, unsigned repeat); + ~vvp_fun_repeat(); + + void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, + vvp_context_t context); + void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, + unsigned int base, unsigned int vwid, + vvp_context_t context) final; + + private: + unsigned wid_; + unsigned rep_; +}; + +#endif diff --git a/vvp/vpi_priv.cc b/vvp/vpi_priv.cc index eddc0d8fe..26d59ac55 100644 --- a/vvp/vpi_priv.cc +++ b/vvp/vpi_priv.cc @@ -23,6 +23,7 @@ # include "schedule.h" # include "logic.h" # include "part.h" +# include "concat.h" #ifdef CHECK_WITH_VALGRIND # include "vvp_cleanup.h" #endif diff --git a/vvp/vvp_net.h b/vvp/vvp_net.h index 925bc3deb..511154a7b 100644 --- a/vvp/vvp_net.h +++ b/vvp/vvp_net.h @@ -52,7 +52,6 @@ class vvp_net_fun_t; class vvp_net_fil_t; /* Core net function types. */ -class vvp_fun_concat; class vvp_fun_drive; class vvp_fun_part; @@ -1366,55 +1365,6 @@ class vvp_net_fil_t : public vvp_vpi_callback { /* **** Some core net functions **** */ -/* vvp_fun_concat - * This node function creates vectors (vvp_vector4_t) from the - * concatenation of the inputs. The inputs (4) may be vector or - * vector8 objects, but they are reduced to vector4 values and - * strength information lost. - * - * The expected widths of the input vectors must be given up front so - * that the positions in the output vector (and also the size of the - * output vector) can be worked out. The input vectors must match the - * expected width. - */ -class vvp_fun_concat : public vvp_net_fun_t { - - public: - vvp_fun_concat(unsigned w0, unsigned w1, - unsigned w2, unsigned w3); - ~vvp_fun_concat(); - - void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, - vvp_context_t context) final; - - void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, - unsigned base, unsigned vwid, vvp_context_t) final; - private: - unsigned wid_[4]; - vvp_vector4_t val_; -}; - -class vvp_fun_concat8 : public vvp_net_fun_t { - - public: - vvp_fun_concat8(unsigned w0, unsigned w1, - unsigned w2, unsigned w3); - ~vvp_fun_concat8(); - - void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, - vvp_context_t context) final; - void recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit) final; - - void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, - unsigned base, unsigned vwid, vvp_context_t) final; - void recv_vec8_pv(vvp_net_ptr_t p, const vvp_vector8_t&bit, - unsigned base, unsigned vwid) final; - - private: - unsigned wid_[4]; - vvp_vector8_t val_; -}; - /* * The vvp_fun_force class objects are net functors that use their input * to force the associated filter. They do not actually have an @@ -1436,29 +1386,6 @@ class vvp_fun_force : public vvp_net_fun_t { void recv_real(vvp_net_ptr_t port, double bit, vvp_context_t); }; -/* vvp_fun_repeat - * This node function create vectors by repeating the input. The width - * is the width of the output vector, and the repeat is the number of - * times to repeat the input. The width of the input vector is - * implicit from these values. - */ -class vvp_fun_repeat : public vvp_net_fun_t { - - public: - vvp_fun_repeat(unsigned width, unsigned repeat); - ~vvp_fun_repeat(); - - void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit, - vvp_context_t context); - void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit, - unsigned int base, unsigned int vwid, - vvp_context_t context) final; - - private: - unsigned wid_; - unsigned rep_; -}; - /* vvp_fun_drive * This node function takes an input vvp_vector4_t as input, and * repeats that value as a vvp_vector8_t with all the bits given the