Merge pull request #1066 from larsclausen/vvp-concat-performance

vvp: Improve concat performance
This commit is contained in:
Lars-Peter Clausen 2024-01-20 10:49:20 -08:00 committed by GitHub
commit 6d1a9181bb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 167 additions and 159 deletions

View File

@ -25,6 +25,7 @@ assign array6[2:1] = 8'h32;
reg failed = 0;
initial begin
#0
$display("%h", array1);
if (array1 !== 16'h4321) failed = 1;
$display("%h", array2);

View File

@ -33,6 +33,7 @@ mod_test dut(test_string[1:8]);
mod_test2 dut2(test_string[9:16]);
initial begin
#0
if(test_string !== "testTESTabcdefgh") begin
$display("FAILED");
$finish();

View File

@ -8,29 +8,11 @@ assign dataout = datain >>> 2;
reg test_failed;
initial
begin
test_failed = 0;
#1 datain = 14'h0FFF;
#1 datain = 14'h0000;
#1 datain = 14'h1FFF;
#1 datain = 14'h1000;
#1 datain = 14'h2FFF;
#1 datain = 14'h2000;
#1 datain = 14'h3FFF;
#1 datain = 14'h3000;
#2;
if (test_failed)
$display("TEST FAILED :-(");
else
$display("TEST PASSED :-)");
end
wire signed [15:0] expected_dataout;
assign expected_dataout = ($signed({datain[13:2], 2'b0}) / 4) ;
always @(dataout)
task check_data;
if (expected_dataout != dataout)
begin
$display("datain = %d dataout = %h expected = %h ... CHECK FAILED", datain, dataout, expected_dataout);
@ -38,5 +20,32 @@ always @(dataout)
end
else
$display("datain = %d dataout = %d expected = %d ... CHECK PASSED", datain, dataout, expected_dataout);
endtask
initial
begin
test_failed = 0;
#1 datain = 14'h0FFF;
#0 check_data; // #0 delay to allow the wire to resolve
#1 datain = 14'h0000;
#0 check_data;
#1 datain = 14'h1FFF;
#0 check_data;
#1 datain = 14'h1000;
#0 check_data;
#1 datain = 14'h2FFF;
#0 check_data;
#1 datain = 14'h2000;
#0 check_data;
#1 datain = 14'h3FFF;
#0 check_data;
#1 datain = 14'h3000;
#0 check_data;
#2;
if (test_failed)
$display("TEST FAILED :-(");
else
$display("TEST PASSED :-)");
end
endmodule // top

View File

@ -18,7 +18,7 @@
*/
# include "compile.h"
# include "vvp_net.h"
# include "concat.h"
# include <cstdlib>
# include <iostream>
# include <cassert>
@ -27,15 +27,12 @@ using namespace std;
vvp_fun_concat::vvp_fun_concat(unsigned w0, unsigned w1,
unsigned w2, unsigned w3)
: val_(w0+w1+w2+w3)
: val_(w0+w1+w2+w3, BIT4_Z)
{
wid_[0] = w0;
wid_[1] = w1;
wid_[2] = w2;
wid_[3] = w3;
for (unsigned idx = 0 ; idx < val_.size() ; idx += 1)
val_.set_bit(idx, BIT4_Z);
}
vvp_fun_concat::~vvp_fun_concat()
@ -43,33 +40,15 @@ vvp_fun_concat::~vvp_fun_concat()
}
void vvp_fun_concat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
vvp_context_t)
vvp_context_t context)
{
unsigned pdx = port.port();
if (bit.size() != wid_[pdx]) {
cerr << "internal error: port " << pdx
<< " expects wid=" << wid_[pdx]
<< ", got wid=" << bit.size() << endl;
assert(0);
}
unsigned off = 0;
for (unsigned idx = 0 ; idx < pdx ; idx += 1)
off += wid_[idx];
for (unsigned idx = 0 ; idx < wid_[pdx] ; idx += 1) {
val_.set_bit(off+idx, bit.value(idx));
}
port.ptr()->send_vec4(val_, 0);
recv_vec4_pv(port, bit, 0, bit.size(), context);
}
void vvp_fun_concat::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
unsigned base, unsigned vwid, vvp_context_t)
{
unsigned pdx = port.port();
unsigned wid = bit.size();
if (vwid != wid_[pdx]) {
cerr << "internal error: port " << pdx
@ -78,19 +57,25 @@ void vvp_fun_concat::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
assert(0);
}
unsigned off = 0;
unsigned off = base;
for (unsigned idx = 0 ; idx < pdx ; idx += 1)
off += wid_[idx];
unsigned limit = off + wid_[pdx];
if (!val_.set_vec(off, bit))
return;
off += base;
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
if (off+idx >= limit) break;
val_.set_bit(off+idx, bit.value(idx));
}
if (net_)
return;
port.ptr()->send_vec4(val_, 0);
net_ = port.ptr();
schedule_functor(this);
}
void vvp_fun_concat::run_run()
{
vvp_net_t *ptr = net_;
net_ = nullptr;
ptr->send_vec4(val_, 0);
}
void compile_concat(char*label, unsigned w0, unsigned w1,
@ -118,9 +103,6 @@ vvp_fun_concat8::vvp_fun_concat8(unsigned w0, unsigned w1,
wid_[1] = w1;
wid_[2] = w2;
wid_[3] = w3;
for (unsigned idx = 0 ; idx < val_.size() ; idx += 1)
val_.set_bit(idx, vvp_scalar_t(BIT4_Z, 0, 0));
}
vvp_fun_concat8::~vvp_fun_concat8()
@ -131,7 +113,7 @@ void vvp_fun_concat8::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
vvp_context_t)
{
vvp_vector8_t bit8 (bit, 6, 6);
recv_vec8(port, bit8);
recv_vec8_pv(port, bit8, 0, bit8.size());
}
void vvp_fun_concat8::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
@ -143,31 +125,13 @@ void vvp_fun_concat8::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
void vvp_fun_concat8::recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit)
{
unsigned pdx = port.port();
if (bit.size() != wid_[pdx]) {
cerr << "internal error: port " << pdx
<< " expects wid=" << wid_[pdx]
<< ", got wid=" << bit.size() << endl;
assert(0);
}
unsigned off = 0;
for (unsigned idx = 0 ; idx < pdx ; idx += 1)
off += wid_[idx];
for (unsigned idx = 0 ; idx < wid_[pdx] ; idx += 1) {
val_.set_bit(off+idx, bit.value(idx));
}
port.ptr()->send_vec8(val_);
recv_vec8_pv(port, bit, 0, bit.size());
}
void vvp_fun_concat8::recv_vec8_pv(vvp_net_ptr_t port, const vvp_vector8_t&bit,
unsigned base, unsigned vwid)
{
unsigned pdx = port.port();
unsigned wid = bit.size();
if (vwid != wid_[pdx]) {
cerr << "internal error: port " << pdx
@ -176,19 +140,24 @@ void vvp_fun_concat8::recv_vec8_pv(vvp_net_ptr_t port, const vvp_vector8_t&bit,
assert(0);
}
unsigned off = 0;
unsigned off = base;
for (unsigned idx = 0 ; idx < pdx ; idx += 1)
off += wid_[idx];
unsigned limit = off + wid_[pdx];
val_.set_vec(off, bit);
off += base;
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
if (off+idx >= limit) break;
val_.set_bit(off+idx, bit.value(idx));
}
if (net_)
return;
port.ptr()->send_vec8(val_);
net_ = port.ptr();
schedule_functor(this);
}
void vvp_fun_concat8::run_run()
{
vvp_net_t *ptr = net_;
net_ = nullptr;
ptr->send_vec8(val_);
}
void compile_concat8(char*label, unsigned w0, unsigned w1,
@ -226,9 +195,7 @@ void vvp_fun_repeat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
for (unsigned rdx = 0 ; rdx < rep_ ; rdx += 1) {
unsigned off = rdx * bit.size();
for (unsigned idx = 0 ; idx < bit.size() ; idx += 1)
val.set_bit(off+idx, bit.value(idx));
val.set_vec(off, bit);
}
port.ptr()->send_vec4(val, 0);

102
vvp/concat.h Normal file
View File

@ -0,0 +1,102 @@
#ifndef IVL_concat_H
#define IVL_concat_H
/*
* Copyright (c) 2004-2024 Stephen Williams (steve@icarus.com)
*
* This source code is free software; you can redistribute it
* and/or modify it in source code form under the terms of the GNU
* General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
# include "vvp_net.h"
/* vvp_fun_concat
* This node function creates vectors (vvp_vector4_t) from the
* concatenation of the inputs. The inputs (4) may be vector or
* vector8 objects, but they are reduced to vector4 values and
* strength information lost.
*
* The expected widths of the input vectors must be given up front so
* that the positions in the output vector (and also the size of the
* output vector) can be worked out. The input vectors must match the
* expected width.
*/
class vvp_fun_concat : public vvp_net_fun_t, protected vvp_gen_event_s {
public:
vvp_fun_concat(unsigned w0, unsigned w1,
unsigned w2, unsigned w3);
~vvp_fun_concat();
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
vvp_context_t context) final;
void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
unsigned base, unsigned vwid, vvp_context_t) final;
private:
void run_run() final;
unsigned wid_[4];
vvp_vector4_t val_;
vvp_net_t *net_ = nullptr;
};
class vvp_fun_concat8 : public vvp_net_fun_t, protected vvp_gen_event_s {
public:
vvp_fun_concat8(unsigned w0, unsigned w1,
unsigned w2, unsigned w3);
~vvp_fun_concat8();
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
vvp_context_t context) final;
void recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit) final;
void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
unsigned base, unsigned vwid, vvp_context_t) final;
void recv_vec8_pv(vvp_net_ptr_t p, const vvp_vector8_t&bit,
unsigned base, unsigned vwid) final;
private:
void run_run() final;
unsigned wid_[4];
vvp_vector8_t val_;
vvp_net_t *net_ = nullptr;
};
/* vvp_fun_repeat
* This node function create vectors by repeating the input. The width
* is the width of the output vector, and the repeat is the number of
* times to repeat the input. The width of the input vector is
* implicit from these values.
*/
class vvp_fun_repeat : public vvp_net_fun_t {
public:
vvp_fun_repeat(unsigned width, unsigned repeat);
~vvp_fun_repeat();
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
vvp_context_t context);
void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
unsigned int base, unsigned int vwid,
vvp_context_t context) final;
private:
unsigned wid_;
unsigned rep_;
};
#endif

View File

@ -23,6 +23,7 @@
# include "schedule.h"
# include "logic.h"
# include "part.h"
# include "concat.h"
#ifdef CHECK_WITH_VALGRIND
# include "vvp_cleanup.h"
#endif

View File

@ -52,7 +52,6 @@ class vvp_net_fun_t;
class vvp_net_fil_t;
/* Core net function types. */
class vvp_fun_concat;
class vvp_fun_drive;
class vvp_fun_part;
@ -1366,55 +1365,6 @@ class vvp_net_fil_t : public vvp_vpi_callback {
/* **** Some core net functions **** */
/* vvp_fun_concat
* This node function creates vectors (vvp_vector4_t) from the
* concatenation of the inputs. The inputs (4) may be vector or
* vector8 objects, but they are reduced to vector4 values and
* strength information lost.
*
* The expected widths of the input vectors must be given up front so
* that the positions in the output vector (and also the size of the
* output vector) can be worked out. The input vectors must match the
* expected width.
*/
class vvp_fun_concat : public vvp_net_fun_t {
public:
vvp_fun_concat(unsigned w0, unsigned w1,
unsigned w2, unsigned w3);
~vvp_fun_concat();
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
vvp_context_t context);
void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
unsigned base, unsigned vwid, vvp_context_t);
private:
unsigned wid_[4];
vvp_vector4_t val_;
};
class vvp_fun_concat8 : public vvp_net_fun_t {
public:
vvp_fun_concat8(unsigned w0, unsigned w1,
unsigned w2, unsigned w3);
~vvp_fun_concat8();
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
vvp_context_t context);
void recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit);
void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
unsigned base, unsigned vwid, vvp_context_t);
void recv_vec8_pv(vvp_net_ptr_t p, const vvp_vector8_t&bit,
unsigned base, unsigned vwid);
private:
unsigned wid_[4];
vvp_vector8_t val_;
};
/*
* The vvp_fun_force class objects are net functors that use their input
* to force the associated filter. They do not actually have an
@ -1436,29 +1386,6 @@ class vvp_fun_force : public vvp_net_fun_t {
void recv_real(vvp_net_ptr_t port, double bit, vvp_context_t);
};
/* vvp_fun_repeat
* This node function create vectors by repeating the input. The width
* is the width of the output vector, and the repeat is the number of
* times to repeat the input. The width of the input vector is
* implicit from these values.
*/
class vvp_fun_repeat : public vvp_net_fun_t {
public:
vvp_fun_repeat(unsigned width, unsigned repeat);
~vvp_fun_repeat();
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
vvp_context_t context);
void recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
unsigned int base, unsigned int vwid,
vvp_context_t context) final;
private:
unsigned wid_;
unsigned rep_;
};
/* vvp_fun_drive
* This node function takes an input vvp_vector4_t as input, and
* repeats that value as a vvp_vector8_t with all the bits given the