Rework scheduling of concat, part, buf/not and resolv for efficiency.
The concat and resolv functors are best evaluated lazily, because each evaluation is costly and there is a high probability that an evaluation will be invalidated when new input comes in. Also optimization the recv_vec4_pv method of the resolver, which is commonly used, and adjust the order of handling of vvp_fun_part to work more efficiently.
This commit is contained in:
parent
2f4e5bf5b6
commit
2e95a740da
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
# include "compile.h"
|
||||
# include "vvp_net.h"
|
||||
# include "schedule.h"
|
||||
# include <stdlib.h>
|
||||
# include <iostream>
|
||||
#ifdef HAVE_MALLOC_H
|
||||
|
|
@ -27,18 +28,41 @@
|
|||
#endif
|
||||
# include <assert.h>
|
||||
|
||||
/* vvp_fun_concat
|
||||
* This node function creates vectors (vvp_vector4_t) from the
|
||||
* concatenation of the inputs. The inputs (4) may be vector or
|
||||
* vector8 objects, but they are reduced to vector4 values and
|
||||
* strength information lost.
|
||||
*
|
||||
* The expected widths of the input vectors must be given up front so
|
||||
* that the positions in the output vector (and also the size of the
|
||||
* output vector) can be worked out. The input vectors must match the
|
||||
* expected width.
|
||||
*/
|
||||
class vvp_fun_concat : public vvp_net_fun_t, private vvp_gen_event_s {
|
||||
|
||||
public:
|
||||
vvp_fun_concat(unsigned w0, unsigned w1,
|
||||
unsigned w2, unsigned w3);
|
||||
~vvp_fun_concat();
|
||||
|
||||
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
|
||||
|
||||
private:
|
||||
void run_run();
|
||||
vvp_net_t*net_;
|
||||
vvp_vector4_t input_[4];
|
||||
};
|
||||
|
||||
|
||||
vvp_fun_concat::vvp_fun_concat(unsigned w0, unsigned w1,
|
||||
unsigned w2, unsigned w3)
|
||||
: val_(w0+w1+w2+w3)
|
||||
: net_(0)
|
||||
{
|
||||
wid_[0] = w0;
|
||||
wid_[1] = w1;
|
||||
wid_[2] = w2;
|
||||
wid_[3] = w3;
|
||||
|
||||
for (unsigned idx = 0 ; idx < val_.size() ; idx += 1)
|
||||
val_.set_bit(idx, BIT4_X);
|
||||
input_[0] = vvp_vector4_t(w0);
|
||||
input_[1] = vvp_vector4_t(w1);
|
||||
input_[2] = vvp_vector4_t(w2);
|
||||
input_[3] = vvp_vector4_t(w3);
|
||||
}
|
||||
|
||||
vvp_fun_concat::~vvp_fun_concat()
|
||||
|
|
@ -49,22 +73,38 @@ void vvp_fun_concat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
|
|||
{
|
||||
unsigned pdx = port.port();
|
||||
|
||||
if (bit.size() != wid_[pdx]) {
|
||||
if (bit.size() != input_[pdx].size()) {
|
||||
cerr << "internal error: port " << pdx
|
||||
<< " expects wid=" << wid_[pdx]
|
||||
<< " expects wid=" << input_[pdx].size()
|
||||
<< ", got wid=" << bit.size() << endl;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
unsigned off = 0;
|
||||
for (unsigned idx = 0 ; idx < pdx ; idx += 1)
|
||||
off += wid_[idx];
|
||||
if (input_[pdx] .eeq(bit))
|
||||
return;
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid_[pdx] ; idx += 1) {
|
||||
val_.set_bit(off+idx, bit.value(idx));
|
||||
input_[pdx] = bit;
|
||||
if (net_ == 0) {
|
||||
net_ = port.ptr();
|
||||
schedule_generic(this, 0, false);
|
||||
}
|
||||
}
|
||||
|
||||
void vvp_fun_concat::run_run()
|
||||
{
|
||||
vvp_net_t*ptr = net_;
|
||||
net_ = 0;
|
||||
|
||||
unsigned off = 0;
|
||||
unsigned owid = input_[0].size() + input_[1].size() + input_[2].size() + input_[3].size();
|
||||
|
||||
vvp_vector4_t res (owid);
|
||||
for (unsigned idx = 0 ; idx < 4 && (off<owid) ; idx += 1) {
|
||||
res.set_vec(off, input_[idx]);
|
||||
off += input_[idx].size();
|
||||
}
|
||||
|
||||
vvp_send_vec4(port.ptr()->out, val_);
|
||||
vvp_send_vec4(ptr->out, res);
|
||||
}
|
||||
|
||||
void compile_concat(char*label, unsigned w0, unsigned w1,
|
||||
|
|
|
|||
42
vvp/logic.cc
42
vvp/logic.cc
|
|
@ -140,7 +140,6 @@ void vvp_fun_eeq::run_run()
|
|||
|
||||
vvp_fun_buf::vvp_fun_buf()
|
||||
{
|
||||
net_ = 0;
|
||||
count_functors_logic += 1;
|
||||
}
|
||||
|
||||
|
|
@ -157,25 +156,12 @@ void vvp_fun_buf::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit)
|
|||
if (ptr.port() != 0)
|
||||
return;
|
||||
|
||||
if (input_ .eeq( bit ))
|
||||
if (input_ .eq_xz( bit ))
|
||||
return;
|
||||
|
||||
input_ = bit;
|
||||
|
||||
if (net_ == 0) {
|
||||
net_ = ptr.ptr();
|
||||
schedule_generic(this, 0, false);
|
||||
}
|
||||
}
|
||||
|
||||
void vvp_fun_buf::run_run()
|
||||
{
|
||||
vvp_net_t*ptr = net_;
|
||||
net_ = 0;
|
||||
|
||||
vvp_vector4_t tmp (input_);
|
||||
tmp.change_z2x();
|
||||
vvp_send_vec4(ptr->out, tmp);
|
||||
input_.change_z2x();
|
||||
vvp_send_vec4(ptr.ptr()->out, input_);
|
||||
}
|
||||
|
||||
vvp_fun_bufz::vvp_fun_bufz()
|
||||
|
|
@ -394,7 +380,6 @@ void vvp_fun_muxz::run_run()
|
|||
|
||||
vvp_fun_not::vvp_fun_not()
|
||||
{
|
||||
net_ = 0;
|
||||
count_functors_logic += 1;
|
||||
}
|
||||
|
||||
|
|
@ -411,30 +396,13 @@ void vvp_fun_not::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit)
|
|||
if (ptr.port() != 0)
|
||||
return;
|
||||
|
||||
if (input_ .eeq( bit ))
|
||||
if (input_ .eq_xz( bit ))
|
||||
return;
|
||||
|
||||
input_ = bit;
|
||||
if (net_ == 0) {
|
||||
net_ = ptr.ptr();
|
||||
schedule_generic(this, 0, false);
|
||||
}
|
||||
vvp_send_vec4(ptr.ptr()->out, ~input_);
|
||||
}
|
||||
|
||||
void vvp_fun_not::run_run()
|
||||
{
|
||||
vvp_net_t*ptr = net_;
|
||||
net_ = 0;
|
||||
|
||||
vvp_vector4_t result (input_);
|
||||
|
||||
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
|
||||
vvp_bit4_t bitbit = ~ result.value(idx);
|
||||
result.set_bit(idx, bitbit);
|
||||
}
|
||||
|
||||
vvp_send_vec4(ptr->out, result);
|
||||
}
|
||||
|
||||
vvp_fun_or::vvp_fun_or(unsigned wid, bool invert)
|
||||
: vvp_fun_boolean_(wid), invert_(invert)
|
||||
|
|
|
|||
12
vvp/logic.h
12
vvp/logic.h
|
|
@ -69,7 +69,7 @@ class vvp_fun_eeq : public vvp_fun_boolean_ {
|
|||
* The retransmitted vector has all Z values changed to X, just like
|
||||
* the buf(Q,D) gate in Verilog.
|
||||
*/
|
||||
class vvp_fun_buf: public vvp_net_fun_t, private vvp_gen_event_s {
|
||||
class vvp_fun_buf: public vvp_net_fun_t {
|
||||
|
||||
public:
|
||||
explicit vvp_fun_buf();
|
||||
|
|
@ -77,12 +77,8 @@ class vvp_fun_buf: public vvp_net_fun_t, private vvp_gen_event_s {
|
|||
|
||||
void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);
|
||||
|
||||
private:
|
||||
void run_run();
|
||||
|
||||
private:
|
||||
vvp_vector4_t input_;
|
||||
vvp_net_t*net_;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -152,7 +148,7 @@ class vvp_fun_muxr : public vvp_net_fun_t, private vvp_gen_event_s {
|
|||
sel_type select_;
|
||||
};
|
||||
|
||||
class vvp_fun_not: public vvp_net_fun_t, private vvp_gen_event_s {
|
||||
class vvp_fun_not: public vvp_net_fun_t {
|
||||
|
||||
public:
|
||||
explicit vvp_fun_not();
|
||||
|
|
@ -160,12 +156,8 @@ class vvp_fun_not: public vvp_net_fun_t, private vvp_gen_event_s {
|
|||
|
||||
void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);
|
||||
|
||||
private:
|
||||
void run_run();
|
||||
|
||||
private:
|
||||
vvp_vector4_t input_;
|
||||
vvp_net_t*net_;
|
||||
};
|
||||
|
||||
class vvp_fun_or : public vvp_fun_boolean_ {
|
||||
|
|
|
|||
27
vvp/part.cc
27
vvp/part.cc
|
|
@ -28,7 +28,7 @@
|
|||
# include <assert.h>
|
||||
|
||||
vvp_fun_part::vvp_fun_part(unsigned base, unsigned wid)
|
||||
: base_(base), wid_(wid)
|
||||
: base_(base), val_(wid)
|
||||
{
|
||||
net_ = 0;
|
||||
}
|
||||
|
|
@ -41,10 +41,18 @@ void vvp_fun_part::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
|
|||
{
|
||||
assert(port.port() == 0);
|
||||
|
||||
if (val_ .eeq( bit ))
|
||||
vvp_vector4_t tmp = val_;
|
||||
for (unsigned idx = 0 ; idx < tmp.size() ; idx += 1) {
|
||||
if ((idx + base_) < bit.size())
|
||||
tmp.set_bit(idx, bit.value(base_+idx));
|
||||
else
|
||||
tmp.set_bit(idx, BIT4_X);
|
||||
}
|
||||
|
||||
if (val_ .eeq( tmp ))
|
||||
return;
|
||||
|
||||
val_ = bit;
|
||||
val_ = tmp;
|
||||
|
||||
if (net_ == 0) {
|
||||
net_ = port.ptr();
|
||||
|
|
@ -63,6 +71,11 @@ void vvp_fun_part::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
|
|||
{
|
||||
assert(bit.size() == wid);
|
||||
|
||||
if (base >= base_+val_.size())
|
||||
return;
|
||||
if ((base+wid) <= base_)
|
||||
return;
|
||||
|
||||
vvp_vector4_t tmp = val_;
|
||||
if (tmp.size() == 0)
|
||||
tmp = vvp_vector4_t(vwid);
|
||||
|
|
@ -76,13 +89,7 @@ void vvp_fun_part::run_run()
|
|||
{
|
||||
vvp_net_t*ptr = net_;
|
||||
net_ = 0;
|
||||
|
||||
vvp_vector4_t res (wid_, BIT4_X);
|
||||
for (unsigned idx = 0 ; idx < wid_ ; idx += 1) {
|
||||
if (idx + base_ < val_.size())
|
||||
res.set_bit(idx, val_.value(base_+idx));
|
||||
}
|
||||
vvp_send_vec4(ptr->out, res);
|
||||
vvp_send_vec4(ptr->out, val_);
|
||||
}
|
||||
|
||||
vvp_fun_part_pv::vvp_fun_part_pv(unsigned b, unsigned w, unsigned v)
|
||||
|
|
|
|||
|
|
@ -44,18 +44,14 @@ void resolv_functor::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
|
|||
unsigned base, unsigned wid, unsigned vwid)
|
||||
{
|
||||
assert(bit.size() == wid);
|
||||
vvp_vector4_t res (vwid);
|
||||
|
||||
for (unsigned idx = 0 ; idx < base ; idx += 1)
|
||||
res.set_bit(idx, BIT4_Z);
|
||||
vvp_vector8_t tmp (bit,6,6);
|
||||
vvp_vector8_t tmpw (vwid);
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid ; idx += 1)
|
||||
res.set_bit(idx+base, bit.value(idx));
|
||||
tmpw.set_bit(idx+base, tmp.value(idx));
|
||||
|
||||
for (unsigned idx = base+wid ; idx < vwid ; idx += 1)
|
||||
res.set_bit(idx, BIT4_Z);
|
||||
|
||||
recv_vec4(port, res);
|
||||
recv_vec8(port, tmpw);
|
||||
}
|
||||
|
||||
void resolv_functor::recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit)
|
||||
|
|
|
|||
|
|
@ -779,6 +779,41 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
|
|||
return true;
|
||||
}
|
||||
|
||||
bool vvp_vector4_t::eq_xz(const vvp_vector4_t&that) const
|
||||
{
|
||||
if (size_ != that.size_)
|
||||
return false;
|
||||
|
||||
if (size_ < BITS_PER_WORD) {
|
||||
unsigned long mask = (1UL << size_) - 1;
|
||||
return ((abits_val_|bbits_val_)&mask) == ((that.abits_val_|that.bbits_val_)&mask)
|
||||
&& (bbits_val_&mask) == (that.bbits_val_&mask);
|
||||
}
|
||||
|
||||
if (size_ == BITS_PER_WORD) {
|
||||
return ((abits_val_|bbits_val_) == (that.abits_val_|that.bbits_val_))
|
||||
&& (bbits_val_ == that.bbits_val_);
|
||||
}
|
||||
|
||||
unsigned words = size_ / BITS_PER_WORD;
|
||||
for (unsigned idx = 0 ; idx < words ; idx += 1) {
|
||||
if ((abits_ptr_[idx]|bbits_ptr_[idx]) != (that.abits_ptr_[idx]|that.bbits_ptr_[idx]))
|
||||
return false;
|
||||
if (bbits_ptr_[idx] != that.bbits_ptr_[idx])
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned long mask = size_%BITS_PER_WORD;
|
||||
if (mask > 0) {
|
||||
mask = (1UL << mask) - 1;
|
||||
return ((abits_ptr_[words]|bbits_ptr_[words])&mask) == ((that.abits_ptr_[words]|that.bbits_ptr_[words])&mask)
|
||||
&& (bbits_ptr_[words]&mask) == (that.bbits_ptr_[words]&mask);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool vvp_vector4_t::has_xz() const
|
||||
{
|
||||
if (size_ < BITS_PER_WORD) {
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ class vvp_net_t;
|
|||
class vvp_net_fun_t;
|
||||
|
||||
/* Core net function types. */
|
||||
class vvp_fun_concat;
|
||||
class vvp_fun_drive;
|
||||
class vvp_fun_part;
|
||||
|
||||
|
|
@ -154,6 +153,8 @@ class vvp_vector4_t {
|
|||
|
||||
// Test that the vectors are exactly equal
|
||||
bool eeq(const vvp_vector4_t&that) const;
|
||||
// Test that the vectors are equal, with x and z comparing equal.
|
||||
bool eq_xz(const vvp_vector4_t&that) const;
|
||||
|
||||
// Return true if there is an X or Z anywhere in the vector.
|
||||
bool has_xz() const;
|
||||
|
|
@ -791,31 +792,6 @@ class vvp_net_fun_t {
|
|||
|
||||
/* **** Some core net functions **** */
|
||||
|
||||
/* vvp_fun_concat
|
||||
* This node function creates vectors (vvp_vector4_t) from the
|
||||
* concatenation of the inputs. The inputs (4) may be vector or
|
||||
* vector8 objects, but they are reduced to vector4 values and
|
||||
* strength information lost.
|
||||
*
|
||||
* The expected widths of the input vectors must be given up front so
|
||||
* that the positions in the output vector (and also the size of the
|
||||
* output vector) can be worked out. The input vectors must match the
|
||||
* expected width.
|
||||
*/
|
||||
class vvp_fun_concat : public vvp_net_fun_t {
|
||||
|
||||
public:
|
||||
vvp_fun_concat(unsigned w0, unsigned w1,
|
||||
unsigned w2, unsigned w3);
|
||||
~vvp_fun_concat();
|
||||
|
||||
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
|
||||
|
||||
private:
|
||||
unsigned wid_[4];
|
||||
vvp_vector4_t val_;
|
||||
};
|
||||
|
||||
/* vvp_fun_repeat
|
||||
* This node function create vectors by repeating the input. The width
|
||||
* is the width of the output vector, and the repeat is the number of
|
||||
|
|
|
|||
Loading…
Reference in New Issue