Rework scheduling of concat, part, buf/not and resolv for efficiency.

The concat and resolv functors are best evaluated lazily, because each
evaluation is costly and there is a high probability that an evaluation
will be invalidated when new input comes in.

Also optimization the recv_vec4_pv method of the resolver, which is
commonly used, and adjust the order of handling of vvp_fun_part to
work more efficiently.
This commit is contained in:
Stephen Williams 2008-06-06 15:31:22 -07:00
parent 2f4e5bf5b6
commit 2e95a740da
7 changed files with 121 additions and 107 deletions

View File

@ -20,6 +20,7 @@
# include "compile.h"
# include "vvp_net.h"
# include "schedule.h"
# include <stdlib.h>
# include <iostream>
#ifdef HAVE_MALLOC_H
@ -27,18 +28,41 @@
#endif
# include <assert.h>
/* vvp_fun_concat
* This node function creates vectors (vvp_vector4_t) from the
* concatenation of the inputs. The inputs (4) may be vector or
* vector8 objects, but they are reduced to vector4 values and
* strength information lost.
*
* The expected widths of the input vectors must be given up front so
* that the positions in the output vector (and also the size of the
* output vector) can be worked out. The input vectors must match the
* expected width.
*/
class vvp_fun_concat : public vvp_net_fun_t, private vvp_gen_event_s {
public:
vvp_fun_concat(unsigned w0, unsigned w1,
unsigned w2, unsigned w3);
~vvp_fun_concat();
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
private:
void run_run();
vvp_net_t*net_;
vvp_vector4_t input_[4];
};
vvp_fun_concat::vvp_fun_concat(unsigned w0, unsigned w1,
unsigned w2, unsigned w3)
: val_(w0+w1+w2+w3)
: net_(0)
{
wid_[0] = w0;
wid_[1] = w1;
wid_[2] = w2;
wid_[3] = w3;
for (unsigned idx = 0 ; idx < val_.size() ; idx += 1)
val_.set_bit(idx, BIT4_X);
input_[0] = vvp_vector4_t(w0);
input_[1] = vvp_vector4_t(w1);
input_[2] = vvp_vector4_t(w2);
input_[3] = vvp_vector4_t(w3);
}
vvp_fun_concat::~vvp_fun_concat()
@ -49,22 +73,38 @@ void vvp_fun_concat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
{
unsigned pdx = port.port();
if (bit.size() != wid_[pdx]) {
if (bit.size() != input_[pdx].size()) {
cerr << "internal error: port " << pdx
<< " expects wid=" << wid_[pdx]
<< " expects wid=" << input_[pdx].size()
<< ", got wid=" << bit.size() << endl;
assert(0);
}
unsigned off = 0;
for (unsigned idx = 0 ; idx < pdx ; idx += 1)
off += wid_[idx];
if (input_[pdx] .eeq(bit))
return;
for (unsigned idx = 0 ; idx < wid_[pdx] ; idx += 1) {
val_.set_bit(off+idx, bit.value(idx));
input_[pdx] = bit;
if (net_ == 0) {
net_ = port.ptr();
schedule_generic(this, 0, false);
}
}
void vvp_fun_concat::run_run()
{
vvp_net_t*ptr = net_;
net_ = 0;
unsigned off = 0;
unsigned owid = input_[0].size() + input_[1].size() + input_[2].size() + input_[3].size();
vvp_vector4_t res (owid);
for (unsigned idx = 0 ; idx < 4 && (off<owid) ; idx += 1) {
res.set_vec(off, input_[idx]);
off += input_[idx].size();
}
vvp_send_vec4(port.ptr()->out, val_);
vvp_send_vec4(ptr->out, res);
}
void compile_concat(char*label, unsigned w0, unsigned w1,

View File

@ -140,7 +140,6 @@ void vvp_fun_eeq::run_run()
vvp_fun_buf::vvp_fun_buf()
{
net_ = 0;
count_functors_logic += 1;
}
@ -157,25 +156,12 @@ void vvp_fun_buf::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit)
if (ptr.port() != 0)
return;
if (input_ .eeq( bit ))
if (input_ .eq_xz( bit ))
return;
input_ = bit;
if (net_ == 0) {
net_ = ptr.ptr();
schedule_generic(this, 0, false);
}
}
void vvp_fun_buf::run_run()
{
vvp_net_t*ptr = net_;
net_ = 0;
vvp_vector4_t tmp (input_);
tmp.change_z2x();
vvp_send_vec4(ptr->out, tmp);
input_.change_z2x();
vvp_send_vec4(ptr.ptr()->out, input_);
}
vvp_fun_bufz::vvp_fun_bufz()
@ -394,7 +380,6 @@ void vvp_fun_muxz::run_run()
vvp_fun_not::vvp_fun_not()
{
net_ = 0;
count_functors_logic += 1;
}
@ -411,30 +396,13 @@ void vvp_fun_not::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit)
if (ptr.port() != 0)
return;
if (input_ .eeq( bit ))
if (input_ .eq_xz( bit ))
return;
input_ = bit;
if (net_ == 0) {
net_ = ptr.ptr();
schedule_generic(this, 0, false);
}
vvp_send_vec4(ptr.ptr()->out, ~input_);
}
void vvp_fun_not::run_run()
{
vvp_net_t*ptr = net_;
net_ = 0;
vvp_vector4_t result (input_);
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
vvp_bit4_t bitbit = ~ result.value(idx);
result.set_bit(idx, bitbit);
}
vvp_send_vec4(ptr->out, result);
}
vvp_fun_or::vvp_fun_or(unsigned wid, bool invert)
: vvp_fun_boolean_(wid), invert_(invert)

View File

@ -69,7 +69,7 @@ class vvp_fun_eeq : public vvp_fun_boolean_ {
* The retransmitted vector has all Z values changed to X, just like
* the buf(Q,D) gate in Verilog.
*/
class vvp_fun_buf: public vvp_net_fun_t, private vvp_gen_event_s {
class vvp_fun_buf: public vvp_net_fun_t {
public:
explicit vvp_fun_buf();
@ -77,12 +77,8 @@ class vvp_fun_buf: public vvp_net_fun_t, private vvp_gen_event_s {
void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);
private:
void run_run();
private:
vvp_vector4_t input_;
vvp_net_t*net_;
};
/*
@ -152,7 +148,7 @@ class vvp_fun_muxr : public vvp_net_fun_t, private vvp_gen_event_s {
sel_type select_;
};
class vvp_fun_not: public vvp_net_fun_t, private vvp_gen_event_s {
class vvp_fun_not: public vvp_net_fun_t {
public:
explicit vvp_fun_not();
@ -160,12 +156,8 @@ class vvp_fun_not: public vvp_net_fun_t, private vvp_gen_event_s {
void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);
private:
void run_run();
private:
vvp_vector4_t input_;
vvp_net_t*net_;
};
class vvp_fun_or : public vvp_fun_boolean_ {

View File

@ -28,7 +28,7 @@
# include <assert.h>
vvp_fun_part::vvp_fun_part(unsigned base, unsigned wid)
: base_(base), wid_(wid)
: base_(base), val_(wid)
{
net_ = 0;
}
@ -41,10 +41,18 @@ void vvp_fun_part::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
{
assert(port.port() == 0);
if (val_ .eeq( bit ))
vvp_vector4_t tmp = val_;
for (unsigned idx = 0 ; idx < tmp.size() ; idx += 1) {
if ((idx + base_) < bit.size())
tmp.set_bit(idx, bit.value(base_+idx));
else
tmp.set_bit(idx, BIT4_X);
}
if (val_ .eeq( tmp ))
return;
val_ = bit;
val_ = tmp;
if (net_ == 0) {
net_ = port.ptr();
@ -63,6 +71,11 @@ void vvp_fun_part::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
{
assert(bit.size() == wid);
if (base >= base_+val_.size())
return;
if ((base+wid) <= base_)
return;
vvp_vector4_t tmp = val_;
if (tmp.size() == 0)
tmp = vvp_vector4_t(vwid);
@ -76,13 +89,7 @@ void vvp_fun_part::run_run()
{
vvp_net_t*ptr = net_;
net_ = 0;
vvp_vector4_t res (wid_, BIT4_X);
for (unsigned idx = 0 ; idx < wid_ ; idx += 1) {
if (idx + base_ < val_.size())
res.set_bit(idx, val_.value(base_+idx));
}
vvp_send_vec4(ptr->out, res);
vvp_send_vec4(ptr->out, val_);
}
vvp_fun_part_pv::vvp_fun_part_pv(unsigned b, unsigned w, unsigned v)

View File

@ -44,18 +44,14 @@ void resolv_functor::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
unsigned base, unsigned wid, unsigned vwid)
{
assert(bit.size() == wid);
vvp_vector4_t res (vwid);
for (unsigned idx = 0 ; idx < base ; idx += 1)
res.set_bit(idx, BIT4_Z);
vvp_vector8_t tmp (bit,6,6);
vvp_vector8_t tmpw (vwid);
for (unsigned idx = 0 ; idx < wid ; idx += 1)
res.set_bit(idx+base, bit.value(idx));
tmpw.set_bit(idx+base, tmp.value(idx));
for (unsigned idx = base+wid ; idx < vwid ; idx += 1)
res.set_bit(idx, BIT4_Z);
recv_vec4(port, res);
recv_vec8(port, tmpw);
}
void resolv_functor::recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit)

View File

@ -779,6 +779,41 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
return true;
}
bool vvp_vector4_t::eq_xz(const vvp_vector4_t&that) const
{
if (size_ != that.size_)
return false;
if (size_ < BITS_PER_WORD) {
unsigned long mask = (1UL << size_) - 1;
return ((abits_val_|bbits_val_)&mask) == ((that.abits_val_|that.bbits_val_)&mask)
&& (bbits_val_&mask) == (that.bbits_val_&mask);
}
if (size_ == BITS_PER_WORD) {
return ((abits_val_|bbits_val_) == (that.abits_val_|that.bbits_val_))
&& (bbits_val_ == that.bbits_val_);
}
unsigned words = size_ / BITS_PER_WORD;
for (unsigned idx = 0 ; idx < words ; idx += 1) {
if ((abits_ptr_[idx]|bbits_ptr_[idx]) != (that.abits_ptr_[idx]|that.bbits_ptr_[idx]))
return false;
if (bbits_ptr_[idx] != that.bbits_ptr_[idx])
return false;
}
unsigned long mask = size_%BITS_PER_WORD;
if (mask > 0) {
mask = (1UL << mask) - 1;
return ((abits_ptr_[words]|bbits_ptr_[words])&mask) == ((that.abits_ptr_[words]|that.bbits_ptr_[words])&mask)
&& (bbits_ptr_[words]&mask) == (that.bbits_ptr_[words]&mask);
}
return true;
}
bool vvp_vector4_t::has_xz() const
{
if (size_ < BITS_PER_WORD) {

View File

@ -40,7 +40,6 @@ class vvp_net_t;
class vvp_net_fun_t;
/* Core net function types. */
class vvp_fun_concat;
class vvp_fun_drive;
class vvp_fun_part;
@ -154,6 +153,8 @@ class vvp_vector4_t {
// Test that the vectors are exactly equal
bool eeq(const vvp_vector4_t&that) const;
// Test that the vectors are equal, with x and z comparing equal.
bool eq_xz(const vvp_vector4_t&that) const;
// Return true if there is an X or Z anywhere in the vector.
bool has_xz() const;
@ -791,31 +792,6 @@ class vvp_net_fun_t {
/* **** Some core net functions **** */
/* vvp_fun_concat
* This node function creates vectors (vvp_vector4_t) from the
* concatenation of the inputs. The inputs (4) may be vector or
* vector8 objects, but they are reduced to vector4 values and
* strength information lost.
*
* The expected widths of the input vectors must be given up front so
* that the positions in the output vector (and also the size of the
* output vector) can be worked out. The input vectors must match the
* expected width.
*/
class vvp_fun_concat : public vvp_net_fun_t {
public:
vvp_fun_concat(unsigned w0, unsigned w1,
unsigned w2, unsigned w3);
~vvp_fun_concat();
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
private:
unsigned wid_[4];
vvp_vector4_t val_;
};
/* vvp_fun_repeat
* This node function create vectors by repeating the input. The width
* is the width of the output vector, and the repeat is the number of