Rework scheduling of concat, part, buf/not and resolv for efficiency.

The concat and resolv functors are best evaluated lazily, because each
evaluation is costly and there is a high probability that an evaluation
will be invalidated when new input comes in.

Also optimization the recv_vec4_pv method of the resolver, which is
commonly used, and adjust the order of handling of vvp_fun_part to
work more efficiently.
This commit is contained in:
Stephen Williams 2008-06-06 15:31:22 -07:00
parent 2f4e5bf5b6
commit 2e95a740da
7 changed files with 121 additions and 107 deletions

View File

@ -20,6 +20,7 @@
# include "compile.h" # include "compile.h"
# include "vvp_net.h" # include "vvp_net.h"
# include "schedule.h"
# include <stdlib.h> # include <stdlib.h>
# include <iostream> # include <iostream>
#ifdef HAVE_MALLOC_H #ifdef HAVE_MALLOC_H
@ -27,18 +28,41 @@
#endif #endif
# include <assert.h> # include <assert.h>
/* vvp_fun_concat
* This node function creates vectors (vvp_vector4_t) from the
* concatenation of the inputs. The inputs (4) may be vector or
* vector8 objects, but they are reduced to vector4 values and
* strength information lost.
*
* The expected widths of the input vectors must be given up front so
* that the positions in the output vector (and also the size of the
* output vector) can be worked out. The input vectors must match the
* expected width.
*/
class vvp_fun_concat : public vvp_net_fun_t, private vvp_gen_event_s {
public:
vvp_fun_concat(unsigned w0, unsigned w1,
unsigned w2, unsigned w3);
~vvp_fun_concat();
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
private:
void run_run();
vvp_net_t*net_;
vvp_vector4_t input_[4];
};
vvp_fun_concat::vvp_fun_concat(unsigned w0, unsigned w1, vvp_fun_concat::vvp_fun_concat(unsigned w0, unsigned w1,
unsigned w2, unsigned w3) unsigned w2, unsigned w3)
: val_(w0+w1+w2+w3) : net_(0)
{ {
wid_[0] = w0; input_[0] = vvp_vector4_t(w0);
wid_[1] = w1; input_[1] = vvp_vector4_t(w1);
wid_[2] = w2; input_[2] = vvp_vector4_t(w2);
wid_[3] = w3; input_[3] = vvp_vector4_t(w3);
for (unsigned idx = 0 ; idx < val_.size() ; idx += 1)
val_.set_bit(idx, BIT4_X);
} }
vvp_fun_concat::~vvp_fun_concat() vvp_fun_concat::~vvp_fun_concat()
@ -49,22 +73,38 @@ void vvp_fun_concat::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
{ {
unsigned pdx = port.port(); unsigned pdx = port.port();
if (bit.size() != wid_[pdx]) { if (bit.size() != input_[pdx].size()) {
cerr << "internal error: port " << pdx cerr << "internal error: port " << pdx
<< " expects wid=" << wid_[pdx] << " expects wid=" << input_[pdx].size()
<< ", got wid=" << bit.size() << endl; << ", got wid=" << bit.size() << endl;
assert(0); assert(0);
} }
unsigned off = 0; if (input_[pdx] .eeq(bit))
for (unsigned idx = 0 ; idx < pdx ; idx += 1) return;
off += wid_[idx];
for (unsigned idx = 0 ; idx < wid_[pdx] ; idx += 1) { input_[pdx] = bit;
val_.set_bit(off+idx, bit.value(idx)); if (net_ == 0) {
net_ = port.ptr();
schedule_generic(this, 0, false);
}
} }
vvp_send_vec4(port.ptr()->out, val_); void vvp_fun_concat::run_run()
{
vvp_net_t*ptr = net_;
net_ = 0;
unsigned off = 0;
unsigned owid = input_[0].size() + input_[1].size() + input_[2].size() + input_[3].size();
vvp_vector4_t res (owid);
for (unsigned idx = 0 ; idx < 4 && (off<owid) ; idx += 1) {
res.set_vec(off, input_[idx]);
off += input_[idx].size();
}
vvp_send_vec4(ptr->out, res);
} }
void compile_concat(char*label, unsigned w0, unsigned w1, void compile_concat(char*label, unsigned w0, unsigned w1,

View File

@ -140,7 +140,6 @@ void vvp_fun_eeq::run_run()
vvp_fun_buf::vvp_fun_buf() vvp_fun_buf::vvp_fun_buf()
{ {
net_ = 0;
count_functors_logic += 1; count_functors_logic += 1;
} }
@ -157,25 +156,12 @@ void vvp_fun_buf::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit)
if (ptr.port() != 0) if (ptr.port() != 0)
return; return;
if (input_ .eeq( bit )) if (input_ .eq_xz( bit ))
return; return;
input_ = bit; input_ = bit;
input_.change_z2x();
if (net_ == 0) { vvp_send_vec4(ptr.ptr()->out, input_);
net_ = ptr.ptr();
schedule_generic(this, 0, false);
}
}
void vvp_fun_buf::run_run()
{
vvp_net_t*ptr = net_;
net_ = 0;
vvp_vector4_t tmp (input_);
tmp.change_z2x();
vvp_send_vec4(ptr->out, tmp);
} }
vvp_fun_bufz::vvp_fun_bufz() vvp_fun_bufz::vvp_fun_bufz()
@ -394,7 +380,6 @@ void vvp_fun_muxz::run_run()
vvp_fun_not::vvp_fun_not() vvp_fun_not::vvp_fun_not()
{ {
net_ = 0;
count_functors_logic += 1; count_functors_logic += 1;
} }
@ -411,30 +396,13 @@ void vvp_fun_not::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit)
if (ptr.port() != 0) if (ptr.port() != 0)
return; return;
if (input_ .eeq( bit )) if (input_ .eq_xz( bit ))
return; return;
input_ = bit; input_ = bit;
if (net_ == 0) { vvp_send_vec4(ptr.ptr()->out, ~input_);
net_ = ptr.ptr();
schedule_generic(this, 0, false);
}
} }
void vvp_fun_not::run_run()
{
vvp_net_t*ptr = net_;
net_ = 0;
vvp_vector4_t result (input_);
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
vvp_bit4_t bitbit = ~ result.value(idx);
result.set_bit(idx, bitbit);
}
vvp_send_vec4(ptr->out, result);
}
vvp_fun_or::vvp_fun_or(unsigned wid, bool invert) vvp_fun_or::vvp_fun_or(unsigned wid, bool invert)
: vvp_fun_boolean_(wid), invert_(invert) : vvp_fun_boolean_(wid), invert_(invert)

View File

@ -69,7 +69,7 @@ class vvp_fun_eeq : public vvp_fun_boolean_ {
* The retransmitted vector has all Z values changed to X, just like * The retransmitted vector has all Z values changed to X, just like
* the buf(Q,D) gate in Verilog. * the buf(Q,D) gate in Verilog.
*/ */
class vvp_fun_buf: public vvp_net_fun_t, private vvp_gen_event_s { class vvp_fun_buf: public vvp_net_fun_t {
public: public:
explicit vvp_fun_buf(); explicit vvp_fun_buf();
@ -77,12 +77,8 @@ class vvp_fun_buf: public vvp_net_fun_t, private vvp_gen_event_s {
void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit); void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);
private:
void run_run();
private: private:
vvp_vector4_t input_; vvp_vector4_t input_;
vvp_net_t*net_;
}; };
/* /*
@ -152,7 +148,7 @@ class vvp_fun_muxr : public vvp_net_fun_t, private vvp_gen_event_s {
sel_type select_; sel_type select_;
}; };
class vvp_fun_not: public vvp_net_fun_t, private vvp_gen_event_s { class vvp_fun_not: public vvp_net_fun_t {
public: public:
explicit vvp_fun_not(); explicit vvp_fun_not();
@ -160,12 +156,8 @@ class vvp_fun_not: public vvp_net_fun_t, private vvp_gen_event_s {
void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit); void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);
private:
void run_run();
private: private:
vvp_vector4_t input_; vvp_vector4_t input_;
vvp_net_t*net_;
}; };
class vvp_fun_or : public vvp_fun_boolean_ { class vvp_fun_or : public vvp_fun_boolean_ {

View File

@ -28,7 +28,7 @@
# include <assert.h> # include <assert.h>
vvp_fun_part::vvp_fun_part(unsigned base, unsigned wid) vvp_fun_part::vvp_fun_part(unsigned base, unsigned wid)
: base_(base), wid_(wid) : base_(base), val_(wid)
{ {
net_ = 0; net_ = 0;
} }
@ -41,10 +41,18 @@ void vvp_fun_part::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
{ {
assert(port.port() == 0); assert(port.port() == 0);
if (val_ .eeq( bit )) vvp_vector4_t tmp = val_;
for (unsigned idx = 0 ; idx < tmp.size() ; idx += 1) {
if ((idx + base_) < bit.size())
tmp.set_bit(idx, bit.value(base_+idx));
else
tmp.set_bit(idx, BIT4_X);
}
if (val_ .eeq( tmp ))
return; return;
val_ = bit; val_ = tmp;
if (net_ == 0) { if (net_ == 0) {
net_ = port.ptr(); net_ = port.ptr();
@ -63,6 +71,11 @@ void vvp_fun_part::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
{ {
assert(bit.size() == wid); assert(bit.size() == wid);
if (base >= base_+val_.size())
return;
if ((base+wid) <= base_)
return;
vvp_vector4_t tmp = val_; vvp_vector4_t tmp = val_;
if (tmp.size() == 0) if (tmp.size() == 0)
tmp = vvp_vector4_t(vwid); tmp = vvp_vector4_t(vwid);
@ -76,13 +89,7 @@ void vvp_fun_part::run_run()
{ {
vvp_net_t*ptr = net_; vvp_net_t*ptr = net_;
net_ = 0; net_ = 0;
vvp_send_vec4(ptr->out, val_);
vvp_vector4_t res (wid_, BIT4_X);
for (unsigned idx = 0 ; idx < wid_ ; idx += 1) {
if (idx + base_ < val_.size())
res.set_bit(idx, val_.value(base_+idx));
}
vvp_send_vec4(ptr->out, res);
} }
vvp_fun_part_pv::vvp_fun_part_pv(unsigned b, unsigned w, unsigned v) vvp_fun_part_pv::vvp_fun_part_pv(unsigned b, unsigned w, unsigned v)

View File

@ -44,18 +44,14 @@ void resolv_functor::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
unsigned base, unsigned wid, unsigned vwid) unsigned base, unsigned wid, unsigned vwid)
{ {
assert(bit.size() == wid); assert(bit.size() == wid);
vvp_vector4_t res (vwid);
for (unsigned idx = 0 ; idx < base ; idx += 1) vvp_vector8_t tmp (bit,6,6);
res.set_bit(idx, BIT4_Z); vvp_vector8_t tmpw (vwid);
for (unsigned idx = 0 ; idx < wid ; idx += 1) for (unsigned idx = 0 ; idx < wid ; idx += 1)
res.set_bit(idx+base, bit.value(idx)); tmpw.set_bit(idx+base, tmp.value(idx));
for (unsigned idx = base+wid ; idx < vwid ; idx += 1) recv_vec8(port, tmpw);
res.set_bit(idx, BIT4_Z);
recv_vec4(port, res);
} }
void resolv_functor::recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit) void resolv_functor::recv_vec8(vvp_net_ptr_t port, const vvp_vector8_t&bit)

View File

@ -779,6 +779,41 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
return true; return true;
} }
bool vvp_vector4_t::eq_xz(const vvp_vector4_t&that) const
{
if (size_ != that.size_)
return false;
if (size_ < BITS_PER_WORD) {
unsigned long mask = (1UL << size_) - 1;
return ((abits_val_|bbits_val_)&mask) == ((that.abits_val_|that.bbits_val_)&mask)
&& (bbits_val_&mask) == (that.bbits_val_&mask);
}
if (size_ == BITS_PER_WORD) {
return ((abits_val_|bbits_val_) == (that.abits_val_|that.bbits_val_))
&& (bbits_val_ == that.bbits_val_);
}
unsigned words = size_ / BITS_PER_WORD;
for (unsigned idx = 0 ; idx < words ; idx += 1) {
if ((abits_ptr_[idx]|bbits_ptr_[idx]) != (that.abits_ptr_[idx]|that.bbits_ptr_[idx]))
return false;
if (bbits_ptr_[idx] != that.bbits_ptr_[idx])
return false;
}
unsigned long mask = size_%BITS_PER_WORD;
if (mask > 0) {
mask = (1UL << mask) - 1;
return ((abits_ptr_[words]|bbits_ptr_[words])&mask) == ((that.abits_ptr_[words]|that.bbits_ptr_[words])&mask)
&& (bbits_ptr_[words]&mask) == (that.bbits_ptr_[words]&mask);
}
return true;
}
bool vvp_vector4_t::has_xz() const bool vvp_vector4_t::has_xz() const
{ {
if (size_ < BITS_PER_WORD) { if (size_ < BITS_PER_WORD) {

View File

@ -40,7 +40,6 @@ class vvp_net_t;
class vvp_net_fun_t; class vvp_net_fun_t;
/* Core net function types. */ /* Core net function types. */
class vvp_fun_concat;
class vvp_fun_drive; class vvp_fun_drive;
class vvp_fun_part; class vvp_fun_part;
@ -154,6 +153,8 @@ class vvp_vector4_t {
// Test that the vectors are exactly equal // Test that the vectors are exactly equal
bool eeq(const vvp_vector4_t&that) const; bool eeq(const vvp_vector4_t&that) const;
// Test that the vectors are equal, with x and z comparing equal.
bool eq_xz(const vvp_vector4_t&that) const;
// Return true if there is an X or Z anywhere in the vector. // Return true if there is an X or Z anywhere in the vector.
bool has_xz() const; bool has_xz() const;
@ -791,31 +792,6 @@ class vvp_net_fun_t {
/* **** Some core net functions **** */ /* **** Some core net functions **** */
/* vvp_fun_concat
* This node function creates vectors (vvp_vector4_t) from the
* concatenation of the inputs. The inputs (4) may be vector or
* vector8 objects, but they are reduced to vector4 values and
* strength information lost.
*
* The expected widths of the input vectors must be given up front so
* that the positions in the output vector (and also the size of the
* output vector) can be worked out. The input vectors must match the
* expected width.
*/
class vvp_fun_concat : public vvp_net_fun_t {
public:
vvp_fun_concat(unsigned w0, unsigned w1,
unsigned w2, unsigned w3);
~vvp_fun_concat();
void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
private:
unsigned wid_[4];
vvp_vector4_t val_;
};
/* vvp_fun_repeat /* vvp_fun_repeat
* This node function create vectors by repeating the input. The width * This node function create vectors by repeating the input. The width
* is the width of the output vector, and the repeat is the number of * is the width of the output vector, and the repeat is the number of