Performance optimizations

For the %mov instruction, implement a vvp_vector4_t::mov method to
manipulate the thread vector directly.

For the %load/v instruction, rework the vec4_value() methods to
avoid creating vvp_vector4_t temporaries, and therefore reduce the
copy overhead.
This commit is contained in:
Stephen Williams 2009-11-20 17:54:48 -08:00
parent 0fc136fad9
commit 971179d617
8 changed files with 121 additions and 42 deletions

View File

@ -931,7 +931,8 @@ vvp_vector4_t array_get_word(vvp_array_t arr, unsigned address)
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (vsig->node->fil);
assert(sig);
vvp_vector4_t val = sig->vec4_value();
vvp_vector4_t val;
sig->vec4_value(val);
return val;
}

View File

@ -304,9 +304,11 @@ static void format_vpiDecStrVal(vvp_signal_value*sig, int base, unsigned wid,
vvp_vector4_t vec4;
if (base == 0 && end == ssize) {
vec4 = sig->vec4_value();
sig->vec4_value(vec4);
} else {
vec4 = sig->vec4_value().subvalue(base, wid);
vvp_vector4_t tmp;
sig->vec4_value(tmp);
vec4 = tmp.subvalue(base, wid);
}
vpip_vec4_to_dec_str(vec4, rbuf, hwid, signed_flag);
@ -317,7 +319,9 @@ static void format_vpiDecStrVal(vvp_signal_value*sig, int base, unsigned wid,
static void format_vpiIntVal(vvp_signal_value*sig, int base, unsigned wid,
int signed_flag, s_vpi_value*vp)
{
vvp_vector4_t sub = sig->vec4_value().subvalue(base, wid);
vvp_vector4_t tmp;
sig->vec4_value(tmp);
vvp_vector4_t sub = tmp.subvalue(base, wid);
long val = 0;
vector4_to_value(sub, val, signed_flag, false);
vp->value.integer = val;

View File

@ -2688,7 +2688,8 @@ bool of_IX_GETV(vthread_t thr, vvp_code_t cp)
}
assert(sig);
vvp_vector4_t vec = sig->vec4_value();
vvp_vector4_t vec;
sig->vec4_value(vec);
unsigned long val;
bool known_flag = vector4_to_value(vec, val);
@ -2717,7 +2718,8 @@ bool of_IX_GETVS(vthread_t thr, vvp_code_t cp)
}
assert(sig);
vvp_vector4_t vec = sig->vec4_value();
vvp_vector4_t vec;
sig->vec4_value(vec);
long val;
bool known_flag = vector4_to_value(vec, val, true, true);
@ -3055,7 +3057,7 @@ bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t cp)
* The functor to read from is the vvp_net_t object pointed to by the
* cp->net pointer.
*/
static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
static void load_base(vthread_t thr, vvp_code_t cp, vvp_vector4_t&dst)
{
vvp_net_t*net = cp->net;
@ -3068,7 +3070,7 @@ static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
assert(sig);
}
return sig->vec4_value();
sig->vec4_value(dst);
}
bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
@ -3076,7 +3078,8 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
unsigned bit = cp->bit_idx[0];
unsigned wid = cp->bit_idx[1];
vvp_vector4_t sig_value = load_base(thr, cp);
vvp_vector4_t sig_value;
load_base(thr, cp, sig_value);
/* Check the address once, before we scan the vector. */
thr_check_addr(thr, bit+wid-1);
@ -3108,7 +3111,10 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
/* We need a vector this wide to make the math work correctly.
* Copy the base bits into the vector, but keep the width. */
vvp_vector4_t sig_value(wid, BIT4_0);
sig_value.copy_bits(load_base(thr, cp));
vvp_vector4_t tmp;
load_base(thr, cp, tmp);
sig_value.copy_bits(tmp);
load_vp0_common(thr, cp, sig_value);
return true;
@ -3118,7 +3124,8 @@ bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t cp)
{
unsigned wid = cp->bit_idx[1];
vvp_vector4_t tmp (load_base(thr, cp));
vvp_vector4_t tmp;
load_base(thr, cp, tmp);
/* We need a vector this wide to make the math work correctly.
* Copy the base bits into the vector, but keep the width. */
@ -3483,10 +3490,8 @@ static bool of_MOV_(vthread_t thr, vvp_code_t cp)
thr_check_addr(thr, cp->bit_idx[0]+cp->number-1);
thr_check_addr(thr, cp->bit_idx[1]+cp->number-1);
// Read the source vector out
vvp_vector4_t tmp (thr->bits4, cp->bit_idx[1], cp->number);
// Write it in the new place.
thr->bits4.set_vec(cp->bit_idx[0], tmp);
thr->bits4.mov(cp->bit_idx[0], cp->bit_idx[1], cp->number);
return true;
}
@ -4262,15 +4267,17 @@ bool of_SHIFTR_I0(vthread_t thr, vvp_code_t cp)
vvp_vector4_t tmp (wid, BIT4_X);
thr->bits4.set_vec(base, tmp);
} else if (shift > wid) {
// Shift so far that the entire vector is shifted out.
vvp_vector4_t tmp (wid, BIT4_0);
thr->bits4.set_vec(base, tmp);
} else if (shift > 0) {
unsigned idx;
for (idx = 0 ; (idx+shift) < wid ; idx += 1) {
unsigned src = base + idx + shift;
unsigned dst = base + idx;
thr_put_bit(thr, dst, thr_get_bit(thr, src));
}
for ( ; idx < wid ; idx += 1)
thr_put_bit(thr, base+idx, BIT4_0);
// The mov method should handle overlapped source/dest
thr->bits4.mov(base, base+shift, wid-shift);
vvp_vector4_t tmp (shift, BIT4_0);
thr->bits4.set_vec(base+wid-shift, tmp);
} else if (shift < -(long)wid) {
// Negative shift is so far that all the value is shifted out.

View File

@ -1131,6 +1131,63 @@ void vvp_vector4_t::set_vec(unsigned adr, const vvp_vector4_t&that)
}
void vvp_vector4_t::mov(unsigned dst, unsigned src, unsigned cnt)
{
assert(dst+cnt <= size_);
assert(src+cnt <= size_);
if (size_ <= BITS_PER_WORD) {
unsigned long vmask = (1UL << cnt) - 1;
unsigned long tmp;
tmp = (abits_val_ >> src) & vmask;
abits_val_ &= ~ (vmask << dst);
abits_val_ |= tmp << dst;
tmp = (bbits_val_ >> src) & vmask;
bbits_val_ &= ~ (vmask << dst);
bbits_val_ |= tmp << dst;
} else {
unsigned sptr = src / BITS_PER_WORD;
unsigned dptr = dst / BITS_PER_WORD;
unsigned soff = src % BITS_PER_WORD;
unsigned doff = dst % BITS_PER_WORD;
while (cnt > 0) {
unsigned trans = cnt;
if ((soff+trans) > BITS_PER_WORD)
trans = BITS_PER_WORD - soff;
if ((doff+trans) > BITS_PER_WORD)
trans = BITS_PER_WORD - doff;
unsigned long vmask = (1UL << trans) - 1;
unsigned long tmp;
tmp = (abits_ptr_[sptr] >> soff) & vmask;
abits_ptr_[dptr] &= ~ (vmask << doff);
abits_ptr_[dptr] |= tmp << doff;
tmp = (bbits_ptr_[sptr] >> soff) & vmask;
bbits_ptr_[dptr] &= ~ (vmask << doff);
bbits_ptr_[dptr] |= tmp << doff;
cnt -= trans;
soff += trans;
if (soff >= BITS_PER_WORD) {
soff = 0;
sptr += 1;
}
doff += trans;
if (doff >= BITS_PER_WORD) {
doff = 0;
dptr += 1;
}
}
}
}
bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
{
if (size_ != that.size_)

View File

@ -234,6 +234,9 @@ class vvp_vector4_t {
// Get the bits from another vector, but keep my size.
void copy_bits(const vvp_vector4_t&that);
// Move bits within this vector.
void mov(unsigned dst, unsigned src, unsigned cnt);
// Test that the vectors are exactly equal
bool eeq(const vvp_vector4_t&that) const;

View File

@ -380,17 +380,19 @@ vvp_scalar_t vvp_fun_signal4_aa::scalar_value(unsigned idx) const
return vvp_scalar_t(bits4->value(idx), 6, 6);
}
vvp_vector4_t vvp_fun_signal4_aa::vec4_value() const
void vvp_fun_signal4_aa::vec4_value(vvp_vector4_t&val) const
{
vvp_vector4_t*bits4 = static_cast<vvp_vector4_t*>
(vthread_get_rd_context_item(context_idx_));
return *bits4;
val = *bits4;
}
vvp_vector4_t vvp_fun_signal4_aa::vec4_unfiltered_value() const
{
return vec4_value();
vvp_vector4_t tmp;
vec4_value(tmp);
return tmp;
}
void vvp_fun_signal4_aa::operator delete(void*)
@ -597,7 +599,7 @@ vvp_scalar_t vvp_fun_signal_real_aa::scalar_value(unsigned idx) const
assert(0);
}
vvp_vector4_t vvp_fun_signal_real_aa::vec4_value() const
void vvp_fun_signal_real_aa::vec4_value(vvp_vector4_t&) const
{
assert(0);
}
@ -787,12 +789,14 @@ vvp_scalar_t vvp_wire_vec4::scalar_value(unsigned idx) const
return vvp_scalar_t(value(idx),6,6);
}
vvp_vector4_t vvp_wire_vec4::vec4_value() const
void vvp_wire_vec4::vec4_value(vvp_vector4_t&val) const
{
vvp_vector4_t tmp = bits4_;
val = bits4_;
if (test_force_mask_is_zero())
return;
for (unsigned idx = 0 ; idx < bits4_.size() ; idx += 1)
tmp.set_bit(idx, filtered_value_(idx));
return tmp;
val.set_bit(idx, filtered_value_(idx));
}
vvp_wire_vec8::vvp_wire_vec8(unsigned wid)
@ -930,9 +934,9 @@ vvp_vector8_t vvp_wire_vec8::vec8_value() const
return tmp;
}
vvp_vector4_t vvp_wire_vec8::vec4_value() const
void vvp_wire_vec8::vec4_value(vvp_vector4_t&val) const
{
return reduce4(vec8_value());
val = reduce4(vec8_value());
}
vvp_wire_real::vvp_wire_real()
@ -1011,7 +1015,7 @@ vvp_scalar_t vvp_wire_real::scalar_value(unsigned idx) const
assert(0);
}
vvp_vector4_t vvp_wire_real::vec4_value() const
void vvp_wire_real::vec4_value(vvp_vector4_t&) const
{
assert(0);
}

View File

@ -99,7 +99,7 @@ class vvp_signal_value {
virtual unsigned value_size() const =0;
virtual vvp_bit4_t value(unsigned idx) const =0;
virtual vvp_scalar_t scalar_value(unsigned idx) const =0;
virtual vvp_vector4_t vec4_value() const =0;
virtual void vec4_value(vvp_vector4_t&) const =0;
virtual double real_value() const;
virtual void get_signal_value(struct t_vpi_value*vp);
@ -182,7 +182,7 @@ class vvp_fun_signal4_aa : public vvp_fun_signal_vec, public automatic_signal_ba
unsigned value_size() const;
vvp_bit4_t value(unsigned idx) const;
vvp_scalar_t scalar_value(unsigned idx) const;
vvp_vector4_t vec4_value() const;
void vec4_value(vvp_vector4_t&) const;
vvp_vector4_t vec4_unfiltered_value() const;
public: // These objects are only permallocated.
@ -268,7 +268,7 @@ class vvp_fun_signal_real_aa : public vvp_fun_signal_real, public automatic_sign
unsigned value_size() const;
vvp_bit4_t value(unsigned idx) const;
vvp_scalar_t scalar_value(unsigned idx) const;
vvp_vector4_t vec4_value() const;
void vec4_value(vvp_vector4_t&) const;
double real_value() const;
void get_signal_value(struct t_vpi_value*vp);
@ -322,7 +322,7 @@ class vvp_wire_vec4 : public vvp_wire_base {
unsigned value_size() const;
vvp_bit4_t value(unsigned idx) const;
vvp_scalar_t scalar_value(unsigned idx) const;
vvp_vector4_t vec4_value() const;
void vec4_value(vvp_vector4_t&) const;
private:
vvp_bit4_t filtered_value_(unsigned idx) const;
@ -358,7 +358,7 @@ class vvp_wire_vec8 : public vvp_wire_base {
unsigned value_size() const;
vvp_bit4_t value(unsigned idx) const;
vvp_scalar_t scalar_value(unsigned idx) const;
vvp_vector4_t vec4_value() const;
void vec4_value(vvp_vector4_t&) const;
// This is new to vvp_wire_vec8
vvp_vector8_t vec8_value() const;
@ -393,7 +393,7 @@ class vvp_wire_real : public vvp_wire_base {
unsigned value_size() const;
vvp_bit4_t value(unsigned idx) const;
vvp_scalar_t scalar_value(unsigned idx) const;
vvp_vector4_t vec4_value() const;
void vec4_value(vvp_vector4_t&) const;
double real_value() const;
void get_signal_value(struct t_vpi_value*vp);

View File

@ -118,8 +118,11 @@ static void __compile_var(char*label, char*name,
if (name) {
assert(!array);
if (obj) vpip_attach_to_current_scope(obj);
if (!vpip_peek_current_scope()->is_automatic)
schedule_init_vector(vvp_net_ptr_t(net,0), vfil->vec4_value());
if (!vpip_peek_current_scope()->is_automatic) {
vvp_vector4_t tmp;
vfil->vec4_value(tmp);
schedule_init_vector(vvp_net_ptr_t(net,0), tmp);
}
}
// If this is an array word, then it does not have a name, and
// it is attached to the addressed array.