Performance optimizations
For the %mov instruction, implement a vvp_vector4_t::mov method to manipulate the thread vector directly. For the %load/v instruction, rework the vec4_value() methods to avoid creating vvp_vector4_t temporaries, and therefore reduce the copy overhead.
This commit is contained in:
parent
0fc136fad9
commit
971179d617
|
|
@ -931,7 +931,8 @@ vvp_vector4_t array_get_word(vvp_array_t arr, unsigned address)
|
|||
vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (vsig->node->fil);
|
||||
assert(sig);
|
||||
|
||||
vvp_vector4_t val = sig->vec4_value();
|
||||
vvp_vector4_t val;
|
||||
sig->vec4_value(val);
|
||||
return val;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -304,9 +304,11 @@ static void format_vpiDecStrVal(vvp_signal_value*sig, int base, unsigned wid,
|
|||
|
||||
vvp_vector4_t vec4;
|
||||
if (base == 0 && end == ssize) {
|
||||
vec4 = sig->vec4_value();
|
||||
sig->vec4_value(vec4);
|
||||
} else {
|
||||
vec4 = sig->vec4_value().subvalue(base, wid);
|
||||
vvp_vector4_t tmp;
|
||||
sig->vec4_value(tmp);
|
||||
vec4 = tmp.subvalue(base, wid);
|
||||
}
|
||||
|
||||
vpip_vec4_to_dec_str(vec4, rbuf, hwid, signed_flag);
|
||||
|
|
@ -317,7 +319,9 @@ static void format_vpiDecStrVal(vvp_signal_value*sig, int base, unsigned wid,
|
|||
static void format_vpiIntVal(vvp_signal_value*sig, int base, unsigned wid,
|
||||
int signed_flag, s_vpi_value*vp)
|
||||
{
|
||||
vvp_vector4_t sub = sig->vec4_value().subvalue(base, wid);
|
||||
vvp_vector4_t tmp;
|
||||
sig->vec4_value(tmp);
|
||||
vvp_vector4_t sub = tmp.subvalue(base, wid);
|
||||
long val = 0;
|
||||
vector4_to_value(sub, val, signed_flag, false);
|
||||
vp->value.integer = val;
|
||||
|
|
|
|||
|
|
@ -2688,7 +2688,8 @@ bool of_IX_GETV(vthread_t thr, vvp_code_t cp)
|
|||
}
|
||||
assert(sig);
|
||||
|
||||
vvp_vector4_t vec = sig->vec4_value();
|
||||
vvp_vector4_t vec;
|
||||
sig->vec4_value(vec);
|
||||
unsigned long val;
|
||||
bool known_flag = vector4_to_value(vec, val);
|
||||
|
||||
|
|
@ -2717,7 +2718,8 @@ bool of_IX_GETVS(vthread_t thr, vvp_code_t cp)
|
|||
}
|
||||
assert(sig);
|
||||
|
||||
vvp_vector4_t vec = sig->vec4_value();
|
||||
vvp_vector4_t vec;
|
||||
sig->vec4_value(vec);
|
||||
long val;
|
||||
bool known_flag = vector4_to_value(vec, val, true, true);
|
||||
|
||||
|
|
@ -3055,7 +3057,7 @@ bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t cp)
|
|||
* The functor to read from is the vvp_net_t object pointed to by the
|
||||
* cp->net pointer.
|
||||
*/
|
||||
static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
|
||||
static void load_base(vthread_t thr, vvp_code_t cp, vvp_vector4_t&dst)
|
||||
{
|
||||
vvp_net_t*net = cp->net;
|
||||
|
||||
|
|
@ -3068,7 +3070,7 @@ static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
|
|||
assert(sig);
|
||||
}
|
||||
|
||||
return sig->vec4_value();
|
||||
sig->vec4_value(dst);
|
||||
}
|
||||
|
||||
bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
|
||||
|
|
@ -3076,7 +3078,8 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
|
|||
unsigned bit = cp->bit_idx[0];
|
||||
unsigned wid = cp->bit_idx[1];
|
||||
|
||||
vvp_vector4_t sig_value = load_base(thr, cp);
|
||||
vvp_vector4_t sig_value;
|
||||
load_base(thr, cp, sig_value);
|
||||
|
||||
/* Check the address once, before we scan the vector. */
|
||||
thr_check_addr(thr, bit+wid-1);
|
||||
|
|
@ -3108,7 +3111,10 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
|||
/* We need a vector this wide to make the math work correctly.
|
||||
* Copy the base bits into the vector, but keep the width. */
|
||||
vvp_vector4_t sig_value(wid, BIT4_0);
|
||||
sig_value.copy_bits(load_base(thr, cp));
|
||||
|
||||
vvp_vector4_t tmp;
|
||||
load_base(thr, cp, tmp);
|
||||
sig_value.copy_bits(tmp);
|
||||
|
||||
load_vp0_common(thr, cp, sig_value);
|
||||
return true;
|
||||
|
|
@ -3118,7 +3124,8 @@ bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t cp)
|
|||
{
|
||||
unsigned wid = cp->bit_idx[1];
|
||||
|
||||
vvp_vector4_t tmp (load_base(thr, cp));
|
||||
vvp_vector4_t tmp;
|
||||
load_base(thr, cp, tmp);
|
||||
|
||||
/* We need a vector this wide to make the math work correctly.
|
||||
* Copy the base bits into the vector, but keep the width. */
|
||||
|
|
@ -3483,10 +3490,8 @@ static bool of_MOV_(vthread_t thr, vvp_code_t cp)
|
|||
|
||||
thr_check_addr(thr, cp->bit_idx[0]+cp->number-1);
|
||||
thr_check_addr(thr, cp->bit_idx[1]+cp->number-1);
|
||||
// Read the source vector out
|
||||
vvp_vector4_t tmp (thr->bits4, cp->bit_idx[1], cp->number);
|
||||
// Write it in the new place.
|
||||
thr->bits4.set_vec(cp->bit_idx[0], tmp);
|
||||
|
||||
thr->bits4.mov(cp->bit_idx[0], cp->bit_idx[1], cp->number);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -4262,15 +4267,17 @@ bool of_SHIFTR_I0(vthread_t thr, vvp_code_t cp)
|
|||
vvp_vector4_t tmp (wid, BIT4_X);
|
||||
thr->bits4.set_vec(base, tmp);
|
||||
|
||||
} else if (shift > wid) {
|
||||
// Shift so far that the entire vector is shifted out.
|
||||
vvp_vector4_t tmp (wid, BIT4_0);
|
||||
thr->bits4.set_vec(base, tmp);
|
||||
|
||||
} else if (shift > 0) {
|
||||
unsigned idx;
|
||||
for (idx = 0 ; (idx+shift) < wid ; idx += 1) {
|
||||
unsigned src = base + idx + shift;
|
||||
unsigned dst = base + idx;
|
||||
thr_put_bit(thr, dst, thr_get_bit(thr, src));
|
||||
}
|
||||
for ( ; idx < wid ; idx += 1)
|
||||
thr_put_bit(thr, base+idx, BIT4_0);
|
||||
// The mov method should handle overlapped source/dest
|
||||
thr->bits4.mov(base, base+shift, wid-shift);
|
||||
|
||||
vvp_vector4_t tmp (shift, BIT4_0);
|
||||
thr->bits4.set_vec(base+wid-shift, tmp);
|
||||
|
||||
} else if (shift < -(long)wid) {
|
||||
// Negative shift is so far that all the value is shifted out.
|
||||
|
|
|
|||
|
|
@ -1131,6 +1131,63 @@ void vvp_vector4_t::set_vec(unsigned adr, const vvp_vector4_t&that)
|
|||
|
||||
}
|
||||
|
||||
void vvp_vector4_t::mov(unsigned dst, unsigned src, unsigned cnt)
|
||||
{
|
||||
assert(dst+cnt <= size_);
|
||||
assert(src+cnt <= size_);
|
||||
|
||||
if (size_ <= BITS_PER_WORD) {
|
||||
unsigned long vmask = (1UL << cnt) - 1;
|
||||
unsigned long tmp;
|
||||
|
||||
tmp = (abits_val_ >> src) & vmask;
|
||||
abits_val_ &= ~ (vmask << dst);
|
||||
abits_val_ |= tmp << dst;
|
||||
|
||||
tmp = (bbits_val_ >> src) & vmask;
|
||||
bbits_val_ &= ~ (vmask << dst);
|
||||
bbits_val_ |= tmp << dst;
|
||||
|
||||
} else {
|
||||
unsigned sptr = src / BITS_PER_WORD;
|
||||
unsigned dptr = dst / BITS_PER_WORD;
|
||||
unsigned soff = src % BITS_PER_WORD;
|
||||
unsigned doff = dst % BITS_PER_WORD;
|
||||
|
||||
while (cnt > 0) {
|
||||
unsigned trans = cnt;
|
||||
if ((soff+trans) > BITS_PER_WORD)
|
||||
trans = BITS_PER_WORD - soff;
|
||||
|
||||
if ((doff+trans) > BITS_PER_WORD)
|
||||
trans = BITS_PER_WORD - doff;
|
||||
|
||||
unsigned long vmask = (1UL << trans) - 1;
|
||||
unsigned long tmp;
|
||||
|
||||
tmp = (abits_ptr_[sptr] >> soff) & vmask;
|
||||
abits_ptr_[dptr] &= ~ (vmask << doff);
|
||||
abits_ptr_[dptr] |= tmp << doff;
|
||||
|
||||
tmp = (bbits_ptr_[sptr] >> soff) & vmask;
|
||||
bbits_ptr_[dptr] &= ~ (vmask << doff);
|
||||
bbits_ptr_[dptr] |= tmp << doff;
|
||||
|
||||
cnt -= trans;
|
||||
soff += trans;
|
||||
if (soff >= BITS_PER_WORD) {
|
||||
soff = 0;
|
||||
sptr += 1;
|
||||
}
|
||||
doff += trans;
|
||||
if (doff >= BITS_PER_WORD) {
|
||||
doff = 0;
|
||||
dptr += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
|
||||
{
|
||||
if (size_ != that.size_)
|
||||
|
|
|
|||
|
|
@ -234,6 +234,9 @@ class vvp_vector4_t {
|
|||
// Get the bits from another vector, but keep my size.
|
||||
void copy_bits(const vvp_vector4_t&that);
|
||||
|
||||
// Move bits within this vector.
|
||||
void mov(unsigned dst, unsigned src, unsigned cnt);
|
||||
|
||||
// Test that the vectors are exactly equal
|
||||
bool eeq(const vvp_vector4_t&that) const;
|
||||
|
||||
|
|
|
|||
|
|
@ -380,17 +380,19 @@ vvp_scalar_t vvp_fun_signal4_aa::scalar_value(unsigned idx) const
|
|||
return vvp_scalar_t(bits4->value(idx), 6, 6);
|
||||
}
|
||||
|
||||
vvp_vector4_t vvp_fun_signal4_aa::vec4_value() const
|
||||
void vvp_fun_signal4_aa::vec4_value(vvp_vector4_t&val) const
|
||||
{
|
||||
vvp_vector4_t*bits4 = static_cast<vvp_vector4_t*>
|
||||
(vthread_get_rd_context_item(context_idx_));
|
||||
|
||||
return *bits4;
|
||||
val = *bits4;
|
||||
}
|
||||
|
||||
vvp_vector4_t vvp_fun_signal4_aa::vec4_unfiltered_value() const
|
||||
{
|
||||
return vec4_value();
|
||||
vvp_vector4_t tmp;
|
||||
vec4_value(tmp);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void vvp_fun_signal4_aa::operator delete(void*)
|
||||
|
|
@ -597,7 +599,7 @@ vvp_scalar_t vvp_fun_signal_real_aa::scalar_value(unsigned idx) const
|
|||
assert(0);
|
||||
}
|
||||
|
||||
vvp_vector4_t vvp_fun_signal_real_aa::vec4_value() const
|
||||
void vvp_fun_signal_real_aa::vec4_value(vvp_vector4_t&) const
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
|
@ -787,12 +789,14 @@ vvp_scalar_t vvp_wire_vec4::scalar_value(unsigned idx) const
|
|||
return vvp_scalar_t(value(idx),6,6);
|
||||
}
|
||||
|
||||
vvp_vector4_t vvp_wire_vec4::vec4_value() const
|
||||
void vvp_wire_vec4::vec4_value(vvp_vector4_t&val) const
|
||||
{
|
||||
vvp_vector4_t tmp = bits4_;
|
||||
val = bits4_;
|
||||
if (test_force_mask_is_zero())
|
||||
return;
|
||||
|
||||
for (unsigned idx = 0 ; idx < bits4_.size() ; idx += 1)
|
||||
tmp.set_bit(idx, filtered_value_(idx));
|
||||
return tmp;
|
||||
val.set_bit(idx, filtered_value_(idx));
|
||||
}
|
||||
|
||||
vvp_wire_vec8::vvp_wire_vec8(unsigned wid)
|
||||
|
|
@ -930,9 +934,9 @@ vvp_vector8_t vvp_wire_vec8::vec8_value() const
|
|||
return tmp;
|
||||
}
|
||||
|
||||
vvp_vector4_t vvp_wire_vec8::vec4_value() const
|
||||
void vvp_wire_vec8::vec4_value(vvp_vector4_t&val) const
|
||||
{
|
||||
return reduce4(vec8_value());
|
||||
val = reduce4(vec8_value());
|
||||
}
|
||||
|
||||
vvp_wire_real::vvp_wire_real()
|
||||
|
|
@ -1011,7 +1015,7 @@ vvp_scalar_t vvp_wire_real::scalar_value(unsigned idx) const
|
|||
assert(0);
|
||||
}
|
||||
|
||||
vvp_vector4_t vvp_wire_real::vec4_value() const
|
||||
void vvp_wire_real::vec4_value(vvp_vector4_t&) const
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ class vvp_signal_value {
|
|||
virtual unsigned value_size() const =0;
|
||||
virtual vvp_bit4_t value(unsigned idx) const =0;
|
||||
virtual vvp_scalar_t scalar_value(unsigned idx) const =0;
|
||||
virtual vvp_vector4_t vec4_value() const =0;
|
||||
virtual void vec4_value(vvp_vector4_t&) const =0;
|
||||
virtual double real_value() const;
|
||||
|
||||
virtual void get_signal_value(struct t_vpi_value*vp);
|
||||
|
|
@ -182,7 +182,7 @@ class vvp_fun_signal4_aa : public vvp_fun_signal_vec, public automatic_signal_ba
|
|||
unsigned value_size() const;
|
||||
vvp_bit4_t value(unsigned idx) const;
|
||||
vvp_scalar_t scalar_value(unsigned idx) const;
|
||||
vvp_vector4_t vec4_value() const;
|
||||
void vec4_value(vvp_vector4_t&) const;
|
||||
vvp_vector4_t vec4_unfiltered_value() const;
|
||||
|
||||
public: // These objects are only permallocated.
|
||||
|
|
@ -268,7 +268,7 @@ class vvp_fun_signal_real_aa : public vvp_fun_signal_real, public automatic_sign
|
|||
unsigned value_size() const;
|
||||
vvp_bit4_t value(unsigned idx) const;
|
||||
vvp_scalar_t scalar_value(unsigned idx) const;
|
||||
vvp_vector4_t vec4_value() const;
|
||||
void vec4_value(vvp_vector4_t&) const;
|
||||
double real_value() const;
|
||||
void get_signal_value(struct t_vpi_value*vp);
|
||||
|
||||
|
|
@ -322,7 +322,7 @@ class vvp_wire_vec4 : public vvp_wire_base {
|
|||
unsigned value_size() const;
|
||||
vvp_bit4_t value(unsigned idx) const;
|
||||
vvp_scalar_t scalar_value(unsigned idx) const;
|
||||
vvp_vector4_t vec4_value() const;
|
||||
void vec4_value(vvp_vector4_t&) const;
|
||||
|
||||
private:
|
||||
vvp_bit4_t filtered_value_(unsigned idx) const;
|
||||
|
|
@ -358,7 +358,7 @@ class vvp_wire_vec8 : public vvp_wire_base {
|
|||
unsigned value_size() const;
|
||||
vvp_bit4_t value(unsigned idx) const;
|
||||
vvp_scalar_t scalar_value(unsigned idx) const;
|
||||
vvp_vector4_t vec4_value() const;
|
||||
void vec4_value(vvp_vector4_t&) const;
|
||||
// This is new to vvp_wire_vec8
|
||||
vvp_vector8_t vec8_value() const;
|
||||
|
||||
|
|
@ -393,7 +393,7 @@ class vvp_wire_real : public vvp_wire_base {
|
|||
unsigned value_size() const;
|
||||
vvp_bit4_t value(unsigned idx) const;
|
||||
vvp_scalar_t scalar_value(unsigned idx) const;
|
||||
vvp_vector4_t vec4_value() const;
|
||||
void vec4_value(vvp_vector4_t&) const;
|
||||
double real_value() const;
|
||||
|
||||
void get_signal_value(struct t_vpi_value*vp);
|
||||
|
|
|
|||
|
|
@ -118,8 +118,11 @@ static void __compile_var(char*label, char*name,
|
|||
if (name) {
|
||||
assert(!array);
|
||||
if (obj) vpip_attach_to_current_scope(obj);
|
||||
if (!vpip_peek_current_scope()->is_automatic)
|
||||
schedule_init_vector(vvp_net_ptr_t(net,0), vfil->vec4_value());
|
||||
if (!vpip_peek_current_scope()->is_automatic) {
|
||||
vvp_vector4_t tmp;
|
||||
vfil->vec4_value(tmp);
|
||||
schedule_init_vector(vvp_net_ptr_t(net,0), tmp);
|
||||
}
|
||||
}
|
||||
// If this is an array word, then it does not have a name, and
|
||||
// it is attached to the addressed array.
|
||||
|
|
|
|||
Loading…
Reference in New Issue