Some instructions can do stack manipultations in place.
By doing some stack manipulations in place, certain instructions can eliminate, or optimize, vector copies.
This commit is contained in:
parent
c2ca9c3b73
commit
aadd67cd3b
|
|
@ -132,6 +132,12 @@ struct vthread_s {
|
|||
unsigned use_index = stack_vec4_.size()-1-depth;
|
||||
return stack_vec4_[use_index];
|
||||
}
|
||||
inline vvp_vector4_t& peek_vec4(void)
|
||||
{
|
||||
assert(! stack_vec4_.empty());
|
||||
unsigned use_index = stack_vec4_.size()-1;
|
||||
return stack_vec4_[use_index];
|
||||
}
|
||||
inline void pop_vec4(unsigned cnt)
|
||||
{
|
||||
while (cnt > 0) {
|
||||
|
|
@ -894,10 +900,23 @@ bool of_AND(vthread_t thr, vvp_code_t)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* %add
|
||||
*
|
||||
* Pop r,
|
||||
* Pop l,
|
||||
* Push l+r
|
||||
*
|
||||
* Pop 2 and push 1 is the same as pop 1 and replace the remaining top
|
||||
* of the stack with a new value. That is what we will do.
|
||||
*/
|
||||
bool of_ADD(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
vvp_vector4_t r = thr->pop_vec4();
|
||||
vvp_vector4_t l = thr->pop_vec4();
|
||||
// Rather then pop l, use it directly from the stack. When we
|
||||
// assign to 'l', that will edit the top of the stack, which
|
||||
// replaces a pop and a pull.
|
||||
vvp_vector4_t&l = thr->peek_vec4();
|
||||
|
||||
unsigned wid = l.size();
|
||||
assert(wid == r.size());
|
||||
|
|
@ -914,8 +933,6 @@ bool of_ADD(vthread_t thr, vvp_code_t)
|
|||
|
||||
l.setarray(0,wid,lva);
|
||||
|
||||
thr->push_vec4(l);
|
||||
|
||||
delete[]lva;
|
||||
delete[]lvb;
|
||||
return true;
|
||||
|
|
@ -925,7 +942,7 @@ bool of_ADD(vthread_t thr, vvp_code_t)
|
|||
delete[]lvb;
|
||||
|
||||
vvp_vector4_t tmp (wid, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
l = tmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1504,8 +1521,11 @@ bool of_CMPS(vthread_t thr, vvp_code_t)
|
|||
vvp_bit4_t eeq = BIT4_1;
|
||||
vvp_bit4_t lt = BIT4_0;
|
||||
|
||||
vvp_vector4_t rval = thr->pop_vec4();
|
||||
vvp_vector4_t lval = thr->pop_vec4();
|
||||
// We are going to pop these and push nothing in their
|
||||
// place, but for now it is more efficient to use a constant
|
||||
// reference. When we finish, pop the stack without copies.
|
||||
const vvp_vector4_t&rval = thr->peek_vec4(0);
|
||||
const vvp_vector4_t&lval = thr->peek_vec4(1);
|
||||
|
||||
assert(rval.size() == lval.size());
|
||||
|
||||
|
|
@ -1516,6 +1536,7 @@ bool of_CMPS(vthread_t thr, vvp_code_t)
|
|||
thr->flags[4] = BIT4_X; // eq
|
||||
thr->flags[5] = BIT4_X; // lt
|
||||
thr->flags[6] = lval.eeq(rval)? BIT4_1 : BIT4_0;
|
||||
thr->pop_vec4(2);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1559,6 +1580,7 @@ bool of_CMPS(vthread_t thr, vvp_code_t)
|
|||
thr->flags[5] = lt;
|
||||
thr->flags[6] = eeq;
|
||||
|
||||
thr->pop_vec4(2);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -2736,10 +2758,17 @@ bool of_FREE(vthread_t thr, vvp_code_t cp)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* %inv
|
||||
*
|
||||
* Logically, this pops a value, inverts is (Verilog style, with Z and
|
||||
* X converted to X) and pushes the result. We can more efficiently
|
||||
* just to the invert in place.
|
||||
*/
|
||||
bool of_INV(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
vvp_vector4_t val = thr->pop_vec4();
|
||||
thr->push_vec4(~val);
|
||||
vvp_vector4_t&val = thr->peek_vec4();
|
||||
val.invert();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -3419,7 +3448,7 @@ bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t cp)
|
|||
}
|
||||
|
||||
static void do_verylong_mod(vthread_t thr,
|
||||
const vvp_vector4_t&vala, const vvp_vector4_t&valb,
|
||||
vvp_vector4_t&vala, const vvp_vector4_t&valb,
|
||||
bool left_is_neg, bool right_is_neg)
|
||||
{
|
||||
bool out_is_neg = left_is_neg;
|
||||
|
|
@ -3447,7 +3476,7 @@ static void do_verylong_mod(vthread_t thr,
|
|||
delete []z;
|
||||
delete []a;
|
||||
vvp_vector4_t tmp(len, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
vala = tmp;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -3489,7 +3518,7 @@ static void do_verylong_mod(vthread_t thr,
|
|||
delete []z;
|
||||
delete []a;
|
||||
vvp_vector4_t tmpx (len, BIT4_X);
|
||||
thr->push_vec4(tmpx);
|
||||
vala = tmpx;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -3529,7 +3558,7 @@ static void do_verylong_mod(vthread_t thr,
|
|||
}
|
||||
tmp.set_bit(idx, ob?BIT4_1:BIT4_0);
|
||||
}
|
||||
thr->push_vec4(tmp);
|
||||
vala = tmp;
|
||||
delete []t;
|
||||
delete []z;
|
||||
delete []a;
|
||||
|
|
@ -3568,7 +3597,7 @@ bool of_MIN_WR(vthread_t thr, vvp_code_t)
|
|||
bool of_MOD(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
vvp_vector4_t valb = thr->pop_vec4();
|
||||
vvp_vector4_t vala = thr->pop_vec4();
|
||||
vvp_vector4_t&vala = thr->peek_vec4();
|
||||
|
||||
assert(vala.size()==valb.size());
|
||||
unsigned wid = vala.size();
|
||||
|
|
@ -3596,7 +3625,6 @@ bool of_MOD(vthread_t thr, vvp_code_t)
|
|||
vala.set_bit(idx, (lv&1)?BIT4_1 : BIT4_0);
|
||||
lv >>= 1;
|
||||
}
|
||||
thr->push_vec4(vala);
|
||||
|
||||
return true;
|
||||
|
||||
|
|
@ -3606,8 +3634,7 @@ bool of_MOD(vthread_t thr, vvp_code_t)
|
|||
}
|
||||
|
||||
x_out:
|
||||
vvp_vector4_t tmp (wid, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
vala = vvp_vector4_t(wid, BIT4_X);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -3617,7 +3644,7 @@ bool of_MOD(vthread_t thr, vvp_code_t)
|
|||
bool of_MOD_S(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
vvp_vector4_t valb = thr->pop_vec4();
|
||||
vvp_vector4_t vala = thr->pop_vec4();
|
||||
vvp_vector4_t&vala = thr->peek_vec4();
|
||||
|
||||
assert(vala.size()==valb.size());
|
||||
unsigned wid = vala.size();
|
||||
|
|
@ -3655,7 +3682,9 @@ bool of_MOD_S(vthread_t thr, vvp_code_t)
|
|||
vala.set_bit(idx, (lv&1)? BIT4_1 : BIT4_0);
|
||||
lv >>= 1;
|
||||
}
|
||||
thr->push_vec4(vala);
|
||||
|
||||
// vala is the top of the stack, edited in place, so we
|
||||
// do not need to push the result.
|
||||
|
||||
return true;
|
||||
|
||||
|
|
@ -3668,8 +3697,7 @@ bool of_MOD_S(vthread_t thr, vvp_code_t)
|
|||
}
|
||||
|
||||
x_out:
|
||||
vvp_vector4_t tmp (wid, BIT4_X);
|
||||
thr->push_vec4(tmp);
|
||||
vala = vvp_vector4_t(wid, BIT4_X);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue