Optimize load-add with load/add instruction
Where and expression is an immediate value added to a signal value, it is possible to optimize them to a single instruction that combines the load with an add at the same time.
This commit is contained in:
parent
68a9526fec
commit
8f519531f3
|
|
@ -27,6 +27,8 @@
|
|||
|
||||
static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest,
|
||||
int ok_flags);
|
||||
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
|
||||
int add_index);
|
||||
|
||||
int number_is_unknown(ivl_expr_t ex)
|
||||
{
|
||||
|
|
@ -998,6 +1000,28 @@ static struct vector_info draw_binary_expr_lrs(ivl_expr_t exp, unsigned wid)
|
|||
return lv;
|
||||
}
|
||||
|
||||
static struct vector_info draw_load_add_immediate(ivl_expr_t le,
|
||||
ivl_expr_t re,
|
||||
unsigned wid)
|
||||
{
|
||||
struct vector_info lv;
|
||||
unsigned long imm;
|
||||
|
||||
imm = get_number_immediate(re);
|
||||
|
||||
/* Load the immidiate value into word register 0 */
|
||||
fprintf(vvp_out, " %%ix/load 0, %lu;\n", imm);
|
||||
|
||||
lv.base = allocate_vector(wid);
|
||||
lv.wid = wid;
|
||||
|
||||
/* Load the signal value with %loads that add the index
|
||||
register to the value being loaded. */
|
||||
draw_signal_dest(le, lv, 0);
|
||||
|
||||
return lv;
|
||||
}
|
||||
|
||||
static struct vector_info draw_add_immediate(ivl_expr_t le,
|
||||
ivl_expr_t re,
|
||||
unsigned wid)
|
||||
|
|
@ -1098,6 +1122,26 @@ static struct vector_info draw_binary_expr_arith(ivl_expr_t exp, unsigned wid)
|
|||
|
||||
const char*sign_string = ivl_expr_signed(exp)? "/s" : "";
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '+')
|
||||
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
|
||||
&& (ivl_expr_type(re) == IVL_EX_ULONG))
|
||||
return draw_load_add_immediate(le, re, wid);
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '+')
|
||||
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
|
||||
&& (ivl_expr_type(re) == IVL_EX_NUMBER))
|
||||
return draw_load_add_immediate(le, re, wid);
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '+')
|
||||
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
|
||||
&& (ivl_expr_type(le) == IVL_EX_ULONG))
|
||||
return draw_load_add_immediate(re, le, wid);
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '+')
|
||||
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
|
||||
&& (ivl_expr_type(le) == IVL_EX_NUMBER))
|
||||
return draw_load_add_immediate(re, le, wid);
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '+')
|
||||
&& (ivl_expr_type(re) == IVL_EX_ULONG))
|
||||
return draw_add_immediate(le, re, wid);
|
||||
|
|
@ -1663,8 +1707,12 @@ static void pad_expr_in_place(ivl_expr_t exp, struct vector_info res, unsigned s
|
|||
* into the thread bits. Remember to account for the part select by
|
||||
* offsetting the read from the lsi (least significant index) of the
|
||||
* signal.
|
||||
*
|
||||
* If the add_index is >=0, then generate a %load/vpp to add the
|
||||
* word0 value to the loaded value before storing it into the destination.
|
||||
*/
|
||||
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res)
|
||||
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
|
||||
int add_index)
|
||||
{
|
||||
unsigned swid = ivl_expr_width(exp);
|
||||
ivl_signal_t sig = ivl_expr_signal(exp);
|
||||
|
|
@ -1679,6 +1727,7 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res)
|
|||
if (ivl_signal_array_count(sig) > 1) {
|
||||
ivl_expr_t ix = ivl_expr_oper1(exp);
|
||||
if (!number_is_immediate(ix, 8*sizeof(unsigned long))) {
|
||||
assert(add_index < 0);
|
||||
draw_eval_expr_into_integer(ix, 3);
|
||||
fprintf(vvp_out, " %%load/av %u, v%p, %u;\n",
|
||||
res.base, sig, swid);
|
||||
|
|
@ -1694,11 +1743,20 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res)
|
|||
|
||||
if (ivl_signal_data_type(sig) == IVL_VT_REAL) {
|
||||
|
||||
assert(add_index < 0);
|
||||
int tmp = allocate_word();
|
||||
fprintf(vvp_out, " %%load/wr %d, v%p_%u;\n", tmp, sig, word);
|
||||
fprintf(vvp_out, " %%cvt/vr %u, %d, %u;\n", res.base, tmp, res.wid);
|
||||
clr_word(tmp);
|
||||
|
||||
} else if (add_index >= 0) {
|
||||
|
||||
assert(add_index == 0);
|
||||
|
||||
/* If this is a REG (a variable) then I can do a vector read. */
|
||||
fprintf(vvp_out, " %%load/vp0 %u, v%p_%u, %u;\n",
|
||||
res.base, sig, word, swid);
|
||||
|
||||
} else {
|
||||
|
||||
/* If this is a REG (a variable) then I can do a vector read. */
|
||||
|
|
@ -1730,7 +1788,7 @@ static struct vector_info draw_signal_expr(ivl_expr_t exp, unsigned wid,
|
|||
res.wid = wid;
|
||||
save_expression_lookaside(res.base, exp, wid);
|
||||
|
||||
draw_signal_dest(exp, res);
|
||||
draw_signal_dest(exp, res, -1);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
@ -2232,7 +2290,7 @@ static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest,
|
|||
switch (ivl_expr_type(exp)) {
|
||||
|
||||
case IVL_EX_SIGNAL:
|
||||
draw_signal_dest(exp, dest);
|
||||
draw_signal_dest(exp, dest, -1);
|
||||
return;
|
||||
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code);
|
|||
extern bool of_LOAD_MV(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_NX(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_X(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_XP(vthread_t thr, vvp_code_t code);
|
||||
|
|
|
|||
|
|
@ -141,6 +141,7 @@ const static struct opcode_table_s opcode_table[] = {
|
|||
{ "%load/mv",of_LOAD_MV,3, {OA_BIT1, OA_MEM_PTR, OA_BIT2} },
|
||||
{ "%load/nx",of_LOAD_NX,3, {OA_BIT1, OA_VPI_PTR, OA_BIT2} },
|
||||
{ "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||
{ "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||
{ "%load/wr",of_LOAD_WR,2, {OA_BIT1, OA_VPI_PTR, OA_BIT2} },
|
||||
{ "%load/x", of_LOAD_X, 3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||
{ "%load/x.p",of_LOAD_XP, 3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||
|
|
|
|||
|
|
@ -2188,13 +2188,14 @@ bool of_LOAD_NX(vthread_t thr, vvp_code_t cp)
|
|||
* The functor to read from is the vvp_net_t object pointed to by the
|
||||
* cp->net pointer.
|
||||
*/
|
||||
bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
|
||||
vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
assert(cp->bit_idx[0] >= 4);
|
||||
assert(cp->bit_idx[1] > 0);
|
||||
|
||||
unsigned bit = cp->bit_idx[0];
|
||||
unsigned wid = cp->bit_idx[1];
|
||||
int64_t addend = thr->words[0].w_int;
|
||||
vvp_net_t*net = cp->net;
|
||||
|
||||
/* For the %load to work, the functor must actually be a
|
||||
|
|
@ -2209,6 +2210,40 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
|
|||
vvp_vector4_t sig_value = sig->vec4_value();
|
||||
sig_value.resize(wid);
|
||||
|
||||
return sig_value;
|
||||
}
|
||||
|
||||
bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
unsigned bit = cp->bit_idx[0];
|
||||
unsigned wid = cp->bit_idx[1];
|
||||
|
||||
vvp_vector4_t sig_value = load_base(thr, cp);
|
||||
|
||||
/* Check the address once, before we scan the vector. */
|
||||
thr_check_addr(thr, bit+wid-1);
|
||||
|
||||
/* Copy the vector bits into the bits4 vector. Do the copy
|
||||
directly to skip the excess calls to thr_check_addr. */
|
||||
thr->bits4.set_vec(bit, sig_value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is like of_LOAD_VEC, but includes an add of an integer value.
|
||||
*/
|
||||
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
unsigned bit = cp->bit_idx[0];
|
||||
unsigned wid = cp->bit_idx[1];
|
||||
int64_t addend = thr->words[0].w_int;
|
||||
|
||||
vvp_vector4_t sig_value = load_base(thr, cp);
|
||||
|
||||
/* Add the addend value */
|
||||
sig_value += addend;
|
||||
|
||||
/* Check the address once, before we scan the vector. */
|
||||
thr_check_addr(thr, bit+wid-1);
|
||||
|
||||
|
|
|
|||
|
|
@ -557,6 +557,31 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
|
|||
return true;
|
||||
}
|
||||
|
||||
bool vvp_vector4_t::has_xz() const
|
||||
{
|
||||
if (size_ < BITS_PER_WORD) {
|
||||
unsigned long mask = WORD_X_BITS >> 2*(BITS_PER_WORD - size_);
|
||||
return 0 != (bits_val_&mask);
|
||||
}
|
||||
|
||||
if (size_ == BITS_PER_WORD) {
|
||||
return 0 != (bits_val_&WORD_X_BITS);
|
||||
}
|
||||
|
||||
unsigned words = size_ / BITS_PER_WORD;
|
||||
for (unsigned idx = 0 ; idx < words ; idx += 1) {
|
||||
if (bits_ptr_[idx] & WORD_X_BITS)
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned long mask = size_%BITS_PER_WORD;
|
||||
if (mask > 0) {
|
||||
mask = WORD_X_BITS >> 2*(BITS_PER_WORD - mask);
|
||||
return 0 != bits_ptr_[words]&mask;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void vvp_vector4_t::change_z2x()
|
||||
{
|
||||
|
|
@ -602,6 +627,40 @@ char* vvp_vector4_t::as_string(char*buf, size_t buf_len)
|
|||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add an integer to the vvp_vector4_t in place, bit by bit so that
|
||||
* there is no size limitations.
|
||||
*/
|
||||
vvp_vector4_t& vvp_vector4_t::operator += (int64_t that)
|
||||
{
|
||||
vvp_bit4_t carry = BIT4_0;
|
||||
unsigned idx;
|
||||
|
||||
if (has_xz()) {
|
||||
vvp_vector4_t xxx (size(), BIT4_X);
|
||||
*this = xxx;
|
||||
return *this;
|
||||
}
|
||||
|
||||
for (idx = 0 ; idx < size() ; idx += 1) {
|
||||
if (that == 0 && carry==BIT4_0)
|
||||
break;
|
||||
|
||||
vvp_bit4_t that_bit = (that&1)? BIT4_1 : BIT4_0;
|
||||
that >>= 1;
|
||||
|
||||
if (that_bit==BIT4_0 && carry==BIT4_0)
|
||||
continue;
|
||||
|
||||
vvp_bit4_t bit = value(idx);
|
||||
bit = add_with_carry(bit, that_bit, carry);
|
||||
|
||||
set_bit(idx, bit);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
ostream& operator<< (ostream&out, const vvp_vector4_t&that)
|
||||
{
|
||||
out << that.size() << "'b";
|
||||
|
|
|
|||
|
|
@ -121,12 +121,17 @@ class vvp_vector4_t {
|
|||
// Test that the vectors are exactly equal
|
||||
bool eeq(const vvp_vector4_t&that) const;
|
||||
|
||||
// Return true if there is an X or Z anywhere in the vector.
|
||||
bool has_xz() const;
|
||||
|
||||
// Change all Z bits to X bits.
|
||||
void change_z2x();
|
||||
|
||||
// Display the value into the buf as a string.
|
||||
char*as_string(char*buf, size_t buf_len);
|
||||
|
||||
vvp_vector4_t& operator += (int64_t);
|
||||
|
||||
private:
|
||||
// Number of vvp_bit4_t bits that can be shoved into a word.
|
||||
enum { BITS_PER_WORD = 8*sizeof(unsigned long)/2 };
|
||||
|
|
|
|||
Loading…
Reference in New Issue