Optimize load-add with load/add instruction

Where and expression is an immediate value added to a signal value,
it is possible to optimize them to a single instruction that combines
the load with an add at the same time.
This commit is contained in:
Stephen Williams 2007-12-04 19:15:15 -08:00
parent 68a9526fec
commit 8f519531f3
6 changed files with 163 additions and 4 deletions

View File

@ -27,6 +27,8 @@
static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest,
int ok_flags);
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
int add_index);
int number_is_unknown(ivl_expr_t ex)
{
@ -998,6 +1000,28 @@ static struct vector_info draw_binary_expr_lrs(ivl_expr_t exp, unsigned wid)
return lv;
}
static struct vector_info draw_load_add_immediate(ivl_expr_t le,
ivl_expr_t re,
unsigned wid)
{
struct vector_info lv;
unsigned long imm;
imm = get_number_immediate(re);
/* Load the immidiate value into word register 0 */
fprintf(vvp_out, " %%ix/load 0, %lu;\n", imm);
lv.base = allocate_vector(wid);
lv.wid = wid;
/* Load the signal value with %loads that add the index
register to the value being loaded. */
draw_signal_dest(le, lv, 0);
return lv;
}
static struct vector_info draw_add_immediate(ivl_expr_t le,
ivl_expr_t re,
unsigned wid)
@ -1098,6 +1122,26 @@ static struct vector_info draw_binary_expr_arith(ivl_expr_t exp, unsigned wid)
const char*sign_string = ivl_expr_signed(exp)? "/s" : "";
if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
&& (ivl_expr_type(re) == IVL_EX_ULONG))
return draw_load_add_immediate(le, re, wid);
if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
&& (ivl_expr_type(re) == IVL_EX_NUMBER))
return draw_load_add_immediate(le, re, wid);
if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
&& (ivl_expr_type(le) == IVL_EX_ULONG))
return draw_load_add_immediate(re, le, wid);
if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
&& (ivl_expr_type(le) == IVL_EX_NUMBER))
return draw_load_add_immediate(re, le, wid);
if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(re) == IVL_EX_ULONG))
return draw_add_immediate(le, re, wid);
@ -1663,8 +1707,12 @@ static void pad_expr_in_place(ivl_expr_t exp, struct vector_info res, unsigned s
* into the thread bits. Remember to account for the part select by
* offsetting the read from the lsi (least significant index) of the
* signal.
*
* If the add_index is >=0, then generate a %load/vpp to add the
* word0 value to the loaded value before storing it into the destination.
*/
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res)
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
int add_index)
{
unsigned swid = ivl_expr_width(exp);
ivl_signal_t sig = ivl_expr_signal(exp);
@ -1679,6 +1727,7 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res)
if (ivl_signal_array_count(sig) > 1) {
ivl_expr_t ix = ivl_expr_oper1(exp);
if (!number_is_immediate(ix, 8*sizeof(unsigned long))) {
assert(add_index < 0);
draw_eval_expr_into_integer(ix, 3);
fprintf(vvp_out, " %%load/av %u, v%p, %u;\n",
res.base, sig, swid);
@ -1694,11 +1743,20 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res)
if (ivl_signal_data_type(sig) == IVL_VT_REAL) {
assert(add_index < 0);
int tmp = allocate_word();
fprintf(vvp_out, " %%load/wr %d, v%p_%u;\n", tmp, sig, word);
fprintf(vvp_out, " %%cvt/vr %u, %d, %u;\n", res.base, tmp, res.wid);
clr_word(tmp);
} else if (add_index >= 0) {
assert(add_index == 0);
/* If this is a REG (a variable) then I can do a vector read. */
fprintf(vvp_out, " %%load/vp0 %u, v%p_%u, %u;\n",
res.base, sig, word, swid);
} else {
/* If this is a REG (a variable) then I can do a vector read. */
@ -1730,7 +1788,7 @@ static struct vector_info draw_signal_expr(ivl_expr_t exp, unsigned wid,
res.wid = wid;
save_expression_lookaside(res.base, exp, wid);
draw_signal_dest(exp, res);
draw_signal_dest(exp, res, -1);
return res;
}
@ -2232,7 +2290,7 @@ static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest,
switch (ivl_expr_type(exp)) {
case IVL_EX_SIGNAL:
draw_signal_dest(exp, dest);
draw_signal_dest(exp, dest, -1);
return;
default:

View File

@ -95,6 +95,7 @@ extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_MV(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_NX(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_X(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_XP(vthread_t thr, vvp_code_t code);

View File

@ -141,6 +141,7 @@ const static struct opcode_table_s opcode_table[] = {
{ "%load/mv",of_LOAD_MV,3, {OA_BIT1, OA_MEM_PTR, OA_BIT2} },
{ "%load/nx",of_LOAD_NX,3, {OA_BIT1, OA_VPI_PTR, OA_BIT2} },
{ "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/wr",of_LOAD_WR,2, {OA_BIT1, OA_VPI_PTR, OA_BIT2} },
{ "%load/x", of_LOAD_X, 3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/x.p",of_LOAD_XP, 3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },

View File

@ -2188,13 +2188,14 @@ bool of_LOAD_NX(vthread_t thr, vvp_code_t cp)
* The functor to read from is the vvp_net_t object pointed to by the
* cp->net pointer.
*/
bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
{
assert(cp->bit_idx[0] >= 4);
assert(cp->bit_idx[1] > 0);
unsigned bit = cp->bit_idx[0];
unsigned wid = cp->bit_idx[1];
int64_t addend = thr->words[0].w_int;
vvp_net_t*net = cp->net;
/* For the %load to work, the functor must actually be a
@ -2209,6 +2210,40 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
vvp_vector4_t sig_value = sig->vec4_value();
sig_value.resize(wid);
return sig_value;
}
bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
{
unsigned bit = cp->bit_idx[0];
unsigned wid = cp->bit_idx[1];
vvp_vector4_t sig_value = load_base(thr, cp);
/* Check the address once, before we scan the vector. */
thr_check_addr(thr, bit+wid-1);
/* Copy the vector bits into the bits4 vector. Do the copy
directly to skip the excess calls to thr_check_addr. */
thr->bits4.set_vec(bit, sig_value);
return true;
}
/*
* This is like of_LOAD_VEC, but includes an add of an integer value.
*/
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
{
unsigned bit = cp->bit_idx[0];
unsigned wid = cp->bit_idx[1];
int64_t addend = thr->words[0].w_int;
vvp_vector4_t sig_value = load_base(thr, cp);
/* Add the addend value */
sig_value += addend;
/* Check the address once, before we scan the vector. */
thr_check_addr(thr, bit+wid-1);

View File

@ -557,6 +557,31 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
return true;
}
bool vvp_vector4_t::has_xz() const
{
if (size_ < BITS_PER_WORD) {
unsigned long mask = WORD_X_BITS >> 2*(BITS_PER_WORD - size_);
return 0 != (bits_val_&mask);
}
if (size_ == BITS_PER_WORD) {
return 0 != (bits_val_&WORD_X_BITS);
}
unsigned words = size_ / BITS_PER_WORD;
for (unsigned idx = 0 ; idx < words ; idx += 1) {
if (bits_ptr_[idx] & WORD_X_BITS)
return true;
}
unsigned long mask = size_%BITS_PER_WORD;
if (mask > 0) {
mask = WORD_X_BITS >> 2*(BITS_PER_WORD - mask);
return 0 != bits_ptr_[words]&mask;
}
return false;
}
void vvp_vector4_t::change_z2x()
{
@ -602,6 +627,40 @@ char* vvp_vector4_t::as_string(char*buf, size_t buf_len)
return res;
}
/*
* Add an integer to the vvp_vector4_t in place, bit by bit so that
* there is no size limitations.
*/
vvp_vector4_t& vvp_vector4_t::operator += (int64_t that)
{
vvp_bit4_t carry = BIT4_0;
unsigned idx;
if (has_xz()) {
vvp_vector4_t xxx (size(), BIT4_X);
*this = xxx;
return *this;
}
for (idx = 0 ; idx < size() ; idx += 1) {
if (that == 0 && carry==BIT4_0)
break;
vvp_bit4_t that_bit = (that&1)? BIT4_1 : BIT4_0;
that >>= 1;
if (that_bit==BIT4_0 && carry==BIT4_0)
continue;
vvp_bit4_t bit = value(idx);
bit = add_with_carry(bit, that_bit, carry);
set_bit(idx, bit);
}
return *this;
}
ostream& operator<< (ostream&out, const vvp_vector4_t&that)
{
out << that.size() << "'b";

View File

@ -121,12 +121,17 @@ class vvp_vector4_t {
// Test that the vectors are exactly equal
bool eeq(const vvp_vector4_t&that) const;
// Return true if there is an X or Z anywhere in the vector.
bool has_xz() const;
// Change all Z bits to X bits.
void change_z2x();
// Display the value into the buf as a string.
char*as_string(char*buf, size_t buf_len);
vvp_vector4_t& operator += (int64_t);
private:
// Number of vvp_bit4_t bits that can be shoved into a word.
enum { BITS_PER_WORD = 8*sizeof(unsigned long)/2 };