Load_add_immediate to work with signed expressions

The %load/vp0 instruction adds a signed value to the signal value being
loaded, but it doesn't allow for a signed source vector. Add the
%load/vp0/s instruction that pads the loaded vector, and add the code
generator details to properly use it.
This commit is contained in:
Stephen Williams 2008-06-13 20:23:40 -07:00
parent 62d7c081dc
commit 6f0d8e8dda
5 changed files with 70 additions and 37 deletions

View File

@ -28,7 +28,7 @@
static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest, static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest,
int ok_flags); int ok_flags);
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res, static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
int add_index, unsigned long immediate); int add_index, long immediate);
int number_is_unknown(ivl_expr_t ex) int number_is_unknown(ivl_expr_t ex)
{ {
@ -1158,12 +1158,11 @@ static struct vector_info draw_binary_expr_lrs(ivl_expr_t exp, unsigned wid)
static struct vector_info draw_load_add_immediate(ivl_expr_t le, static struct vector_info draw_load_add_immediate(ivl_expr_t le,
ivl_expr_t re, ivl_expr_t re,
unsigned wid) unsigned wid,
int signed_flag)
{ {
struct vector_info lv; struct vector_info lv;
unsigned long imm; long imm = get_number_immediate(re);
imm = get_number_immediate(re);
lv.base = allocate_vector(wid); lv.base = allocate_vector(wid);
lv.wid = wid; lv.wid = wid;
if (lv.base == 0) { if (lv.base == 0) {
@ -1176,7 +1175,7 @@ static struct vector_info draw_load_add_immediate(ivl_expr_t le,
/* Load the signal value with a %load that adds the index /* Load the signal value with a %load that adds the index
register to the value being loaded. */ register to the value being loaded. */
draw_signal_dest(le, lv, 0, imm); draw_signal_dest(le, lv, signed_flag, imm);
return lv; return lv;
} }
@ -1319,25 +1318,27 @@ static struct vector_info draw_binary_expr_arith(ivl_expr_t exp, unsigned wid)
const char*sign_string = ivl_expr_signed(le) && ivl_expr_signed(re)? "/s" : ""; const char*sign_string = ivl_expr_signed(le) && ivl_expr_signed(re)? "/s" : "";
int signed_flag = ivl_expr_signed(exp)? 1 : 0;
if ((ivl_expr_opcode(exp) == '+') if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(le) == IVL_EX_SIGNAL) && (ivl_expr_type(le) == IVL_EX_SIGNAL)
&& (ivl_expr_type(re) == IVL_EX_ULONG)) && (ivl_expr_type(re) == IVL_EX_ULONG))
return draw_load_add_immediate(le, re, wid); return draw_load_add_immediate(le, re, wid, signed_flag);
if ((ivl_expr_opcode(exp) == '+') if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(le) == IVL_EX_SIGNAL) && (ivl_expr_type(le) == IVL_EX_SIGNAL)
&& (ivl_expr_type(re) == IVL_EX_NUMBER)) && (ivl_expr_type(re) == IVL_EX_NUMBER))
return draw_load_add_immediate(le, re, wid); return draw_load_add_immediate(le, re, wid, signed_flag);
if ((ivl_expr_opcode(exp) == '+') if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(re) == IVL_EX_SIGNAL) && (ivl_expr_type(re) == IVL_EX_SIGNAL)
&& (ivl_expr_type(le) == IVL_EX_ULONG)) && (ivl_expr_type(le) == IVL_EX_ULONG))
return draw_load_add_immediate(re, le, wid); return draw_load_add_immediate(re, le, wid, signed_flag);
if ((ivl_expr_opcode(exp) == '+') if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(re) == IVL_EX_SIGNAL) && (ivl_expr_type(re) == IVL_EX_SIGNAL)
&& (ivl_expr_type(le) == IVL_EX_NUMBER)) && (ivl_expr_type(le) == IVL_EX_NUMBER))
return draw_load_add_immediate(re, le, wid); return draw_load_add_immediate(re, le, wid, signed_flag);
if ((ivl_expr_opcode(exp) == '+') if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(re) == IVL_EX_ULONG)) && (ivl_expr_type(re) == IVL_EX_ULONG))
@ -1963,11 +1964,13 @@ void pad_expr_in_place(ivl_expr_t exp, struct vector_info res, unsigned swid)
* offsetting the read from the lsi (least significant index) of the * offsetting the read from the lsi (least significant index) of the
* signal. * signal.
* *
* If the add_index is >=0, then generate a %load/vp0 to add the * If the add_index is 0, then generate a %load/vp0 to add the
* word0 value to the loaded value before storing it into the destination. * word0 value to the loaded value before storing it into the
* destination. If the add_index is 1, then generate a %load/vp0/s to
* do a signed load.
*/ */
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res, static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
int add_index, unsigned long immediate) int add_index, long immediate)
{ {
unsigned swid = ivl_expr_width(exp); unsigned swid = ivl_expr_width(exp);
ivl_signal_t sig = ivl_expr_signal(exp); ivl_signal_t sig = ivl_expr_signal(exp);
@ -2009,13 +2012,17 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
} else if (add_index >= 0) { } else if (add_index >= 0) {
assert(add_index == 0); const char*sign_flag = add_index==1? "/s" : "";
/* If this is a REG (a variable) then I can do a vector read. */ /* If this is a REG (a variable) then I can do a vector read. */
fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate); if (immediate >= 0) {
fprintf(vvp_out, " %%ix/load 2, %u;\n", res.wid); fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate);
fprintf(vvp_out, " %%load/vp0 %u, v%p_%u, %u;\n", } else {
res.base, sig, word, swid); fprintf(vvp_out, " %%ix/load 0, 0; immediate=%ld\n", immediate);
fprintf(vvp_out, " %%ix/sub 0, %ld;\n", -immediate);
}
fprintf(vvp_out, " %%load/vp0%s %u, v%p_%u, %u;\n", sign_flag,
res.base, sig,word, res.wid);
swid = res.wid; swid = res.wid;
} else { } else {

View File

@ -100,6 +100,7 @@ extern bool of_LOAD_AVP0(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code); extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code); extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code); extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code); extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code); extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code);
extern bool of_LOADI_WR(vthread_t thr, vvp_code_t code); extern bool of_LOADI_WR(vthread_t thr, vvp_code_t code);

View File

@ -143,6 +143,7 @@ const static struct opcode_table_s opcode_table[] = {
{ "%load/avx.p",of_LOAD_AVX_P,3,{OA_BIT1, OA_ARR_PTR, OA_BIT2} }, { "%load/avx.p",of_LOAD_AVX_P,3,{OA_BIT1, OA_ARR_PTR, OA_BIT2} },
{ "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/vp0/s",of_LOAD_VP0_S,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/wr",of_LOAD_WR,2, {OA_BIT1, OA_VPI_PTR, OA_BIT2} }, { "%load/wr",of_LOAD_WR,2, {OA_BIT1, OA_VPI_PTR, OA_BIT2} },
{ "%load/x1p",of_LOAD_X1P,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%load/x1p",of_LOAD_X1P,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%loadi/wr",of_LOADI_WR,3,{OA_BIT1, OA_NUMBER, OA_BIT2} }, { "%loadi/wr",of_LOADI_WR,3,{OA_BIT1, OA_NUMBER, OA_BIT2} },

View File

@ -437,18 +437,21 @@ the specified thread register bit. The functor-label can refer to a
from the least significant up to <wid> bits, is loaded starting at from the least significant up to <wid> bits, is loaded starting at
thread bit <bit>. It is an OK for the width to not match the vector thread bit <bit>. It is an OK for the width to not match the vector
width at the functor. If the <wid> is less than the width at the width at the functor. If the <wid> is less than the width at the
functor, then the most significant bits are dropped. functor, then the most significant bits are dropped. If the <wid> is
more than the width at the functor, the value is padded with X bits.
* %load/vp0 <bit>, <functor-label>, <wid> * %load/vp0 <bit>, <functor-label>, <wid>
* %load/vp0/s <bit>, <functor-label>, <wid>
This instruction is the same as %load/v above, except that it also This instruction is the similar %load/v above, except that it also
adds the integer value is index register 0 into the loaded value. The adds the signed integer value in index register 0 into the loaded
addition is a Verilog-style add, which means that if any of the input value. The addition is a Verilog-style add, which means that if any of
bits are X or Z, the entire result is turned into a vector of X bits. the input bits are X or Z, the entire result is turned into a vector
of X bits.
Index register 2 contains the result width. The addition of the loaded The <wid> is, line the %load/v, the result width. But unlike the
value and the index are done at this width to avoid the problem of a %load/v, the vector is padded with 0s (%load/vp0) or sign extended
small vector with a large immediate offset indexing an array. (%load/vp0/s) to the desired width.
* %load/wr <bit>, <vpi-label> * %load/wr <bit>, <vpi-label>

View File

@ -2467,9 +2467,6 @@ bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t cp)
*/ */
static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp) static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
{ {
assert(cp->bit_idx[0] >= 4);
assert(cp->bit_idx[1] > 0);
vvp_net_t*net = cp->net; vvp_net_t*net = cp->net;
/* For the %load to work, the functor must actually be a /* For the %load to work, the functor must actually be a
@ -2501,6 +2498,8 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
directly to skip the excess calls to thr_check_addr. */ directly to skip the excess calls to thr_check_addr. */
thr->bits4.set_vec(bit, sig_value); thr->bits4.set_vec(bit, sig_value);
/* If the source is shorter then the desired width, then pad
with BIT4_X values. */
for (unsigned idx = sig_value.size() ; idx < wid ; idx += 1) for (unsigned idx = sig_value.size() ; idx < wid ; idx += 1)
thr->bits4.set_bit(bit+idx, BIT4_X); thr->bits4.set_bit(bit+idx, BIT4_X);
@ -2511,16 +2510,12 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
* This is like of_LOAD_VEC, but includes an add of an integer value from * This is like of_LOAD_VEC, but includes an add of an integer value from
* index 0. The <wid> is the expected result width not the vector width. * index 0. The <wid> is the expected result width not the vector width.
*/ */
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&sig_value)
{ {
unsigned bit = cp->bit_idx[0]; unsigned bit = cp->bit_idx[0];
unsigned wid = cp->bit_idx[1];
int64_t addend = thr->words[0].w_int; int64_t addend = thr->words[0].w_int;
unsigned wid = thr->words[2].w_int;
/* We need a vector this wide to make the math work correctly.
* Copy the base bits into the vector, but keep the width. */
vvp_vector4_t sig_value(wid, BIT4_0);
sig_value.copy_bits(load_base(thr, cp));
/* Check the address once, before we scan the vector. */ /* Check the address once, before we scan the vector. */
thr_check_addr(thr, bit+wid-1); thr_check_addr(thr, bit+wid-1);
@ -2529,7 +2524,7 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
if (val == 0) { if (val == 0) {
vvp_vector4_t tmp(wid, BIT4_X); vvp_vector4_t tmp(wid, BIT4_X);
thr->bits4.set_vec(bit, tmp); thr->bits4.set_vec(bit, tmp);
return true; return;
} }
unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS; unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
@ -2551,7 +2546,33 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
directly to skip the excess calls to thr_check_addr. */ directly to skip the excess calls to thr_check_addr. */
thr->bits4.setarray(bit, wid, val); thr->bits4.setarray(bit, wid, val);
delete[]val; delete[]val;
}
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
{
unsigned wid = cp->bit_idx[1];
/* We need a vector this wide to make the math work correctly.
* Copy the base bits into the vector, but keep the width. */
vvp_vector4_t sig_value(wid, BIT4_0);
sig_value.copy_bits(load_base(thr, cp));
load_vp0_common(thr, cp, sig_value);
return true;
}
bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t cp)
{
unsigned wid = cp->bit_idx[1];
vvp_vector4_t tmp (load_base(thr, cp));
/* We need a vector this wide to make the math work correctly.
* Copy the base bits into the vector, but keep the width. */
vvp_vector4_t sig_value(wid, tmp.value(tmp.size()-1));
sig_value.copy_bits(tmp);
load_vp0_common(thr, cp, sig_value);
return true; return true;
} }