Load_add_immediate to work with signed expressions

The %load/vp0 instruction adds a signed value to the signal value being
loaded, but it doesn't allow for a signed source vector. Add the
%load/vp0/s instruction that pads the loaded vector, and add the code
generator details to properly use it.
This commit is contained in:
Stephen Williams 2008-06-13 20:23:40 -07:00
parent 62d7c081dc
commit 6f0d8e8dda
5 changed files with 70 additions and 37 deletions

View File

@ -28,7 +28,7 @@
static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest,
int ok_flags);
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
int add_index, unsigned long immediate);
int add_index, long immediate);
int number_is_unknown(ivl_expr_t ex)
{
@ -1158,12 +1158,11 @@ static struct vector_info draw_binary_expr_lrs(ivl_expr_t exp, unsigned wid)
static struct vector_info draw_load_add_immediate(ivl_expr_t le,
ivl_expr_t re,
unsigned wid)
unsigned wid,
int signed_flag)
{
struct vector_info lv;
unsigned long imm;
imm = get_number_immediate(re);
long imm = get_number_immediate(re);
lv.base = allocate_vector(wid);
lv.wid = wid;
if (lv.base == 0) {
@ -1176,7 +1175,7 @@ static struct vector_info draw_load_add_immediate(ivl_expr_t le,
/* Load the signal value with a %load that adds the index
register to the value being loaded. */
draw_signal_dest(le, lv, 0, imm);
draw_signal_dest(le, lv, signed_flag, imm);
return lv;
}
@ -1319,25 +1318,27 @@ static struct vector_info draw_binary_expr_arith(ivl_expr_t exp, unsigned wid)
const char*sign_string = ivl_expr_signed(le) && ivl_expr_signed(re)? "/s" : "";
int signed_flag = ivl_expr_signed(exp)? 1 : 0;
if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
&& (ivl_expr_type(re) == IVL_EX_ULONG))
return draw_load_add_immediate(le, re, wid);
return draw_load_add_immediate(le, re, wid, signed_flag);
if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
&& (ivl_expr_type(re) == IVL_EX_NUMBER))
return draw_load_add_immediate(le, re, wid);
return draw_load_add_immediate(le, re, wid, signed_flag);
if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
&& (ivl_expr_type(le) == IVL_EX_ULONG))
return draw_load_add_immediate(re, le, wid);
return draw_load_add_immediate(re, le, wid, signed_flag);
if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
&& (ivl_expr_type(le) == IVL_EX_NUMBER))
return draw_load_add_immediate(re, le, wid);
return draw_load_add_immediate(re, le, wid, signed_flag);
if ((ivl_expr_opcode(exp) == '+')
&& (ivl_expr_type(re) == IVL_EX_ULONG))
@ -1963,11 +1964,13 @@ void pad_expr_in_place(ivl_expr_t exp, struct vector_info res, unsigned swid)
* offsetting the read from the lsi (least significant index) of the
* signal.
*
* If the add_index is >=0, then generate a %load/vp0 to add the
* word0 value to the loaded value before storing it into the destination.
* If the add_index is 0, then generate a %load/vp0 to add the
* word0 value to the loaded value before storing it into the
* destination. If the add_index is 1, then generate a %load/vp0/s to
* do a signed load.
*/
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
int add_index, unsigned long immediate)
int add_index, long immediate)
{
unsigned swid = ivl_expr_width(exp);
ivl_signal_t sig = ivl_expr_signal(exp);
@ -2009,13 +2012,17 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
} else if (add_index >= 0) {
assert(add_index == 0);
const char*sign_flag = add_index==1? "/s" : "";
/* If this is a REG (a variable) then I can do a vector read. */
fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate);
fprintf(vvp_out, " %%ix/load 2, %u;\n", res.wid);
fprintf(vvp_out, " %%load/vp0 %u, v%p_%u, %u;\n",
res.base, sig, word, swid);
if (immediate >= 0) {
fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate);
} else {
fprintf(vvp_out, " %%ix/load 0, 0; immediate=%ld\n", immediate);
fprintf(vvp_out, " %%ix/sub 0, %ld;\n", -immediate);
}
fprintf(vvp_out, " %%load/vp0%s %u, v%p_%u, %u;\n", sign_flag,
res.base, sig,word, res.wid);
swid = res.wid;
} else {

View File

@ -100,6 +100,7 @@ extern bool of_LOAD_AVP0(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code);
extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code);
extern bool of_LOADI_WR(vthread_t thr, vvp_code_t code);

View File

@ -143,6 +143,7 @@ const static struct opcode_table_s opcode_table[] = {
{ "%load/avx.p",of_LOAD_AVX_P,3,{OA_BIT1, OA_ARR_PTR, OA_BIT2} },
{ "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/vp0/s",of_LOAD_VP0_S,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%load/wr",of_LOAD_WR,2, {OA_BIT1, OA_VPI_PTR, OA_BIT2} },
{ "%load/x1p",of_LOAD_X1P,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
{ "%loadi/wr",of_LOADI_WR,3,{OA_BIT1, OA_NUMBER, OA_BIT2} },

View File

@ -437,18 +437,21 @@ the specified thread register bit. The functor-label can refer to a
from the least significant up to <wid> bits, is loaded starting at
thread bit <bit>. It is an OK for the width to not match the vector
width at the functor. If the <wid> is less than the width at the
functor, then the most significant bits are dropped.
functor, then the most significant bits are dropped. If the <wid> is
more than the width at the functor, the value is padded with X bits.
* %load/vp0 <bit>, <functor-label>, <wid>
* %load/vp0/s <bit>, <functor-label>, <wid>
This instruction is the same as %load/v above, except that it also
adds the integer value is index register 0 into the loaded value. The
addition is a Verilog-style add, which means that if any of the input
bits are X or Z, the entire result is turned into a vector of X bits.
This instruction is the similar %load/v above, except that it also
adds the signed integer value in index register 0 into the loaded
value. The addition is a Verilog-style add, which means that if any of
the input bits are X or Z, the entire result is turned into a vector
of X bits.
Index register 2 contains the result width. The addition of the loaded
value and the index are done at this width to avoid the problem of a
small vector with a large immediate offset indexing an array.
The <wid> is, line the %load/v, the result width. But unlike the
%load/v, the vector is padded with 0s (%load/vp0) or sign extended
(%load/vp0/s) to the desired width.
* %load/wr <bit>, <vpi-label>

View File

@ -2467,9 +2467,6 @@ bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t cp)
*/
static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
{
assert(cp->bit_idx[0] >= 4);
assert(cp->bit_idx[1] > 0);
vvp_net_t*net = cp->net;
/* For the %load to work, the functor must actually be a
@ -2501,6 +2498,8 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
directly to skip the excess calls to thr_check_addr. */
thr->bits4.set_vec(bit, sig_value);
/* If the source is shorter then the desired width, then pad
with BIT4_X values. */
for (unsigned idx = sig_value.size() ; idx < wid ; idx += 1)
thr->bits4.set_bit(bit+idx, BIT4_X);
@ -2511,16 +2510,12 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
* This is like of_LOAD_VEC, but includes an add of an integer value from
* index 0. The <wid> is the expected result width not the vector width.
*/
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&sig_value)
{
unsigned bit = cp->bit_idx[0];
unsigned wid = cp->bit_idx[1];
int64_t addend = thr->words[0].w_int;
unsigned wid = thr->words[2].w_int;
/* We need a vector this wide to make the math work correctly.
* Copy the base bits into the vector, but keep the width. */
vvp_vector4_t sig_value(wid, BIT4_0);
sig_value.copy_bits(load_base(thr, cp));
/* Check the address once, before we scan the vector. */
thr_check_addr(thr, bit+wid-1);
@ -2529,7 +2524,7 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
if (val == 0) {
vvp_vector4_t tmp(wid, BIT4_X);
thr->bits4.set_vec(bit, tmp);
return true;
return;
}
unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
@ -2551,7 +2546,33 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
directly to skip the excess calls to thr_check_addr. */
thr->bits4.setarray(bit, wid, val);
delete[]val;
}
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
{
unsigned wid = cp->bit_idx[1];
/* We need a vector this wide to make the math work correctly.
* Copy the base bits into the vector, but keep the width. */
vvp_vector4_t sig_value(wid, BIT4_0);
sig_value.copy_bits(load_base(thr, cp));
load_vp0_common(thr, cp, sig_value);
return true;
}
bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t cp)
{
unsigned wid = cp->bit_idx[1];
vvp_vector4_t tmp (load_base(thr, cp));
/* We need a vector this wide to make the math work correctly.
* Copy the base bits into the vector, but keep the width. */
vvp_vector4_t sig_value(wid, tmp.value(tmp.size()-1));
sig_value.copy_bits(tmp);
load_vp0_common(thr, cp, sig_value);
return true;
}