Load_add_immediate to work with signed expressions
The %load/vp0 instruction adds a signed value to the signal value being loaded, but it doesn't allow for a signed source vector. Add the %load/vp0/s instruction that pads the loaded vector, and add the code generator details to properly use it.
This commit is contained in:
parent
62d7c081dc
commit
6f0d8e8dda
|
|
@ -28,7 +28,7 @@
|
|||
static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest,
|
||||
int ok_flags);
|
||||
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
|
||||
int add_index, unsigned long immediate);
|
||||
int add_index, long immediate);
|
||||
|
||||
int number_is_unknown(ivl_expr_t ex)
|
||||
{
|
||||
|
|
@ -1158,12 +1158,11 @@ static struct vector_info draw_binary_expr_lrs(ivl_expr_t exp, unsigned wid)
|
|||
|
||||
static struct vector_info draw_load_add_immediate(ivl_expr_t le,
|
||||
ivl_expr_t re,
|
||||
unsigned wid)
|
||||
unsigned wid,
|
||||
int signed_flag)
|
||||
{
|
||||
struct vector_info lv;
|
||||
unsigned long imm;
|
||||
|
||||
imm = get_number_immediate(re);
|
||||
long imm = get_number_immediate(re);
|
||||
lv.base = allocate_vector(wid);
|
||||
lv.wid = wid;
|
||||
if (lv.base == 0) {
|
||||
|
|
@ -1176,7 +1175,7 @@ static struct vector_info draw_load_add_immediate(ivl_expr_t le,
|
|||
|
||||
/* Load the signal value with a %load that adds the index
|
||||
register to the value being loaded. */
|
||||
draw_signal_dest(le, lv, 0, imm);
|
||||
draw_signal_dest(le, lv, signed_flag, imm);
|
||||
|
||||
return lv;
|
||||
}
|
||||
|
|
@ -1319,25 +1318,27 @@ static struct vector_info draw_binary_expr_arith(ivl_expr_t exp, unsigned wid)
|
|||
|
||||
const char*sign_string = ivl_expr_signed(le) && ivl_expr_signed(re)? "/s" : "";
|
||||
|
||||
int signed_flag = ivl_expr_signed(exp)? 1 : 0;
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '+')
|
||||
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
|
||||
&& (ivl_expr_type(re) == IVL_EX_ULONG))
|
||||
return draw_load_add_immediate(le, re, wid);
|
||||
return draw_load_add_immediate(le, re, wid, signed_flag);
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '+')
|
||||
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
|
||||
&& (ivl_expr_type(re) == IVL_EX_NUMBER))
|
||||
return draw_load_add_immediate(le, re, wid);
|
||||
return draw_load_add_immediate(le, re, wid, signed_flag);
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '+')
|
||||
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
|
||||
&& (ivl_expr_type(le) == IVL_EX_ULONG))
|
||||
return draw_load_add_immediate(re, le, wid);
|
||||
return draw_load_add_immediate(re, le, wid, signed_flag);
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '+')
|
||||
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
|
||||
&& (ivl_expr_type(le) == IVL_EX_NUMBER))
|
||||
return draw_load_add_immediate(re, le, wid);
|
||||
return draw_load_add_immediate(re, le, wid, signed_flag);
|
||||
|
||||
if ((ivl_expr_opcode(exp) == '+')
|
||||
&& (ivl_expr_type(re) == IVL_EX_ULONG))
|
||||
|
|
@ -1963,11 +1964,13 @@ void pad_expr_in_place(ivl_expr_t exp, struct vector_info res, unsigned swid)
|
|||
* offsetting the read from the lsi (least significant index) of the
|
||||
* signal.
|
||||
*
|
||||
* If the add_index is >=0, then generate a %load/vp0 to add the
|
||||
* word0 value to the loaded value before storing it into the destination.
|
||||
* If the add_index is 0, then generate a %load/vp0 to add the
|
||||
* word0 value to the loaded value before storing it into the
|
||||
* destination. If the add_index is 1, then generate a %load/vp0/s to
|
||||
* do a signed load.
|
||||
*/
|
||||
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
|
||||
int add_index, unsigned long immediate)
|
||||
int add_index, long immediate)
|
||||
{
|
||||
unsigned swid = ivl_expr_width(exp);
|
||||
ivl_signal_t sig = ivl_expr_signal(exp);
|
||||
|
|
@ -2009,13 +2012,17 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
|
|||
|
||||
} else if (add_index >= 0) {
|
||||
|
||||
assert(add_index == 0);
|
||||
const char*sign_flag = add_index==1? "/s" : "";
|
||||
|
||||
/* If this is a REG (a variable) then I can do a vector read. */
|
||||
fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate);
|
||||
fprintf(vvp_out, " %%ix/load 2, %u;\n", res.wid);
|
||||
fprintf(vvp_out, " %%load/vp0 %u, v%p_%u, %u;\n",
|
||||
res.base, sig, word, swid);
|
||||
if (immediate >= 0) {
|
||||
fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate);
|
||||
} else {
|
||||
fprintf(vvp_out, " %%ix/load 0, 0; immediate=%ld\n", immediate);
|
||||
fprintf(vvp_out, " %%ix/sub 0, %ld;\n", -immediate);
|
||||
}
|
||||
fprintf(vvp_out, " %%load/vp0%s %u, v%p_%u, %u;\n", sign_flag,
|
||||
res.base, sig,word, res.wid);
|
||||
swid = res.wid;
|
||||
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -100,6 +100,7 @@ extern bool of_LOAD_AVP0(vthread_t thr, vvp_code_t code);
|
|||
extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_LOADI_WR(vthread_t thr, vvp_code_t code);
|
||||
|
|
|
|||
|
|
@ -143,6 +143,7 @@ const static struct opcode_table_s opcode_table[] = {
|
|||
{ "%load/avx.p",of_LOAD_AVX_P,3,{OA_BIT1, OA_ARR_PTR, OA_BIT2} },
|
||||
{ "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||
{ "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||
{ "%load/vp0/s",of_LOAD_VP0_S,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||
{ "%load/wr",of_LOAD_WR,2, {OA_BIT1, OA_VPI_PTR, OA_BIT2} },
|
||||
{ "%load/x1p",of_LOAD_X1P,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||
{ "%loadi/wr",of_LOADI_WR,3,{OA_BIT1, OA_NUMBER, OA_BIT2} },
|
||||
|
|
|
|||
|
|
@ -437,18 +437,21 @@ the specified thread register bit. The functor-label can refer to a
|
|||
from the least significant up to <wid> bits, is loaded starting at
|
||||
thread bit <bit>. It is an OK for the width to not match the vector
|
||||
width at the functor. If the <wid> is less than the width at the
|
||||
functor, then the most significant bits are dropped.
|
||||
functor, then the most significant bits are dropped. If the <wid> is
|
||||
more than the width at the functor, the value is padded with X bits.
|
||||
|
||||
* %load/vp0 <bit>, <functor-label>, <wid>
|
||||
* %load/vp0/s <bit>, <functor-label>, <wid>
|
||||
|
||||
This instruction is the same as %load/v above, except that it also
|
||||
adds the integer value is index register 0 into the loaded value. The
|
||||
addition is a Verilog-style add, which means that if any of the input
|
||||
bits are X or Z, the entire result is turned into a vector of X bits.
|
||||
This instruction is the similar %load/v above, except that it also
|
||||
adds the signed integer value in index register 0 into the loaded
|
||||
value. The addition is a Verilog-style add, which means that if any of
|
||||
the input bits are X or Z, the entire result is turned into a vector
|
||||
of X bits.
|
||||
|
||||
Index register 2 contains the result width. The addition of the loaded
|
||||
value and the index are done at this width to avoid the problem of a
|
||||
small vector with a large immediate offset indexing an array.
|
||||
The <wid> is, line the %load/v, the result width. But unlike the
|
||||
%load/v, the vector is padded with 0s (%load/vp0) or sign extended
|
||||
(%load/vp0/s) to the desired width.
|
||||
|
||||
* %load/wr <bit>, <vpi-label>
|
||||
|
||||
|
|
|
|||
|
|
@ -2467,9 +2467,6 @@ bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t cp)
|
|||
*/
|
||||
static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
assert(cp->bit_idx[0] >= 4);
|
||||
assert(cp->bit_idx[1] > 0);
|
||||
|
||||
vvp_net_t*net = cp->net;
|
||||
|
||||
/* For the %load to work, the functor must actually be a
|
||||
|
|
@ -2501,6 +2498,8 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
|
|||
directly to skip the excess calls to thr_check_addr. */
|
||||
thr->bits4.set_vec(bit, sig_value);
|
||||
|
||||
/* If the source is shorter then the desired width, then pad
|
||||
with BIT4_X values. */
|
||||
for (unsigned idx = sig_value.size() ; idx < wid ; idx += 1)
|
||||
thr->bits4.set_bit(bit+idx, BIT4_X);
|
||||
|
||||
|
|
@ -2511,16 +2510,12 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
|
|||
* This is like of_LOAD_VEC, but includes an add of an integer value from
|
||||
* index 0. The <wid> is the expected result width not the vector width.
|
||||
*/
|
||||
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
||||
|
||||
static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&sig_value)
|
||||
{
|
||||
unsigned bit = cp->bit_idx[0];
|
||||
unsigned wid = cp->bit_idx[1];
|
||||
int64_t addend = thr->words[0].w_int;
|
||||
unsigned wid = thr->words[2].w_int;
|
||||
|
||||
/* We need a vector this wide to make the math work correctly.
|
||||
* Copy the base bits into the vector, but keep the width. */
|
||||
vvp_vector4_t sig_value(wid, BIT4_0);
|
||||
sig_value.copy_bits(load_base(thr, cp));
|
||||
|
||||
/* Check the address once, before we scan the vector. */
|
||||
thr_check_addr(thr, bit+wid-1);
|
||||
|
|
@ -2529,7 +2524,7 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
|||
if (val == 0) {
|
||||
vvp_vector4_t tmp(wid, BIT4_X);
|
||||
thr->bits4.set_vec(bit, tmp);
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
|
||||
|
|
@ -2551,7 +2546,33 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
|||
directly to skip the excess calls to thr_check_addr. */
|
||||
thr->bits4.setarray(bit, wid, val);
|
||||
delete[]val;
|
||||
}
|
||||
|
||||
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
unsigned wid = cp->bit_idx[1];
|
||||
|
||||
/* We need a vector this wide to make the math work correctly.
|
||||
* Copy the base bits into the vector, but keep the width. */
|
||||
vvp_vector4_t sig_value(wid, BIT4_0);
|
||||
sig_value.copy_bits(load_base(thr, cp));
|
||||
|
||||
load_vp0_common(thr, cp, sig_value);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
unsigned wid = cp->bit_idx[1];
|
||||
|
||||
vvp_vector4_t tmp (load_base(thr, cp));
|
||||
|
||||
/* We need a vector this wide to make the math work correctly.
|
||||
* Copy the base bits into the vector, but keep the width. */
|
||||
vvp_vector4_t sig_value(wid, tmp.value(tmp.size()-1));
|
||||
sig_value.copy_bits(tmp);
|
||||
|
||||
load_vp0_common(thr, cp, sig_value);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue