Load_add_immediate to work with signed expressions
The %load/vp0 instruction adds a signed value to the signal value being loaded, but it doesn't allow for a signed source vector. Add the %load/vp0/s instruction that pads the loaded vector, and add the code generator details to properly use it.
This commit is contained in:
parent
62d7c081dc
commit
6f0d8e8dda
|
|
@ -28,7 +28,7 @@
|
||||||
static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest,
|
static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest,
|
||||||
int ok_flags);
|
int ok_flags);
|
||||||
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
|
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
|
||||||
int add_index, unsigned long immediate);
|
int add_index, long immediate);
|
||||||
|
|
||||||
int number_is_unknown(ivl_expr_t ex)
|
int number_is_unknown(ivl_expr_t ex)
|
||||||
{
|
{
|
||||||
|
|
@ -1158,12 +1158,11 @@ static struct vector_info draw_binary_expr_lrs(ivl_expr_t exp, unsigned wid)
|
||||||
|
|
||||||
static struct vector_info draw_load_add_immediate(ivl_expr_t le,
|
static struct vector_info draw_load_add_immediate(ivl_expr_t le,
|
||||||
ivl_expr_t re,
|
ivl_expr_t re,
|
||||||
unsigned wid)
|
unsigned wid,
|
||||||
|
int signed_flag)
|
||||||
{
|
{
|
||||||
struct vector_info lv;
|
struct vector_info lv;
|
||||||
unsigned long imm;
|
long imm = get_number_immediate(re);
|
||||||
|
|
||||||
imm = get_number_immediate(re);
|
|
||||||
lv.base = allocate_vector(wid);
|
lv.base = allocate_vector(wid);
|
||||||
lv.wid = wid;
|
lv.wid = wid;
|
||||||
if (lv.base == 0) {
|
if (lv.base == 0) {
|
||||||
|
|
@ -1176,7 +1175,7 @@ static struct vector_info draw_load_add_immediate(ivl_expr_t le,
|
||||||
|
|
||||||
/* Load the signal value with a %load that adds the index
|
/* Load the signal value with a %load that adds the index
|
||||||
register to the value being loaded. */
|
register to the value being loaded. */
|
||||||
draw_signal_dest(le, lv, 0, imm);
|
draw_signal_dest(le, lv, signed_flag, imm);
|
||||||
|
|
||||||
return lv;
|
return lv;
|
||||||
}
|
}
|
||||||
|
|
@ -1319,25 +1318,27 @@ static struct vector_info draw_binary_expr_arith(ivl_expr_t exp, unsigned wid)
|
||||||
|
|
||||||
const char*sign_string = ivl_expr_signed(le) && ivl_expr_signed(re)? "/s" : "";
|
const char*sign_string = ivl_expr_signed(le) && ivl_expr_signed(re)? "/s" : "";
|
||||||
|
|
||||||
|
int signed_flag = ivl_expr_signed(exp)? 1 : 0;
|
||||||
|
|
||||||
if ((ivl_expr_opcode(exp) == '+')
|
if ((ivl_expr_opcode(exp) == '+')
|
||||||
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
|
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
|
||||||
&& (ivl_expr_type(re) == IVL_EX_ULONG))
|
&& (ivl_expr_type(re) == IVL_EX_ULONG))
|
||||||
return draw_load_add_immediate(le, re, wid);
|
return draw_load_add_immediate(le, re, wid, signed_flag);
|
||||||
|
|
||||||
if ((ivl_expr_opcode(exp) == '+')
|
if ((ivl_expr_opcode(exp) == '+')
|
||||||
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
|
&& (ivl_expr_type(le) == IVL_EX_SIGNAL)
|
||||||
&& (ivl_expr_type(re) == IVL_EX_NUMBER))
|
&& (ivl_expr_type(re) == IVL_EX_NUMBER))
|
||||||
return draw_load_add_immediate(le, re, wid);
|
return draw_load_add_immediate(le, re, wid, signed_flag);
|
||||||
|
|
||||||
if ((ivl_expr_opcode(exp) == '+')
|
if ((ivl_expr_opcode(exp) == '+')
|
||||||
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
|
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
|
||||||
&& (ivl_expr_type(le) == IVL_EX_ULONG))
|
&& (ivl_expr_type(le) == IVL_EX_ULONG))
|
||||||
return draw_load_add_immediate(re, le, wid);
|
return draw_load_add_immediate(re, le, wid, signed_flag);
|
||||||
|
|
||||||
if ((ivl_expr_opcode(exp) == '+')
|
if ((ivl_expr_opcode(exp) == '+')
|
||||||
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
|
&& (ivl_expr_type(re) == IVL_EX_SIGNAL)
|
||||||
&& (ivl_expr_type(le) == IVL_EX_NUMBER))
|
&& (ivl_expr_type(le) == IVL_EX_NUMBER))
|
||||||
return draw_load_add_immediate(re, le, wid);
|
return draw_load_add_immediate(re, le, wid, signed_flag);
|
||||||
|
|
||||||
if ((ivl_expr_opcode(exp) == '+')
|
if ((ivl_expr_opcode(exp) == '+')
|
||||||
&& (ivl_expr_type(re) == IVL_EX_ULONG))
|
&& (ivl_expr_type(re) == IVL_EX_ULONG))
|
||||||
|
|
@ -1963,11 +1964,13 @@ void pad_expr_in_place(ivl_expr_t exp, struct vector_info res, unsigned swid)
|
||||||
* offsetting the read from the lsi (least significant index) of the
|
* offsetting the read from the lsi (least significant index) of the
|
||||||
* signal.
|
* signal.
|
||||||
*
|
*
|
||||||
* If the add_index is >=0, then generate a %load/vp0 to add the
|
* If the add_index is 0, then generate a %load/vp0 to add the
|
||||||
* word0 value to the loaded value before storing it into the destination.
|
* word0 value to the loaded value before storing it into the
|
||||||
|
* destination. If the add_index is 1, then generate a %load/vp0/s to
|
||||||
|
* do a signed load.
|
||||||
*/
|
*/
|
||||||
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
|
static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
|
||||||
int add_index, unsigned long immediate)
|
int add_index, long immediate)
|
||||||
{
|
{
|
||||||
unsigned swid = ivl_expr_width(exp);
|
unsigned swid = ivl_expr_width(exp);
|
||||||
ivl_signal_t sig = ivl_expr_signal(exp);
|
ivl_signal_t sig = ivl_expr_signal(exp);
|
||||||
|
|
@ -2009,13 +2012,17 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res,
|
||||||
|
|
||||||
} else if (add_index >= 0) {
|
} else if (add_index >= 0) {
|
||||||
|
|
||||||
assert(add_index == 0);
|
const char*sign_flag = add_index==1? "/s" : "";
|
||||||
|
|
||||||
/* If this is a REG (a variable) then I can do a vector read. */
|
/* If this is a REG (a variable) then I can do a vector read. */
|
||||||
fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate);
|
if (immediate >= 0) {
|
||||||
fprintf(vvp_out, " %%ix/load 2, %u;\n", res.wid);
|
fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate);
|
||||||
fprintf(vvp_out, " %%load/vp0 %u, v%p_%u, %u;\n",
|
} else {
|
||||||
res.base, sig, word, swid);
|
fprintf(vvp_out, " %%ix/load 0, 0; immediate=%ld\n", immediate);
|
||||||
|
fprintf(vvp_out, " %%ix/sub 0, %ld;\n", -immediate);
|
||||||
|
}
|
||||||
|
fprintf(vvp_out, " %%load/vp0%s %u, v%p_%u, %u;\n", sign_flag,
|
||||||
|
res.base, sig,word, res.wid);
|
||||||
swid = res.wid;
|
swid = res.wid;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -100,6 +100,7 @@ extern bool of_LOAD_AVP0(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code);
|
extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code);
|
extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code);
|
extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code);
|
||||||
|
extern bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code);
|
extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code);
|
extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_LOADI_WR(vthread_t thr, vvp_code_t code);
|
extern bool of_LOADI_WR(vthread_t thr, vvp_code_t code);
|
||||||
|
|
|
||||||
|
|
@ -143,6 +143,7 @@ const static struct opcode_table_s opcode_table[] = {
|
||||||
{ "%load/avx.p",of_LOAD_AVX_P,3,{OA_BIT1, OA_ARR_PTR, OA_BIT2} },
|
{ "%load/avx.p",of_LOAD_AVX_P,3,{OA_BIT1, OA_ARR_PTR, OA_BIT2} },
|
||||||
{ "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
{ "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||||
{ "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
{ "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||||
|
{ "%load/vp0/s",of_LOAD_VP0_S,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||||
{ "%load/wr",of_LOAD_WR,2, {OA_BIT1, OA_VPI_PTR, OA_BIT2} },
|
{ "%load/wr",of_LOAD_WR,2, {OA_BIT1, OA_VPI_PTR, OA_BIT2} },
|
||||||
{ "%load/x1p",of_LOAD_X1P,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
{ "%load/x1p",of_LOAD_X1P,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} },
|
||||||
{ "%loadi/wr",of_LOADI_WR,3,{OA_BIT1, OA_NUMBER, OA_BIT2} },
|
{ "%loadi/wr",of_LOADI_WR,3,{OA_BIT1, OA_NUMBER, OA_BIT2} },
|
||||||
|
|
|
||||||
|
|
@ -437,18 +437,21 @@ the specified thread register bit. The functor-label can refer to a
|
||||||
from the least significant up to <wid> bits, is loaded starting at
|
from the least significant up to <wid> bits, is loaded starting at
|
||||||
thread bit <bit>. It is an OK for the width to not match the vector
|
thread bit <bit>. It is an OK for the width to not match the vector
|
||||||
width at the functor. If the <wid> is less than the width at the
|
width at the functor. If the <wid> is less than the width at the
|
||||||
functor, then the most significant bits are dropped.
|
functor, then the most significant bits are dropped. If the <wid> is
|
||||||
|
more than the width at the functor, the value is padded with X bits.
|
||||||
|
|
||||||
* %load/vp0 <bit>, <functor-label>, <wid>
|
* %load/vp0 <bit>, <functor-label>, <wid>
|
||||||
|
* %load/vp0/s <bit>, <functor-label>, <wid>
|
||||||
|
|
||||||
This instruction is the same as %load/v above, except that it also
|
This instruction is the similar %load/v above, except that it also
|
||||||
adds the integer value is index register 0 into the loaded value. The
|
adds the signed integer value in index register 0 into the loaded
|
||||||
addition is a Verilog-style add, which means that if any of the input
|
value. The addition is a Verilog-style add, which means that if any of
|
||||||
bits are X or Z, the entire result is turned into a vector of X bits.
|
the input bits are X or Z, the entire result is turned into a vector
|
||||||
|
of X bits.
|
||||||
|
|
||||||
Index register 2 contains the result width. The addition of the loaded
|
The <wid> is, line the %load/v, the result width. But unlike the
|
||||||
value and the index are done at this width to avoid the problem of a
|
%load/v, the vector is padded with 0s (%load/vp0) or sign extended
|
||||||
small vector with a large immediate offset indexing an array.
|
(%load/vp0/s) to the desired width.
|
||||||
|
|
||||||
* %load/wr <bit>, <vpi-label>
|
* %load/wr <bit>, <vpi-label>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2467,9 +2467,6 @@ bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t cp)
|
||||||
*/
|
*/
|
||||||
static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
|
static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp)
|
||||||
{
|
{
|
||||||
assert(cp->bit_idx[0] >= 4);
|
|
||||||
assert(cp->bit_idx[1] > 0);
|
|
||||||
|
|
||||||
vvp_net_t*net = cp->net;
|
vvp_net_t*net = cp->net;
|
||||||
|
|
||||||
/* For the %load to work, the functor must actually be a
|
/* For the %load to work, the functor must actually be a
|
||||||
|
|
@ -2501,6 +2498,8 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
|
||||||
directly to skip the excess calls to thr_check_addr. */
|
directly to skip the excess calls to thr_check_addr. */
|
||||||
thr->bits4.set_vec(bit, sig_value);
|
thr->bits4.set_vec(bit, sig_value);
|
||||||
|
|
||||||
|
/* If the source is shorter then the desired width, then pad
|
||||||
|
with BIT4_X values. */
|
||||||
for (unsigned idx = sig_value.size() ; idx < wid ; idx += 1)
|
for (unsigned idx = sig_value.size() ; idx < wid ; idx += 1)
|
||||||
thr->bits4.set_bit(bit+idx, BIT4_X);
|
thr->bits4.set_bit(bit+idx, BIT4_X);
|
||||||
|
|
||||||
|
|
@ -2511,16 +2510,12 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp)
|
||||||
* This is like of_LOAD_VEC, but includes an add of an integer value from
|
* This is like of_LOAD_VEC, but includes an add of an integer value from
|
||||||
* index 0. The <wid> is the expected result width not the vector width.
|
* index 0. The <wid> is the expected result width not the vector width.
|
||||||
*/
|
*/
|
||||||
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
|
||||||
|
static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&sig_value)
|
||||||
{
|
{
|
||||||
unsigned bit = cp->bit_idx[0];
|
unsigned bit = cp->bit_idx[0];
|
||||||
|
unsigned wid = cp->bit_idx[1];
|
||||||
int64_t addend = thr->words[0].w_int;
|
int64_t addend = thr->words[0].w_int;
|
||||||
unsigned wid = thr->words[2].w_int;
|
|
||||||
|
|
||||||
/* We need a vector this wide to make the math work correctly.
|
|
||||||
* Copy the base bits into the vector, but keep the width. */
|
|
||||||
vvp_vector4_t sig_value(wid, BIT4_0);
|
|
||||||
sig_value.copy_bits(load_base(thr, cp));
|
|
||||||
|
|
||||||
/* Check the address once, before we scan the vector. */
|
/* Check the address once, before we scan the vector. */
|
||||||
thr_check_addr(thr, bit+wid-1);
|
thr_check_addr(thr, bit+wid-1);
|
||||||
|
|
@ -2529,7 +2524,7 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
||||||
if (val == 0) {
|
if (val == 0) {
|
||||||
vvp_vector4_t tmp(wid, BIT4_X);
|
vvp_vector4_t tmp(wid, BIT4_X);
|
||||||
thr->bits4.set_vec(bit, tmp);
|
thr->bits4.set_vec(bit, tmp);
|
||||||
return true;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
|
unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
|
||||||
|
|
@ -2551,7 +2546,33 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
||||||
directly to skip the excess calls to thr_check_addr. */
|
directly to skip the excess calls to thr_check_addr. */
|
||||||
thr->bits4.setarray(bit, wid, val);
|
thr->bits4.setarray(bit, wid, val);
|
||||||
delete[]val;
|
delete[]val;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp)
|
||||||
|
{
|
||||||
|
unsigned wid = cp->bit_idx[1];
|
||||||
|
|
||||||
|
/* We need a vector this wide to make the math work correctly.
|
||||||
|
* Copy the base bits into the vector, but keep the width. */
|
||||||
|
vvp_vector4_t sig_value(wid, BIT4_0);
|
||||||
|
sig_value.copy_bits(load_base(thr, cp));
|
||||||
|
|
||||||
|
load_vp0_common(thr, cp, sig_value);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t cp)
|
||||||
|
{
|
||||||
|
unsigned wid = cp->bit_idx[1];
|
||||||
|
|
||||||
|
vvp_vector4_t tmp (load_base(thr, cp));
|
||||||
|
|
||||||
|
/* We need a vector this wide to make the math work correctly.
|
||||||
|
* Copy the base bits into the vector, but keep the width. */
|
||||||
|
vvp_vector4_t sig_value(wid, tmp.value(tmp.size()-1));
|
||||||
|
sig_value.copy_bits(tmp);
|
||||||
|
|
||||||
|
load_vp0_common(thr, cp, sig_value);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue