diff --git a/tgt-vvp/eval_expr.c b/tgt-vvp/eval_expr.c index 9efebb366..37f6af26c 100644 --- a/tgt-vvp/eval_expr.c +++ b/tgt-vvp/eval_expr.c @@ -28,7 +28,7 @@ static void draw_eval_expr_dest(ivl_expr_t exp, struct vector_info dest, int ok_flags); static void draw_signal_dest(ivl_expr_t exp, struct vector_info res, - int add_index, unsigned long immediate); + int add_index, long immediate); int number_is_unknown(ivl_expr_t ex) { @@ -1158,12 +1158,11 @@ static struct vector_info draw_binary_expr_lrs(ivl_expr_t exp, unsigned wid) static struct vector_info draw_load_add_immediate(ivl_expr_t le, ivl_expr_t re, - unsigned wid) + unsigned wid, + int signed_flag) { struct vector_info lv; - unsigned long imm; - - imm = get_number_immediate(re); + long imm = get_number_immediate(re); lv.base = allocate_vector(wid); lv.wid = wid; if (lv.base == 0) { @@ -1176,7 +1175,7 @@ static struct vector_info draw_load_add_immediate(ivl_expr_t le, /* Load the signal value with a %load that adds the index register to the value being loaded. */ - draw_signal_dest(le, lv, 0, imm); + draw_signal_dest(le, lv, signed_flag, imm); return lv; } @@ -1319,25 +1318,27 @@ static struct vector_info draw_binary_expr_arith(ivl_expr_t exp, unsigned wid) const char*sign_string = ivl_expr_signed(le) && ivl_expr_signed(re)? "/s" : ""; + int signed_flag = ivl_expr_signed(exp)? 1 : 0; + if ((ivl_expr_opcode(exp) == '+') && (ivl_expr_type(le) == IVL_EX_SIGNAL) && (ivl_expr_type(re) == IVL_EX_ULONG)) - return draw_load_add_immediate(le, re, wid); + return draw_load_add_immediate(le, re, wid, signed_flag); if ((ivl_expr_opcode(exp) == '+') && (ivl_expr_type(le) == IVL_EX_SIGNAL) && (ivl_expr_type(re) == IVL_EX_NUMBER)) - return draw_load_add_immediate(le, re, wid); + return draw_load_add_immediate(le, re, wid, signed_flag); if ((ivl_expr_opcode(exp) == '+') && (ivl_expr_type(re) == IVL_EX_SIGNAL) && (ivl_expr_type(le) == IVL_EX_ULONG)) - return draw_load_add_immediate(re, le, wid); + return draw_load_add_immediate(re, le, wid, signed_flag); if ((ivl_expr_opcode(exp) == '+') && (ivl_expr_type(re) == IVL_EX_SIGNAL) && (ivl_expr_type(le) == IVL_EX_NUMBER)) - return draw_load_add_immediate(re, le, wid); + return draw_load_add_immediate(re, le, wid, signed_flag); if ((ivl_expr_opcode(exp) == '+') && (ivl_expr_type(re) == IVL_EX_ULONG)) @@ -1963,11 +1964,13 @@ void pad_expr_in_place(ivl_expr_t exp, struct vector_info res, unsigned swid) * offsetting the read from the lsi (least significant index) of the * signal. * - * If the add_index is >=0, then generate a %load/vp0 to add the - * word0 value to the loaded value before storing it into the destination. + * If the add_index is 0, then generate a %load/vp0 to add the + * word0 value to the loaded value before storing it into the + * destination. If the add_index is 1, then generate a %load/vp0/s to + * do a signed load. */ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res, - int add_index, unsigned long immediate) + int add_index, long immediate) { unsigned swid = ivl_expr_width(exp); ivl_signal_t sig = ivl_expr_signal(exp); @@ -2009,13 +2012,17 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res, } else if (add_index >= 0) { - assert(add_index == 0); + const char*sign_flag = add_index==1? "/s" : ""; /* If this is a REG (a variable) then I can do a vector read. */ - fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate); - fprintf(vvp_out, " %%ix/load 2, %u;\n", res.wid); - fprintf(vvp_out, " %%load/vp0 %u, v%p_%u, %u;\n", - res.base, sig, word, swid); + if (immediate >= 0) { + fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate); + } else { + fprintf(vvp_out, " %%ix/load 0, 0; immediate=%ld\n", immediate); + fprintf(vvp_out, " %%ix/sub 0, %ld;\n", -immediate); + } + fprintf(vvp_out, " %%load/vp0%s %u, v%p_%u, %u;\n", sign_flag, + res.base, sig,word, res.wid); swid = res.wid; } else { diff --git a/vvp/codes.h b/vvp/codes.h index 37341e6a1..58e1ee4be 100644 --- a/vvp/codes.h +++ b/vvp/codes.h @@ -100,6 +100,7 @@ extern bool of_LOAD_AVP0(vthread_t thr, vvp_code_t code); extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code); extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code); extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code); +extern bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t code); extern bool of_LOAD_WR(vthread_t thr, vvp_code_t code); extern bool of_LOAD_X1P(vthread_t thr, vvp_code_t code); extern bool of_LOADI_WR(vthread_t thr, vvp_code_t code); diff --git a/vvp/compile.cc b/vvp/compile.cc index 0dce76fe5..1b426498d 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -143,6 +143,7 @@ const static struct opcode_table_s opcode_table[] = { { "%load/avx.p",of_LOAD_AVX_P,3,{OA_BIT1, OA_ARR_PTR, OA_BIT2} }, { "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, + { "%load/vp0/s",of_LOAD_VP0_S,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%load/wr",of_LOAD_WR,2, {OA_BIT1, OA_VPI_PTR, OA_BIT2} }, { "%load/x1p",of_LOAD_X1P,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%loadi/wr",of_LOADI_WR,3,{OA_BIT1, OA_NUMBER, OA_BIT2} }, diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt index f72fe2917..97fc6ec50 100644 --- a/vvp/opcodes.txt +++ b/vvp/opcodes.txt @@ -437,18 +437,21 @@ the specified thread register bit. The functor-label can refer to a from the least significant up to bits, is loaded starting at thread bit . It is an OK for the width to not match the vector width at the functor. If the is less than the width at the -functor, then the most significant bits are dropped. +functor, then the most significant bits are dropped. If the is +more than the width at the functor, the value is padded with X bits. * %load/vp0 , , +* %load/vp0/s , , -This instruction is the same as %load/v above, except that it also -adds the integer value is index register 0 into the loaded value. The -addition is a Verilog-style add, which means that if any of the input -bits are X or Z, the entire result is turned into a vector of X bits. +This instruction is the similar %load/v above, except that it also +adds the signed integer value in index register 0 into the loaded +value. The addition is a Verilog-style add, which means that if any of +the input bits are X or Z, the entire result is turned into a vector +of X bits. -Index register 2 contains the result width. The addition of the loaded -value and the index are done at this width to avoid the problem of a -small vector with a large immediate offset indexing an array. +The is, line the %load/v, the result width. But unlike the +%load/v, the vector is padded with 0s (%load/vp0) or sign extended +(%load/vp0/s) to the desired width. * %load/wr , diff --git a/vvp/vthread.cc b/vvp/vthread.cc index fa52151b4..839471ada 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -2467,9 +2467,6 @@ bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t cp) */ static vvp_vector4_t load_base(vthread_t thr, vvp_code_t cp) { - assert(cp->bit_idx[0] >= 4); - assert(cp->bit_idx[1] > 0); - vvp_net_t*net = cp->net; /* For the %load to work, the functor must actually be a @@ -2501,6 +2498,8 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp) directly to skip the excess calls to thr_check_addr. */ thr->bits4.set_vec(bit, sig_value); + /* If the source is shorter then the desired width, then pad + with BIT4_X values. */ for (unsigned idx = sig_value.size() ; idx < wid ; idx += 1) thr->bits4.set_bit(bit+idx, BIT4_X); @@ -2511,16 +2510,12 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp) * This is like of_LOAD_VEC, but includes an add of an integer value from * index 0. The is the expected result width not the vector width. */ -bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp) + +static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&sig_value) { unsigned bit = cp->bit_idx[0]; + unsigned wid = cp->bit_idx[1]; int64_t addend = thr->words[0].w_int; - unsigned wid = thr->words[2].w_int; - - /* We need a vector this wide to make the math work correctly. - * Copy the base bits into the vector, but keep the width. */ - vvp_vector4_t sig_value(wid, BIT4_0); - sig_value.copy_bits(load_base(thr, cp)); /* Check the address once, before we scan the vector. */ thr_check_addr(thr, bit+wid-1); @@ -2529,7 +2524,7 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp) if (val == 0) { vvp_vector4_t tmp(wid, BIT4_X); thr->bits4.set_vec(bit, tmp); - return true; + return; } unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS; @@ -2551,7 +2546,33 @@ bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp) directly to skip the excess calls to thr_check_addr. */ thr->bits4.setarray(bit, wid, val); delete[]val; +} +bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp) +{ + unsigned wid = cp->bit_idx[1]; + + /* We need a vector this wide to make the math work correctly. + * Copy the base bits into the vector, but keep the width. */ + vvp_vector4_t sig_value(wid, BIT4_0); + sig_value.copy_bits(load_base(thr, cp)); + + load_vp0_common(thr, cp, sig_value); + return true; +} + +bool of_LOAD_VP0_S(vthread_t thr, vvp_code_t cp) +{ + unsigned wid = cp->bit_idx[1]; + + vvp_vector4_t tmp (load_base(thr, cp)); + + /* We need a vector this wide to make the math work correctly. + * Copy the base bits into the vector, but keep the width. */ + vvp_vector4_t sig_value(wid, tmp.value(tmp.size()-1)); + sig_value.copy_bits(tmp); + + load_vp0_common(thr, cp, sig_value); return true; }