diff --git a/tgt-vvp/eval_expr.c b/tgt-vvp/eval_expr.c index 37f6af26c..a9931423e 100644 --- a/tgt-vvp/eval_expr.c +++ b/tgt-vvp/eval_expr.c @@ -1987,17 +1987,17 @@ static void draw_signal_dest(ivl_expr_t exp, struct vector_info res, draw_eval_expr_into_integer(ix, 3); if (add_index < 0) { - fprintf(vvp_out, " %%load/av %u, v%p, %u;\n", + fprintf(vvp_out, " %%load/av %u, v%p, %u;\n", res.base, sig, swid); + pad_expr_in_place(exp, res, swid); } else { - assert(add_index == 0); + const char*sign_flag = (add_index>0)? "/s" : ""; /* Add an immediate value to an array value. */ - fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate); - fprintf(vvp_out, " %%load/avp0 %u, v%p, %u;\n", - res.base, sig, swid); + fprintf(vvp_out, " %%ix/load 0, %lu;\n", immediate); + fprintf(vvp_out, " %%load/avp0%s %u, v%p, %u;\n", + sign_flag, res.base, sig, res.wid); } - pad_expr_in_place(exp, res, swid); return; } diff --git a/vvp/codes.h b/vvp/codes.h index 58e1ee4be..6fe3f2489 100644 --- a/vvp/codes.h +++ b/vvp/codes.h @@ -97,6 +97,7 @@ extern bool of_JMP1(vthread_t thr, vvp_code_t code); extern bool of_JOIN(vthread_t thr, vvp_code_t code); extern bool of_LOAD_AV(vthread_t thr, vvp_code_t code); extern bool of_LOAD_AVP0(vthread_t thr, vvp_code_t code); +extern bool of_LOAD_AVP0_S(vthread_t thr, vvp_code_t code); extern bool of_LOAD_AVX_P(vthread_t thr, vvp_code_t code); extern bool of_LOAD_VEC(vthread_t thr, vvp_code_t code); extern bool of_LOAD_VP0(vthread_t thr, vvp_code_t code); diff --git a/vvp/compile.cc b/vvp/compile.cc index 1b426498d..18174c937 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -140,6 +140,7 @@ const static struct opcode_table_s opcode_table[] = { { "%join", of_JOIN, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%load/av",of_LOAD_AV,3, {OA_BIT1, OA_ARR_PTR, OA_BIT2} }, { "%load/avp0",of_LOAD_AVP0,3, {OA_BIT1, OA_ARR_PTR, OA_BIT2} }, + { "%load/avp0/s",of_LOAD_AVP0_S,3,{OA_BIT1, OA_ARR_PTR, OA_BIT2} }, { "%load/avx.p",of_LOAD_AVX_P,3,{OA_BIT1, OA_ARR_PTR, OA_BIT2} }, { "%load/v", of_LOAD_VEC,3, {OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, { "%load/vp0",of_LOAD_VP0,3,{OA_BIT1, OA_FUNC_PTR, OA_BIT2} }, diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt index 97fc6ec50..292d7fb53 100644 --- a/vvp/opcodes.txt +++ b/vvp/opcodes.txt @@ -418,10 +418,16 @@ address is in index register 3. The width should match the width of the array word. * %load/avp0 , , +* %load/avp0/s , , This instruction is a mix of %load/av and %load/vp0. It loads an array value like %load/av and then adds a value from index register 0 to the -result like %load/vp0. +result like %load/vp0. The loaded value is zero-extended to , +then added arithmetically to the signed index register 0. The result +is then stored in . + +The %load/avp0/s instruction is the same, except that the loaded +vector is sign extended (instead of 0-extended) before the addition. * %load/avx.p , , diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 839471ada..591040e5f 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -2381,6 +2381,46 @@ bool of_LOAD_AV(vthread_t thr, vvp_code_t cp) return true; } +/* + * %load/vp0, %load/vp0/s, %load/avp0 and %load/avp0/s share this function. +*/ +static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&sig_value) +{ + unsigned bit = cp->bit_idx[0]; + unsigned wid = cp->bit_idx[1]; + int64_t addend = thr->words[0].w_int; + + /* Check the address once, before we scan the vector. */ + thr_check_addr(thr, bit+wid-1); + + unsigned long*val = sig_value.subarray(0, wid); + if (val == 0) { + vvp_vector4_t tmp(wid, BIT4_X); + thr->bits4.set_vec(bit, tmp); + return; + } + + unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS; + unsigned long carry = 0; + unsigned long imm = addend; + if (addend >= 0) { + for (unsigned idx = 0 ; idx < words ; idx += 1) { + val[idx] = add_with_carry(val[idx], imm, carry); + imm = 0UL; + } + } else { + for (unsigned idx = 0 ; idx < words ; idx += 1) { + val[idx] = add_with_carry(val[idx], imm, carry); + imm = -1UL; + } + } + + /* Copy the vector bits into the bits4 vector. Do the copy + directly to skip the excess calls to thr_check_addr. */ + thr->bits4.setarray(bit, wid, val); + delete[]val; +} + /* * %load/avp0 , , ; * @@ -2393,30 +2433,31 @@ bool of_LOAD_AV(vthread_t thr, vvp_code_t cp) */ bool of_LOAD_AVP0(vthread_t thr, vvp_code_t cp) { - unsigned bit = cp->bit_idx[0]; unsigned wid = cp->bit_idx[1]; - int64_t addend = thr->words[0].w_int; unsigned adr = thr->words[3].w_int; - vvp_vector4_t word = array_get_word(cp->array, adr); + /* We need a vector this wide to make the math work correctly. + * Copy the base bits into the vector, but keep the width. */ + vvp_vector4_t sig_value(wid, BIT4_0); + sig_value.copy_bits(array_get_word(cp->array, adr)); - if (word.size() != wid) { - fprintf(stderr, "internal error: array width=%u, word.size()=%u, wid=%u\n", - 0, word.size(), wid); - } - assert(word.size() == wid); + load_vp0_common(thr, cp, sig_value); + return true; +} - /* Add the addend value */ - word += addend; +bool of_LOAD_AVP0_S(vthread_t thr, vvp_code_t cp) +{ + unsigned wid = cp->bit_idx[1]; + unsigned adr = thr->words[3].w_int; - /* Check the address once, before we scan the vector. */ - thr_check_addr(thr, bit+wid-1); - - /* Copy the vector bits into the bits4 vector. Do the copy - directly to skip the excess calls to thr_check_addr. */ - thr->bits4.set_vec(bit, word); + vvp_vector4_t tmp (array_get_word(cp->array, adr)); + /* We need a vector this wide to make the math work correctly. + * Copy the base bits into the vector, but keep the width. */ + vvp_vector4_t sig_value(wid, tmp.value(tmp.size()-1)); + sig_value.copy_bits(tmp); + load_vp0_common(thr, cp, sig_value); return true; } @@ -2511,43 +2552,6 @@ bool of_LOAD_VEC(vthread_t thr, vvp_code_t cp) * index 0. The is the expected result width not the vector width. */ -static void load_vp0_common(vthread_t thr, vvp_code_t cp, const vvp_vector4_t&sig_value) -{ - unsigned bit = cp->bit_idx[0]; - unsigned wid = cp->bit_idx[1]; - int64_t addend = thr->words[0].w_int; - - /* Check the address once, before we scan the vector. */ - thr_check_addr(thr, bit+wid-1); - - unsigned long*val = sig_value.subarray(0, wid); - if (val == 0) { - vvp_vector4_t tmp(wid, BIT4_X); - thr->bits4.set_vec(bit, tmp); - return; - } - - unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS; - unsigned long carry = 0; - unsigned long imm = addend; - if (addend >= 0) { - for (unsigned idx = 0 ; idx < words ; idx += 1) { - val[idx] = add_with_carry(val[idx], imm, carry); - imm = 0UL; - } - } else { - for (unsigned idx = 0 ; idx < words ; idx += 1) { - val[idx] = add_with_carry(val[idx], imm, carry); - imm = -1UL; - } - } - - /* Copy the vector bits into the bits4 vector. Do the copy - directly to skip the excess calls to thr_check_addr. */ - thr->bits4.setarray(bit, wid, val); - delete[]val; -} - bool of_LOAD_VP0(vthread_t thr, vvp_code_t cp) { unsigned wid = cp->bit_idx[1];