diff --git a/tgt-vvp/eval_vec4.c b/tgt-vvp/eval_vec4.c index 81c2b140a..8564d271a 100644 --- a/tgt-vvp/eval_vec4.c +++ b/tgt-vvp/eval_vec4.c @@ -62,7 +62,7 @@ int test_immediate_vec4_ok(ivl_expr_t re) return 1; } -void draw_immediate_vec4(ivl_expr_t re, const char*opcode) +static void make_immediate_vec4_words(ivl_expr_t re, unsigned long*val0p, unsigned long*valxp, unsigned*widp) { unsigned long val0 = 0; unsigned long valx = 0; @@ -95,6 +95,16 @@ void draw_immediate_vec4(ivl_expr_t re, const char*opcode) } } + *val0p = val0; + *valxp = valx; + *widp = wid; +} + +void draw_immediate_vec4(ivl_expr_t re, const char*opcode) +{ + unsigned long val0, valx; + unsigned wid; + make_immediate_vec4_words(re, &val0, &valx, &wid); fprintf(vvp_out, " %s %lu, %lu, %u;\n", opcode, val0, valx, wid); } @@ -879,9 +889,21 @@ static void draw_select_vec4(ivl_expr_t expr) return; } - draw_eval_vec4(subexpr); - draw_eval_vec4(base); - fprintf(vvp_out, " %%part/%c %u;\n", sign_suff, wid); + if (test_immediate_vec4_ok(base)) { + unsigned long val0, valx; + unsigned base_wid; + make_immediate_vec4_words(base, &val0, &valx, &base_wid); + assert(valx == 0); + + draw_eval_vec4(subexpr); + fprintf(vvp_out, " %%parti/%c %u, %lu, %u;\n", + sign_suff, wid, val0, base_wid); + + } else { + draw_eval_vec4(subexpr); + draw_eval_vec4(base); + fprintf(vvp_out, " %%part/%c %u;\n", sign_suff, wid); + } } static void draw_select_pad_vec4(ivl_expr_t expr) diff --git a/vvp/codes.h b/vvp/codes.h index 74733f849..598fe2cb7 100644 --- a/vvp/codes.h +++ b/vvp/codes.h @@ -164,6 +164,8 @@ extern bool of_PAD_S(vthread_t thr, vvp_code_t code); extern bool of_PAD_U(vthread_t thr, vvp_code_t code); extern bool of_PART_S(vthread_t thr, vvp_code_t code); extern bool of_PART_U(vthread_t thr, vvp_code_t code); +extern bool of_PARTI_S(vthread_t thr, vvp_code_t code); +extern bool of_PARTI_U(vthread_t thr, vvp_code_t code); extern bool of_POP_OBJ(vthread_t thr, vvp_code_t code); extern bool of_POP_REAL(vthread_t thr, vvp_code_t code); extern bool of_POP_STR(vthread_t thr, vvp_code_t code); diff --git a/vvp/compile.cc b/vvp/compile.cc index c1c58df44..c43b57033 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -212,6 +212,8 @@ static const struct opcode_table_s opcode_table[] = { { "%pad/u", of_PAD_U, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%part/s", of_PART_S, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%part/u", of_PART_U, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, + { "%parti/s",of_PARTI_S,3, {OA_NUMBER, OA_BIT1, OA_BIT2} }, + { "%parti/u",of_PARTI_U,3, {OA_NUMBER, OA_BIT1, OA_BIT2} }, { "%pop/obj", of_POP_OBJ, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%pop/real",of_POP_REAL,1, {OA_NUMBER, OA_NONE, OA_NONE} }, { "%pop/str", of_POP_STR, 1, {OA_NUMBER, OA_NONE, OA_NONE} }, diff --git a/vvp/vthread.cc b/vvp/vthread.cc index ac5bf1adf..344d03438 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -2406,7 +2406,7 @@ static void negate_words(unsigned long*val, unsigned words) bool of_DIV_S(vthread_t thr, vvp_code_t) { vvp_vector4_t valb = thr->pop_vec4(); - vvp_vector4_t vala = thr->pop_vec4(); + vvp_vector4_t&vala = thr->peek_vec4(); assert(vala.size()== valb.size()); unsigned wid = vala.size(); @@ -2418,7 +2418,7 @@ bool of_DIV_S(vthread_t thr, vvp_code_t) unsigned long*ap = vala.subarray(0, wid); if (ap == 0) { vvp_vector4_t tmp(wid, BIT4_X); - thr->push_vec4(tmp); + vala = tmp; return true; } @@ -2426,7 +2426,7 @@ bool of_DIV_S(vthread_t thr, vvp_code_t) if (bp == 0) { delete[]ap; vvp_vector4_t tmp(wid, BIT4_X); - thr->push_vec4(tmp); + vala = tmp; return true; } @@ -2444,14 +2444,13 @@ bool of_DIV_S(vthread_t thr, vvp_code_t) if (wid <= CPU_WORD_BITS) { if (bp[0] == 0) { vvp_vector4_t tmp(wid, BIT4_X); - thr->push_vec4(tmp); + vala = tmp; } else { long tmpa = (long) ap[0]; long tmpb = (long) bp[0]; long res = tmpa / tmpb; ap[0] = ((unsigned long)res) & ~sign_mask; vala.setarray(0, wid, ap); - thr->push_vec4(vala); } delete[]ap; delete[]bp; @@ -2475,7 +2474,7 @@ bool of_DIV_S(vthread_t thr, vvp_code_t) delete[]ap; delete[]bp; vvp_vector4_t tmp(wid, BIT4_X); - thr->push_vec4(tmp); + vala = tmp; return true; } @@ -2486,7 +2485,6 @@ bool of_DIV_S(vthread_t thr, vvp_code_t) result[words-1] &= ~sign_mask; vala.setarray(0, wid, result); - thr->push_vec4(vala); delete[]ap; delete[]bp; delete[]result; @@ -3787,7 +3785,7 @@ static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag) unsigned wid = cp->number; vvp_vector4_t base4 = thr->pop_vec4(); - vvp_vector4_t value = thr->pop_vec4(); + vvp_vector4_t&value = thr->peek_vec4(); vvp_vector4_t res (wid, BIT4_X); @@ -3795,17 +3793,17 @@ static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag) int32_t base; bool value_ok = vector4_to_value(base4, base, signed_flag); if (! value_ok) { - thr->push_vec4(res); + value = res; return true; } if (base >= (int32_t)value.size()) { - thr->push_vec4(res); + value = res; return true; } if ((base+(int)wid) <= 0) { - thr->push_vec4(res); + value = res; return true; } @@ -3821,7 +3819,7 @@ static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag) } res .set_vec(vbase, value.subvalue(base, wid)); - thr->push_vec4(res); + value = res; return true; } @@ -3836,6 +3834,65 @@ bool of_PART_U(vthread_t thr, vvp_code_t cp) return of_PART_base(thr, cp, false); } +/* + * %parti/s , , + * %parti/u , , + * + * Pop the value to be selected. The result is pushed back to the stack. + */ +static bool of_PARTI_base(vthread_t thr, vvp_code_t cp, bool signed_flag) +{ + unsigned wid = cp->number; + uint32_t base = cp->bit_idx[0]; + uint32_t bwid = cp->bit_idx[1]; + + vvp_vector4_t&value = thr->peek_vec4(); + + vvp_vector4_t res (wid, BIT4_X); + + // NOTE: This is treating the vector as signed. Is that correct? + int32_t use_base = base; + if (signed_flag && bwid < 32 && (base&(1<<(bwid-1)))) { + use_base |= (-1) << bwid; + } + + if (use_base >= (int32_t)value.size()) { + value = res; + return true; + } + + if ((use_base+(int32_t)wid) <= 0) { + value = res; + return true; + } + + long vbase = 0; + if (use_base < 0) { + vbase = -use_base; + wid -= vbase; + use_base = 0; + } + + if ((use_base+wid) > value.size()) { + wid = value.size() - use_base; + } + + res .set_vec(vbase, value.subvalue(use_base, wid)); + value = res; + + return true; +} + +bool of_PARTI_S(vthread_t thr, vvp_code_t cp) +{ + return of_PARTI_base(thr, cp, true); +} + +bool of_PARTI_U(vthread_t thr, vvp_code_t cp) +{ + return of_PARTI_base(thr, cp, false); +} + /* * %mov/wu , */ @@ -3853,15 +3910,15 @@ bool of_MOV_WU(vthread_t thr, vvp_code_t cp) */ bool of_MUL(vthread_t thr, vvp_code_t) { - vvp_vector4_t vala = thr->pop_vec4(); vvp_vector4_t valb = thr->pop_vec4(); + vvp_vector4_t&vala = thr->peek_vec4(); assert(vala.size() == valb.size()); unsigned wid = vala.size(); unsigned long*ap = vala.subarray(0, wid); if (ap == 0) { vvp_vector4_t tmp(wid, BIT4_X); - thr->push_vec4(tmp); + vala = tmp; return true; } @@ -3869,7 +3926,7 @@ bool of_MUL(vthread_t thr, vvp_code_t) if (bp == 0) { delete[]ap; vvp_vector4_t tmp(wid, BIT4_X); - thr->push_vec4(tmp); + vala = tmp; return true; } @@ -3877,7 +3934,6 @@ bool of_MUL(vthread_t thr, vvp_code_t) if (wid <= CPU_WORD_BITS) { ap[0] *= bp[0]; vala.setarray(0, wid, ap); - thr->push_vec4(vala); delete[]ap; delete[]bp; return true; @@ -3903,7 +3959,6 @@ bool of_MUL(vthread_t thr, vvp_code_t) } vala.setarray(0, wid, res); - thr->push_vec4(vala); delete[]ap; delete[]bp; delete[]res; @@ -5284,7 +5339,7 @@ bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp) int off = off_index? thr->words[off_index].w_int : 0; const int sig_value_size = sig->value_size(); - vvp_vector4_t val = thr->pop_vec4(); + vvp_vector4_t&val = thr->peek_vec4(); if (val.size() < (unsigned)wid) { cerr << "XXXX Internal error: val.size()=" << val.size() @@ -5296,13 +5351,19 @@ bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp) // If there is a problem loading the index register, flags-4 // will be set to 1, and we know here to skip the actual assignment. - if (off_index!=0 && thr->flags[4] == BIT4_1) + if (off_index!=0 && thr->flags[4] == BIT4_1) { + thr->pop_vec4(1); return true; + } - if (off <= -wid) + if (off <= -wid) { + thr->pop_vec4(1); return true; - if (off >= sig_value_size) + } + if (off >= sig_value_size) { + thr->pop_vec4(1); return true; + } // If the index is below the vector, then only assign the high // bits that overlap with the target. @@ -5327,6 +5388,7 @@ bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp) else vvp_send_vec4_pv(ptr, val, off, wid, sig_value_size, thr->wt_context); + thr->pop_vec4(1); return true; }