diff --git a/tgt-vvp/eval_vec4.c b/tgt-vvp/eval_vec4.c index 48d1377ba..bf7eb6d30 100644 --- a/tgt-vvp/eval_vec4.c +++ b/tgt-vvp/eval_vec4.c @@ -138,6 +138,9 @@ static void draw_binary_vec4_arith(ivl_expr_t expr) case '+': draw_immediate_vec4(re, "%addi"); return; + case '*': + draw_immediate_vec4(re, "%muli"); + return; default: break; } diff --git a/vvp/codes.h b/vvp/codes.h index b1a68eb88..b8f5eb295 100644 --- a/vvp/codes.h +++ b/vvp/codes.h @@ -151,6 +151,7 @@ extern bool of_MOD_S(vthread_t thr, vvp_code_t code); extern bool of_MOD_WR(vthread_t thr, vvp_code_t code); extern bool of_MOV_WU(vthread_t thr, vvp_code_t code); extern bool of_MUL(vthread_t thr, vvp_code_t code); +extern bool of_MULI(vthread_t thr, vvp_code_t code); extern bool of_MUL_WR(vthread_t thr, vvp_code_t code); extern bool of_NAND(vthread_t thr, vvp_code_t code); extern bool of_NANDR(vthread_t thr, vvp_code_t code); diff --git a/vvp/compile.cc b/vvp/compile.cc index 830565362..95a11a691 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -200,6 +200,7 @@ static const struct opcode_table_s opcode_table[] = { { "%mov/wu", of_MOV_WU, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%mul", of_MUL, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%mul/wr", of_MUL_WR, 0, {OA_NONE, OA_NONE, OA_NONE} }, + { "%muli", of_MULI, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%nand", of_NAND, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%nand/r", of_NANDR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%new/cobj", of_NEW_COBJ, 1, {OA_VPI_PTR,OA_NONE, OA_NONE} }, diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt index 08c315643..89353899c 100644 --- a/vvp/opcodes.txt +++ b/vvp/opcodes.txt @@ -754,6 +754,7 @@ This opcode is the real-valued modulus of the two real values. * %mov/wu , * %mul +* %muli , , This instruction multiplies the left vector by the right vector, the vectors pare popped from the vec4 stack and have the same width. If diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 5d408c874..bb7d6116b 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -3676,7 +3676,7 @@ static void do_verylong_mod(vvp_vector4_t&vala, const vvp_vector4_t&valb, vvp_vector4_t tmp (len, BIT4_X); carry = out_is_neg? 1 : 0; - for (unsigned idx = 0 ; idx < len ; idx += 1) { + for (int idx = 0 ; idx < len ; idx += 1) { unsigned ob = z[idx]; if (out_is_neg) { ob = (1-ob) + carry; @@ -4004,13 +4004,8 @@ bool of_MOV_WU(vthread_t thr, vvp_code_t cp) return true; } -/* - * %mul - */ -bool of_MUL(vthread_t thr, vvp_code_t) +static bool do_MUL(vvp_vector4_t&vala, const vvp_vector4_t&valb) { - vvp_vector4_t valb = thr->pop_vec4(); - vvp_vector4_t&vala = thr->peek_vec4(); assert(vala.size() == valb.size()); unsigned wid = vala.size(); @@ -4065,6 +4060,41 @@ bool of_MUL(vthread_t thr, vvp_code_t) return true; } +/* + * %mul + */ +bool of_MUL(vthread_t thr, vvp_code_t) +{ + vvp_vector4_t r = thr->pop_vec4(); + // Rather then pop l, use it directly from the stack. When we + // assign to 'l', that will edit the top of the stack, which + // replaces a pop and a pull. + vvp_vector4_t&l = thr->peek_vec4(); + + return do_MUL(l, r); +} + +/* + * %muli , , + * + * Pop1 operand, get the other operand from the arguments, and push + * the result. + */ +bool of_MULI(vthread_t thr, vvp_code_t cp) +{ + unsigned wid = cp->number; + + vvp_vector4_t&l = thr->peek_vec4(); + + // I expect that most of the bits of an immediate value are + // going to be zero, so start the result vector with all zero + // bits. Then we only need to replace the bits that are different. + vvp_vector4_t r (wid, BIT4_0); + get_immediate_rval (cp, r); + + return do_MUL(l, r); +} + bool of_MUL_WR(vthread_t thr, vvp_code_t) { double r = thr->pop_real();