diff --git a/tgt-vvp/eval_condit.c b/tgt-vvp/eval_condit.c index 508beb292..1d0d9d79f 100644 --- a/tgt-vvp/eval_condit.c +++ b/tgt-vvp/eval_condit.c @@ -86,11 +86,11 @@ static int draw_condition_binary_compare(ivl_expr_t expr) if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) { /* Special case: If the right operand can be handled as an immediate operand, then use that instead. */ - draw_immediate_vec4(re, "%cmpi/u"); + draw_immediate_vec4(re, "%cmpi/e"); } else { draw_eval_vec4(re); resize_vec4_wid(re, use_wid); - fprintf(vvp_out, " %%cmp/u;\n"); + fprintf(vvp_out, " %%cmp/e;\n"); } switch (ivl_expr_opcode(expr)) { diff --git a/tgt-vvp/eval_vec4.c b/tgt-vvp/eval_vec4.c index 8564d271a..48d1377ba 100644 --- a/tgt-vvp/eval_vec4.c +++ b/tgt-vvp/eval_vec4.c @@ -375,20 +375,20 @@ static void draw_binary_vec4_compare(ivl_expr_t expr) switch (ivl_expr_opcode(expr)) { case 'e': /* == */ - fprintf(vvp_out, " %%cmp/u;\n"); + fprintf(vvp_out, " %%cmp/e;\n"); fprintf(vvp_out, " %%flag_get/vec4 4;\n"); break; case 'n': /* != */ - fprintf(vvp_out, " %%cmp/u;\n"); + fprintf(vvp_out, " %%cmp/e;\n"); fprintf(vvp_out, " %%flag_get/vec4 4;\n"); fprintf(vvp_out, " %%inv;\n"); break; case 'E': /* === */ - fprintf(vvp_out, " %%cmp/u;\n"); + fprintf(vvp_out, " %%cmp/e;\n"); fprintf(vvp_out, " %%flag_get/vec4 6;\n"); break; case 'N': /* !== */ - fprintf(vvp_out, " %%cmp/u;\n"); + fprintf(vvp_out, " %%cmp/e;\n"); fprintf(vvp_out, " %%flag_get/vec4 6;\n"); fprintf(vvp_out, " %%inv;\n"); break; diff --git a/vvp/codes.h b/vvp/codes.h index 598fe2cb7..b1a68eb88 100644 --- a/vvp/codes.h +++ b/vvp/codes.h @@ -61,6 +61,8 @@ extern bool of_CASSIGN_VEC4(vthread_t thr, vvp_code_t code); extern bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t code); extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code); extern bool of_CAST2(vthread_t thr, vvp_code_t code); +extern bool of_CMPE(vthread_t thr, vvp_code_t code); +extern bool of_CMPIE(vthread_t thr, vvp_code_t code); extern bool of_CMPS(vthread_t thr, vvp_code_t code); extern bool of_CMPIS(vthread_t thr, vvp_code_t code); extern bool of_CMPSTR(vthread_t thr, vvp_code_t code); diff --git a/vvp/compile.cc b/vvp/compile.cc index c43b57033..830565362 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -112,6 +112,7 @@ static const struct opcode_table_s opcode_table[] = { { "%cassign/vec4/off",of_CASSIGN_VEC4_OFF,2,{OA_FUNC_PTR,OA_BIT1, OA_NONE} }, { "%cassign/wr", of_CASSIGN_WR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} }, { "%cast2", of_CAST2, 0, {OA_NONE, OA_NONE, OA_NONE} }, + { "%cmp/e", of_CMPE, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} }, @@ -120,6 +121,7 @@ static const struct opcode_table_s opcode_table[] = { { "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} }, + { "%cmpi/e", of_CMPIE, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} }, diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt index 1f5a342f9..08c315643 100644 --- a/vvp/opcodes.txt +++ b/vvp/opcodes.txt @@ -252,8 +252,10 @@ vector2 (binary) value, and push the result. * %cmp/s * %cmp/u +* %cmp/e * %cmpi/s , , * %cmpi/u , , +* %cmpi/e , , These instructions perform a generic comparison of two vectors of equal size. Two values are pulled from the top of the stack, and not @@ -282,6 +284,9 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The compare. In either case, if either operand contains x or z, then lt bit gets the x value. +Thje %cmp/e and %cmpi/e variants are the same, but they do not bother +to calculate the lt flag. These are faster if the lt flag is not needed. + * %cmp/wr Compare real values for equality and less-then. This opcode pops to diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 141bb4bc2..720f06d38 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -1477,6 +1477,93 @@ bool of_CAST2(vthread_t thr, vvp_code_t) return true; } +static void do_CMPE(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval) +{ + assert(rval.size() == lval.size()); + + if (lval.has_xz() || rval.has_xz()) { + + unsigned wid = lval.size(); + vvp_bit4_t eq = BIT4_1; + vvp_bit4_t eeq = BIT4_1; + + for (unsigned idx = 0 ; idx < wid ; idx += 1) { + vvp_bit4_t lv = lval.value(idx); + vvp_bit4_t rv = rval.value(idx); + + if (lv != rv) + eeq = BIT4_0; + + if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv))) + eq = BIT4_X; + if ((lv == BIT4_0) && (rv==BIT4_1)) + eq = BIT4_0; + if ((lv == BIT4_1) && (rv==BIT4_0)) + eq = BIT4_0; + + if (eq == BIT4_0) + break; + } + + thr->flags[4] = eq; + thr->flags[6] = eeq; + + } else { + // If there are no XZ bits anywhere, then the results of + // == match the === test. + thr->flags[4] = thr->flags[6] = (lval.eeq(rval)? BIT4_1 : BIT4_0); + } +} + +/* + * %cmp/e + * + * Pop the operands from the stack, and do not replace them. The + * results are written to flag bits: + * + * 4: eq (equal) + * + * 6: eeq (case equal) + */ +bool of_CMPE(vthread_t thr, vvp_code_t) +{ + // We are going to pop these and push nothing in their + // place, but for now it is more efficient to use a constant + // reference. When we finish, pop the stack without copies. + const vvp_vector4_t&rval = thr->peek_vec4(0); + const vvp_vector4_t&lval = thr->peek_vec4(1); + + do_CMPE(thr, lval, rval); + + thr->pop_vec4(2); + return true; +} + +/* + * %cmpi/e , , + * + * Pop1 operand, get the other operand from the arguments. + */ +bool of_CMPIE(vthread_t thr, vvp_code_t cp) +{ + unsigned wid = cp->number; + + vvp_vector4_t&lval = thr->peek_vec4(); + + // I expect that most of the bits of an immediate value are + // going to be zero, so start the result vector with all zero + // bits. Then we only need to replace the bits that are different. + vvp_vector4_t rval (wid, BIT4_0); + get_immediate_rval (cp, rval); + + do_CMPE(thr, lval, rval); + + thr->pop_vec4(1); + return true; +} + + + static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval) { vvp_bit4_t eq = BIT4_1;