From 85c7b07a9bd2888d7d7f4ba015e8d89e96d2c48b Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Wed, 3 Dec 2014 11:06:11 -0800 Subject: [PATCH] Implement %cmp/ne and %cmpi/ne These pull in the inversion of the output flags so that they more efficiently implement != and !==, without %flag_inv instructions. --- tgt-vvp/eval_condit.c | 17 +++++++++++------ vvp/codes.h | 2 ++ vvp/compile.cc | 2 ++ vvp/opcodes.txt | 9 ++++++++- vvp/vthread.cc | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 61 insertions(+), 7 deletions(-) diff --git a/tgt-vvp/eval_condit.c b/tgt-vvp/eval_condit.c index 2e78e503b..d4cd104a6 100644 --- a/tgt-vvp/eval_condit.c +++ b/tgt-vvp/eval_condit.c @@ -83,26 +83,31 @@ static int draw_condition_binary_compare(ivl_expr_t expr) draw_eval_vec4(le); resize_vec4_wid(le, use_wid); + char use_opcode = ivl_expr_opcode(expr); + + if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) { /* Special case: If the right operand can be handled as an immediate operand, then use that instead. */ - draw_immediate_vec4(re, "%cmpi/e"); + if (use_opcode=='n' || use_opcode=='N') + draw_immediate_vec4(re, "%cmpi/ne"); + else + draw_immediate_vec4(re, "%cmpi/e"); } else { draw_eval_vec4(re); resize_vec4_wid(re, use_wid); - fprintf(vvp_out, " %%cmp/e;\n"); + if (use_opcode=='n' || use_opcode=='N') + fprintf(vvp_out, " %%cmp/ne;\n"); + else + fprintf(vvp_out, " %%cmp/e;\n"); } switch (ivl_expr_opcode(expr)) { case 'n': /* != */ - fprintf(vvp_out, " %%flag_inv 4;\n"); - ; /* fall through.. */ case 'e': /* == */ return 4; break; case 'N': /* !== */ - fprintf(vvp_out, " %%flag_inv 6;\n"); - ; /* fall through.. */ case 'E': /* === */ return 6; default: diff --git a/vvp/codes.h b/vvp/codes.h index b8f5eb295..e31dd9cae 100644 --- a/vvp/codes.h +++ b/vvp/codes.h @@ -63,6 +63,8 @@ extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code); extern bool of_CAST2(vthread_t thr, vvp_code_t code); extern bool of_CMPE(vthread_t thr, vvp_code_t code); extern bool of_CMPIE(vthread_t thr, vvp_code_t code); +extern bool of_CMPINE(vthread_t thr, vvp_code_t code); +extern bool of_CMPNE(vthread_t thr, vvp_code_t code); extern bool of_CMPS(vthread_t thr, vvp_code_t code); extern bool of_CMPIS(vthread_t thr, vvp_code_t code); extern bool of_CMPSTR(vthread_t thr, vvp_code_t code); diff --git a/vvp/compile.cc b/vvp/compile.cc index 95a11a691..ea71009a8 100644 --- a/vvp/compile.cc +++ b/vvp/compile.cc @@ -113,6 +113,7 @@ static const struct opcode_table_s opcode_table[] = { { "%cassign/wr", of_CASSIGN_WR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} }, { "%cast2", of_CAST2, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/e", of_CMPE, 0, {OA_NONE, OA_NONE, OA_NONE} }, + { "%cmp/ne", of_CMPNE, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} }, @@ -122,6 +123,7 @@ static const struct opcode_table_s opcode_table[] = { { "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmpi/e", of_CMPIE, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, + { "%cmpi/ne",of_CMPINE, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} }, diff --git a/vvp/opcodes.txt b/vvp/opcodes.txt index 89353899c..b1fba8fc8 100644 --- a/vvp/opcodes.txt +++ b/vvp/opcodes.txt @@ -253,9 +253,11 @@ vector2 (binary) value, and push the result. * %cmp/s * %cmp/u * %cmp/e +* %cmp/ne * %cmpi/s , , * %cmpi/u , , * %cmpi/e , , +* %cmpi/ne , , These instructions perform a generic comparison of two vectors of equal size. Two values are pulled from the top of the stack, and not @@ -284,9 +286,14 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The compare. In either case, if either operand contains x or z, then lt bit gets the x value. -Thje %cmp/e and %cmpi/e variants are the same, but they do not bother +The %cmp/e and %cmpi/e variants are the same, but they do not bother to calculate the lt flag. These are faster if the lt flag is not needed. +The %cmp/ne and %cmpi/ne variants are the same as the %cmp/e and +%cmpi/e variants, but the 4 and 6 flags are inverted in order to +eliminate the need for a %flag_inv instruction to implement != and !== +operations. + * %cmp/wr Compare real values for equality and less-then. This opcode pops to diff --git a/vvp/vthread.cc b/vvp/vthread.cc index bb7d6116b..11061e2e6 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -1560,6 +1560,23 @@ bool of_CMPE(vthread_t thr, vvp_code_t) return true; } +bool of_CMPNE(vthread_t thr, vvp_code_t) +{ + // We are going to pop these and push nothing in their + // place, but for now it is more efficient to use a constant + // reference. When we finish, pop the stack without copies. + const vvp_vector4_t&rval = thr->peek_vec4(0); + const vvp_vector4_t&lval = thr->peek_vec4(1); + + do_CMPE(thr, lval, rval); + + thr->flags[4] = ~thr->flags[4]; + thr->flags[6] = ~thr->flags[6]; + + thr->pop_vec4(2); + return true; +} + /* * %cmpi/e , , * @@ -1583,6 +1600,27 @@ bool of_CMPIE(vthread_t thr, vvp_code_t cp) return true; } +bool of_CMPINE(vthread_t thr, vvp_code_t cp) +{ + unsigned wid = cp->number; + + vvp_vector4_t&lval = thr->peek_vec4(); + + // I expect that most of the bits of an immediate value are + // going to be zero, so start the result vector with all zero + // bits. Then we only need to replace the bits that are different. + vvp_vector4_t rval (wid, BIT4_0); + get_immediate_rval (cp, rval); + + do_CMPE(thr, lval, rval); + + thr->flags[4] = ~thr->flags[4]; + thr->flags[6] = ~thr->flags[6]; + + thr->pop_vec4(1); + return true; +} + static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)