Implement %cmp/ne and %cmpi/ne

These pull in the inversion of the output flags so that they more
efficiently implement != and !==, without %flag_inv instructions.
This commit is contained in:
Stephen Williams 2014-12-03 11:06:11 -08:00
parent 0c5ed2b60f
commit 85c7b07a9b
5 changed files with 61 additions and 7 deletions

View File

@ -83,26 +83,31 @@ static int draw_condition_binary_compare(ivl_expr_t expr)
draw_eval_vec4(le);
resize_vec4_wid(le, use_wid);
char use_opcode = ivl_expr_opcode(expr);
if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) {
/* Special case: If the right operand can be handled as
an immediate operand, then use that instead. */
draw_immediate_vec4(re, "%cmpi/e");
if (use_opcode=='n' || use_opcode=='N')
draw_immediate_vec4(re, "%cmpi/ne");
else
draw_immediate_vec4(re, "%cmpi/e");
} else {
draw_eval_vec4(re);
resize_vec4_wid(re, use_wid);
fprintf(vvp_out, " %%cmp/e;\n");
if (use_opcode=='n' || use_opcode=='N')
fprintf(vvp_out, " %%cmp/ne;\n");
else
fprintf(vvp_out, " %%cmp/e;\n");
}
switch (ivl_expr_opcode(expr)) {
case 'n': /* != */
fprintf(vvp_out, " %%flag_inv 4;\n");
; /* fall through.. */
case 'e': /* == */
return 4;
break;
case 'N': /* !== */
fprintf(vvp_out, " %%flag_inv 6;\n");
; /* fall through.. */
case 'E': /* === */
return 6;
default:

View File

@ -63,6 +63,8 @@ extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code);
extern bool of_CAST2(vthread_t thr, vvp_code_t code);
extern bool of_CMPE(vthread_t thr, vvp_code_t code);
extern bool of_CMPIE(vthread_t thr, vvp_code_t code);
extern bool of_CMPINE(vthread_t thr, vvp_code_t code);
extern bool of_CMPNE(vthread_t thr, vvp_code_t code);
extern bool of_CMPS(vthread_t thr, vvp_code_t code);
extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
extern bool of_CMPSTR(vthread_t thr, vvp_code_t code);

View File

@ -113,6 +113,7 @@ static const struct opcode_table_s opcode_table[] = {
{ "%cassign/wr", of_CASSIGN_WR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} },
{ "%cast2", of_CAST2, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/e", of_CMPE, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/ne", of_CMPNE, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} },
@ -122,6 +123,7 @@ static const struct opcode_table_s opcode_table[] = {
{ "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmpi/e", of_CMPIE, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%cmpi/ne",of_CMPINE, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },

View File

@ -253,9 +253,11 @@ vector2 (binary) value, and push the result.
* %cmp/s
* %cmp/u
* %cmp/e
* %cmp/ne
* %cmpi/s <vala>, <valb>, <wid>
* %cmpi/u <vala>, <valb>, <wid>
* %cmpi/e <vala>, <valb>, <wid>
* %cmpi/ne <vala>, <valb>, <wid>
These instructions perform a generic comparison of two vectors of
equal size. Two values are pulled from the top of the stack, and not
@ -284,9 +286,14 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The
compare. In either case, if either operand contains x or z, then lt
bit gets the x value.
Thje %cmp/e and %cmpi/e variants are the same, but they do not bother
The %cmp/e and %cmpi/e variants are the same, but they do not bother
to calculate the lt flag. These are faster if the lt flag is not needed.
The %cmp/ne and %cmpi/ne variants are the same as the %cmp/e and
%cmpi/e variants, but the 4 and 6 flags are inverted in order to
eliminate the need for a %flag_inv instruction to implement != and !==
operations.
* %cmp/wr
Compare real values for equality and less-then. This opcode pops to

View File

@ -1560,6 +1560,23 @@ bool of_CMPE(vthread_t thr, vvp_code_t)
return true;
}
bool of_CMPNE(vthread_t thr, vvp_code_t)
{
// We are going to pop these and push nothing in their
// place, but for now it is more efficient to use a constant
// reference. When we finish, pop the stack without copies.
const vvp_vector4_t&rval = thr->peek_vec4(0);
const vvp_vector4_t&lval = thr->peek_vec4(1);
do_CMPE(thr, lval, rval);
thr->flags[4] = ~thr->flags[4];
thr->flags[6] = ~thr->flags[6];
thr->pop_vec4(2);
return true;
}
/*
* %cmpi/e <vala>, <valb>, <wid>
*
@ -1583,6 +1600,27 @@ bool of_CMPIE(vthread_t thr, vvp_code_t cp)
return true;
}
bool of_CMPINE(vthread_t thr, vvp_code_t cp)
{
unsigned wid = cp->number;
vvp_vector4_t&lval = thr->peek_vec4();
// I expect that most of the bits of an immediate value are
// going to be zero, so start the result vector with all zero
// bits. Then we only need to replace the bits that are different.
vvp_vector4_t rval (wid, BIT4_0);
get_immediate_rval (cp, rval);
do_CMPE(thr, lval, rval);
thr->flags[4] = ~thr->flags[4];
thr->flags[6] = ~thr->flags[6];
thr->pop_vec4(1);
return true;
}
static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)