Add the %cmp/e instructions, and put them to use.

When testing for == and ===, there is no need to also calculate <,
so it makes sense to have a special instruction for these cases.
This commit is contained in:
Stephen Williams 2014-11-21 16:45:27 -08:00
parent bea03db25d
commit 663c79d4af
6 changed files with 102 additions and 6 deletions

View File

@ -86,11 +86,11 @@ static int draw_condition_binary_compare(ivl_expr_t expr)
if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) { if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) {
/* Special case: If the right operand can be handled as /* Special case: If the right operand can be handled as
an immediate operand, then use that instead. */ an immediate operand, then use that instead. */
draw_immediate_vec4(re, "%cmpi/u"); draw_immediate_vec4(re, "%cmpi/e");
} else { } else {
draw_eval_vec4(re); draw_eval_vec4(re);
resize_vec4_wid(re, use_wid); resize_vec4_wid(re, use_wid);
fprintf(vvp_out, " %%cmp/u;\n"); fprintf(vvp_out, " %%cmp/e;\n");
} }
switch (ivl_expr_opcode(expr)) { switch (ivl_expr_opcode(expr)) {

View File

@ -375,20 +375,20 @@ static void draw_binary_vec4_compare(ivl_expr_t expr)
switch (ivl_expr_opcode(expr)) { switch (ivl_expr_opcode(expr)) {
case 'e': /* == */ case 'e': /* == */
fprintf(vvp_out, " %%cmp/u;\n"); fprintf(vvp_out, " %%cmp/e;\n");
fprintf(vvp_out, " %%flag_get/vec4 4;\n"); fprintf(vvp_out, " %%flag_get/vec4 4;\n");
break; break;
case 'n': /* != */ case 'n': /* != */
fprintf(vvp_out, " %%cmp/u;\n"); fprintf(vvp_out, " %%cmp/e;\n");
fprintf(vvp_out, " %%flag_get/vec4 4;\n"); fprintf(vvp_out, " %%flag_get/vec4 4;\n");
fprintf(vvp_out, " %%inv;\n"); fprintf(vvp_out, " %%inv;\n");
break; break;
case 'E': /* === */ case 'E': /* === */
fprintf(vvp_out, " %%cmp/u;\n"); fprintf(vvp_out, " %%cmp/e;\n");
fprintf(vvp_out, " %%flag_get/vec4 6;\n"); fprintf(vvp_out, " %%flag_get/vec4 6;\n");
break; break;
case 'N': /* !== */ case 'N': /* !== */
fprintf(vvp_out, " %%cmp/u;\n"); fprintf(vvp_out, " %%cmp/e;\n");
fprintf(vvp_out, " %%flag_get/vec4 6;\n"); fprintf(vvp_out, " %%flag_get/vec4 6;\n");
fprintf(vvp_out, " %%inv;\n"); fprintf(vvp_out, " %%inv;\n");
break; break;

View File

@ -61,6 +61,8 @@ extern bool of_CASSIGN_VEC4(vthread_t thr, vvp_code_t code);
extern bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t code); extern bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t code);
extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code); extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code);
extern bool of_CAST2(vthread_t thr, vvp_code_t code); extern bool of_CAST2(vthread_t thr, vvp_code_t code);
extern bool of_CMPE(vthread_t thr, vvp_code_t code);
extern bool of_CMPIE(vthread_t thr, vvp_code_t code);
extern bool of_CMPS(vthread_t thr, vvp_code_t code); extern bool of_CMPS(vthread_t thr, vvp_code_t code);
extern bool of_CMPIS(vthread_t thr, vvp_code_t code); extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
extern bool of_CMPSTR(vthread_t thr, vvp_code_t code); extern bool of_CMPSTR(vthread_t thr, vvp_code_t code);

View File

@ -112,6 +112,7 @@ static const struct opcode_table_s opcode_table[] = {
{ "%cassign/vec4/off",of_CASSIGN_VEC4_OFF,2,{OA_FUNC_PTR,OA_BIT1, OA_NONE} }, { "%cassign/vec4/off",of_CASSIGN_VEC4_OFF,2,{OA_FUNC_PTR,OA_BIT1, OA_NONE} },
{ "%cassign/wr", of_CASSIGN_WR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} }, { "%cassign/wr", of_CASSIGN_WR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} },
{ "%cast2", of_CAST2, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cast2", of_CAST2, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/e", of_CMPE, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} },
@ -120,6 +121,7 @@ static const struct opcode_table_s opcode_table[] = {
{ "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} }, { "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
{ "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} }, { "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmpi/e", of_CMPIE, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} }, { "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} }, { "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },

View File

@ -252,8 +252,10 @@ vector2 (binary) value, and push the result.
* %cmp/s * %cmp/s
* %cmp/u * %cmp/u
* %cmp/e
* %cmpi/s <vala>, <valb>, <wid> * %cmpi/s <vala>, <valb>, <wid>
* %cmpi/u <vala>, <valb>, <wid> * %cmpi/u <vala>, <valb>, <wid>
* %cmpi/e <vala>, <valb>, <wid>
These instructions perform a generic comparison of two vectors of These instructions perform a generic comparison of two vectors of
equal size. Two values are pulled from the top of the stack, and not equal size. Two values are pulled from the top of the stack, and not
@ -282,6 +284,9 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The
compare. In either case, if either operand contains x or z, then lt compare. In either case, if either operand contains x or z, then lt
bit gets the x value. bit gets the x value.
Thje %cmp/e and %cmpi/e variants are the same, but they do not bother
to calculate the lt flag. These are faster if the lt flag is not needed.
* %cmp/wr * %cmp/wr
Compare real values for equality and less-then. This opcode pops to Compare real values for equality and less-then. This opcode pops to

View File

@ -1477,6 +1477,93 @@ bool of_CAST2(vthread_t thr, vvp_code_t)
return true; return true;
} }
static void do_CMPE(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
{
assert(rval.size() == lval.size());
if (lval.has_xz() || rval.has_xz()) {
unsigned wid = lval.size();
vvp_bit4_t eq = BIT4_1;
vvp_bit4_t eeq = BIT4_1;
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
vvp_bit4_t lv = lval.value(idx);
vvp_bit4_t rv = rval.value(idx);
if (lv != rv)
eeq = BIT4_0;
if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv)))
eq = BIT4_X;
if ((lv == BIT4_0) && (rv==BIT4_1))
eq = BIT4_0;
if ((lv == BIT4_1) && (rv==BIT4_0))
eq = BIT4_0;
if (eq == BIT4_0)
break;
}
thr->flags[4] = eq;
thr->flags[6] = eeq;
} else {
// If there are no XZ bits anywhere, then the results of
// == match the === test.
thr->flags[4] = thr->flags[6] = (lval.eeq(rval)? BIT4_1 : BIT4_0);
}
}
/*
* %cmp/e
*
* Pop the operands from the stack, and do not replace them. The
* results are written to flag bits:
*
* 4: eq (equal)
*
* 6: eeq (case equal)
*/
bool of_CMPE(vthread_t thr, vvp_code_t)
{
// We are going to pop these and push nothing in their
// place, but for now it is more efficient to use a constant
// reference. When we finish, pop the stack without copies.
const vvp_vector4_t&rval = thr->peek_vec4(0);
const vvp_vector4_t&lval = thr->peek_vec4(1);
do_CMPE(thr, lval, rval);
thr->pop_vec4(2);
return true;
}
/*
* %cmpi/e <vala>, <valb>, <wid>
*
* Pop1 operand, get the other operand from the arguments.
*/
bool of_CMPIE(vthread_t thr, vvp_code_t cp)
{
unsigned wid = cp->number;
vvp_vector4_t&lval = thr->peek_vec4();
// I expect that most of the bits of an immediate value are
// going to be zero, so start the result vector with all zero
// bits. Then we only need to replace the bits that are different.
vvp_vector4_t rval (wid, BIT4_0);
get_immediate_rval (cp, rval);
do_CMPE(thr, lval, rval);
thr->pop_vec4(1);
return true;
}
static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval) static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
{ {
vvp_bit4_t eq = BIT4_1; vvp_bit4_t eq = BIT4_1;