Add the %cmp/e instructions, and put them to use.

When testing for == and ===, there is no need to also calculate <,
so it makes sense to have a special instruction for these cases.
This commit is contained in:
Stephen Williams 2014-11-21 16:45:27 -08:00
parent bea03db25d
commit 663c79d4af
6 changed files with 102 additions and 6 deletions

View File

@ -86,11 +86,11 @@ static int draw_condition_binary_compare(ivl_expr_t expr)
if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) {
/* Special case: If the right operand can be handled as
an immediate operand, then use that instead. */
draw_immediate_vec4(re, "%cmpi/u");
draw_immediate_vec4(re, "%cmpi/e");
} else {
draw_eval_vec4(re);
resize_vec4_wid(re, use_wid);
fprintf(vvp_out, " %%cmp/u;\n");
fprintf(vvp_out, " %%cmp/e;\n");
}
switch (ivl_expr_opcode(expr)) {

View File

@ -375,20 +375,20 @@ static void draw_binary_vec4_compare(ivl_expr_t expr)
switch (ivl_expr_opcode(expr)) {
case 'e': /* == */
fprintf(vvp_out, " %%cmp/u;\n");
fprintf(vvp_out, " %%cmp/e;\n");
fprintf(vvp_out, " %%flag_get/vec4 4;\n");
break;
case 'n': /* != */
fprintf(vvp_out, " %%cmp/u;\n");
fprintf(vvp_out, " %%cmp/e;\n");
fprintf(vvp_out, " %%flag_get/vec4 4;\n");
fprintf(vvp_out, " %%inv;\n");
break;
case 'E': /* === */
fprintf(vvp_out, " %%cmp/u;\n");
fprintf(vvp_out, " %%cmp/e;\n");
fprintf(vvp_out, " %%flag_get/vec4 6;\n");
break;
case 'N': /* !== */
fprintf(vvp_out, " %%cmp/u;\n");
fprintf(vvp_out, " %%cmp/e;\n");
fprintf(vvp_out, " %%flag_get/vec4 6;\n");
fprintf(vvp_out, " %%inv;\n");
break;

View File

@ -61,6 +61,8 @@ extern bool of_CASSIGN_VEC4(vthread_t thr, vvp_code_t code);
extern bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t code);
extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code);
extern bool of_CAST2(vthread_t thr, vvp_code_t code);
extern bool of_CMPE(vthread_t thr, vvp_code_t code);
extern bool of_CMPIE(vthread_t thr, vvp_code_t code);
extern bool of_CMPS(vthread_t thr, vvp_code_t code);
extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
extern bool of_CMPSTR(vthread_t thr, vvp_code_t code);

View File

@ -112,6 +112,7 @@ static const struct opcode_table_s opcode_table[] = {
{ "%cassign/vec4/off",of_CASSIGN_VEC4_OFF,2,{OA_FUNC_PTR,OA_BIT1, OA_NONE} },
{ "%cassign/wr", of_CASSIGN_WR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} },
{ "%cast2", of_CAST2, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/e", of_CMPE, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} },
@ -120,6 +121,7 @@ static const struct opcode_table_s opcode_table[] = {
{ "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
{ "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} },
{ "%cmpi/e", of_CMPIE, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },

View File

@ -252,8 +252,10 @@ vector2 (binary) value, and push the result.
* %cmp/s
* %cmp/u
* %cmp/e
* %cmpi/s <vala>, <valb>, <wid>
* %cmpi/u <vala>, <valb>, <wid>
* %cmpi/e <vala>, <valb>, <wid>
These instructions perform a generic comparison of two vectors of
equal size. Two values are pulled from the top of the stack, and not
@ -282,6 +284,9 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The
compare. In either case, if either operand contains x or z, then lt
bit gets the x value.
Thje %cmp/e and %cmpi/e variants are the same, but they do not bother
to calculate the lt flag. These are faster if the lt flag is not needed.
* %cmp/wr
Compare real values for equality and less-then. This opcode pops to

View File

@ -1477,6 +1477,93 @@ bool of_CAST2(vthread_t thr, vvp_code_t)
return true;
}
static void do_CMPE(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
{
assert(rval.size() == lval.size());
if (lval.has_xz() || rval.has_xz()) {
unsigned wid = lval.size();
vvp_bit4_t eq = BIT4_1;
vvp_bit4_t eeq = BIT4_1;
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
vvp_bit4_t lv = lval.value(idx);
vvp_bit4_t rv = rval.value(idx);
if (lv != rv)
eeq = BIT4_0;
if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv)))
eq = BIT4_X;
if ((lv == BIT4_0) && (rv==BIT4_1))
eq = BIT4_0;
if ((lv == BIT4_1) && (rv==BIT4_0))
eq = BIT4_0;
if (eq == BIT4_0)
break;
}
thr->flags[4] = eq;
thr->flags[6] = eeq;
} else {
// If there are no XZ bits anywhere, then the results of
// == match the === test.
thr->flags[4] = thr->flags[6] = (lval.eeq(rval)? BIT4_1 : BIT4_0);
}
}
/*
* %cmp/e
*
* Pop the operands from the stack, and do not replace them. The
* results are written to flag bits:
*
* 4: eq (equal)
*
* 6: eeq (case equal)
*/
bool of_CMPE(vthread_t thr, vvp_code_t)
{
// We are going to pop these and push nothing in their
// place, but for now it is more efficient to use a constant
// reference. When we finish, pop the stack without copies.
const vvp_vector4_t&rval = thr->peek_vec4(0);
const vvp_vector4_t&lval = thr->peek_vec4(1);
do_CMPE(thr, lval, rval);
thr->pop_vec4(2);
return true;
}
/*
* %cmpi/e <vala>, <valb>, <wid>
*
* Pop1 operand, get the other operand from the arguments.
*/
bool of_CMPIE(vthread_t thr, vvp_code_t cp)
{
unsigned wid = cp->number;
vvp_vector4_t&lval = thr->peek_vec4();
// I expect that most of the bits of an immediate value are
// going to be zero, so start the result vector with all zero
// bits. Then we only need to replace the bits that are different.
vvp_vector4_t rval (wid, BIT4_0);
get_immediate_rval (cp, rval);
do_CMPE(thr, lval, rval);
thr->pop_vec4(1);
return true;
}
static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
{
vvp_bit4_t eq = BIT4_1;