Add the %cmp/e instructions, and put them to use.
When testing for == and ===, there is no need to also calculate <, so it makes sense to have a special instruction for these cases.
This commit is contained in:
parent
bea03db25d
commit
663c79d4af
|
|
@ -86,11 +86,11 @@ static int draw_condition_binary_compare(ivl_expr_t expr)
|
|||
if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) {
|
||||
/* Special case: If the right operand can be handled as
|
||||
an immediate operand, then use that instead. */
|
||||
draw_immediate_vec4(re, "%cmpi/u");
|
||||
draw_immediate_vec4(re, "%cmpi/e");
|
||||
} else {
|
||||
draw_eval_vec4(re);
|
||||
resize_vec4_wid(re, use_wid);
|
||||
fprintf(vvp_out, " %%cmp/u;\n");
|
||||
fprintf(vvp_out, " %%cmp/e;\n");
|
||||
}
|
||||
|
||||
switch (ivl_expr_opcode(expr)) {
|
||||
|
|
|
|||
|
|
@ -375,20 +375,20 @@ static void draw_binary_vec4_compare(ivl_expr_t expr)
|
|||
|
||||
switch (ivl_expr_opcode(expr)) {
|
||||
case 'e': /* == */
|
||||
fprintf(vvp_out, " %%cmp/u;\n");
|
||||
fprintf(vvp_out, " %%cmp/e;\n");
|
||||
fprintf(vvp_out, " %%flag_get/vec4 4;\n");
|
||||
break;
|
||||
case 'n': /* != */
|
||||
fprintf(vvp_out, " %%cmp/u;\n");
|
||||
fprintf(vvp_out, " %%cmp/e;\n");
|
||||
fprintf(vvp_out, " %%flag_get/vec4 4;\n");
|
||||
fprintf(vvp_out, " %%inv;\n");
|
||||
break;
|
||||
case 'E': /* === */
|
||||
fprintf(vvp_out, " %%cmp/u;\n");
|
||||
fprintf(vvp_out, " %%cmp/e;\n");
|
||||
fprintf(vvp_out, " %%flag_get/vec4 6;\n");
|
||||
break;
|
||||
case 'N': /* !== */
|
||||
fprintf(vvp_out, " %%cmp/u;\n");
|
||||
fprintf(vvp_out, " %%cmp/e;\n");
|
||||
fprintf(vvp_out, " %%flag_get/vec4 6;\n");
|
||||
fprintf(vvp_out, " %%inv;\n");
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -61,6 +61,8 @@ extern bool of_CASSIGN_VEC4(vthread_t thr, vvp_code_t code);
|
|||
extern bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CAST2(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CMPE(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CMPIE(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CMPS(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
|
||||
extern bool of_CMPSTR(vthread_t thr, vvp_code_t code);
|
||||
|
|
|
|||
|
|
@ -112,6 +112,7 @@ static const struct opcode_table_s opcode_table[] = {
|
|||
{ "%cassign/vec4/off",of_CASSIGN_VEC4_OFF,2,{OA_FUNC_PTR,OA_BIT1, OA_NONE} },
|
||||
{ "%cassign/wr", of_CASSIGN_WR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} },
|
||||
{ "%cast2", of_CAST2, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%cmp/e", of_CMPE, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
|
|
@ -120,6 +121,7 @@ static const struct opcode_table_s opcode_table[] = {
|
|||
{ "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
|
||||
{ "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||
{ "%cmpi/e", of_CMPIE, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||
{ "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||
{ "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },
|
||||
|
|
|
|||
|
|
@ -252,8 +252,10 @@ vector2 (binary) value, and push the result.
|
|||
|
||||
* %cmp/s
|
||||
* %cmp/u
|
||||
* %cmp/e
|
||||
* %cmpi/s <vala>, <valb>, <wid>
|
||||
* %cmpi/u <vala>, <valb>, <wid>
|
||||
* %cmpi/e <vala>, <valb>, <wid>
|
||||
|
||||
These instructions perform a generic comparison of two vectors of
|
||||
equal size. Two values are pulled from the top of the stack, and not
|
||||
|
|
@ -282,6 +284,9 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The
|
|||
compare. In either case, if either operand contains x or z, then lt
|
||||
bit gets the x value.
|
||||
|
||||
Thje %cmp/e and %cmpi/e variants are the same, but they do not bother
|
||||
to calculate the lt flag. These are faster if the lt flag is not needed.
|
||||
|
||||
* %cmp/wr
|
||||
|
||||
Compare real values for equality and less-then. This opcode pops to
|
||||
|
|
|
|||
|
|
@ -1477,6 +1477,93 @@ bool of_CAST2(vthread_t thr, vvp_code_t)
|
|||
return true;
|
||||
}
|
||||
|
||||
static void do_CMPE(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
|
||||
{
|
||||
assert(rval.size() == lval.size());
|
||||
|
||||
if (lval.has_xz() || rval.has_xz()) {
|
||||
|
||||
unsigned wid = lval.size();
|
||||
vvp_bit4_t eq = BIT4_1;
|
||||
vvp_bit4_t eeq = BIT4_1;
|
||||
|
||||
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
||||
vvp_bit4_t lv = lval.value(idx);
|
||||
vvp_bit4_t rv = rval.value(idx);
|
||||
|
||||
if (lv != rv)
|
||||
eeq = BIT4_0;
|
||||
|
||||
if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv)))
|
||||
eq = BIT4_X;
|
||||
if ((lv == BIT4_0) && (rv==BIT4_1))
|
||||
eq = BIT4_0;
|
||||
if ((lv == BIT4_1) && (rv==BIT4_0))
|
||||
eq = BIT4_0;
|
||||
|
||||
if (eq == BIT4_0)
|
||||
break;
|
||||
}
|
||||
|
||||
thr->flags[4] = eq;
|
||||
thr->flags[6] = eeq;
|
||||
|
||||
} else {
|
||||
// If there are no XZ bits anywhere, then the results of
|
||||
// == match the === test.
|
||||
thr->flags[4] = thr->flags[6] = (lval.eeq(rval)? BIT4_1 : BIT4_0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* %cmp/e
|
||||
*
|
||||
* Pop the operands from the stack, and do not replace them. The
|
||||
* results are written to flag bits:
|
||||
*
|
||||
* 4: eq (equal)
|
||||
*
|
||||
* 6: eeq (case equal)
|
||||
*/
|
||||
bool of_CMPE(vthread_t thr, vvp_code_t)
|
||||
{
|
||||
// We are going to pop these and push nothing in their
|
||||
// place, but for now it is more efficient to use a constant
|
||||
// reference. When we finish, pop the stack without copies.
|
||||
const vvp_vector4_t&rval = thr->peek_vec4(0);
|
||||
const vvp_vector4_t&lval = thr->peek_vec4(1);
|
||||
|
||||
do_CMPE(thr, lval, rval);
|
||||
|
||||
thr->pop_vec4(2);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* %cmpi/e <vala>, <valb>, <wid>
|
||||
*
|
||||
* Pop1 operand, get the other operand from the arguments.
|
||||
*/
|
||||
bool of_CMPIE(vthread_t thr, vvp_code_t cp)
|
||||
{
|
||||
unsigned wid = cp->number;
|
||||
|
||||
vvp_vector4_t&lval = thr->peek_vec4();
|
||||
|
||||
// I expect that most of the bits of an immediate value are
|
||||
// going to be zero, so start the result vector with all zero
|
||||
// bits. Then we only need to replace the bits that are different.
|
||||
vvp_vector4_t rval (wid, BIT4_0);
|
||||
get_immediate_rval (cp, rval);
|
||||
|
||||
do_CMPE(thr, lval, rval);
|
||||
|
||||
thr->pop_vec4(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
|
||||
{
|
||||
vvp_bit4_t eq = BIT4_1;
|
||||
|
|
|
|||
Loading…
Reference in New Issue