Add the %cmp/e instructions, and put them to use.
When testing for == and ===, there is no need to also calculate <, so it makes sense to have a special instruction for these cases.
This commit is contained in:
parent
bea03db25d
commit
663c79d4af
|
|
@ -86,11 +86,11 @@ static int draw_condition_binary_compare(ivl_expr_t expr)
|
||||||
if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) {
|
if (ivl_expr_width(re)==use_wid && test_immediate_vec4_ok(re)) {
|
||||||
/* Special case: If the right operand can be handled as
|
/* Special case: If the right operand can be handled as
|
||||||
an immediate operand, then use that instead. */
|
an immediate operand, then use that instead. */
|
||||||
draw_immediate_vec4(re, "%cmpi/u");
|
draw_immediate_vec4(re, "%cmpi/e");
|
||||||
} else {
|
} else {
|
||||||
draw_eval_vec4(re);
|
draw_eval_vec4(re);
|
||||||
resize_vec4_wid(re, use_wid);
|
resize_vec4_wid(re, use_wid);
|
||||||
fprintf(vvp_out, " %%cmp/u;\n");
|
fprintf(vvp_out, " %%cmp/e;\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (ivl_expr_opcode(expr)) {
|
switch (ivl_expr_opcode(expr)) {
|
||||||
|
|
|
||||||
|
|
@ -375,20 +375,20 @@ static void draw_binary_vec4_compare(ivl_expr_t expr)
|
||||||
|
|
||||||
switch (ivl_expr_opcode(expr)) {
|
switch (ivl_expr_opcode(expr)) {
|
||||||
case 'e': /* == */
|
case 'e': /* == */
|
||||||
fprintf(vvp_out, " %%cmp/u;\n");
|
fprintf(vvp_out, " %%cmp/e;\n");
|
||||||
fprintf(vvp_out, " %%flag_get/vec4 4;\n");
|
fprintf(vvp_out, " %%flag_get/vec4 4;\n");
|
||||||
break;
|
break;
|
||||||
case 'n': /* != */
|
case 'n': /* != */
|
||||||
fprintf(vvp_out, " %%cmp/u;\n");
|
fprintf(vvp_out, " %%cmp/e;\n");
|
||||||
fprintf(vvp_out, " %%flag_get/vec4 4;\n");
|
fprintf(vvp_out, " %%flag_get/vec4 4;\n");
|
||||||
fprintf(vvp_out, " %%inv;\n");
|
fprintf(vvp_out, " %%inv;\n");
|
||||||
break;
|
break;
|
||||||
case 'E': /* === */
|
case 'E': /* === */
|
||||||
fprintf(vvp_out, " %%cmp/u;\n");
|
fprintf(vvp_out, " %%cmp/e;\n");
|
||||||
fprintf(vvp_out, " %%flag_get/vec4 6;\n");
|
fprintf(vvp_out, " %%flag_get/vec4 6;\n");
|
||||||
break;
|
break;
|
||||||
case 'N': /* !== */
|
case 'N': /* !== */
|
||||||
fprintf(vvp_out, " %%cmp/u;\n");
|
fprintf(vvp_out, " %%cmp/e;\n");
|
||||||
fprintf(vvp_out, " %%flag_get/vec4 6;\n");
|
fprintf(vvp_out, " %%flag_get/vec4 6;\n");
|
||||||
fprintf(vvp_out, " %%inv;\n");
|
fprintf(vvp_out, " %%inv;\n");
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,8 @@ extern bool of_CASSIGN_VEC4(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t code);
|
extern bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code);
|
extern bool of_CASSIGN_WR(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CAST2(vthread_t thr, vvp_code_t code);
|
extern bool of_CAST2(vthread_t thr, vvp_code_t code);
|
||||||
|
extern bool of_CMPE(vthread_t thr, vvp_code_t code);
|
||||||
|
extern bool of_CMPIE(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CMPS(vthread_t thr, vvp_code_t code);
|
extern bool of_CMPS(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
|
extern bool of_CMPIS(vthread_t thr, vvp_code_t code);
|
||||||
extern bool of_CMPSTR(vthread_t thr, vvp_code_t code);
|
extern bool of_CMPSTR(vthread_t thr, vvp_code_t code);
|
||||||
|
|
|
||||||
|
|
@ -112,6 +112,7 @@ static const struct opcode_table_s opcode_table[] = {
|
||||||
{ "%cassign/vec4/off",of_CASSIGN_VEC4_OFF,2,{OA_FUNC_PTR,OA_BIT1, OA_NONE} },
|
{ "%cassign/vec4/off",of_CASSIGN_VEC4_OFF,2,{OA_FUNC_PTR,OA_BIT1, OA_NONE} },
|
||||||
{ "%cassign/wr", of_CASSIGN_WR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} },
|
{ "%cassign/wr", of_CASSIGN_WR, 1,{OA_FUNC_PTR,OA_NONE, OA_NONE} },
|
||||||
{ "%cast2", of_CAST2, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%cast2", of_CAST2, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
|
{ "%cmp/e", of_CMPE, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%cmp/s", of_CMPS, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%cmp/str",of_CMPSTR, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%cmp/u", of_CMPU, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
|
|
@ -120,6 +121,7 @@ static const struct opcode_table_s opcode_table[] = {
|
||||||
{ "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
|
{ "%cmp/wu", of_CMPWU, 2, {OA_BIT1, OA_BIT2, OA_NONE} },
|
||||||
{ "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%cmp/x", of_CMPX, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
{ "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
{ "%cmp/z", of_CMPZ, 0, {OA_NONE, OA_NONE, OA_NONE} },
|
||||||
|
{ "%cmpi/e", of_CMPIE, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||||
{ "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
{ "%cmpi/s", of_CMPIS, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||||
{ "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
{ "%cmpi/u", of_CMPIU, 3, {OA_BIT1, OA_BIT2, OA_NUMBER} },
|
||||||
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },
|
{ "%concat/str", of_CONCAT_STR, 0,{OA_NONE, OA_NONE, OA_NONE} },
|
||||||
|
|
|
||||||
|
|
@ -252,8 +252,10 @@ vector2 (binary) value, and push the result.
|
||||||
|
|
||||||
* %cmp/s
|
* %cmp/s
|
||||||
* %cmp/u
|
* %cmp/u
|
||||||
|
* %cmp/e
|
||||||
* %cmpi/s <vala>, <valb>, <wid>
|
* %cmpi/s <vala>, <valb>, <wid>
|
||||||
* %cmpi/u <vala>, <valb>, <wid>
|
* %cmpi/u <vala>, <valb>, <wid>
|
||||||
|
* %cmpi/e <vala>, <valb>, <wid>
|
||||||
|
|
||||||
These instructions perform a generic comparison of two vectors of
|
These instructions perform a generic comparison of two vectors of
|
||||||
equal size. Two values are pulled from the top of the stack, and not
|
equal size. Two values are pulled from the top of the stack, and not
|
||||||
|
|
@ -282,6 +284,9 @@ The %cmp/u and %cmp/s differ only in the handling of the lt bit. The
|
||||||
compare. In either case, if either operand contains x or z, then lt
|
compare. In either case, if either operand contains x or z, then lt
|
||||||
bit gets the x value.
|
bit gets the x value.
|
||||||
|
|
||||||
|
Thje %cmp/e and %cmpi/e variants are the same, but they do not bother
|
||||||
|
to calculate the lt flag. These are faster if the lt flag is not needed.
|
||||||
|
|
||||||
* %cmp/wr
|
* %cmp/wr
|
||||||
|
|
||||||
Compare real values for equality and less-then. This opcode pops to
|
Compare real values for equality and less-then. This opcode pops to
|
||||||
|
|
|
||||||
|
|
@ -1477,6 +1477,93 @@ bool of_CAST2(vthread_t thr, vvp_code_t)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void do_CMPE(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
|
||||||
|
{
|
||||||
|
assert(rval.size() == lval.size());
|
||||||
|
|
||||||
|
if (lval.has_xz() || rval.has_xz()) {
|
||||||
|
|
||||||
|
unsigned wid = lval.size();
|
||||||
|
vvp_bit4_t eq = BIT4_1;
|
||||||
|
vvp_bit4_t eeq = BIT4_1;
|
||||||
|
|
||||||
|
for (unsigned idx = 0 ; idx < wid ; idx += 1) {
|
||||||
|
vvp_bit4_t lv = lval.value(idx);
|
||||||
|
vvp_bit4_t rv = rval.value(idx);
|
||||||
|
|
||||||
|
if (lv != rv)
|
||||||
|
eeq = BIT4_0;
|
||||||
|
|
||||||
|
if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv)))
|
||||||
|
eq = BIT4_X;
|
||||||
|
if ((lv == BIT4_0) && (rv==BIT4_1))
|
||||||
|
eq = BIT4_0;
|
||||||
|
if ((lv == BIT4_1) && (rv==BIT4_0))
|
||||||
|
eq = BIT4_0;
|
||||||
|
|
||||||
|
if (eq == BIT4_0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
thr->flags[4] = eq;
|
||||||
|
thr->flags[6] = eeq;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// If there are no XZ bits anywhere, then the results of
|
||||||
|
// == match the === test.
|
||||||
|
thr->flags[4] = thr->flags[6] = (lval.eeq(rval)? BIT4_1 : BIT4_0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* %cmp/e
|
||||||
|
*
|
||||||
|
* Pop the operands from the stack, and do not replace them. The
|
||||||
|
* results are written to flag bits:
|
||||||
|
*
|
||||||
|
* 4: eq (equal)
|
||||||
|
*
|
||||||
|
* 6: eeq (case equal)
|
||||||
|
*/
|
||||||
|
bool of_CMPE(vthread_t thr, vvp_code_t)
|
||||||
|
{
|
||||||
|
// We are going to pop these and push nothing in their
|
||||||
|
// place, but for now it is more efficient to use a constant
|
||||||
|
// reference. When we finish, pop the stack without copies.
|
||||||
|
const vvp_vector4_t&rval = thr->peek_vec4(0);
|
||||||
|
const vvp_vector4_t&lval = thr->peek_vec4(1);
|
||||||
|
|
||||||
|
do_CMPE(thr, lval, rval);
|
||||||
|
|
||||||
|
thr->pop_vec4(2);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* %cmpi/e <vala>, <valb>, <wid>
|
||||||
|
*
|
||||||
|
* Pop1 operand, get the other operand from the arguments.
|
||||||
|
*/
|
||||||
|
bool of_CMPIE(vthread_t thr, vvp_code_t cp)
|
||||||
|
{
|
||||||
|
unsigned wid = cp->number;
|
||||||
|
|
||||||
|
vvp_vector4_t&lval = thr->peek_vec4();
|
||||||
|
|
||||||
|
// I expect that most of the bits of an immediate value are
|
||||||
|
// going to be zero, so start the result vector with all zero
|
||||||
|
// bits. Then we only need to replace the bits that are different.
|
||||||
|
vvp_vector4_t rval (wid, BIT4_0);
|
||||||
|
get_immediate_rval (cp, rval);
|
||||||
|
|
||||||
|
do_CMPE(thr, lval, rval);
|
||||||
|
|
||||||
|
thr->pop_vec4(1);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
|
static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
|
||||||
{
|
{
|
||||||
vvp_bit4_t eq = BIT4_1;
|
vvp_bit4_t eq = BIT4_1;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue