Optimize the vec4-stack %cmp/s and %cmpi/s instructions.

Magnitude compare is called a LOT, so it is worth putting some
special effort into it.
This commit is contained in:
Stephen Williams 2014-12-04 10:42:48 -08:00
parent eb070b061b
commit 86139c855d
2 changed files with 39 additions and 41 deletions

View File

@ -1625,11 +1625,6 @@ bool of_CMPINE(vthread_t thr, vvp_code_t cp)
static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
{
vvp_bit4_t eq = BIT4_1;
vvp_bit4_t eeq = BIT4_1;
vvp_bit4_t lt = BIT4_0;
assert(rval.size() == lval.size());
// If either value has XZ bits, then the eq and lt values are
@ -1649,38 +1644,48 @@ static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t
const vvp_bit4_t sig1 = lval.value(wid-1);
const vvp_bit4_t sig2 = rval.value(wid-1);
for (unsigned idx = 0 ; idx < (wid-1) ; idx += 1) {
vvp_bit4_t lv = lval.value(idx);
vvp_bit4_t rv = rval.value(idx);
if (lv==BIT4_0 && rv==BIT4_1) {
eeq = eq = BIT4_0;
lt = BIT4_1;
} else if (lv==BIT4_1 && rv==BIT4_0) {
eeq = eq = BIT4_0;
lt = BIT4_0;
}
}
/* Correct the lt bit to account for the sign of the parameters. */
// If the first is negative and the last positive, then
// a < b for certain.
// If the lval is <0 and the rval is >=0, then we know the result.
if ((sig1 == BIT4_1) && (sig2 == BIT4_0)) {
lt = BIT4_1;
eeq = eq = BIT4_0;
thr->flags[4] = BIT4_0; // eq;
thr->flags[5] = BIT4_1; // lt;
thr->flags[6] = BIT4_0; // eeq
return;
}
// If the first is positive and the last negative, then
// a > b for certain.
// If the lval is >=0 and the rval is <0, then we know the result.
if ((sig1 == BIT4_0) && (sig2 == BIT4_1)) {
lt = BIT4_0;
eeq = eq = BIT4_0;
thr->flags[4] = BIT4_0; // eq;
thr->flags[5] = BIT4_0; // lt;
thr->flags[6] = BIT4_0; // eeq
return;
}
thr->flags[4] = eq;
thr->flags[5] = lt;
thr->flags[6] = eeq;
// The values have the same sign, so we have to look at the
// actual value. Scan from the MSB down. As soon as we find a
// bit that differs, we know the result.
for (unsigned idx = 1 ; idx < wid ; idx += 1) {
vvp_bit4_t lv = lval.value(wid-1-idx);
vvp_bit4_t rv = rval.value(wid-1-idx);
if (lv == rv)
continue;
thr->flags[4] = BIT4_0; // eq
thr->flags[6] = BIT4_0; // eeq
if (lv==BIT4_0) {
thr->flags[5] = BIT4_1; // lt
} else {
thr->flags[5] = BIT4_0; // lt
}
return;
}
// If we survive the loop above, then the values must be equal.
thr->flags[4] = BIT4_1;
thr->flags[5] = BIT4_0;
thr->flags[6] = BIT4_1;
}
/*

View File

@ -413,7 +413,7 @@ inline vvp_bit4_t vvp_vector4_t::value(unsigned idx) const
if (idx >= size_)
return BIT4_X;
unsigned long off;
unsigned off;
unsigned long abits, bbits;
if (size_ > BITS_PER_WORD) {
@ -430,15 +430,8 @@ inline vvp_bit4_t vvp_vector4_t::value(unsigned idx) const
abits >>= off;
bbits >>= off;
int tmp = ((bbits&1) << 1) + (abits&1);
static const vvp_bit4_t bits_bit4_map[4] = {
BIT4_0, // bbit==0, abit==0
BIT4_1, // bbit==0, abit==1
BIT4_Z, // bbit==1, abit==0
BIT4_X // bbit==1, abit==1
};
// This map converts the bit-pattern to a vvp_bit4_t value.
return bits_bit4_map[tmp];
// This cast works since b==1,a==1 is X and b==1,a==0 is Z.
return (vvp_bit4_t)tmp;
}
inline vvp_vector4_t vvp_vector4_t::subvalue(unsigned adr, unsigned wid) const