Fix for GitHub issue 9 part 1 : Efficiency of vvp_vector2_t::pow() function.

The vvp_vector2_t::pow() function is recursive, and performs a multiplication
operation on each step. The multiplication operator was expanding the result
vector to accomodate the maximum possible result value for the given operand
vectors, thus causing the execution time of the power operation to be
exponentially proportional to the exponent value. Both in this case and
in general, it is unnecessary for the multiplication result vector to be
expanded, as the compiler has already determined the required vector width
during elaboration, and sizes the operand vectors to match.
This commit is contained in:
Martin Whitaker 2014-02-15 21:40:55 +00:00
parent 68f8de28af
commit 5853f7d867
2 changed files with 10 additions and 27 deletions

View File

@ -4557,19 +4557,8 @@ bool of_POW(vthread_t thr, vvp_code_t cp)
return true;
}
/* To make the result more manageable trim off the extra bits. */
xv2.trim();
yv2.trim();
vvp_vector2_t result = pow(xv2, yv2);
/* If the result is too small zero pad it. */
if (result.size() < wid) {
for (unsigned jdx = wid-1; jdx >= result.size(); jdx -= 1)
thr_put_bit(thr, cp->bit_idx[0]+jdx, BIT4_0);
wid = result.size();
}
/* Copy only what we need of the result. */
for (unsigned jdx = 0; jdx < wid; jdx += 1)
thr_put_bit(thr, cp->bit_idx[0]+jdx,

View File

@ -2362,23 +2362,21 @@ bool vvp_vector2_t::is_zero() const
*/
vvp_vector2_t pow(const vvp_vector2_t&x, vvp_vector2_t&y)
{
/* If we have a zero exponent just return a 1 bit wide 1. */
/* If we have a zero exponent just return 1. */
if (y == vvp_vector2_t(0L, 1)) {
return vvp_vector2_t(1L, 1);
return vvp_vector2_t(1L, x.size());
}
/* Is the value odd? */
if (y.value(0) == 1) {
y.set_bit(0, 0); // A quick subtract by 1.
vvp_vector2_t res = x * pow(x, y);
res.trim(); // To keep the size under control trim extra zeros.
return res;
}
y >>= 1; // A fast divide by two. We know the LSB is zero.
vvp_vector2_t z = pow(x, y);
vvp_vector2_t res = z * z;
res.trim(); // To keep the size under control trim extra zeros.
return res;
}
@ -2422,25 +2420,22 @@ static void multiply_long(unsigned long a, unsigned long b,
low = (res[1] << 4UL*sizeof(unsigned long)) | res[0];
}
/*
* Multiplication of two vector2 vectors returns a product as wide as
* the sum of the widths of the input vectors.
*/
vvp_vector2_t operator * (const vvp_vector2_t&a, const vvp_vector2_t&b)
{
const unsigned bits_per_word = 8 * sizeof(a.vec_[0]);
vvp_vector2_t r (0, a.size() + b.size());
unsigned awords = (a.wid_ + bits_per_word - 1) / bits_per_word;
unsigned bwords = (b.wid_ + bits_per_word - 1) / bits_per_word;
unsigned rwords = (r.wid_ + bits_per_word - 1) / bits_per_word;
// The compiler ensures that the two operands are of equal size.
assert(a.size() == b.size());
vvp_vector2_t r (0, a.size());
for (unsigned bdx = 0 ; bdx < bwords ; bdx += 1) {
unsigned words = (r.wid_ + bits_per_word - 1) / bits_per_word;
for (unsigned bdx = 0 ; bdx < words ; bdx += 1) {
unsigned long tmpb = b.vec_[bdx];
if (tmpb == 0)
continue;
for (unsigned adx = 0 ; adx < awords ; adx += 1) {
for (unsigned adx = 0 ; adx < words ; adx += 1) {
unsigned long tmpa = a.vec_[adx];
if (tmpa == 0)
continue;
@ -2450,7 +2445,7 @@ vvp_vector2_t operator * (const vvp_vector2_t&a, const vvp_vector2_t&b)
unsigned long carry = 0;
for (unsigned sdx = 0
; (adx+bdx+sdx) < rwords
; (adx+bdx+sdx) < words
; sdx += 1) {
r.vec_[adx+bdx+sdx] = add_carry(r.vec_[adx+bdx+sdx],
@ -2461,7 +2456,6 @@ vvp_vector2_t operator * (const vvp_vector2_t&a, const vvp_vector2_t&b)
}
}
return r;
}