From 5853f7d8678544d20348cee2bce2cde0ca53c885 Mon Sep 17 00:00:00 2001 From: Martin Whitaker Date: Sat, 15 Feb 2014 21:40:55 +0000 Subject: [PATCH] Fix for GitHub issue 9 part 1 : Efficiency of vvp_vector2_t::pow() function. The vvp_vector2_t::pow() function is recursive, and performs a multiplication operation on each step. The multiplication operator was expanding the result vector to accomodate the maximum possible result value for the given operand vectors, thus causing the execution time of the power operation to be exponentially proportional to the exponent value. Both in this case and in general, it is unnecessary for the multiplication result vector to be expanded, as the compiler has already determined the required vector width during elaboration, and sizes the operand vectors to match. --- vvp/vthread.cc | 11 ----------- vvp/vvp_net.cc | 26 ++++++++++---------------- 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/vvp/vthread.cc b/vvp/vthread.cc index b2b9fd8ce..73d9b41f5 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -4557,19 +4557,8 @@ bool of_POW(vthread_t thr, vvp_code_t cp) return true; } - /* To make the result more manageable trim off the extra bits. */ - xv2.trim(); - yv2.trim(); - vvp_vector2_t result = pow(xv2, yv2); - /* If the result is too small zero pad it. */ - if (result.size() < wid) { - for (unsigned jdx = wid-1; jdx >= result.size(); jdx -= 1) - thr_put_bit(thr, cp->bit_idx[0]+jdx, BIT4_0); - wid = result.size(); - } - /* Copy only what we need of the result. */ for (unsigned jdx = 0; jdx < wid; jdx += 1) thr_put_bit(thr, cp->bit_idx[0]+jdx, diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc index 3e9f32695..c87e26ef9 100644 --- a/vvp/vvp_net.cc +++ b/vvp/vvp_net.cc @@ -2362,23 +2362,21 @@ bool vvp_vector2_t::is_zero() const */ vvp_vector2_t pow(const vvp_vector2_t&x, vvp_vector2_t&y) { - /* If we have a zero exponent just return a 1 bit wide 1. */ + /* If we have a zero exponent just return 1. */ if (y == vvp_vector2_t(0L, 1)) { - return vvp_vector2_t(1L, 1); + return vvp_vector2_t(1L, x.size()); } /* Is the value odd? */ if (y.value(0) == 1) { y.set_bit(0, 0); // A quick subtract by 1. vvp_vector2_t res = x * pow(x, y); - res.trim(); // To keep the size under control trim extra zeros. return res; } y >>= 1; // A fast divide by two. We know the LSB is zero. vvp_vector2_t z = pow(x, y); vvp_vector2_t res = z * z; - res.trim(); // To keep the size under control trim extra zeros. return res; } @@ -2422,25 +2420,22 @@ static void multiply_long(unsigned long a, unsigned long b, low = (res[1] << 4UL*sizeof(unsigned long)) | res[0]; } -/* - * Multiplication of two vector2 vectors returns a product as wide as - * the sum of the widths of the input vectors. - */ vvp_vector2_t operator * (const vvp_vector2_t&a, const vvp_vector2_t&b) { const unsigned bits_per_word = 8 * sizeof(a.vec_[0]); - vvp_vector2_t r (0, a.size() + b.size()); - unsigned awords = (a.wid_ + bits_per_word - 1) / bits_per_word; - unsigned bwords = (b.wid_ + bits_per_word - 1) / bits_per_word; - unsigned rwords = (r.wid_ + bits_per_word - 1) / bits_per_word; + // The compiler ensures that the two operands are of equal size. + assert(a.size() == b.size()); + vvp_vector2_t r (0, a.size()); - for (unsigned bdx = 0 ; bdx < bwords ; bdx += 1) { + unsigned words = (r.wid_ + bits_per_word - 1) / bits_per_word; + + for (unsigned bdx = 0 ; bdx < words ; bdx += 1) { unsigned long tmpb = b.vec_[bdx]; if (tmpb == 0) continue; - for (unsigned adx = 0 ; adx < awords ; adx += 1) { + for (unsigned adx = 0 ; adx < words ; adx += 1) { unsigned long tmpa = a.vec_[adx]; if (tmpa == 0) continue; @@ -2450,7 +2445,7 @@ vvp_vector2_t operator * (const vvp_vector2_t&a, const vvp_vector2_t&b) unsigned long carry = 0; for (unsigned sdx = 0 - ; (adx+bdx+sdx) < rwords + ; (adx+bdx+sdx) < words ; sdx += 1) { r.vec_[adx+bdx+sdx] = add_carry(r.vec_[adx+bdx+sdx], @@ -2461,7 +2456,6 @@ vvp_vector2_t operator * (const vvp_vector2_t&a, const vvp_vector2_t&b) } } - return r; }