From 60b9121c6cf8c0f6c314c29da1cbb74a1ddc988d Mon Sep 17 00:00:00 2001 From: steve Date: Mon, 27 Jun 2005 21:13:14 +0000 Subject: [PATCH] Make vector2 multiply more portable. --- vvp/vvp_net.cc | 92 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 77 insertions(+), 15 deletions(-) diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc index e1825db96..011d59109 100644 --- a/vvp/vvp_net.cc +++ b/vvp/vvp_net.cc @@ -16,7 +16,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ -#ident "$Id: vvp_net.cc,v 1.39 2005/06/26 01:57:22 steve Exp $" +#ident "$Id: vvp_net.cc,v 1.40 2005/06/27 21:13:14 steve Exp $" # include "config.h" # include "vvp_net.h" @@ -24,6 +24,7 @@ # include # include # include +# include # include /* *** BIT operations *** */ @@ -351,7 +352,7 @@ vvp_vector2_t::vvp_vector2_t(const vvp_vector4_t&that) case BIT4_0: break; case BIT4_1: - vec_[addr] |= 1 << shift; + vec_[addr] |= 1UL << shift; break; default: delete[]vec_; @@ -392,6 +393,63 @@ bool vvp_vector2_t::is_NaN() const return wid_ == 0; } +static unsigned long add_carry(unsigned long a, unsigned long b, + unsigned long&carry) +{ + unsigned long out = carry; + carry = 0; + + if ((ULONG_MAX - out) < a) + carry += 1; + out += a; + + if ((ULONG_MAX - out) < b) + carry += 1; + out += b; + + return out; +} + +static void multiply_long(unsigned long a, unsigned long b, + unsigned long&low, unsigned long&high) +{ + assert(sizeof(unsigned long) %2 == 0); + + const unsigned long word_mask = (1UL << 4UL*sizeof(a)) - 1UL; + unsigned long tmpa; + unsigned long tmpb; + unsigned long res[4]; + + tmpa = a & word_mask; + tmpb = b & word_mask; + res[0] = tmpa * tmpb; + res[1] = res[0] >> 4UL*sizeof(unsigned long); + res[0] &= word_mask; + + tmpa = (a >> 4UL*sizeof(unsigned long)) & word_mask; + tmpb = b & word_mask; + res[1] += tmpa * tmpb; + res[2] = res[1] >> 4UL*sizeof(unsigned long); + res[1] &= word_mask; + + tmpa = a & word_mask; + tmpb = (b >> 4UL*sizeof(unsigned long)) & word_mask; + res[1] += tmpa * tmpb; + res[2] += res[1] >> 4UL*sizeof(unsigned long); + res[3] = res[2] >> 4UL*sizeof(unsigned long); + res[1] &= word_mask; + res[2] &= word_mask; + + tmpa = (a >> 4UL*sizeof(unsigned long)) & word_mask; + tmpb = (b >> 4UL*sizeof(unsigned long)) & word_mask; + res[2] += tmpa * tmpb; + res[3] += res[2] >> 4UL*sizeof(unsigned long); + res[2] &= word_mask; + + high = (res[3] << 4UL*sizeof(unsigned long)) | res[2]; + low = (res[1] << 4UL*sizeof(unsigned long)) | res[0]; +} + /* * Multiplication of two vector2 vectors returns a product as wide as * the sum of the widths of the input vectors. @@ -401,31 +459,32 @@ vvp_vector2_t operator * (const vvp_vector2_t&a, const vvp_vector2_t&b) const unsigned bits_per_word = 8 * sizeof(a.vec_[0]); vvp_vector2_t r (0, a.size() + b.size()); - assert(sizeof(unsigned long long) >= 2*sizeof(a.vec_[0])); - unsigned long long word_mask = (1ULL << bits_per_word) - 1ULL; - unsigned awords = (a.wid_ + bits_per_word - 1) / bits_per_word; unsigned bwords = (b.wid_ + bits_per_word - 1) / bits_per_word; unsigned rwords = (r.wid_ + bits_per_word - 1) / bits_per_word; for (unsigned bdx = 0 ; bdx < bwords ; bdx += 1) { - unsigned long long tmpb = b.vec_[bdx]; + unsigned long tmpb = b.vec_[bdx]; if (tmpb == 0) continue; for (unsigned adx = 0 ; adx < awords ; adx += 1) { - unsigned long long tmpa = a.vec_[adx]; - unsigned long long tmpr = tmpb * tmpa; + unsigned long tmpa = a.vec_[adx]; + if (tmpa == 0) + continue; + unsigned long low, hig; + multiply_long(tmpa, tmpb, low, hig); + + unsigned long carry = 0; for (unsigned sdx = 0 - ; (adx+bdx+sdx) < rwords && tmpr > 0 + ; (adx+bdx+sdx) < rwords ; sdx += 1) { - unsigned long long sum = r.vec_[adx+bdx+sdx]; - sum += tmpr & word_mask; - r.vec_[adx+bdx+sdx] = sum & word_mask; - sum >>= bits_per_word; - tmpr >>= bits_per_word; - tmpr += sum; + + r.vec_[adx+bdx+sdx] = add_carry(r.vec_[adx+bdx+sdx], + low, carry); + low = hig; + hig = 0; } } } @@ -1270,6 +1329,9 @@ vvp_bit4_t compare_gtge_signed(const vvp_vector4_t&a, /* * $Log: vvp_net.cc,v $ + * Revision 1.40 2005/06/27 21:13:14 steve + * Make vector2 multiply more portable. + * * Revision 1.39 2005/06/26 01:57:22 steve * Make bit masks of vector4_t 64bit aware. *