diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 0d01a687d..70f514b69 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -842,60 +842,20 @@ static void get_immediate_rval(vvp_code_t cp, vvp_vector4_t&val) for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) { uint32_t ba = 0; - // Convert the vala/valb bits to a ba number that can be - // used to select what goes into the value. + // Convert the vala/valb bits to a ba number that + // matches the encoding of the vvp_bit4_t enumeration. ba = (valb & 1) << 1; ba |= vala & 1; - switch (ba) { - case 1: - val.set_bit(idx, BIT4_1); - break; - case 2: - val.set_bit(idx, BIT4_Z); - break; - case 3: - val.set_bit(idx, BIT4_X); - break; - default: - break; - } + // Note that the val is already pre-filled with BIT4_0 + // bits, os we only need to set non-zero bit values. + if (ba) val.set_bit(idx, (vvp_bit4_t)ba); vala >>= 1; valb >>= 1; } } -static bool do_ADD(vvp_vector4_t&l, const vvp_vector4_t&r) -{ - unsigned wid = l.size(); - assert(wid == r.size()); - - unsigned long*lva = l.subarray(0,wid); - unsigned long*lvb = r.subarray(0,wid); - if (lva==0 || lvb==0) - goto x_out; - - unsigned long carry; - carry = 0; - for (unsigned idx = 0 ; (idx*CPU_WORD_BITS) < wid ; idx += 1) - lva[idx] = add_with_carry(lva[idx], lvb[idx], carry); - - l.setarray(0,wid,lva); - - delete[]lva; - delete[]lvb; - return true; - - x_out: - delete[]lva; - delete[]lvb; - - vvp_vector4_t tmp (wid, BIT4_X); - l = tmp; - return true; -} - /* * %add * @@ -914,7 +874,9 @@ bool of_ADD(vthread_t thr, vvp_code_t) // replaces a pop and a pull. vvp_vector4_t&l = thr->peek_vec4(); - return do_ADD(l, r); + l.add(r); + + return true; } /* @@ -935,7 +897,9 @@ bool of_ADDI(vthread_t thr, vvp_code_t cp) vvp_vector4_t r (wid, BIT4_0); get_immediate_rval (cp, r); - return do_ADD(l, r); + l.add(r); + + return true; } bool of_ADD_WR(vthread_t thr, vvp_code_t) @@ -1184,7 +1148,7 @@ bool of_ASSIGN_VEC4_OFF_E(vthread_t thr, vvp_code_t cp) return true; int use_off = -off; - assert(wid > use_off); + assert((int)wid > use_off); unsigned use_wid = wid - use_off; val = val.subvalue(use_off, use_wid); off = 0; diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc index edf7be50a..6a2508327 100644 --- a/vvp/vvp_net.cc +++ b/vvp/vvp_net.cc @@ -510,6 +510,25 @@ int edge(vvp_bit4_t from, vvp_bit4_t to) return 0; } +/* + * Some of the instructions do wide addition to arrays of long. They + * use this add_with_carry function to help. + */ +static inline unsigned long add_with_carry(unsigned long a, unsigned long b, + unsigned long&carry) +{ + unsigned long tmp = b + carry; + unsigned long sum = a + tmp; + carry = 0; + if (tmp < b) + carry = 1; + if (sum < tmp) + carry = 1; + if (sum < a) + carry = 1; + return sum; +} + void vvp_send_vec8(vvp_net_ptr_t ptr, const vvp_vector8_t&val) { while (vvp_net_t*cur = ptr.ptr()) { @@ -1366,6 +1385,72 @@ bool vvp_vector4_t::set_vec(unsigned adr, const vvp_vector4_t&that) return diff_flag; } +/* + * Add that vector to this vector. Do it in the Verilog way, which + * means if we detect any X or Z bits, change the entire results to + * all X. + * + * Assume both vectors are the same size. + */ +void vvp_vector4_t::add(const vvp_vector4_t&that) +{ + assert(size_ == that.size_); + + if (size_ < BITS_PER_WORD) { + unsigned long mask = ~(-1UL << size_); + if ((bbits_val_|that.bbits_val_) & mask) { + abits_val_ |= mask; + bbits_val_ |= mask; + return; + } + + abits_val_ += that.abits_val_; + abits_val_ &= mask; + return; + } + + if (size_ == BITS_PER_WORD) { + if (bbits_val_ | that.bbits_val_) { + abits_val_ = WORD_X_ABITS; + bbits_val_ = WORD_X_BBITS; + } else { + abits_val_ += that.abits_val_; + } + return; + } + + int cnt = size_ / BITS_PER_WORD; + unsigned long carry = 0; + for (int idx = 0 ; idx < cnt ; idx += 1) { + if (bbits_ptr_[idx] | that.bbits_ptr_[idx]) + goto x_out; + + abits_ptr_[idx] = add_with_carry(abits_ptr_[idx], that.abits_ptr_[idx], carry); + } + + if (unsigned tail = size_ % BITS_PER_WORD) { + unsigned long mask = ~( -1UL << tail ); + if ((bbits_ptr_[cnt] | that.bbits_ptr_[cnt])&mask) + goto x_out; + + abits_ptr_[cnt] = add_with_carry(abits_ptr_[cnt], that.abits_ptr_[cnt], carry); + abits_ptr_[cnt] &= mask; + } + + return; + + x_out: + for (int idx = 0 ; idx < cnt ; idx += 1) { + abits_ptr_[idx] = WORD_X_ABITS; + bbits_ptr_[idx] = WORD_X_BBITS; + } + if (unsigned tail = size_%BITS_PER_WORD) { + unsigned long mask = ~( -1UL << tail ); + abits_ptr_[cnt] = WORD_X_ABITS&mask; + bbits_ptr_[cnt] = WORD_X_BBITS&mask; + } +} + void vvp_vector4_t::mov(unsigned dst, unsigned src, unsigned cnt) { assert(dst+cnt <= size_); diff --git a/vvp/vvp_net.h b/vvp/vvp_net.h index 757bb4981..16a37a953 100644 --- a/vvp/vvp_net.h +++ b/vvp/vvp_net.h @@ -130,6 +130,9 @@ struct automatic_hooks_s { * values. The enumeration has fixed numeric values that can be * expressed in 2 real bits, so that some of the internal classes can * pack them tightly. + * + * WARNING: Many things rely on this encoding for the BIT4_* enumeration + * values, so accept that these values are cast in stone. */ enum vvp_bit4_t { BIT4_0 = 0, @@ -268,6 +271,9 @@ class vvp_vector4_t { // Move bits within this vector. void mov(unsigned dst, unsigned src, unsigned cnt); + // Add that to this in the Verilog way. + void add(const vvp_vector4_t&that); + // Test that the vectors are exactly equal bool eeq(const vvp_vector4_t&that) const;