From 5e300169101679f844528ac9d79707871f45de9f Mon Sep 17 00:00:00 2001
From: Stephen Williams <steve@icarus.com>
Date: Sun, 20 Apr 2008 16:30:27 -0700
Subject: [PATCH 1/4] Rework vvp_vector4_t to use planar a/b bits instead of
 interleaved.

The vvp_vector4_t holds 4-value logic. This patch changes the encoding
of 4-value bits in the vector to use separate A- and B bit vectors,
with the B- vector signaling the A- bits that are not 0/1. This
allows rapid conversion to 2-value logic, and rapid tests for X
and Z values.
---
 vvp/vvp_net.cc | 277 +++++++++++++++++++++++++++----------------------
 vvp/vvp_net.h  | 124 ++++++++++++++++------
 2 files changed, 247 insertions(+), 154 deletions(-)

diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc
index 3697e6c12..f9e6498fe 100644
--- a/vvp/vvp_net.cc
+++ b/vvp/vvp_net.cc
@@ -228,26 +228,34 @@ void vvp_vector4_t::copy_from_(const vvp_vector4_t&that)
       size_ = that.size_;
       if (size_ > BITS_PER_WORD) {
 	    unsigned words = (size_+BITS_PER_WORD-1) / BITS_PER_WORD;
-	    bits_ptr_ = new unsigned long[words];
+	    abits_ptr_ = new unsigned long[2*words];
+	    bbits_ptr_ = abits_ptr_ + words;
 
 	    for (unsigned idx = 0 ;  idx < words ;  idx += 1)
-		  bits_ptr_[idx] = that.bits_ptr_[idx];
+		  abits_ptr_[idx] = that.abits_ptr_[idx];
+	    for (unsigned idx = 0 ;  idx < words ;  idx += 1)
+		  bbits_ptr_[idx] = that.bbits_ptr_[idx];
 
       } else {
-	    bits_val_ = that.bits_val_;
+	    abits_val_ = that.abits_val_;
+	    bbits_val_ = that.bbits_val_;
       }
 }
 
-void vvp_vector4_t::allocate_words_(unsigned wid, unsigned long init)
+void vvp_vector4_t::allocate_words_(unsigned wid, unsigned long inita, unsigned long initb)
 {
       if (size_ > BITS_PER_WORD) {
 	    unsigned cnt = (size_ + BITS_PER_WORD - 1) / BITS_PER_WORD;
-	    bits_ptr_ = new unsigned long[cnt];
+	    abits_ptr_ = new unsigned long[2*cnt];
+	    bbits_ptr_ = abits_ptr_ + cnt;
 	    for (unsigned idx = 0 ;  idx < cnt ;  idx += 1)
-		  bits_ptr_[idx] = init;
+		  abits_ptr_[idx] = inita;
+	    for (unsigned idx = 0 ;  idx < cnt ;  idx += 1)
+		  bbits_ptr_[idx] = initb;
 
       } else {
-	    bits_val_ = init;
+	    abits_val_ = inita;
+	    bbits_val_ = initb;
       }
 }
 
@@ -257,20 +265,21 @@ vvp_vector4_t::vvp_vector4_t(const vvp_vector4_t&that,
       size_ = wid;
       assert((adr + wid) <= that.size_);
 
-      allocate_words_(wid, WORD_X_BITS);
+      allocate_words_(wid, WORD_X_ABITS, WORD_X_BBITS);
 
       if (wid > BITS_PER_WORD) {
 	      /* In this case, the subvector and the source vector are
 		 long. Do the transfer reasonably efficiently. */
 	    unsigned ptr = adr / BITS_PER_WORD;
-	    unsigned off = adr % BITS_PER_WORD;
-	    unsigned noff = BITS_PER_WORD - off;
-	    unsigned long lmask = (1UL << 2UL*off) - 1UL;
+	    unsigned long off = adr % BITS_PER_WORD;
+	    unsigned long noff = BITS_PER_WORD - off;
+	    unsigned long lmask = (1UL << off) - 1UL;
 	    unsigned trans = 0;
 	    unsigned dst = 0;
 	    while (trans < wid) {
 		    // The low bits of the result.
-		  bits_ptr_[dst] = (that.bits_ptr_[ptr] & ~lmask) >> 2UL*off;
+		  abits_ptr_[dst] = (that.abits_ptr_[ptr] & ~lmask) >> off;
+		  bbits_ptr_[dst] = (that.bbits_ptr_[ptr] & ~lmask) >> off;
 		  trans += noff;
 
 		  if (trans >= wid)
@@ -281,7 +290,8 @@ vvp_vector4_t::vvp_vector4_t(const vvp_vector4_t&that,
 		    // The high bits of the result. Skip this if the
 		    // source and destination are perfectly aligned.
 		  if (noff != BITS_PER_WORD) {
-			bits_ptr_[dst] |= (that.bits_ptr_[ptr]&lmask) << 2*noff;
+			abits_ptr_[dst] |= (that.abits_ptr_[ptr]&lmask) << noff;
+			bbits_ptr_[dst] |= (that.bbits_ptr_[ptr]&lmask) << noff;
 			trans += off;
 		  }
 
@@ -293,6 +303,7 @@ vvp_vector4_t::vvp_vector4_t(const vvp_vector4_t&that,
 		  set_bit(idx, that.value(adr+idx));
 	    }
       }
+
 }
 
 /*
@@ -308,7 +319,7 @@ void vvp_vector4_t::resize(unsigned newsize)
 
       if (newsize > BITS_PER_WORD) {
 	    unsigned newcnt = (newsize + BITS_PER_WORD - 1) / BITS_PER_WORD;
-	    unsigned long*newbits = new unsigned long[newcnt];
+	    unsigned long*newbits = new unsigned long[2*newcnt];
 
 	    if (cnt > 1) {
 		  unsigned trans = cnt;
@@ -316,26 +327,33 @@ void vvp_vector4_t::resize(unsigned newsize)
 			trans = newcnt;
 
 		  for (unsigned idx = 0 ;  idx < trans ;  idx += 1)
-			newbits[idx] = bits_ptr_[idx];
+			newbits[idx] = abits_ptr_[idx];
+		  for (unsigned idx = 0 ;  idx < trans ;  idx += 1)
+			newbits[newcnt+idx] = bbits_ptr_[idx];
 
-		  delete[]bits_ptr_;
+		  delete[]abits_ptr_;
 
 	    } else {
-		  newbits[0] = bits_val_;
+		  newbits[0] = abits_val_;
+		  newbits[newcnt] = bbits_val_;
 	    }
 
 	    for (unsigned idx = cnt ;  idx < newcnt ;  idx += 1)
-		  newbits[idx] = WORD_X_BITS;
+		  newbits[idx] = WORD_X_ABITS;
+	    for (unsigned idx = cnt ;  idx < newcnt ;  idx += 1)
+		  newbits[newcnt+idx] = WORD_X_BBITS;
 
 	    size_ = newsize;
-	    bits_ptr_ = newbits;
+	    abits_ptr_ = newbits;
+	    bbits_ptr_ = newbits + newcnt;
 
       } else {
-	    unsigned long newval;
 	    if (cnt > 1) {
-		  newval = bits_ptr_[0];
-		  delete[]bits_ptr_;
-		  bits_val_ = newval;
+		  unsigned long newvala = abits_ptr_[0];
+		  unsigned long newvalb = bbits_ptr_[0];
+		  delete[]abits_ptr_;
+		  abits_val_ = newvala;
+		  bbits_val_ = newvalb;
 	    }
 
 	    size_ = newsize;
@@ -354,69 +372,49 @@ unsigned long* vvp_vector4_t::subarray(unsigned adr, unsigned wid) const
 
       if (size_ <= BITS_PER_WORD) {
 	      /* Handle the special case that the array is small. The
-		 entire value of the vector4 is within the bits_val_
+		 entire value of the vector4 is within the xbits_val_
 		 so we know that the result is a single word, the
 		 source is a single word, and we just have to loop
 		 through that word. */
-	    unsigned long tmp = bits_val_ >> 2UL*adr;
-	    tmp &= (1UL << 2*wid) - 1;
-	    if (tmp & WORD_X_BITS)
-		  goto x_out;
+	    unsigned long atmp = abits_val_ >> adr;
+	    unsigned long btmp = bbits_val_ >> adr;
+	    atmp &= (1UL << wid) - 1;
+	    btmp &= (1UL << wid) - 1;
+	    if (btmp) goto x_out;
 
-	    unsigned long mask1 = 1;
-	    for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-		  if (tmp & 1) val[0] |= mask1;
-		  mask1 <<= 1UL;
-		  tmp >>= 2UL;
-	    }
+	    val[0] = atmp;
 	    return val;
 
       } else {
 
+	    unsigned val_ptr = 0;
+	    unsigned val_off = 0;
+
 	      /* Get the first word we are scanning. We may in fact be
 		 somewhere in the middle of that word. */
-	    unsigned long tmp = bits_ptr_[adr/BITS_PER_WORD];
-	    unsigned long off = adr%BITS_PER_WORD;
-	    tmp >>= 2UL * off;
+	    while (wid > 0) {
+		  unsigned long atmp = abits_ptr_[adr/BITS_PER_WORD];
+		  unsigned long btmp = bbits_ptr_[adr/BITS_PER_WORD];
+		  unsigned long off = adr%BITS_PER_WORD;
+		  atmp >>= off;
+		  btmp >>= off;
 
-	      // Test for X bits but not beyond the desired wid.
-	    if (wid < (BITS_PER_WORD-off))
-		  tmp &= ~(-1UL << 2*wid);
-	    if (tmp & WORD_X_BITS)
-		  goto x_out;
+		  unsigned long trans = BITS_PER_WORD - off;
+		  if (trans > (8*sizeof(val[0]) - val_off))
+			trans = 8*sizeof(val[0]) - val_off;
+		  if (wid < trans)
+			trans = wid;
+		  atmp &= (1UL << trans) - 1;
+		  btmp &= (1UL << trans) - 1;
+		  if (btmp) goto x_out;
 
-	      // Where in the target array to write the next bit.
-	    unsigned long mask1 = 1;
-	    const unsigned long mask1_last = 1UL << (BIT2_PER_WORD-1);
-	    unsigned long*val_ptr = val;
-	      // Track where the source bit is in the source word.
-	    unsigned adr_bit = adr%BITS_PER_WORD;
-	      // Scan...
-	    for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-		    /* Starting a new word? */
-		  if (adr_bit == BITS_PER_WORD) {
-			tmp = bits_ptr_[adr/BITS_PER_WORD];
-			  // If this is the last word, then only test
-			  // for X in the valid bits.
-			if ((wid-idx) < BITS_PER_WORD)
-			      tmp &= ~(WORD_Z_BITS<<2*(wid-idx));
-			if (tmp & WORD_X_BITS)
-			      goto x_out;
-			adr_bit = 0;
-		  }
-
-		  if (tmp&1)
-			*val_ptr |= mask1;
-
-		  adr += 1;
-		  adr_bit += 1;
-		  tmp >>= 2UL;
-
-		  if (mask1 == mask1_last) {
+		  val[val_ptr] |= atmp << val_off;
+		  adr += trans;
+		  wid -= trans;
+		  val_off += trans;
+		  if (val_off == 8*sizeof(val[0])) {
 			val_ptr += 1;
-			mask1 = 1;
-		  } else {
-			mask1 <<= 1;
+			val_off = 0;
 		  }
 	    }
       }
@@ -447,50 +445,59 @@ void vvp_vector4_t::set_vec(unsigned adr, const vvp_vector4_t&that)
 		 for all the bits that are to come from that. Do the
 		 job by some shifting, masking and OR. */
 
-	    unsigned long lmask = (1UL << 2UL*adr) - 1;
+	    unsigned long lmask = (1UL << adr) - 1;
 	    unsigned long hmask;
 	    unsigned long hshift = adr+that.size_;
 	    if (hshift >= BITS_PER_WORD)
 		  hmask = -1UL;
 	    else
-		  hmask = (1UL << 2UL*(adr+that.size_)) - 1;
+		  hmask = (1UL << (adr+that.size_)) - 1;
 	    unsigned long mask = hmask & ~lmask;
 
-	    bits_val_ =
-		  (bits_val_ & ~mask)
-		  | ((that.bits_val_<<2UL*adr) & mask);
+	    abits_val_ =
+		  (abits_val_ & ~mask)
+		  | ((that.abits_val_<<adr) & mask);
+	    bbits_val_ =
+		  (bbits_val_ & ~mask)
+		  | ((that.bbits_val_<<adr) & mask);
 
       } else if (that.size_ <= BITS_PER_WORD) {
 
 	      /* This vector is more than a word, but that vector is
 		 still small. Write into the destination, possibly
-		 spanning two destination works, depending on whether
+		 spanning two destination words, depending on whether
 		 the source vector spans a word transition. */
 	    unsigned long dptr = adr / BITS_PER_WORD;
 	    unsigned long doff = adr % BITS_PER_WORD;
 
-	    unsigned long lmask = (1UL << 2UL*doff) - 1;
+	    unsigned long lmask = (1UL << doff) - 1;
 	    unsigned long hshift = doff+that.size_;
 	    unsigned long hmask;
 	    if (hshift >= BITS_PER_WORD)
 		  hmask = -1UL;
 	    else
-		  hmask = (1UL << 2*hshift) - 1UL;
+		  hmask = (1UL << hshift) - 1UL;
 
 	    unsigned long mask = hmask & ~lmask;
 
-	    bits_ptr_[dptr] =
-		  (bits_ptr_[dptr] & ~mask)
-		  | ((that.bits_val_ << 2UL*doff) & mask);
+	    abits_ptr_[dptr] =
+		  (abits_ptr_[dptr] & ~mask)
+		  | ((that.abits_val_ << doff) & mask);
+	    bbits_ptr_[dptr] =
+		  (bbits_ptr_[dptr] & ~mask)
+		  | ((that.bbits_val_ << doff) & mask);
 
 	    if ((doff + that.size_) > BITS_PER_WORD) {
 		  unsigned tail = doff + that.size_ - BITS_PER_WORD;
-		  mask = (1UL << 2UL*tail) - 1;
+		  mask = (1UL << tail) - 1;
 
 		  dptr += 1;
-		  bits_ptr_[dptr] =
-			(bits_ptr_[dptr] & ~mask)
-			| ((that.bits_val_ >> 2UL*(that.size_-tail)) & mask);
+		  abits_ptr_[dptr] =
+			(abits_ptr_[dptr] & ~mask)
+			| ((that.abits_val_ >> (that.size_-tail)) & mask);
+		  bbits_ptr_[dptr] =
+			(bbits_ptr_[dptr] & ~mask)
+			| ((that.bbits_val_ >> (that.size_-tail)) & mask);
 	    }
 
       } else if (adr%BITS_PER_WORD == 0) {
@@ -503,15 +510,21 @@ void vvp_vector4_t::set_vec(unsigned adr, const vvp_vector4_t&that)
 	    unsigned sptr = 0;
 	    unsigned dptr = adr / BITS_PER_WORD;
 	    while (remain >= BITS_PER_WORD) {
-		  bits_ptr_[dptr++] = that.bits_ptr_[sptr++];
+		  abits_ptr_[dptr] = that.abits_ptr_[sptr];
+		  bbits_ptr_[dptr] = that.bbits_ptr_[sptr];
+		  dptr += 1;
+		  sptr += 1;
 		  remain -= BITS_PER_WORD;
 	    }
 
 	    if (remain > 0) {
-		  unsigned long mask = (1UL << 2UL*remain) - 1;
-		  bits_ptr_[dptr] =
-			(bits_ptr_[dptr] & ~mask)
-			| (that.bits_ptr_[sptr] & mask);
+		  unsigned long mask = (1UL << remain) - 1;
+		  abits_ptr_[dptr] =
+			(abits_ptr_[dptr] & ~mask)
+			| (that.abits_ptr_[sptr] & mask);
+		  bbits_ptr_[dptr] =
+			(bbits_ptr_[dptr] & ~mask)
+			| (that.bbits_ptr_[sptr] & mask);
 	    }
 
       } else {
@@ -523,17 +536,23 @@ void vvp_vector4_t::set_vec(unsigned adr, const vvp_vector4_t&that)
 	    unsigned sptr = 0;
 	    unsigned dptr = adr / BITS_PER_WORD;
 	    unsigned doff = adr % BITS_PER_WORD;
-	    unsigned long lmask = (1UL << 2UL*doff) - 1;
+	    unsigned long lmask = (1UL << doff) - 1;
 	    unsigned ndoff = BITS_PER_WORD - doff;
 	    while (remain >= BITS_PER_WORD) {
-		  bits_ptr_[dptr] =
-			(bits_ptr_[dptr] & lmask)
-			| ((that.bits_ptr_[sptr] << 2UL*doff) & ~lmask);
+		  abits_ptr_[dptr] =
+			(abits_ptr_[dptr] & lmask)
+			| ((that.abits_ptr_[sptr] << doff) & ~lmask);
+		  bbits_ptr_[dptr] =
+			(bbits_ptr_[dptr] & lmask)
+			| ((that.bbits_ptr_[sptr] << doff) & ~lmask);
 		  dptr += 1;
 
-		  bits_ptr_[dptr] =
-			(bits_ptr_[dptr] & ~lmask)
-			| ((that.bits_ptr_[sptr] >> 2UL*ndoff) & lmask);
+		  abits_ptr_[dptr] =
+			(abits_ptr_[dptr] & ~lmask)
+			| ((that.abits_ptr_[sptr] >> ndoff) & lmask);
+		  bbits_ptr_[dptr] =
+			(bbits_ptr_[dptr] & ~lmask)
+			| ((that.bbits_ptr_[sptr] >> ndoff) & lmask);
 
 		  remain -= BITS_PER_WORD;
 		  sptr += 1;
@@ -545,28 +564,32 @@ void vvp_vector4_t::set_vec(unsigned adr, const vvp_vector4_t&that)
 		  if (hshift >= BITS_PER_WORD)
 			hmask = -1UL;
 		  else
-			hmask = (1UL << 2UL*(doff+remain)) - 1;
+			hmask = (1UL << (doff+remain)) - 1;
 
 		  unsigned long mask = hmask & ~lmask;
 
-		  bits_ptr_[dptr] = (bits_ptr_[dptr] & ~mask)
-		        | ((that.bits_ptr_[sptr] << 2UL*doff) & mask);
+		  abits_ptr_[dptr] = (abits_ptr_[dptr] & ~mask)
+		        | ((that.abits_ptr_[sptr] << doff) & mask);
+		  bbits_ptr_[dptr] = (bbits_ptr_[dptr] & ~mask)
+		        | ((that.bbits_ptr_[sptr] << doff) & mask);
 
 		  if ((doff + remain) > BITS_PER_WORD) {
 			unsigned tail = doff + remain - BITS_PER_WORD;
 			if (tail >= BITS_PER_WORD)
 			      mask = -1UL;
 			else
-			      mask = (1UL << 2UL*tail) - 1;
+			      mask = (1UL << tail) - 1;
 
 			dptr += 1;
-			bits_ptr_[dptr] = (bits_ptr_[dptr] & ~mask) |
-			      ((that.bits_ptr_[sptr] >> 2UL*
-			        (remain-tail))&mask);
+			abits_ptr_[dptr] = (abits_ptr_[dptr] & ~mask) |
+			      ((that.abits_ptr_[sptr] >> (remain-tail))&mask);
+			bbits_ptr_[dptr] = (bbits_ptr_[dptr] & ~mask) |
+			      ((that.bbits_ptr_[sptr] >> (remain-tail))&mask);
 		  }
 	    }
 
       }
+
 }
 
 bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
@@ -575,24 +598,29 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
 	    return false;
 
       if (size_ < BITS_PER_WORD) {
-	    unsigned long mask = (1UL << 2UL * size_) - 1;
-	    return (bits_val_&mask) == (that.bits_val_&mask);
+	    unsigned long mask = (1UL << size_) - 1;
+	    return (abits_val_&mask) == (that.abits_val_&mask)
+		  && (bbits_val_&mask) == (that.bbits_val_&mask);
       }
 
       if (size_ == BITS_PER_WORD) {
-	    return bits_val_ == that.bits_val_;
+	    return (abits_val_ == that.abits_val_)
+		  && (bbits_val_ == that.bbits_val_);
       }
 
       unsigned words = size_ / BITS_PER_WORD;
       for (unsigned idx = 0 ;  idx < words ;  idx += 1) {
-	    if (bits_ptr_[idx] != that.bits_ptr_[idx])
+	    if (abits_ptr_[idx] != that.abits_ptr_[idx])
+		  return false;
+	    if (bbits_ptr_[idx] != that.bbits_ptr_[idx])
 		  return false;
       }
 
       unsigned long mask = size_%BITS_PER_WORD;
       if (mask > 0) {
-	    mask = (1UL << 2UL*mask) - 1;
-	    return (bits_ptr_[words]&mask) == (that.bits_ptr_[words]&mask);
+	    mask = (1UL << mask) - 1;
+	    return (abits_ptr_[words]&mask) == (that.abits_ptr_[words]&mask)
+		  && (bbits_ptr_[words]&mask) == (that.bbits_ptr_[words]&mask);
       }
 
       return true;
@@ -601,24 +629,24 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
 bool vvp_vector4_t::has_xz() const
 {
       if (size_ < BITS_PER_WORD) {
-	    unsigned long mask = WORD_X_BITS >> 2*(BITS_PER_WORD - size_);
-	    return 0 != (bits_val_&mask);
+	    unsigned long mask = -1UL >> (BITS_PER_WORD - size_);
+	    return bbits_val_&mask;
       }
 
       if (size_ == BITS_PER_WORD) {
-	    return 0 != (bits_val_&WORD_X_BITS);
+	    return bbits_val_;
       }
 
       unsigned words = size_ / BITS_PER_WORD;
       for (unsigned idx = 0 ; idx < words ; idx += 1) {
-	    if (bits_ptr_[idx] & WORD_X_BITS)
+	    if (bbits_ptr_[idx])
 		  return true;
       }
 
       unsigned long mask = size_%BITS_PER_WORD;
       if (mask > 0) {
-	    mask = WORD_X_BITS >> 2*(BITS_PER_WORD - mask);
-	    return 0 != (bits_ptr_[words]&mask);
+	    mask = -1UL >> (BITS_PER_WORD - mask);
+	    return bbits_ptr_[words]&mask;
       }
 
       return false;
@@ -626,15 +654,18 @@ bool vvp_vector4_t::has_xz() const
 
 void vvp_vector4_t::change_z2x()
 {
-      assert(BIT4_Z == 3 && BIT4_X == 2);
-# define Z2X(val) do{ (val) = (val) & ~(((val)&WORD_X_BITS) >> 1UL); }while(0)
+	// This method relies on the fact that both BIT4_X and BIT4_Z
+	// have the bbit set in the vector4 encoding, and also that
+	// the BIT4_X has abit set in the vector4 encoding. By simply
+	// or-ing the bbit into the abit, BIT4_X and BIT4_Z both
+	// become BIT4_X.
 
       if (size_ <= BITS_PER_WORD) {
-	    Z2X(bits_val_);
+	    abits_val_ |= bbits_val_;
       } else {
 	    unsigned words = (size_+BITS_PER_WORD-1) / BITS_PER_WORD;
 	    for (unsigned idx = 0 ;  idx < words ;  idx += 1)
-		  Z2X(bits_ptr_[idx]);
+		  abits_ptr_[idx] |= bbits_ptr_[idx];
       }
 }
 
diff --git a/vvp/vvp_net.h b/vvp/vvp_net.h
index 6c169a19e..c4c98d5dd 100644
--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@@ -136,31 +136,50 @@ class vvp_vector4_t {
 
     private:
 	// Number of vvp_bit4_t bits that can be shoved into a word.
-      enum { BITS_PER_WORD = 8*sizeof(unsigned long)/2 };
+      enum { BITS_PER_WORD = 8*sizeof(unsigned long) };
 #if SIZEOF_UNSIGNED_LONG == 8
-      enum { WORD_0_BITS = 0x0000000000000000UL };
-      enum { WORD_1_BITS = 0x5555555555555555UL };
-      enum { WORD_X_BITS = 0xaaaaaaaaaaaaaaaaUL };
-      enum { WORD_Z_BITS = 0xffffffffffffffffUL };
+      enum { WORD_0_ABITS = 0x0000000000000000UL,
+	     WORD_0_BBITS = 0x0000000000000000UL };
+      enum { WORD_1_ABITS = 0xFFFFFFFFFFFFFFFFUL,
+	     WORD_1_BBITS = 0x0000000000000000UL };
+      enum { WORD_X_ABITS = 0xFFFFFFFFFFFFFFFFUL,
+	     WORD_X_BBITS = 0xFFFFFFFFFFFFFFFFUL };
+      enum { WORD_Z_ABITS = 0x0000000000000000UL,
+             WORD_Z_BBITS = 0xFFFFFFFFFFFFFFFFUL };
 #elif SIZEOF_UNSIGNED_LONG == 4
-      enum { WORD_0_BITS = 0x00000000UL };
-      enum { WORD_1_BITS = 0x55555555UL };
-      enum { WORD_X_BITS = 0xaaaaaaaaUL };
-      enum { WORD_Z_BITS = 0xffffffffUL };
+      enum { WORD_0_ABITS = 0x00000000UL, WORD_0_BBITS = 0x00000000UL };
+      enum { WORD_1_ABITS = 0xFFFFFFFFUL, WORD_1_BBITS = 0x00000000UL };
+      enum { WORD_X_ABITS = 0xFFFFFFFFUL, WORD_X_BBITS = 0xFFFFFFFFUL };
+      enum { WORD_Z_ABITS = 0x00000000UL, WORD_Z_BBITS = 0xFFFFFFFFUL };
 #else
-#error "WORD_X_BITS not defined for this architecture?"
+#error "WORD_X_xBITS not defined for this architecture?"
 #endif
 
 	// Initialize and operator= use this private method to copy
 	// the data from that object into this object.
       void copy_from_(const vvp_vector4_t&that);
 
-      void allocate_words_(unsigned size, unsigned long init);
+      void allocate_words_(unsigned size, unsigned long inita, unsigned long initb);
+
+	// Values in the vvp_vector4_t are stored split accross two
+	// arrays. For each bit in the vector, there is an abit and a
+	// bbit. the encoding of a vvp_vector4_t is:
+	//
+	//         abit bbit
+	//         ---- ----
+	// BIT4_0    0    0   (Note that for BIT4_0 and BIT4_1, the bbit
+	// BIT4_1    1    0    value is 0. This makes detecting XZ fast.)
+	// BIT4_X    1    1
+	// BIT4_Z    0    1
 
       unsigned size_;
       union {
-	    unsigned long bits_val_;
-	    unsigned long*bits_ptr_;
+	    unsigned long abits_val_;
+	    unsigned long*abits_ptr_;
+      };
+      union {
+	    unsigned long bbits_val_;
+	    unsigned long*bbits_ptr_;
       };
 };
 
@@ -173,19 +192,26 @@ inline vvp_vector4_t::vvp_vector4_t(unsigned size, vvp_bit4_t val)
 : size_(size)
 {
 	/* note: this relies on the bit encoding for the vvp_bit4_t. */
-      const static unsigned long init_table[4] = {
-	    WORD_0_BITS,
-	    WORD_1_BITS,
-	    WORD_X_BITS,
-	    WORD_Z_BITS };
+      const static unsigned long init_atable[4] = {
+	    WORD_0_ABITS,
+	    WORD_1_ABITS,
+	    WORD_X_ABITS,
+	    WORD_Z_ABITS };
+      const static unsigned long init_btable[4] = {
+	    WORD_0_BBITS,
+	    WORD_1_BBITS,
+	    WORD_X_BBITS,
+	    WORD_Z_BBITS };
 
-      allocate_words_(size, init_table[val]);
+      allocate_words_(size, init_atable[val], init_btable[val]);
 }
 
 inline vvp_vector4_t::~vvp_vector4_t()
 {
       if (size_ > BITS_PER_WORD) {
-	    delete[] bits_ptr_;
+	    delete[] abits_ptr_;
+	      // bbits_ptr_ actually points half-way into a
+	      // double-length array started at abits_ptr_
       }
 }
 
@@ -195,7 +221,7 @@ inline vvp_vector4_t& vvp_vector4_t::operator= (const vvp_vector4_t&that)
 	    return *this;
 
       if (size_ > BITS_PER_WORD)
-	    delete[] bits_ptr_;
+	    delete[] abits_ptr_;
 
       copy_from_(that);
 
@@ -211,18 +237,28 @@ inline vvp_bit4_t vvp_vector4_t::value(unsigned idx) const
       unsigned wdx = idx / BITS_PER_WORD;
       unsigned long off = idx % BITS_PER_WORD;
 
-      unsigned long bits;
+      unsigned long abits, bbits;
       if (size_ > BITS_PER_WORD) {
-	    bits = bits_ptr_[wdx];
+	    abits = abits_ptr_[wdx];
+	    bbits = bbits_ptr_[wdx];
       } else {
-	    bits = bits_val_;
+	    abits = abits_val_;
+	    bbits = bbits_val_;
       }
 
-      bits >>= (off * 2UL);
+      abits >>= off;
+      bbits >>= off;
+      int tmp = ((bbits&1) << 1) + (abits&1);
+      static const vvp_bit4_t bits_bit4_map[4] = {
+	    BIT4_0, // bbit==0, abit==0
+	    BIT4_1, // bbit==0, abit==1
+	    BIT4_Z, // bbit==1, abit==0
+	    BIT4_X  // bbit==1, abit==1
+      };
 
 	/* Casting is evil, but this cast matches the un-cast done
 	   when the vvp_bit4_t value is put into the vector. */
-      return (vvp_bit4_t) (bits & 3);
+      return bits_bit4_map[tmp];
 }
 
 inline vvp_vector4_t vvp_vector4_t::subvalue(unsigned adr, unsigned wid) const
@@ -235,15 +271,41 @@ inline void vvp_vector4_t::set_bit(unsigned idx, vvp_bit4_t val)
       assert(idx < size_);
 
       unsigned long off = idx % BITS_PER_WORD;
-      unsigned long mask = 3UL << (2UL*off);
+      unsigned long amask = 0, bmask = 0;
+      switch (val) {
+	  case BIT4_0:
+	    amask = 0;
+	    bmask = 0;
+	    break;
+	  case BIT4_1:
+	    amask = 1;
+	    bmask = 0;
+	    break;
+	  case BIT4_X:
+	    amask = 1;
+	    bmask = 1;
+	    break;
+	  case BIT4_Z:
+	    amask = 0;
+	    bmask = 1;
+	    break;
+      }
+
+      unsigned long mask = 1UL << off;
+      amask <<= off;
+      bmask <<= off;
 
       if (size_ > BITS_PER_WORD) {
 	    unsigned wdx = idx / BITS_PER_WORD;
-	    bits_ptr_[wdx] &= ~mask;
-	    bits_ptr_[wdx] |= (unsigned long)val << (2UL*off);
+	    abits_ptr_[wdx] &= ~mask;
+	    abits_ptr_[wdx] |= amask;
+	    bbits_ptr_[wdx] &= ~mask;
+	    bbits_ptr_[wdx] |= bmask;
       } else {
-	    bits_val_ &= ~mask;
-	    bits_val_ |=  (unsigned long)val << (2UL*off);
+	    abits_val_ &= ~mask;
+	    abits_val_ |= amask;
+	    bbits_val_ &= ~mask;
+	    bbits_val_ |= bmask;
       }
 }
 

From 40fd07d46e47a7823557e2f0a141483f2e534b6f Mon Sep 17 00:00:00 2001
From: Stephen Williams <steve@icarus.com>
Date: Sun, 20 Apr 2008 19:21:41 -0700
Subject: [PATCH 2/4] Remove most dependencies on vvp_bit4_t encoding.

Remove dependencies on vvp_bit4_encoding outside of the vvp_net
core types. The table_functor_s class was the worst offfender and
was barely used, so it is now removed completely. There are a few
opcodes in vhtread.cc that also make vvvp_bit4_t encoding
assumptions (and used casts) and those have been fixed. There
were also various VPI interface functions that are fixed.
---
 vvp/compile.cc         |   4 +-
 vvp/draw_tt.c          | 710 +----------------------------------------
 vvp/logic.cc           |  49 ---
 vvp/logic.h            |  27 --
 vvp/resolv.cc          |  81 +++++
 vvp/resolv.h           |  68 ++--
 vvp/vpi_priv.cc        |  15 +-
 vvp/vpi_signal.cc      |  47 ++-
 vvp/vpi_vthr_vector.cc |  20 +-
 vvp/vthread.cc         |  10 +-
 10 files changed, 199 insertions(+), 832 deletions(-)

diff --git a/vvp/compile.cc b/vvp/compile.cc
index e84f2e6ab..ccd1d7795 100644
--- a/vvp/compile.cc
+++ b/vvp/compile.cc
@@ -1444,10 +1444,10 @@ void compile_resolver(char*label, char*type, unsigned argc, struct symb_s*argv)
 	    obj = new resolv_functor(vvp_scalar_t(BIT4_1, 5));
 
       } else if (strcmp(type,"triand") == 0) {
-	    obj = new table_functor_s(ft_TRIAND);
+	    obj = new resolv_triand;
 
       } else if (strcmp(type,"trior") == 0) {
-	    obj = new table_functor_s(ft_TRIOR);
+	    obj = new resolv_trior;
 
       } else {
 	    fprintf(stderr, "invalid resolver type: %s\n", type);
diff --git a/vvp/draw_tt.c b/vvp/draw_tt.c
index 0f04cd1f3..85e3b6237 100644
--- a/vvp/draw_tt.c
+++ b/vvp/draw_tt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001 Stephen Williams (steve@icarus.com)
+ * Copyright (c) 2001-2008 Stephen Williams (steve@icarus.com)
  *
  *    This source code is free software; you can redistribute it
  *    and/or modify it in source code form under the terms of the GNU
@@ -16,639 +16,9 @@
  *    along with this program; if not, write to the Free Software
  *    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  */
-#ifdef HAVE_CVS_IDENT
-#ident "$Id: draw_tt.c,v 1.23 2006/11/28 05:57:20 steve Exp $"
-#endif
 
 # include  <stdio.h>
 
-#if 0
-static void draw_AND(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_AND[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if ((i0 == 0) || (i1 == 0) ||
-				  (i2 == 0) || (i3 == 0))
-				    val = 0;
-			      else if ((i0 == 1) && (i1 == 1) &&
-				       (i2 == 1) && (i3 == 1))
-				    val = 1;
-			      else
-				    val = 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_NAND(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_NAND[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if ((i0 == 0) || (i1 == 0) ||
-				  (i2 == 0) || (i3 == 0))
-				    val = 1;
-			      else if ((i0 == 1) && (i1 == 1) &&
-				       (i2 == 1) && (i3 == 1))
-				    val = 0;
-			      else
-				    val = 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_BUF(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_BUF[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if (i0 == 1)
-				    val = 1;
-			      else if (i0 == 0)
-				    val = 0;
-			      else
-				    val = 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-
-#if 0
-static void draw_BUFZ(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_BUFZ[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val = i0;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_BUFIF0(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_BUFIF0[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if (i2 == 0)
-				    val = 3;
-			      else if (i0 == 1)
-				    val = 1;
-			      else if (i0 == 0)
-				    val = 0;
-			      else
-				    val = 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_BUFIF1(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_BUFIF1[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if (i2 == 1)
-				    val = 3;
-			      else if (i0 == 1)
-				    val = 1;
-			      else if (i0 == 0)
-				    val = 0;
-			      else
-				    val = 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_PMOS(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_PMOS[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if (i2 == 0 || i0 == 3)
-				    val = 3;
-			      else if (i0 == 1)
-				    val = 1;
-			      else if (i0 == 0)
-				    val = 0;
-			      else
-				    val = 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_NMOS(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_NMOS[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if (i2 == 1 || i0 == 3)
-				    val = 3;
-			      else if (i0 == 1)
-				    val = 1;
-			      else if (i0 == 0)
-				    val = 0;
-			      else
-				    val = 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-static void draw_MUXX(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_MUXX[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if (i3 == 0)
-				    val = 3;
-			      else if (i3 == 2)
-				    val = 2;
-			      else if (i3 == 3)
-				    val = 2;
-			      else if (i2 >= 2) {
-				    val = 2;
-			      } else if (i2 == 0)
-				    val = i0;
-			      else
-				    val = i1;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-
-#if 0
-static void draw_MUXZ(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_MUXZ[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if (i3 == 0)
-				    val = 3;
-			      else if (i3 == 2)
-				    val = 2;
-			      else if (i3 == 3)
-				    val = 2;
-			      else if (i2 >= 2) {
-				    if (i0 == i1)
-					  val = i0;
-				    else
-					  val = 2;
-			      } else if (i2 == 0)
-				    val = i0;
-			      else
-				    val = i1;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_EEQ(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_EEQ[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if (i3 != i2)
-				    val = 0;
-			      else if (i1 != i0)
-				    val = 0;
-			      else
-				    val = 1;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_NOR(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_NOR[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if ((i0 == 1) || (i1 == 1) ||
-				  (i2 == 1) || (i3 == 1))
-				    val = 0;
-			      else if ((i0 == 0) && (i1 == 0) &&
-				       (i2 == 0) && (i3 == 0))
-				    val = 1;
-			      else
-				    val = 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_NOT(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_NOT[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if (i0 == 1)
-				    val = 0;
-			      else if (i0 == 0)
-				    val = 1;
-			      else
-				    val = 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_OR(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_OR[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if ((i0 == 1) || (i1 == 1) ||
-				  (i2 == 1) || (i3 == 1))
-				    val = 1;
-			      else if ((i0 == 0) && (i1 == 0) &&
-				       (i2 == 0) && (i3 == 0))
-				    val = 0;
-			      else
-				    val = 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_XNOR(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_XNOR[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-
-			      if ((i0 > 1) || (i1 > 1)
-				  || (i2 > 1) || (i3 > 1))
-				    val = 2;
-			      else
-				    val = (i0 + i1 + i2 + i3) % 2 ^ 1;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-#if 0
-static void draw_XOR(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_XOR[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-
-			      if ((i0 > 1) || (i1 > 1)
-				  || (i2 > 1) || (i3 > 1))
-				    val = 2;
-			      else
-				    val = (i0 + i1 + i2 + i3) % 2;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-#endif
-
-static void draw_TRIAND(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_TRIAND[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if ((i0 == 0) || (i1 == 0) ||
-				  (i2 == 0) || (i3 == 0))
-				    val = 0;
-			      else if ((i0 == 2) || (i1 == 2) ||
-				       (i2 == 2) || (i3 == 2))
-				    val = 2;
-			      else if ((i0 == 3) && (i1 == 3) &&
-				       (i2 == 3) && (i3 == 3))
-				    val = 3;
-			      else
-				    val = 1;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
-
-static void draw_TRIOR(void)
-{
-      unsigned i0, i1, i2, i3;
-
-      printf("const unsigned char ft_TRIOR[64] = {");
-
-      for (i3 = 0 ;  i3 < 4 ;  i3 += 1)
-	    for (i2 = 0 ;  i2 < 4 ;  i2 += 1) {
-		  printf("\n    ");
-		  for (i1 = 0 ;  i1 < 4 ;  i1 += 1) {
-			unsigned idx = (i3 << 4) | (i2 << 2) | i1;
-			unsigned char byte = 0;
-
-			for (i0 = 0 ; i0 < 4 ;  i0 += 1) {
-			      unsigned val;
-			      if ((i0 == 1) || (i1 == 1) ||
-				  (i2 == 1) || (i3 == 1))
-				    val = 1;
-			      else if ((i0 == 2) || (i1 == 2) ||
-				       (i2 == 2) || (i3 == 2))
-				    val = 2;
-			      else if ((i0 == 3) && (i1 == 3) &&
-				       (i2 == 3) && (i3 == 3))
-				    val = 3;
-			      else
-				    val = 0;
-
-			      byte |= val << (i0*2);
-			}
-
-			printf("0x%02x, ", byte);
-		  }
-	    }
-
-      printf("};\n");
-}
 
 /*
  * The hex_digits table is not a functor truth table per say, but a
@@ -749,85 +119,7 @@ static void draw_oct_table()
 
 main()
 {
-      printf("# include  \"logic.h\"\n");
-      draw_MUXX();
-      draw_TRIAND();
-      draw_TRIOR();
       draw_hex_table();
       draw_oct_table();
       return 0;
 }
-
-/*
- * $Log: draw_tt.c,v $
- * Revision 1.23  2006/11/28 05:57:20  steve
- *  Use new vvp_fun_XXX in place of old functor table for NAND/NOR/XNOR/EEQ.
- *
- * Revision 1.22  2005/09/19 21:45:09  steve
- *  Use lazy eval of BUF/NOT/OR/XOR gates.
- *
- * Revision 1.21  2005/06/12 21:56:16  steve
- *  Remove unused ft_MOS truth tables.
- *
- * Revision 1.20  2005/02/12 23:05:25  steve
- *  Cleanup unused truth tables.
- *
- * Revision 1.19  2005/02/12 22:50:52  steve
- *  Implement the vvp_fun_muxz functor.
- *
- * Revision 1.18  2005/01/29 17:52:06  steve
- *  move AND to buitin instead of table.
- *
- * Revision 1.17  2004/12/31 05:57:25  steve
- *  No need to draw BUF or BUFZ tables.
- *
- * Revision 1.16  2004/10/04 01:10:59  steve
- *  Clean up spurious trailing white space.
- *
- * Revision 1.15  2003/07/30 01:13:29  steve
- *  Add support for triand and trior.
- *
- * Revision 1.14  2002/08/29 03:04:01  steve
- *  Generate x out for x select on wide muxes.
- *
- * Revision 1.13  2002/08/12 01:35:08  steve
- *  conditional ident string using autoconfig.
- *
- * Revision 1.12  2002/01/12 04:02:16  steve
- *  Support the BUFZ logic device.
- *
- * Revision 1.11  2001/11/06 03:07:22  steve
- *  Code rearrange. (Stephan Boettcher)
- *
- * Revision 1.10  2001/10/09 02:28:17  steve
- *  Add the PMOS and NMOS functor types.
- *
- * Revision 1.9  2001/06/19 03:01:10  steve
- *  Add structural EEQ gates (Stephan Boettcher)
- *
- * Revision 1.8  2001/04/29 23:13:34  steve
- *  Add bufif0 and bufif1 functors.
- *
- * Revision 1.7  2001/04/26 05:12:02  steve
- *  Implement simple MUXZ for ?: operators.
- *
- * Revision 1.6  2001/04/21 02:04:01  steve
- *  Add NAND and XNOR functors.
- *
- * Revision 1.5  2001/04/15 16:37:48  steve
- *  add XOR support.
- *
- * Revision 1.4  2001/04/01 21:31:46  steve
- *  Add the buf functor type.
- *
- * Revision 1.3  2001/03/25 20:45:09  steve
- *  Add vpiOctStrVal access to signals.
- *
- * Revision 1.2  2001/03/25 19:37:26  steve
- *  Calculate NOR and NOT tables, and also the hex_digits table.
- *
- * Revision 1.1  2001/03/11 22:42:11  steve
- *  Functor values and propagation.
- *
- */
-
diff --git a/vvp/logic.cc b/vvp/logic.cc
index e01f92b68..ecfc947d7 100644
--- a/vvp/logic.cc
+++ b/vvp/logic.cc
@@ -31,52 +31,6 @@
 # include  <malloc.h>
 #endif
 
-
-/*
- *   Implementation of the table functor, which provides logic with up
- *   to 4 inputs.
- */
-
-table_functor_s::table_functor_s(truth_t t)
-: table(t)
-{
-      count_functors_logic += 1;
-}
-
-table_functor_s::~table_functor_s()
-{
-}
-
-/*
- * WARNING: This function assumes that the table generator encodes the
- * values 0/1/x/z the same as the vvp_bit4_t enumeration values.
- */
-void table_functor_s::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&val)
-{
-      input_[ptr.port()] = val;
-
-      vvp_vector4_t result (val.size());
-
-      for (unsigned idx = 0 ;  idx < val.size() ;  idx += 1) {
-
-	    unsigned lookup = 0;
-	    for (unsigned pdx = 4 ;  pdx > 0 ;  pdx -= 1) {
-		  lookup <<= 2;
-		  if (idx < input_[pdx-1].size())
-			lookup |= input_[pdx-1].value(idx);
-	    }
-
-	    unsigned off = lookup / 4;
-	    unsigned shift = lookup % 4 * 2;
-
-	    unsigned bit_val = table[off] >> shift;
-	    bit_val &= 3;
-	    result.set_bit(idx, (vvp_bit4_t)bit_val);
-      }
-
-      vvp_send_vec4(ptr.ptr()->out, result);
-}
-
 vvp_fun_boolean_::vvp_fun_boolean_(unsigned wid)
 {
       net_ = 0;
@@ -604,9 +558,6 @@ void compile_functor(char*label, char*type, unsigned width,
       } else if (strcmp(type, "MUXR") == 0) {
 	    obj = new vvp_fun_muxr;
 
-      } else if (strcmp(type, "MUXX") == 0) {
-	    obj = new table_functor_s(ft_MUXX);
-
       } else if (strcmp(type, "MUXZ") == 0) {
 	    obj = new vvp_fun_muxz(width);
 
diff --git a/vvp/logic.h b/vvp/logic.h
index 4793b8bb3..24df457d2 100644
--- a/vvp/logic.h
+++ b/vvp/logic.h
@@ -23,26 +23,6 @@
 # include  "schedule.h"
 # include  <stddef.h>
 
-/*
- * Table driven functor. This kind of node takes 4 inputs and
- * generates a single output. The logic is bitwise, and implemented
- * with a lookup table.
- */
-
-class table_functor_s: public vvp_net_fun_t {
-
-    public:
-      typedef const unsigned char *truth_t;
-      explicit table_functor_s(truth_t t);
-      virtual ~table_functor_s();
-
-      void recv_vec4(vvp_net_ptr_t p, const vvp_vector4_t&bit);
-
-    private:
-      truth_t table;
-      vvp_vector4_t input_[4];
-};
-
 /*
  * vvp_fun_boolean_ is just a common hook for holding operands.
  */
@@ -210,11 +190,4 @@ class vvp_fun_xor  : public vvp_fun_boolean_ {
       bool invert_;
 };
 
-// table functor types
-
-extern const unsigned char ft_MUXX[];
-extern const unsigned char ft_EEQ[];
-extern const unsigned char ft_TRIAND[];
-extern const unsigned char ft_TRIOR[];
-
 #endif // __logic_H
diff --git a/vvp/resolv.cc b/vvp/resolv.cc
index e375b5c41..faa0abf41 100644
--- a/vvp/resolv.cc
+++ b/vvp/resolv.cc
@@ -93,3 +93,84 @@ void resolv_functor::recv_vec8(vvp_net_ptr_t port, vvp_vector8_t bit)
 
       vvp_send_vec8(ptr->out, out);
 }
+
+resolv_wired_logic::resolv_wired_logic()
+{
+}
+
+resolv_wired_logic::~resolv_wired_logic()
+{
+}
+
+void resolv_wired_logic::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit)
+{
+      unsigned pdx = port.port();
+      vvp_net_t*ptr = port.ptr();
+
+      if (val_[pdx].eeq(bit))
+	    return;
+
+      val_[pdx] = bit;
+
+      vvp_vector4_t out (bit);
+      for (unsigned idx = 0 ; idx < 4 ; idx += 1) {
+	    if (idx == pdx)
+		  continue;
+	    if (val_[idx].size() == 0)
+		  continue;
+
+	    out = wired_logic_math_(out, val_[idx]);
+      }
+
+      vvp_send_vec4(ptr->out, out);
+}
+
+vvp_vector4_t resolv_triand::wired_logic_math_(vvp_vector4_t&a, vvp_vector4_t&b)
+{
+      assert(a.size() == b.size());
+
+      vvp_vector4_t out (a.size());
+
+      for (unsigned idx = 0 ; idx < out.size() ; idx += 1) {
+	    vvp_bit4_t abit = a.value(idx);
+	    vvp_bit4_t bbit = b.value(idx);
+	    if (abit == BIT4_Z) {
+		  out.set_bit(idx, bbit);
+	    } else if (bbit == BIT4_Z) {
+		  out.set_bit(idx, abit);
+	    } else if (abit == BIT4_0 || bbit == BIT4_0) {
+		  out.set_bit(idx, BIT4_0);
+	    } else if (abit == BIT4_X || bbit == BIT4_X) {
+		  out.set_bit(idx, BIT4_X);
+	    } else {
+		  out.set_bit(idx, BIT4_1);
+	    }
+      }
+
+      return out;
+}
+
+vvp_vector4_t resolv_trior::wired_logic_math_(vvp_vector4_t&a, vvp_vector4_t&b)
+{
+      assert(a.size() == b.size());
+
+      vvp_vector4_t out (a.size());
+
+      for (unsigned idx = 0 ; idx < out.size() ; idx += 1) {
+	    vvp_bit4_t abit = a.value(idx);
+	    vvp_bit4_t bbit = b.value(idx);
+	    if (abit == BIT4_Z) {
+		  out.set_bit(idx, bbit);
+	    } else if (bbit == BIT4_Z) {
+		  out.set_bit(idx, abit);
+	    } else if (abit == BIT4_1 || bbit == BIT4_1) {
+		  out.set_bit(idx, BIT4_1);
+	    } else if (abit == BIT4_X || bbit == BIT4_X) {
+		  out.set_bit(idx, BIT4_X);
+	    } else {
+		  out.set_bit(idx, BIT4_0);
+	    }
+      }
+
+      return out;
+}
diff --git a/vvp/resolv.h b/vvp/resolv.h
index bd12605bf..2efca12e8 100644
--- a/vvp/resolv.h
+++ b/vvp/resolv.h
@@ -1,7 +1,7 @@
 #ifndef __resolv_H
 #define __resolv_H
 /*
- * Copyright (c) 2001 Stephen Williams (steve@icarus.com)
+ * Copyright (c) 2001-2008 Stephen Williams (steve@icarus.com)
  *
  *    This source code is free software; you can redistribute it
  *    and/or modify it in source code form under the terms of the GNU
@@ -18,9 +18,6 @@
  *    along with this program; if not, write to the Free Software
  *    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  */
-#ifdef HAVE_CVS_IDENT
-#ident "$Id: resolv.h,v 1.15 2005/06/22 18:30:12 steve Exp $"
-#endif
 
 # include  "config.h"
 # include  "vvp_net.h"
@@ -57,32 +54,39 @@ class resolv_functor : public vvp_net_fun_t {
       const char* debug_label_;
 };
 
-/*
- * $Log: resolv.h,v $
- * Revision 1.15  2005/06/22 18:30:12  steve
- *  Inline more simple stuff, and more vector4_t by const reference for performance.
- *
- * Revision 1.14  2005/06/22 00:04:49  steve
- *  Reduce vvp_vector4 copies by using const references.
- *
- * Revision 1.13  2005/03/12 04:27:43  steve
- *  Implement VPI access to signal strengths,
- *  Fix resolution of ambiguous drive pairs,
- *  Fix spelling of scalar.
- *
- * Revision 1.12  2005/01/09 20:11:16  steve
- *  Add the .part/pv node and related functionality.
- *
- * Revision 1.11  2005/01/01 02:12:34  steve
- *  vvp_fun_signal propagates vvp_vector8_t vectors when appropriate.
- *
- * Revision 1.10  2004/12/31 06:00:06  steve
- *  Implement .resolv functors, and stub signals recv_vec8 method.
- *
- * Revision 1.9  2004/12/11 02:31:30  steve
- *  Rework of internals to carry vectors through nexus instead
- *  of single bits. Make the ivl, tgt-vvp and vvp initial changes
- *  down this path.
- *
- */
+class resolv_wired_logic : public vvp_net_fun_t {
+
+    public:
+      explicit resolv_wired_logic(void);
+      ~resolv_wired_logic();
+
+      void recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit);
+
+    protected:
+      virtual vvp_vector4_t wired_logic_math_(vvp_vector4_t&a, vvp_vector4_t&b) =0;
+
+    private:
+      vvp_vector4_t val_[4];
+};
+
+class resolv_triand : public resolv_wired_logic {
+
+    public:
+      explicit resolv_triand(void) { }
+      ~resolv_triand() { }
+
+    private:
+      virtual vvp_vector4_t wired_logic_math_(vvp_vector4_t&a, vvp_vector4_t&b);
+};
+
+class resolv_trior : public resolv_wired_logic {
+
+    public:
+      explicit resolv_trior(void) { }
+      ~resolv_trior() { }
+
+    private:
+      virtual vvp_vector4_t wired_logic_math_(vvp_vector4_t&a, vvp_vector4_t&b);
+};
+
 #endif
diff --git a/vvp/vpi_priv.cc b/vvp/vpi_priv.cc
index 73b080bd2..2b834cb83 100644
--- a/vvp/vpi_priv.cc
+++ b/vvp/vpi_priv.cc
@@ -444,7 +444,20 @@ void vpip_vec4_get_value(const vvp_vector4_t&word_val, unsigned width,
 	    rbuf = need_result_buf(width+1, RBUF_VAL);
 	    for (unsigned idx = 0 ;  idx < width ;  idx += 1) {
 		  vvp_bit4_t bit = word_val.value(idx);
-		  rbuf[width-idx-1] = "01xz"[bit];
+		  switch (bit) {
+		      case BIT4_0:
+			rbuf[width-idx-1] = '0';
+			break;
+		      case BIT4_1:
+			rbuf[width-idx-1] = '1';
+			break;
+		      case BIT4_Z:
+			rbuf[width-idx-1] = 'z';
+			break;
+		      case BIT4_X:
+			rbuf[width-idx-1] = 'x';
+			break;
+		  }
 	    }
 	    rbuf[width] = 0;
 	    vp->value.str = rbuf;
diff --git a/vvp/vpi_signal.cc b/vvp/vpi_signal.cc
index c466550e3..b4e3f7fed 100644
--- a/vvp/vpi_signal.cc
+++ b/vvp/vpi_signal.cc
@@ -250,7 +250,20 @@ static char *signal_vpiDecStrVal(struct __vpiSignal*rfp, s_vpi_value*vp)
       unsigned char* bits = new unsigned char[wid];
 
       for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-	    bits[idx] = vsig->value(idx);
+	    switch (vsig->value(idx)) {
+		case BIT4_0:
+		  bits[idx] = 0;
+		  break;
+		case BIT4_1:
+		  bits[idx] = 1;
+		  break;
+		case BIT4_Z:
+		  bits[idx] = 3;
+		  break;
+		case BIT4_X:
+		  bits[idx] = 2;
+		  break;
+	    }
       }
 
       unsigned hwid = (wid+2) / 3 + 1;
@@ -445,7 +458,20 @@ static void signal_get_value(vpiHandle ref, s_vpi_value*vp)
 	    rbuf = need_result_buf(wid+1, RBUF_VAL);
 
 	    for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-		  rbuf[wid-idx-1] = "01xz"[vsig->value(idx)];
+		  switch (vsig->value(idx)) {
+		      case BIT4_0:
+			rbuf[wid-idx-1] = '0';
+			break;
+		      case BIT4_1:
+			rbuf[wid-idx-1] = '1';
+			break;
+		      case BIT4_Z:
+			rbuf[wid-idx-1] = 'z';
+			break;
+		      case BIT4_X:
+			rbuf[wid-idx-1] = 'x';
+			break;
+		  }
 	    }
 	    rbuf[wid] = 0;
 	    vp->value.str = rbuf;
@@ -470,7 +496,22 @@ static void signal_get_value(vpiHandle ref, s_vpi_value*vp)
 		rbuf[hwid] = 0;
 		hval = 0;
 		for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-		      hval = hval | (vsig->value(idx) << 2*(idx % 3));
+		      unsigned tmp = 0;
+		      switch (vsig->value(idx)) {
+			  case BIT4_0:
+			    tmp = 0;
+			    break;
+			  case BIT4_1:
+			    tmp = 1;
+			    break;
+			  case BIT4_Z:
+			    tmp = 3;
+			    break;
+			  case BIT4_X:
+			    tmp = 2;
+			    break;
+		      }
+		      hval = hval | (tmp << 2*(idx % 3));
 
 		      if (idx%3 == 2) {
 			    hwid -= 1;
diff --git a/vvp/vpi_vthr_vector.cc b/vvp/vpi_vthr_vector.cc
index d4feea42e..063dff62c 100644
--- a/vvp/vpi_vthr_vector.cc
+++ b/vvp/vpi_vthr_vector.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001 Stephen Williams (steve@icarus.com)
+ * Copyright (c) 2001-2008 Stephen Williams (steve@icarus.com)
  * Copyright (c) 2001 Stephan Boettcher <stephan@nevis.columbia.edu>
  *
  *    This source code is free software; you can redistribute it
@@ -17,9 +17,6 @@
  *    along with this program; if not, write to the Free Software
  *    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  */
-#ifdef HAVE_CVS_IDENT
-#ident "$Id: vpi_vthr_vector.cc,v 1.24 2007/02/19 01:45:56 steve Exp $"
-#endif
 
 /*
  * vpiReg handles are handled here. These objects represent vectors of
@@ -118,7 +115,20 @@ static void vthr_vec_DecStrVal(struct __vpiVThrVec*rfp, s_vpi_value*vp)
       char *rbuf = need_result_buf((rfp->wid+2)/3 + 1, RBUF_VAL);
 
       for (unsigned idx = 0 ;  idx < rfp->wid ;  idx += 1)
-	    bits[idx] = get_bit(rfp, idx);
+	    switch (get_bit(rfp, idx)) {
+		case BIT4_0:
+		  bits[idx] = 0;
+		  break;
+		case BIT4_1:
+		  bits[idx] = 1;
+		  break;
+		case BIT4_X:
+		  bits[idx] = 2;
+		  break;
+		case BIT4_Z:
+		  bits[idx] = 3;
+		  break;
+	    }
 
       vpip_bits_to_dec_str(bits, rfp->wid, rbuf, rfp->wid+1, rfp->signed_flag);
       vp->value.str = rbuf;
diff --git a/vvp/vthread.cc b/vvp/vthread.cc
index 4e6c0cd47..b823f618d 100644
--- a/vvp/vthread.cc
+++ b/vvp/vthread.cc
@@ -119,6 +119,9 @@ struct vthread_s {
       struct vthread_s*scope_next, *scope_prev;
 };
 
+// this table maps the thread special index bit addresses to
+// vvp_bit4_t bit values.
+static vvp_bit4_t thr_index_to_bit4[4] = { BIT4_0, BIT4_1, BIT4_X, BIT4_Z };
 
 static inline void thr_check_addr(struct vthread_s*thr, unsigned addr)
 {
@@ -203,7 +206,7 @@ static vvp_vector4_t vthread_bits_to_vector(struct vthread_s*thr,
 
       } else {
 	    vvp_vector4_t value(wid);
-	    vvp_bit4_t bit_val = (vvp_bit4_t)bit;
+	    vvp_bit4_t bit_val = thr_index_to_bit4[bit];
 	    for (unsigned idx = 0; idx < wid; idx +=1) {
 		  value.set_bit(idx, bit_val);
 	    }
@@ -2799,7 +2802,7 @@ bool of_MOD_WR(vthread_t thr, vvp_code_t cp)
 static bool of_MOV1XZ_(vthread_t thr, vvp_code_t cp)
 {
       thr_check_addr(thr, cp->bit_idx[0]+cp->number-1);
-      vvp_vector4_t tmp (cp->number, (vvp_bit4_t)cp->bit_idx[1]);
+      vvp_vector4_t tmp (cp->number, thr_index_to_bit4[cp->bit_idx[1]]);
       thr->bits4.set_vec(cp->bit_idx[0], tmp);
       return true;
 }
@@ -3528,9 +3531,8 @@ bool of_SET_VEC(vthread_t thr, vvp_code_t cp)
 
       } else {
 	      /* Make a vector of the desired width. */
-	    vvp_bit4_t bit_val = (vvp_bit4_t)bit;
+	    vvp_bit4_t bit_val = thr_index_to_bit4[bit];
 	    vvp_vector4_t value(wid, bit_val);
-
 	    vvp_send_vec4(ptr, value);
       }
 

From 6d2ef15951bb2368cd03f4943a51d0f21c26f27e Mon Sep 17 00:00:00 2001
From: Stephen Williams <steve@icarus.com>
Date: Sun, 20 Apr 2008 20:43:53 -0700
Subject: [PATCH 3/4] Remove dead code related to bit arrays/vectors

In previous incarnations of the vvp runtime, bit vectors were passed
around as arrays of unsigned char that charried bit4 vectors. That
is no longer used. Remove the last vestiges of that dead code.
---
 vvp/vpi_priv.h         |  11 +---
 vvp/vpi_signal.cc      |  30 +----------
 vvp/vpi_vthr_vector.cc |  24 +++------
 vvp/vpip_oct.cc        |  31 -----------
 vvp/vpip_to_dec.cc     | 118 +----------------------------------------
 5 files changed, 11 insertions(+), 203 deletions(-)

diff --git a/vvp/vpi_priv.h b/vvp/vpi_priv.h
index 232293bd0..307adb06b 100644
--- a/vvp/vpi_priv.h
+++ b/vvp/vpi_priv.h
@@ -503,14 +503,9 @@ extern const char* vpip_name_string(const char*str);
 
 /*
  * This function is used to make decimal string versions of various
- * vectors. The input format is an array of bit values (0, 1, 2, 3)
- * lsb first, and the result is written into buf, without overflowing
- * nbuf.
+ * vectors. The input format is a vvp_vector4_t, and the result is
+ * written into buf, without overflowing nbuf.
  */
-extern unsigned vpip_bits_to_dec_str(const unsigned char *bits,
-				     unsigned int nbits,
-				     char *buf, unsigned int nbuf,
-				     int signed_flag);
 extern unsigned vpip_vec4_to_dec_str(const vvp_vector4_t&vec4,
 				     char *buf, unsigned int nbuf,
 				     int signed_flag);
@@ -521,8 +516,6 @@ extern void vpip_bin_str_to_vec4(vvp_vector4_t&val,
 extern void vpip_vec4_to_hex_str(const vvp_vector4_t&bits, char*buf,
 				 unsigned nbuf, bool signed_flag);
 
-extern void vpip_bits_to_oct_str(const unsigned char*bits, unsigned nbits,
-				 char*buf, unsigned nbuf, bool signed_flag);
 extern void vpip_vec4_to_oct_str(const vvp_vector4_t&bits, char*buf,
 				 unsigned nbuf, bool signed_flag);
 
diff --git a/vvp/vpi_signal.cc b/vvp/vpi_signal.cc
index b4e3f7fed..c6a845b0f 100644
--- a/vvp/vpi_signal.cc
+++ b/vvp/vpi_signal.cc
@@ -239,39 +239,13 @@ static vpiHandle signal_iterate(int code, vpiHandle ref)
 
 static char *signal_vpiDecStrVal(struct __vpiSignal*rfp, s_vpi_value*vp)
 {
-      unsigned wid = (rfp->msb >= rfp->lsb)
-	    ? (rfp->msb - rfp->lsb + 1)
-	    : (rfp->lsb - rfp->msb + 1);
-
       vvp_fun_signal_vec*vsig = dynamic_cast<vvp_fun_signal_vec*>(rfp->node->fun);
       assert(vsig);
 
-	/* FIXME: bits should be an array of vvp_bit4_t. */
-      unsigned char* bits = new unsigned char[wid];
-
-      for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
-	    switch (vsig->value(idx)) {
-		case BIT4_0:
-		  bits[idx] = 0;
-		  break;
-		case BIT4_1:
-		  bits[idx] = 1;
-		  break;
-		case BIT4_Z:
-		  bits[idx] = 3;
-		  break;
-		case BIT4_X:
-		  bits[idx] = 2;
-		  break;
-	    }
-      }
-
-      unsigned hwid = (wid+2) / 3 + 1;
+      unsigned hwid = (vsig->size()+2) / 3 + 1;
       char *rbuf = need_result_buf(hwid, RBUF_VAL);
 
-      vpip_bits_to_dec_str(bits, wid, rbuf, hwid, rfp->signed_flag);
-
-      delete[]bits;
+      vpip_vec4_to_dec_str(vsig->vec4_value(), rbuf, hwid, rfp->signed_flag);
 
       return rbuf;
 }
diff --git a/vvp/vpi_vthr_vector.cc b/vvp/vpi_vthr_vector.cc
index 063dff62c..025b474d1 100644
--- a/vvp/vpi_vthr_vector.cc
+++ b/vvp/vpi_vthr_vector.cc
@@ -41,7 +41,7 @@ struct __vpiVThrVec {
 };
 
 inline static
-unsigned get_bit(struct __vpiVThrVec *rfp, unsigned idx)
+vvp_bit4_t get_bit(struct __vpiVThrVec *rfp, unsigned idx)
 {
       return vthread_get_bit(vpip_current_vthread, rfp->bas+idx);
 }
@@ -111,26 +111,14 @@ static char* vthr_vec_get_str(int code, vpiHandle ref)
 
 static void vthr_vec_DecStrVal(struct __vpiVThrVec*rfp, s_vpi_value*vp)
 {
-      unsigned char*bits = new unsigned char[rfp->wid];
-      char *rbuf = need_result_buf((rfp->wid+2)/3 + 1, RBUF_VAL);
+      int nbuf = (rfp->wid+2)/3 + 1;
+      char *rbuf = need_result_buf(nbuf, RBUF_VAL);
 
+      vvp_vector4_t tmp (rfp->wid);
       for (unsigned idx = 0 ;  idx < rfp->wid ;  idx += 1)
-	    switch (get_bit(rfp, idx)) {
-		case BIT4_0:
-		  bits[idx] = 0;
-		  break;
-		case BIT4_1:
-		  bits[idx] = 1;
-		  break;
-		case BIT4_X:
-		  bits[idx] = 2;
-		  break;
-		case BIT4_Z:
-		  bits[idx] = 3;
-		  break;
-	    }
+	    tmp.set_bit(idx, get_bit(rfp, idx));
 
-      vpip_bits_to_dec_str(bits, rfp->wid, rbuf, rfp->wid+1, rfp->signed_flag);
+      vpip_vec4_to_dec_str(tmp, rbuf, nbuf, rfp->signed_flag);
       vp->value.str = rbuf;
 
       return;
diff --git a/vvp/vpip_oct.cc b/vvp/vpip_oct.cc
index 9e0a77bcf..e8a1ac057 100644
--- a/vvp/vpip_oct.cc
+++ b/vvp/vpip_oct.cc
@@ -88,37 +88,6 @@ void vpip_oct_str_to_vec4(vvp_vector4_t&val, const char*str)
       }
 }
 
-
-void vpip_bits_to_oct_str(const unsigned char*bits, unsigned nbits,
-			  char*buf, unsigned nbuf, bool signed_flag)
-{
-      unsigned slen = (nbits + 2) / 3;
-      unsigned val = 0;
-      assert(slen < nbuf);
-
-      buf[slen] = 0;
-
-      for (unsigned idx = 0 ;  idx < nbits ;  idx += 1) {
-	    unsigned bi = idx/4;
-	    unsigned bs = (idx%4) * 2;
-	    unsigned bit = (bits[bi] >> bs) & 3;
-
-	    unsigned vs = (idx%3) * 2;
-	    val |= bit << vs;
-
-	    if (vs == 4) {
-		  slen -= 1;
-		  buf[slen] = oct_digits[val];
-		  val = 0;
-	    }
-      }
-
-      if (slen > 0) {
-	    slen -= 1;
-	    buf[slen] = oct_digits[val];
-      }
-}
-
 void vpip_vec4_to_oct_str(const vvp_vector4_t&bits, char*buf, unsigned nbuf,
 			  bool signed_flag)
 {
diff --git a/vvp/vpip_to_dec.cc b/vvp/vpip_to_dec.cc
index 027a46202..782c31c0c 100644
--- a/vvp/vpip_to_dec.cc
+++ b/vvp/vpip_to_dec.cc
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2008 Stephen Williams <steve@icarus.com>
  * Copyright (c) 2002 Larry Doolittle (larry@doolittle.boa.org)
  *
  *    This source code is free software; you can redistribute it
@@ -16,9 +17,6 @@
  *    along with this program; if not, write to the Free Software
  *    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  */
-#ifdef HAVE_CVS_IDENT
-#ident "$Id: vpip_to_dec.cc,v 1.8 2006/02/21 02:39:27 steve Exp $"
-#endif
 
 # include  "config.h"
 # include  "vpi_priv.h"
@@ -222,100 +220,6 @@ unsigned vpip_vec4_to_dec_str(const vvp_vector4_t&vec4,
 	return 0;
 }
 
-unsigned vpip_bits_to_dec_str(const unsigned char *bits, unsigned int nbits,
-			      char *buf, unsigned int nbuf, int signed_flag)
-{
-	unsigned int idx, len, vlen;
-	unsigned int mbits=nbits;   /* number of non-sign bits */
-	unsigned count_x = 0, count_z = 0;
-	/* Jump through some hoops so we don't have to malloc/free valv
-	 * on every call, and implement an optional malloc-less version. */
-	static unsigned long *valv=NULL;
-	static unsigned int vlen_alloc=0;
-
-	unsigned long val=0;
-	int comp=0;
-	if (signed_flag) {
-		     if (B_ISZ(bits[nbits-1])) count_z++;
-		else if (B_ISX(bits[nbits-1])) count_x++;
-		else if (B_IS1(bits[nbits-1])) comp=1;
-		--mbits;
-	}
-	assert(mbits<(UINT_MAX-92)/28);
-	vlen = ((mbits*28+92)/93+BDIGITS-1)/BDIGITS;
-	/* printf("vlen=%d\n",vlen); */
-
-#define ALLOC_MARGIN 4
-	if (!valv || vlen > vlen_alloc) {
-		if (valv) free(valv);
-		valv = (unsigned long*)
-		      calloc( vlen+ALLOC_MARGIN, sizeof (*valv));
-		if (!valv) {perror("malloc"); return 0; }
-		vlen_alloc=vlen+ALLOC_MARGIN;
-	} else {
-		memset(valv,0,vlen*sizeof(valv[0]));
-	}
-
-	for (idx = 0; idx < mbits; idx += 1) {
-		/* printf("%c ",bits[mbits-idx-1]); */
-		     if (B_ISZ(bits[mbits-idx-1])) count_z++;
-		else if (B_ISX(bits[mbits-idx-1])) count_x++;
-		else if (!comp && B_IS1(bits[mbits-idx-1])) ++val;
-		else if ( comp && B_IS0(bits[mbits-idx-1])) ++val;
-		if ((mbits-idx-1)%BBITS==0) {
-  			/* make negative 2's complement, not 1's complement */
-			if (comp && idx==mbits-1) ++val;
-			shift_in(valv,vlen,val);
-			val=0;
-		} else {
-			val=val+val;
-		}
-	}
-
-	if (count_x == nbits) {
-		len = 1;
-		buf[0] = 'x';
-		buf[1] = 0;
-	} else if (count_x > 0) {
-		len = 1;
-		buf[0] = 'X';
-		buf[1] = 0;
-	} else if (count_z == nbits) {
-		len = 1;
-		buf[0] = 'z';
-		buf[1] = 0;
-	} else if (count_z > 0) {
-		len = 1;
-		buf[0] = 'Z';
-		buf[1] = 0;
-	} else {
-		int i;
-		int zero_suppress=1;
-		if (comp) {
-			*buf++='-';
-			nbuf--;
-			/* printf("-"); */
-		}
-		for (i=vlen-1; i>=0; i--) {
-			zero_suppress = write_digits(valv[i],
-				&buf,&nbuf,zero_suppress);
-			/* printf(",%.4u",valv[i]); */
-		}
-		/* Awkward special case, since we don't want to
-		 * zero suppress down to nothing at all.  The only
-		 * way we can still have zero_suppress on in the
-		 * comp=1 case is if mbits==0, and therefore vlen==0.
-		 * We represent 1'sb1 as "-1". */
-		if (zero_suppress) *buf++='0'+comp;
-		/* printf("\n"); */
-		*buf='\0';
-	}
-	/* hold on to the memory, since we expect to be called again. */
-	/* free(valv); */
-	return 0;
-}
-
-
 void vpip_dec_str_to_vec4(vvp_vector4_t&vec,
 			  const char*buf, bool signed_flag)
 {
@@ -364,23 +268,3 @@ void vpip_dec_str_to_vec4(vvp_vector4_t&vec,
 
       delete[]str;
 }
-
-
-/*
- * $Log: vpip_to_dec.cc,v $
- * Revision 1.8  2006/02/21 02:39:27  steve
- *  Support string values for memory words.
- *
- * Revision 1.7  2004/10/04 01:11:00  steve
- *  Clean up spurious trailing white space.
- *
- * Revision 1.6  2002/08/12 01:35:09  steve
- *  conditional ident string using autoconfig.
- *
- * Revision 1.5  2002/05/17 04:05:38  steve
- *  null terminate the reversed decimal string
- *
- * Revision 1.4  2002/05/11 04:39:36  steve
- *  Set and get memory words by string value.
- *
- */

From 935c3dc02ded3ef67dbd5934efa4a35f22367d35 Mon Sep 17 00:00:00 2001
From: Stephen Williams <steve@icarus.com>
Date: Sun, 20 Apr 2008 21:36:53 -0700
Subject: [PATCH 4/4] Optimize some common vvp_bit4_t operators.

By slightly altering the vvp_bit4_t encoding, a few simple
optimizations become possible. By making Z==2 and X==3, the
conversion from X/Z to X is a simple shift-or, and this can
be used to reduce the size of some of the bit4 operators.
---
 vvp/vvp_net.cc | 19 ++-----------------
 vvp/vvp_net.h  | 32 +++++++++++++++++++++++---------
 2 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc
index f9e6498fe..4e6159854 100644
--- a/vvp/vvp_net.cc
+++ b/vvp/vvp_net.cc
@@ -44,6 +44,7 @@ vvp_bit4_t add_with_carry(vvp_bit4_t a, vvp_bit4_t b, vvp_bit4_t&c)
 
       switch (sum) {
 	  case 0:
+	      // c must already be 0.
 	    return BIT4_0;
 	  case 1:
 	    c = BIT4_0;
@@ -79,11 +80,7 @@ vvp_bit4_t operator | (vvp_bit4_t a, vvp_bit4_t b)
 	    return BIT4_1;
       if (b == BIT4_1)
 	    return BIT4_1;
-      if (bit4_is_xz(a))
-	    return BIT4_X;
-      if (bit4_is_xz(b))
-	    return BIT4_X;
-      return BIT4_0;
+      return bit4_z2x( (vvp_bit4_t) ((int)a | (int)b) );
 }
 
 vvp_bit4_t operator ^ (vvp_bit4_t a, vvp_bit4_t b)
@@ -99,18 +96,6 @@ vvp_bit4_t operator ^ (vvp_bit4_t a, vvp_bit4_t b)
       return BIT4_0;
 }
 
-vvp_bit4_t operator ~ (vvp_bit4_t a)
-{
-      switch (a) {
-	  case BIT4_0:
-	    return BIT4_1;
-	  case BIT4_1:
-	    return BIT4_0;
-	  default:
-	    return  BIT4_X;
-      }
-}
-
 ostream& operator<<(ostream&out, vvp_bit4_t bit)
 {
       switch (bit) {
diff --git a/vvp/vvp_net.h b/vvp/vvp_net.h
index c4c98d5dd..70ac50edd 100644
--- a/vvp/vvp_net.h
+++ b/vvp/vvp_net.h
@@ -57,8 +57,8 @@ class  vvp_delay_t;
 enum vvp_bit4_t {
       BIT4_0 = 0,
       BIT4_1 = 1,
-      BIT4_X = 2,
-      BIT4_Z = 3
+      BIT4_X = 3,
+      BIT4_Z = 2
 };
 
 extern vvp_bit4_t add_with_carry(vvp_bit4_t a, vvp_bit4_t b, vvp_bit4_t&c);
@@ -67,11 +67,25 @@ extern vvp_bit4_t add_with_carry(vvp_bit4_t a, vvp_bit4_t b, vvp_bit4_t&c);
      implementation here relies on the encoding of vvp_bit4_t values. */
 inline bool bit4_is_xz(vvp_bit4_t a) { return a >= 2; }
 
+  /* This function converts BIT4_Z to BIT4_X, but passes other values
+     unchanged. This fast implementation relies of the encoding of the
+     vvp_bit4_t values. In particular, the BIT4_X==3 and BIT4_Z==2 */
+inline vvp_bit4_t bit4_z2x(vvp_bit4_t a)
+{ return (vvp_bit4_t) ( (int)a | ((int)a >> 1) ); }
+
   /* Some common boolean operators. These implement the Verilog rules
-     for 4-value bit operations. */
-extern vvp_bit4_t operator ~ (vvp_bit4_t a);
-extern vvp_bit4_t operator & (vvp_bit4_t a, vvp_bit4_t b);
+     for 4-value bit operations. The fast implementations here rely
+     on the encoding of vvp_bit4_t values. */
+
+  // ~BIT4_0 --> BIT4_1
+  // ~BIT4_1 --> BIT4_0
+  // ~BIT4_X --> BIT4_X
+  // ~BIT4_Z --> BIT4_X
+inline vvp_bit4_t operator ~ (vvp_bit4_t a)
+{ return bit4_z2x((vvp_bit4_t) (((int)a) ^ 1)); }
+
 extern vvp_bit4_t operator | (vvp_bit4_t a, vvp_bit4_t b);
+extern vvp_bit4_t operator & (vvp_bit4_t a, vvp_bit4_t b);
 extern vvp_bit4_t operator ^ (vvp_bit4_t a, vvp_bit4_t b);
 extern ostream& operator<< (ostream&o, vvp_bit4_t a);
 
@@ -195,13 +209,13 @@ inline vvp_vector4_t::vvp_vector4_t(unsigned size, vvp_bit4_t val)
       const static unsigned long init_atable[4] = {
 	    WORD_0_ABITS,
 	    WORD_1_ABITS,
-	    WORD_X_ABITS,
-	    WORD_Z_ABITS };
+	    WORD_Z_ABITS,
+	    WORD_X_ABITS };
       const static unsigned long init_btable[4] = {
 	    WORD_0_BBITS,
 	    WORD_1_BBITS,
-	    WORD_X_BBITS,
-	    WORD_Z_BBITS };
+	    WORD_Z_BBITS,
+	    WORD_X_BBITS };
 
       allocate_words_(size, init_atable[val], init_btable[val]);
 }