Optimize handling of invert gates / Tweak vvp_vector8_t performance

The process of inverting and copying can be collapsed into a single
operation that should run a little faster. Also, inverting readily
vectorizes itself. I've also possibly reduced useless vvp_not_fun
iterations.

Also, include some minor tweaks to the vvp_vector8_t handling of
vector copies. This is more to clean up code, although it should
slightly improve performance as well.
This commit is contained in:
Stephen Williams 2010-01-11 11:42:25 -08:00
parent 85e0f8a328
commit d0b1936fb5
4 changed files with 138 additions and 59 deletions

View File

@ -399,7 +399,7 @@ void vvp_fun_not::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit,
if (ptr.port() != 0)
return;
if (input_ .eeq( bit ))
if (input_ .eq_xz( bit ))
return;
input_ = bit;
@ -414,13 +414,7 @@ void vvp_fun_not::run_run()
vvp_net_t*ptr = net_;
net_ = 0;
vvp_vector4_t result (input_);
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
vvp_bit4_t bitbit = ~ result.value(idx);
result.set_bit(idx, bitbit);
}
vvp_vector4_t result (input_, true /* invert */);
ptr->send_vec4(result, 0);
}

View File

@ -173,7 +173,12 @@ vvp_island_port::~vvp_island_port()
void vvp_island_port::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
vvp_context_t)
{
recv_vec8(port, vvp_vector8_t(bit, 6, 6));
vvp_vector8_t tmp (bit, 6, 6);
if (invalue .eeq(tmp))
return;
invalue = tmp;
island_->flag_island();
}
void vvp_island_port::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,

View File

@ -583,6 +583,41 @@ void vvp_vector4_t::copy_from_(const vvp_vector4_t&that)
}
}
/*
* The copy_inverted_from_ method is just like the copy_from_ method,
* except that we combine that with an invert. This allows the ~ and
* the assignment to be blended in many common cases.
*/
void vvp_vector4_t::copy_inverted_from_(const vvp_vector4_t&that)
{
size_ = that.size_;
if (size_ > BITS_PER_WORD) {
unsigned words = (size_+BITS_PER_WORD-1) / BITS_PER_WORD;
abits_ptr_ = new unsigned long[2*words];
bbits_ptr_ = abits_ptr_ + words;
unsigned remaining = size_;
unsigned idx = 0;
while (remaining >= BITS_PER_WORD) {
abits_ptr_[idx] = that.bbits_ptr_[idx] | ~that.abits_ptr_[idx];
idx += 1;
remaining -= BITS_PER_WORD;
}
if (remaining > 0) {
unsigned long mask = (1UL<<remaining) - 1UL;
abits_ptr_[idx] = mask & (that.bbits_ptr_[idx] | ~that.abits_ptr_[idx]);
}
for (unsigned idx = 0 ; idx < words ; idx += 1)
bbits_ptr_[idx] = that.bbits_ptr_[idx];
} else {
unsigned long mask = (size_<BITS_PER_WORD)? (1UL<<size_)-1UL : -1UL;
abits_val_ = mask & (that.bbits_val_ | ~that.abits_val_);
bbits_val_ = that.bbits_val_;
}
}
void vvp_vector4_t::allocate_words_(unsigned wid, unsigned long inita, unsigned long initb)
{
if (size_ > BITS_PER_WORD) {
@ -1238,6 +1273,40 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
return true;
}
bool vvp_vector4_t::eq_xz(const vvp_vector4_t&that) const
{
if (size_ != that.size_)
return false;
if (size_ < BITS_PER_WORD) {
unsigned long mask = (1UL << size_) - 1;
return ((abits_val_|bbits_val_)&mask) == ((that.abits_val_|that.bbits_val_)&mask)
&& (bbits_val_&mask) == (that.bbits_val_&mask);
}
if (size_ == BITS_PER_WORD) {
return ((abits_val_|bbits_val_) == (that.abits_val_|that.bbits_val_))
&& (bbits_val_ == that.bbits_val_);
}
unsigned words = size_ / BITS_PER_WORD;
for (unsigned idx = 0 ; idx < words ; idx += 1) {
if ((abits_ptr_[idx]|bbits_ptr_[idx]) != (that.abits_ptr_[idx]|that.bbits_ptr_[idx]))
return false;
if (bbits_ptr_[idx] != that.bbits_ptr_[idx])
return false;
}
unsigned long mask = size_%BITS_PER_WORD;
if (mask > 0) {
mask = (1UL << mask) - 1;
return ((abits_ptr_[words]|bbits_ptr_[words])&mask) == ((that.abits_ptr_[words]|that.bbits_ptr_[words])&mask)
&& (bbits_ptr_[words]&mask) == (that.bbits_ptr_[words]&mask);
}
return true;
}
bool vvp_vector4_t::has_xz() const
{
if (size_ < BITS_PER_WORD) {
@ -2557,12 +2626,11 @@ ostream& operator<< (ostream&out, const vvp_vector2_t&that)
vvp_vector8_t::vvp_vector8_t(const vvp_vector8_t&that)
{
size_ = that.size_;
if (size_ <= PTR_THRESH) {
memcpy(val_, that.val_, sizeof(val_));
if (size_ <= sizeof val_) {
ptr_ = that.ptr_;
} else {
ptr_ = new vvp_scalar_t[size_];
for (unsigned idx = 0 ; idx < size_ ; idx += 1)
ptr_[idx] = that.ptr_[idx];
ptr_ = new unsigned char[size_];
memcpy(ptr_, that.ptr_, size_);
}
}
@ -2573,15 +2641,14 @@ vvp_vector8_t::vvp_vector8_t(const vvp_vector4_t&that,
if (size_ == 0)
return;
vvp_scalar_t*tmp;
if (size_ <= PTR_THRESH)
tmp = new (val_) vvp_scalar_t[PTR_THRESH];
else
tmp = ptr_ = new vvp_scalar_t[size_];
for (unsigned idx = 0 ; idx < size_ ; idx += 1)
tmp[idx] = vvp_scalar_t (that.value(idx), str0, str1);
if (size_ <= sizeof val_) {
for (unsigned idx = 0 ; idx < size_ ; idx += 1)
val_[idx] = vvp_scalar_t(that.value(idx),str0, str1).raw();
} else {
ptr_ = new unsigned char[size_];
for (unsigned idx = 0 ; idx < size_ ; idx += 1)
ptr_[idx] = vvp_scalar_t(that.value(idx), str0, str1).raw();
}
}
const vvp_vector8_t vvp_vector8_t::nil;
@ -2589,12 +2656,11 @@ const vvp_vector8_t vvp_vector8_t::nil;
vvp_vector8_t& vvp_vector8_t::operator= (const vvp_vector8_t&that)
{
// Assign to self.
if (this == &that || (size_ > PTR_THRESH && that.size_ > PTR_THRESH &&
ptr_ == that.ptr_))
if (this == &that)
return *this;
if (size_ != that.size_) {
if (size_ > PTR_THRESH)
if (size_ > sizeof val_)
delete[]ptr_;
size_ = 0;
}
@ -2604,7 +2670,7 @@ vvp_vector8_t& vvp_vector8_t::operator= (const vvp_vector8_t&that)
return *this;
}
if (that.size_ <= PTR_THRESH) {
if (that.size_ <= sizeof val_) {
size_ = that.size_;
memcpy(val_, that.val_, sizeof(val_));
return *this;
@ -2612,11 +2678,10 @@ vvp_vector8_t& vvp_vector8_t::operator= (const vvp_vector8_t&that)
if (size_ == 0) {
size_ = that.size_;
ptr_ = new vvp_scalar_t[size_];
ptr_ = new unsigned char[size_];
}
for (unsigned idx = 0 ; idx < size_ ; idx += 1)
ptr_[idx] = that.ptr_[idx];
memcpy(ptr_, that.ptr_, size_);
return *this;
}
@ -2625,12 +2690,12 @@ vvp_vector8_t vvp_vector8_t::subvalue(unsigned base, unsigned wid) const
{
vvp_vector8_t tmp (wid);
vvp_scalar_t* tmp_ptr = tmp.size_<=PTR_THRESH? reinterpret_cast<vvp_scalar_t*>(tmp.val_) : tmp.ptr_;
const vvp_scalar_t* ptr = size_<=PTR_THRESH? reinterpret_cast<const vvp_scalar_t*>(val_) : ptr_;
unsigned char*tmp_ptr = tmp.size_ <= sizeof val_? tmp.val_ : tmp.ptr_;
const unsigned char*use_ptr = size_ <= sizeof val_? val_ : ptr_;
unsigned idx = 0;
while ((idx < wid) && (base+idx < size_)) {
tmp_ptr[idx] = ptr[base+idx];
tmp_ptr[idx] = use_ptr[base+idx];
idx += 1;
}
@ -2649,8 +2714,8 @@ vvp_vector8_t part_expand(const vvp_vector8_t&that, unsigned wid, unsigned off)
assert(off < wid);
vvp_vector8_t tmp (wid);
vvp_scalar_t* tmp_ptr = tmp.size_<=vvp_vector8_t::PTR_THRESH? reinterpret_cast<vvp_scalar_t*>(tmp.val_) : tmp.ptr_;
const vvp_scalar_t* that_ptr = that.size_<=vvp_vector8_t::PTR_THRESH? reinterpret_cast<const vvp_scalar_t*>(that.val_) : that.ptr_;
unsigned char* tmp_ptr = tmp.size_<= sizeof tmp.val_? tmp.val_ : tmp.ptr_;
const unsigned char* that_ptr = that.size_<= sizeof that.val_? that.val_ : that.ptr_;
unsigned idx = off;

View File

@ -211,6 +211,7 @@ class vvp_vector4_t {
unsigned adr, unsigned wid);
vvp_vector4_t(const vvp_vector4_t&that);
vvp_vector4_t(const vvp_vector4_t&that, bool invert_flag);
vvp_vector4_t& operator= (const vvp_vector4_t&that);
~vvp_vector4_t();
@ -240,6 +241,9 @@ class vvp_vector4_t {
// Test that the vectors are exactly equal
bool eeq(const vvp_vector4_t&that) const;
// Test that the vectors are equal, with xz comparing as equal.
bool eq_xz(const vvp_vector4_t&that) const;
// Return true if there is an X or Z anywhere in the vector.
bool has_xz() const;
@ -283,6 +287,7 @@ class vvp_vector4_t {
// Initialize and operator= use this private method to copy
// the data from that object into this object.
void copy_from_(const vvp_vector4_t&that);
void copy_inverted_from_(const vvp_vector4_t&that);
void allocate_words_(unsigned size, unsigned long inita, unsigned long initb);
@ -313,6 +318,14 @@ inline vvp_vector4_t::vvp_vector4_t(const vvp_vector4_t&that)
copy_from_(that);
}
inline vvp_vector4_t::vvp_vector4_t(const vvp_vector4_t&that, bool invert_flag)
{
if (invert_flag)
copy_inverted_from_(that);
else
copy_from_(that);
}
inline vvp_vector4_t::vvp_vector4_t(unsigned size__, vvp_bit4_t val)
: size_(size__)
{
@ -442,8 +455,7 @@ inline void vvp_vector4_t::set_bit(unsigned idx, vvp_bit4_t val)
inline vvp_vector4_t operator ~ (const vvp_vector4_t&that)
{
vvp_vector4_t res = that;
res.invert();
vvp_vector4_t res (that, true);
return res;
}
@ -700,6 +712,14 @@ class vvp_scalar_t {
bool eeq(vvp_scalar_t that) const { return value_ == that.value_; }
bool is_hiz() const { return value_ == 0; }
private:
// This class and the vvp_vector8_t class are closely related,
// so allow vvp_vector8_t access to the raw encoding so that
// it can do compact vectoring of vvp_scalar_t objects.
friend class vvp_vector8_t;
explicit vvp_scalar_t(unsigned char raw) : value_(raw) { }
unsigned char raw() const { return value_; }
private:
unsigned char value_;
};
@ -813,11 +833,10 @@ class vvp_vector8_t {
// This is the number of vvp_scalar_t objects we can keep in
// the val_ buffer. If the vector8 is bigger then this, then
// resort to allocations to get a larger buffer.
enum { PTR_THRESH = 8 };
unsigned size_;
union {
vvp_scalar_t*ptr_;
char val_[PTR_THRESH * sizeof(vvp_scalar_t)];
unsigned char*ptr_;
unsigned char val_[sizeof(void*)];
};
};
@ -853,35 +872,36 @@ extern ostream& operator<< (ostream&, const vvp_vector8_t&);
inline vvp_vector8_t::vvp_vector8_t(unsigned size__)
: size_(size__)
{
if (size_ <= PTR_THRESH) {
new (val_) vvp_scalar_t[PTR_THRESH];
if (size_ <= sizeof val_) {
ptr_ = 0;
} else {
ptr_ = new vvp_scalar_t[size_];
ptr_ = new unsigned char[size_];
memset(ptr_, 0, size_);
}
}
inline vvp_vector8_t::~vvp_vector8_t()
{
if (size_ > PTR_THRESH)
if (size_ > sizeof val_)
delete[]ptr_;
}
inline vvp_scalar_t vvp_vector8_t::value(unsigned idx) const
{
assert(idx < size_);
if (size_ <= PTR_THRESH)
return reinterpret_cast<const vvp_scalar_t*>(val_) [idx];
if (size_ <= sizeof val_)
return vvp_scalar_t(val_[idx]);
else
return ptr_[idx];
return vvp_scalar_t(ptr_[idx]);
}
inline void vvp_vector8_t::set_bit(unsigned idx, vvp_scalar_t val)
{
assert(idx < size_);
if (size_ <= PTR_THRESH)
reinterpret_cast<vvp_scalar_t*>(val_) [idx] = val;
if (size_ <= sizeof val_)
val_[idx] = val.raw();
else
ptr_[idx] = val;
ptr_[idx] = val.raw();
}
// Exactly-equal for vvp_vector8_t is common and should be as tight
@ -893,15 +913,10 @@ inline bool vvp_vector8_t::eeq(const vvp_vector8_t&that) const
if (size_ == 0)
return true;
if (size_ <= PTR_THRESH)
return 0 == memcmp(val_, that.val_, sizeof(val_));
for (unsigned idx = 0 ; idx < size_ ; idx += 1) {
if (! ptr_[idx] .eeq( that.ptr_[idx] ))
return false;
}
return true;
if (size_ <= sizeof val_)
return ptr_ == that.ptr_;
else
return memcmp(ptr_, that.ptr_, size_) == 0;
}
/*