Optimize handling of invert gates / Tweak vvp_vector8_t performance
The process of inverting and copying can be collapsed into a single operation that should run a little faster. Also, inverting readily vectorizes itself. I've also possibly reduced useless vvp_not_fun iterations. Also, include some minor tweaks to the vvp_vector8_t handling of vector copies. This is more to clean up code, although it should slightly improve performance as well.
This commit is contained in:
parent
85e0f8a328
commit
d0b1936fb5
10
vvp/logic.cc
10
vvp/logic.cc
|
|
@ -399,7 +399,7 @@ void vvp_fun_not::recv_vec4(vvp_net_ptr_t ptr, const vvp_vector4_t&bit,
|
|||
if (ptr.port() != 0)
|
||||
return;
|
||||
|
||||
if (input_ .eeq( bit ))
|
||||
if (input_ .eq_xz( bit ))
|
||||
return;
|
||||
|
||||
input_ = bit;
|
||||
|
|
@ -414,13 +414,7 @@ void vvp_fun_not::run_run()
|
|||
vvp_net_t*ptr = net_;
|
||||
net_ = 0;
|
||||
|
||||
vvp_vector4_t result (input_);
|
||||
|
||||
for (unsigned idx = 0 ; idx < result.size() ; idx += 1) {
|
||||
vvp_bit4_t bitbit = ~ result.value(idx);
|
||||
result.set_bit(idx, bitbit);
|
||||
}
|
||||
|
||||
vvp_vector4_t result (input_, true /* invert */);
|
||||
ptr->send_vec4(result, 0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -173,7 +173,12 @@ vvp_island_port::~vvp_island_port()
|
|||
void vvp_island_port::recv_vec4(vvp_net_ptr_t port, const vvp_vector4_t&bit,
|
||||
vvp_context_t)
|
||||
{
|
||||
recv_vec8(port, vvp_vector8_t(bit, 6, 6));
|
||||
vvp_vector8_t tmp (bit, 6, 6);
|
||||
if (invalue .eeq(tmp))
|
||||
return;
|
||||
|
||||
invalue = tmp;
|
||||
island_->flag_island();
|
||||
}
|
||||
|
||||
void vvp_island_port::recv_vec4_pv(vvp_net_ptr_t port, const vvp_vector4_t&bit,
|
||||
|
|
|
|||
117
vvp/vvp_net.cc
117
vvp/vvp_net.cc
|
|
@ -583,6 +583,41 @@ void vvp_vector4_t::copy_from_(const vvp_vector4_t&that)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The copy_inverted_from_ method is just like the copy_from_ method,
|
||||
* except that we combine that with an invert. This allows the ~ and
|
||||
* the assignment to be blended in many common cases.
|
||||
*/
|
||||
void vvp_vector4_t::copy_inverted_from_(const vvp_vector4_t&that)
|
||||
{
|
||||
size_ = that.size_;
|
||||
if (size_ > BITS_PER_WORD) {
|
||||
unsigned words = (size_+BITS_PER_WORD-1) / BITS_PER_WORD;
|
||||
abits_ptr_ = new unsigned long[2*words];
|
||||
bbits_ptr_ = abits_ptr_ + words;
|
||||
|
||||
unsigned remaining = size_;
|
||||
unsigned idx = 0;
|
||||
while (remaining >= BITS_PER_WORD) {
|
||||
abits_ptr_[idx] = that.bbits_ptr_[idx] | ~that.abits_ptr_[idx];
|
||||
idx += 1;
|
||||
remaining -= BITS_PER_WORD;
|
||||
}
|
||||
if (remaining > 0) {
|
||||
unsigned long mask = (1UL<<remaining) - 1UL;
|
||||
abits_ptr_[idx] = mask & (that.bbits_ptr_[idx] | ~that.abits_ptr_[idx]);
|
||||
}
|
||||
|
||||
for (unsigned idx = 0 ; idx < words ; idx += 1)
|
||||
bbits_ptr_[idx] = that.bbits_ptr_[idx];
|
||||
|
||||
} else {
|
||||
unsigned long mask = (size_<BITS_PER_WORD)? (1UL<<size_)-1UL : -1UL;
|
||||
abits_val_ = mask & (that.bbits_val_ | ~that.abits_val_);
|
||||
bbits_val_ = that.bbits_val_;
|
||||
}
|
||||
}
|
||||
|
||||
void vvp_vector4_t::allocate_words_(unsigned wid, unsigned long inita, unsigned long initb)
|
||||
{
|
||||
if (size_ > BITS_PER_WORD) {
|
||||
|
|
@ -1238,6 +1273,40 @@ bool vvp_vector4_t::eeq(const vvp_vector4_t&that) const
|
|||
return true;
|
||||
}
|
||||
|
||||
bool vvp_vector4_t::eq_xz(const vvp_vector4_t&that) const
|
||||
{
|
||||
if (size_ != that.size_)
|
||||
return false;
|
||||
|
||||
if (size_ < BITS_PER_WORD) {
|
||||
unsigned long mask = (1UL << size_) - 1;
|
||||
return ((abits_val_|bbits_val_)&mask) == ((that.abits_val_|that.bbits_val_)&mask)
|
||||
&& (bbits_val_&mask) == (that.bbits_val_&mask);
|
||||
}
|
||||
|
||||
if (size_ == BITS_PER_WORD) {
|
||||
return ((abits_val_|bbits_val_) == (that.abits_val_|that.bbits_val_))
|
||||
&& (bbits_val_ == that.bbits_val_);
|
||||
}
|
||||
|
||||
unsigned words = size_ / BITS_PER_WORD;
|
||||
for (unsigned idx = 0 ; idx < words ; idx += 1) {
|
||||
if ((abits_ptr_[idx]|bbits_ptr_[idx]) != (that.abits_ptr_[idx]|that.bbits_ptr_[idx]))
|
||||
return false;
|
||||
if (bbits_ptr_[idx] != that.bbits_ptr_[idx])
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned long mask = size_%BITS_PER_WORD;
|
||||
if (mask > 0) {
|
||||
mask = (1UL << mask) - 1;
|
||||
return ((abits_ptr_[words]|bbits_ptr_[words])&mask) == ((that.abits_ptr_[words]|that.bbits_ptr_[words])&mask)
|
||||
&& (bbits_ptr_[words]&mask) == (that.bbits_ptr_[words]&mask);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool vvp_vector4_t::has_xz() const
|
||||
{
|
||||
if (size_ < BITS_PER_WORD) {
|
||||
|
|
@ -2557,12 +2626,11 @@ ostream& operator<< (ostream&out, const vvp_vector2_t&that)
|
|||
vvp_vector8_t::vvp_vector8_t(const vvp_vector8_t&that)
|
||||
{
|
||||
size_ = that.size_;
|
||||
if (size_ <= PTR_THRESH) {
|
||||
memcpy(val_, that.val_, sizeof(val_));
|
||||
if (size_ <= sizeof val_) {
|
||||
ptr_ = that.ptr_;
|
||||
} else {
|
||||
ptr_ = new vvp_scalar_t[size_];
|
||||
for (unsigned idx = 0 ; idx < size_ ; idx += 1)
|
||||
ptr_[idx] = that.ptr_[idx];
|
||||
ptr_ = new unsigned char[size_];
|
||||
memcpy(ptr_, that.ptr_, size_);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2573,15 +2641,14 @@ vvp_vector8_t::vvp_vector8_t(const vvp_vector4_t&that,
|
|||
if (size_ == 0)
|
||||
return;
|
||||
|
||||
vvp_scalar_t*tmp;
|
||||
if (size_ <= PTR_THRESH)
|
||||
tmp = new (val_) vvp_scalar_t[PTR_THRESH];
|
||||
else
|
||||
tmp = ptr_ = new vvp_scalar_t[size_];
|
||||
|
||||
for (unsigned idx = 0 ; idx < size_ ; idx += 1)
|
||||
tmp[idx] = vvp_scalar_t (that.value(idx), str0, str1);
|
||||
|
||||
if (size_ <= sizeof val_) {
|
||||
for (unsigned idx = 0 ; idx < size_ ; idx += 1)
|
||||
val_[idx] = vvp_scalar_t(that.value(idx),str0, str1).raw();
|
||||
} else {
|
||||
ptr_ = new unsigned char[size_];
|
||||
for (unsigned idx = 0 ; idx < size_ ; idx += 1)
|
||||
ptr_[idx] = vvp_scalar_t(that.value(idx), str0, str1).raw();
|
||||
}
|
||||
}
|
||||
|
||||
const vvp_vector8_t vvp_vector8_t::nil;
|
||||
|
|
@ -2589,12 +2656,11 @@ const vvp_vector8_t vvp_vector8_t::nil;
|
|||
vvp_vector8_t& vvp_vector8_t::operator= (const vvp_vector8_t&that)
|
||||
{
|
||||
// Assign to self.
|
||||
if (this == &that || (size_ > PTR_THRESH && that.size_ > PTR_THRESH &&
|
||||
ptr_ == that.ptr_))
|
||||
if (this == &that)
|
||||
return *this;
|
||||
|
||||
if (size_ != that.size_) {
|
||||
if (size_ > PTR_THRESH)
|
||||
if (size_ > sizeof val_)
|
||||
delete[]ptr_;
|
||||
size_ = 0;
|
||||
}
|
||||
|
|
@ -2604,7 +2670,7 @@ vvp_vector8_t& vvp_vector8_t::operator= (const vvp_vector8_t&that)
|
|||
return *this;
|
||||
}
|
||||
|
||||
if (that.size_ <= PTR_THRESH) {
|
||||
if (that.size_ <= sizeof val_) {
|
||||
size_ = that.size_;
|
||||
memcpy(val_, that.val_, sizeof(val_));
|
||||
return *this;
|
||||
|
|
@ -2612,11 +2678,10 @@ vvp_vector8_t& vvp_vector8_t::operator= (const vvp_vector8_t&that)
|
|||
|
||||
if (size_ == 0) {
|
||||
size_ = that.size_;
|
||||
ptr_ = new vvp_scalar_t[size_];
|
||||
ptr_ = new unsigned char[size_];
|
||||
}
|
||||
|
||||
for (unsigned idx = 0 ; idx < size_ ; idx += 1)
|
||||
ptr_[idx] = that.ptr_[idx];
|
||||
memcpy(ptr_, that.ptr_, size_);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
|
@ -2625,12 +2690,12 @@ vvp_vector8_t vvp_vector8_t::subvalue(unsigned base, unsigned wid) const
|
|||
{
|
||||
vvp_vector8_t tmp (wid);
|
||||
|
||||
vvp_scalar_t* tmp_ptr = tmp.size_<=PTR_THRESH? reinterpret_cast<vvp_scalar_t*>(tmp.val_) : tmp.ptr_;
|
||||
const vvp_scalar_t* ptr = size_<=PTR_THRESH? reinterpret_cast<const vvp_scalar_t*>(val_) : ptr_;
|
||||
unsigned char*tmp_ptr = tmp.size_ <= sizeof val_? tmp.val_ : tmp.ptr_;
|
||||
const unsigned char*use_ptr = size_ <= sizeof val_? val_ : ptr_;
|
||||
|
||||
unsigned idx = 0;
|
||||
while ((idx < wid) && (base+idx < size_)) {
|
||||
tmp_ptr[idx] = ptr[base+idx];
|
||||
tmp_ptr[idx] = use_ptr[base+idx];
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
|
|
@ -2649,8 +2714,8 @@ vvp_vector8_t part_expand(const vvp_vector8_t&that, unsigned wid, unsigned off)
|
|||
assert(off < wid);
|
||||
vvp_vector8_t tmp (wid);
|
||||
|
||||
vvp_scalar_t* tmp_ptr = tmp.size_<=vvp_vector8_t::PTR_THRESH? reinterpret_cast<vvp_scalar_t*>(tmp.val_) : tmp.ptr_;
|
||||
const vvp_scalar_t* that_ptr = that.size_<=vvp_vector8_t::PTR_THRESH? reinterpret_cast<const vvp_scalar_t*>(that.val_) : that.ptr_;
|
||||
unsigned char* tmp_ptr = tmp.size_<= sizeof tmp.val_? tmp.val_ : tmp.ptr_;
|
||||
const unsigned char* that_ptr = that.size_<= sizeof that.val_? that.val_ : that.ptr_;
|
||||
|
||||
unsigned idx = off;
|
||||
|
||||
|
|
|
|||
|
|
@ -211,6 +211,7 @@ class vvp_vector4_t {
|
|||
unsigned adr, unsigned wid);
|
||||
|
||||
vvp_vector4_t(const vvp_vector4_t&that);
|
||||
vvp_vector4_t(const vvp_vector4_t&that, bool invert_flag);
|
||||
vvp_vector4_t& operator= (const vvp_vector4_t&that);
|
||||
|
||||
~vvp_vector4_t();
|
||||
|
|
@ -240,6 +241,9 @@ class vvp_vector4_t {
|
|||
// Test that the vectors are exactly equal
|
||||
bool eeq(const vvp_vector4_t&that) const;
|
||||
|
||||
// Test that the vectors are equal, with xz comparing as equal.
|
||||
bool eq_xz(const vvp_vector4_t&that) const;
|
||||
|
||||
// Return true if there is an X or Z anywhere in the vector.
|
||||
bool has_xz() const;
|
||||
|
||||
|
|
@ -283,6 +287,7 @@ class vvp_vector4_t {
|
|||
// Initialize and operator= use this private method to copy
|
||||
// the data from that object into this object.
|
||||
void copy_from_(const vvp_vector4_t&that);
|
||||
void copy_inverted_from_(const vvp_vector4_t&that);
|
||||
|
||||
void allocate_words_(unsigned size, unsigned long inita, unsigned long initb);
|
||||
|
||||
|
|
@ -313,6 +318,14 @@ inline vvp_vector4_t::vvp_vector4_t(const vvp_vector4_t&that)
|
|||
copy_from_(that);
|
||||
}
|
||||
|
||||
inline vvp_vector4_t::vvp_vector4_t(const vvp_vector4_t&that, bool invert_flag)
|
||||
{
|
||||
if (invert_flag)
|
||||
copy_inverted_from_(that);
|
||||
else
|
||||
copy_from_(that);
|
||||
}
|
||||
|
||||
inline vvp_vector4_t::vvp_vector4_t(unsigned size__, vvp_bit4_t val)
|
||||
: size_(size__)
|
||||
{
|
||||
|
|
@ -442,8 +455,7 @@ inline void vvp_vector4_t::set_bit(unsigned idx, vvp_bit4_t val)
|
|||
|
||||
inline vvp_vector4_t operator ~ (const vvp_vector4_t&that)
|
||||
{
|
||||
vvp_vector4_t res = that;
|
||||
res.invert();
|
||||
vvp_vector4_t res (that, true);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
@ -700,6 +712,14 @@ class vvp_scalar_t {
|
|||
bool eeq(vvp_scalar_t that) const { return value_ == that.value_; }
|
||||
bool is_hiz() const { return value_ == 0; }
|
||||
|
||||
private:
|
||||
// This class and the vvp_vector8_t class are closely related,
|
||||
// so allow vvp_vector8_t access to the raw encoding so that
|
||||
// it can do compact vectoring of vvp_scalar_t objects.
|
||||
friend class vvp_vector8_t;
|
||||
explicit vvp_scalar_t(unsigned char raw) : value_(raw) { }
|
||||
unsigned char raw() const { return value_; }
|
||||
|
||||
private:
|
||||
unsigned char value_;
|
||||
};
|
||||
|
|
@ -813,11 +833,10 @@ class vvp_vector8_t {
|
|||
// This is the number of vvp_scalar_t objects we can keep in
|
||||
// the val_ buffer. If the vector8 is bigger then this, then
|
||||
// resort to allocations to get a larger buffer.
|
||||
enum { PTR_THRESH = 8 };
|
||||
unsigned size_;
|
||||
union {
|
||||
vvp_scalar_t*ptr_;
|
||||
char val_[PTR_THRESH * sizeof(vvp_scalar_t)];
|
||||
unsigned char*ptr_;
|
||||
unsigned char val_[sizeof(void*)];
|
||||
};
|
||||
};
|
||||
|
||||
|
|
@ -853,35 +872,36 @@ extern ostream& operator<< (ostream&, const vvp_vector8_t&);
|
|||
inline vvp_vector8_t::vvp_vector8_t(unsigned size__)
|
||||
: size_(size__)
|
||||
{
|
||||
if (size_ <= PTR_THRESH) {
|
||||
new (val_) vvp_scalar_t[PTR_THRESH];
|
||||
if (size_ <= sizeof val_) {
|
||||
ptr_ = 0;
|
||||
} else {
|
||||
ptr_ = new vvp_scalar_t[size_];
|
||||
ptr_ = new unsigned char[size_];
|
||||
memset(ptr_, 0, size_);
|
||||
}
|
||||
}
|
||||
|
||||
inline vvp_vector8_t::~vvp_vector8_t()
|
||||
{
|
||||
if (size_ > PTR_THRESH)
|
||||
if (size_ > sizeof val_)
|
||||
delete[]ptr_;
|
||||
}
|
||||
|
||||
inline vvp_scalar_t vvp_vector8_t::value(unsigned idx) const
|
||||
{
|
||||
assert(idx < size_);
|
||||
if (size_ <= PTR_THRESH)
|
||||
return reinterpret_cast<const vvp_scalar_t*>(val_) [idx];
|
||||
if (size_ <= sizeof val_)
|
||||
return vvp_scalar_t(val_[idx]);
|
||||
else
|
||||
return ptr_[idx];
|
||||
return vvp_scalar_t(ptr_[idx]);
|
||||
}
|
||||
|
||||
inline void vvp_vector8_t::set_bit(unsigned idx, vvp_scalar_t val)
|
||||
{
|
||||
assert(idx < size_);
|
||||
if (size_ <= PTR_THRESH)
|
||||
reinterpret_cast<vvp_scalar_t*>(val_) [idx] = val;
|
||||
if (size_ <= sizeof val_)
|
||||
val_[idx] = val.raw();
|
||||
else
|
||||
ptr_[idx] = val;
|
||||
ptr_[idx] = val.raw();
|
||||
}
|
||||
|
||||
// Exactly-equal for vvp_vector8_t is common and should be as tight
|
||||
|
|
@ -893,15 +913,10 @@ inline bool vvp_vector8_t::eeq(const vvp_vector8_t&that) const
|
|||
if (size_ == 0)
|
||||
return true;
|
||||
|
||||
if (size_ <= PTR_THRESH)
|
||||
return 0 == memcmp(val_, that.val_, sizeof(val_));
|
||||
|
||||
for (unsigned idx = 0 ; idx < size_ ; idx += 1) {
|
||||
if (! ptr_[idx] .eeq( that.ptr_[idx] ))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
if (size_ <= sizeof val_)
|
||||
return ptr_ == that.ptr_;
|
||||
else
|
||||
return memcmp(ptr_, that.ptr_, size_) == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
Loading…
Reference in New Issue