diff --git a/vvp/reduce.cc b/vvp/reduce.cc index 37afc1ea5..39362e5fb 100644 --- a/vvp/reduce.cc +++ b/vvp/reduce.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005-2025 Stephen Williams (steve@icarus.com) + * Copyright (c) 2005-2026 Stephen Williams (steve@icarus.com) * * This source code is free software; you can redistribute it * and/or modify it in source code form under the terms of the GNU @@ -97,12 +97,7 @@ vvp_reduce_and::~vvp_reduce_and() vvp_bit4_t vvp_reduce_and::calculate_result() const { - vvp_bit4_t res = BIT4_1; - - for (unsigned idx = 0 ; idx < bits_.size() ; idx += 1) - res = res & bits_.value(idx); - - return res; + return bits_.reduce_and(); } class vvp_reduce_or : public vvp_reduce_base { @@ -123,12 +118,7 @@ vvp_reduce_or::~vvp_reduce_or() vvp_bit4_t vvp_reduce_or::calculate_result() const { - vvp_bit4_t res = BIT4_0; - - for (unsigned idx = 0 ; idx < bits_.size() ; idx += 1) - res = res | bits_.value(idx); - - return res; + return bits_.reduce_or(); } class vvp_reduce_xor : public vvp_reduce_base { @@ -149,12 +139,7 @@ vvp_reduce_xor::~vvp_reduce_xor() vvp_bit4_t vvp_reduce_xor::calculate_result() const { - vvp_bit4_t res = BIT4_0; - - for (unsigned idx = 0 ; idx < bits_.size() ; idx += 1) - res = res ^ bits_.value(idx); - - return res; + return bits_.reduce_xor(); } class vvp_reduce_nand : public vvp_reduce_base { @@ -175,12 +160,7 @@ vvp_reduce_nand::~vvp_reduce_nand() vvp_bit4_t vvp_reduce_nand::calculate_result() const { - vvp_bit4_t res = BIT4_1; - - for (unsigned idx = 0 ; idx < bits_.size() ; idx += 1) - res = res & bits_.value(idx); - - return ~res; + return ~bits_.reduce_and(); } class vvp_reduce_nor : public vvp_reduce_base { @@ -201,12 +181,7 @@ vvp_reduce_nor::~vvp_reduce_nor() vvp_bit4_t vvp_reduce_nor::calculate_result() const { - vvp_bit4_t res = BIT4_0; - - for (unsigned idx = 0 ; idx < bits_.size() ; idx += 1) - res = res | bits_.value(idx); - - return ~res; + return ~bits_.reduce_or(); } class vvp_reduce_xnor : public vvp_reduce_base { @@ -227,12 +202,7 @@ vvp_reduce_xnor::~vvp_reduce_xnor() vvp_bit4_t vvp_reduce_xnor::calculate_result() const { - vvp_bit4_t res = BIT4_0; - - for (unsigned idx = 0 ; idx < bits_.size() ; idx += 1) - res = res ^ bits_.value(idx); - - return ~res; + return ~bits_.reduce_xor(); } static void make_reduce(char*label, vvp_net_fun_t*red, const struct symb_s&arg) diff --git a/vvp/vthread.cc b/vvp/vthread.cc index 1ef7aaf76..cd3b679fe 100644 --- a/vvp/vthread.cc +++ b/vvp/vthread.cc @@ -4576,24 +4576,8 @@ bool of_NOOP(vthread_t, vvp_code_t) */ bool of_NORR(vthread_t thr, vvp_code_t) { - vvp_vector4_t val = thr->pop_vec4(); - - vvp_bit4_t lb = BIT4_1; - - for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { - - vvp_bit4_t rb = val.value(idx); - if (rb == BIT4_1) { - lb = BIT4_0; - break; - } - - if (rb != BIT4_0) - lb = BIT4_X; - } - - vvp_vector4_t res (1, lb); - thr->push_vec4(res); + vvp_vector4_t&val = thr->peek_vec4(); + val = vvp_vector4_t(1, ~val.reduce_or()); return true; } @@ -4613,23 +4597,8 @@ bool of_NULL(vthread_t thr, vvp_code_t) */ bool of_ANDR(vthread_t thr, vvp_code_t) { - vvp_vector4_t val = thr->pop_vec4(); - - vvp_bit4_t lb = BIT4_1; - - for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { - vvp_bit4_t rb = val.value(idx); - if (rb == BIT4_0) { - lb = BIT4_0; - break; - } - - if (rb != 1) - lb = BIT4_X; - } - - vvp_vector4_t res (1, lb); - thr->push_vec4(res); + vvp_vector4_t&val = thr->peek_vec4(); + val = vvp_vector4_t(1, val.reduce_and()); return true; } @@ -4639,23 +4608,8 @@ bool of_ANDR(vthread_t thr, vvp_code_t) */ bool of_NANDR(vthread_t thr, vvp_code_t) { - vvp_vector4_t val = thr->pop_vec4(); - - vvp_bit4_t lb = BIT4_0; - for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { - - vvp_bit4_t rb = val.value(idx); - if (rb == BIT4_0) { - lb = BIT4_1; - break; - } - - if (rb != BIT4_1) - lb = BIT4_X; - } - - vvp_vector4_t res (1, lb); - thr->push_vec4(res); + vvp_vector4_t&val = thr->peek_vec4(); + val = vvp_vector4_t(1, ~val.reduce_and()); return true; } @@ -4665,22 +4619,9 @@ bool of_NANDR(vthread_t thr, vvp_code_t) */ bool of_ORR(vthread_t thr, vvp_code_t) { - vvp_vector4_t val = thr->pop_vec4(); + vvp_vector4_t&val = thr->peek_vec4(); + val = vvp_vector4_t(1, val.reduce_or()); - vvp_bit4_t lb = BIT4_0; - for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { - vvp_bit4_t rb = val.value(idx); - if (rb == BIT4_1) { - lb = BIT4_1; - break; - } - - if (rb != BIT4_0) - lb = BIT4_X; - } - - vvp_vector4_t res (1, lb); - thr->push_vec4(res); return true; } @@ -4689,22 +4630,9 @@ bool of_ORR(vthread_t thr, vvp_code_t) */ bool of_XORR(vthread_t thr, vvp_code_t) { - vvp_vector4_t val = thr->pop_vec4(); + vvp_vector4_t&val = thr->peek_vec4(); + val = vvp_vector4_t(1, val.reduce_xor()); - vvp_bit4_t lb = BIT4_0; - for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { - - vvp_bit4_t rb = val.value(idx); - if (rb == BIT4_1) - lb = ~lb; - else if (rb != BIT4_0) { - lb = BIT4_X; - break; - } - } - - vvp_vector4_t res (1, lb); - thr->push_vec4(res); return true; } @@ -4713,22 +4641,9 @@ bool of_XORR(vthread_t thr, vvp_code_t) */ bool of_XNORR(vthread_t thr, vvp_code_t) { - vvp_vector4_t val = thr->pop_vec4(); + vvp_vector4_t&val = thr->peek_vec4(); + val = vvp_vector4_t(1, ~val.reduce_xor()); - vvp_bit4_t lb = BIT4_1; - for (unsigned idx = 0 ; idx < val.size() ; idx += 1) { - - vvp_bit4_t rb = val.value(idx); - if (rb == BIT4_1) - lb = ~lb; - else if (rb != BIT4_0) { - lb = BIT4_X; - break; - } - } - - vvp_vector4_t res (1, lb); - thr->push_vec4(res); return true; } diff --git a/vvp/vvp_net.cc b/vvp/vvp_net.cc index 23f9de4ef..30efa58cc 100644 --- a/vvp/vvp_net.cc +++ b/vvp/vvp_net.cc @@ -1905,6 +1905,105 @@ void vvp_vector4_t::invert() } } +#define BIT_MASK(n) ((n) ? ((~0UL) >> (BITS_PER_WORD - (n))) : ~0UL) + +vvp_bit4_t vvp_vector4_t::reduce_or() const +{ + unsigned long mask = BIT_MASK(size_ % BITS_PER_WORD); + vvp_bit4_t res = BIT4_0; + + if (size_ <= BITS_PER_WORD) { + if ((abits_val_ & ~bbits_val_ & mask) != 0UL) + return BIT4_1; + if ((bbits_val_ & mask) != 0UL) + return BIT4_X; + } else { + unsigned words = (size_ + BITS_PER_WORD - 1) / BITS_PER_WORD; + unsigned idx; + for (idx = 0; idx < words - 1; idx += 1) { + if ((abits_ptr_[idx] & ~bbits_ptr_[idx]) != 0UL) + return BIT4_1; + if (bbits_ptr_[idx] != 0UL) + res = BIT4_X; + } + if ((abits_ptr_[idx] & ~bbits_ptr_[idx] & mask) != 0UL) + return BIT4_1; + if ((bbits_ptr_[idx] & mask) != 0UL) + res = BIT4_X; + } + + return res; +} + +vvp_bit4_t vvp_vector4_t::reduce_and() const +{ + unsigned long mask = BIT_MASK(size_ % BITS_PER_WORD); + vvp_bit4_t res = BIT4_1; + + if (size_ <= BITS_PER_WORD) { + if ((abits_val_ | bbits_val_ | ~mask) != ~0UL) + return BIT4_0; + if ((bbits_val_ & mask) != 0UL) + return BIT4_X; + } else { + unsigned words = (size_ + BITS_PER_WORD - 1) / BITS_PER_WORD; + unsigned idx; + for (idx = 0; idx < words - 1; idx += 1) { + if ((abits_ptr_[idx] | bbits_ptr_[idx]) != ~0UL) + return BIT4_0; + if (bbits_ptr_[idx] != 0UL) + res = BIT4_X; + } + if ((abits_ptr_[idx] | bbits_ptr_[idx] | ~mask) != ~0UL) + return BIT4_0; + if ((bbits_ptr_[idx] & mask) != 0UL) + res = BIT4_X; + } + + return res; +} + +static unsigned long parity(unsigned long val) +{ +#if defined(__GNUC__) + // The compiler builtin can use target-specific CPU instructions. + return __builtin_parityl(val); +#else +#if ULONG_MAX > 0xffffffffUL + val ^= val >> 32; +#endif + val ^= val >> 16; + val ^= val >> 8; + val ^= val >> 4; + + return (0x6996 >> (val & 0xf)) & 1; +#endif +} + +vvp_bit4_t vvp_vector4_t::reduce_xor() const +{ + unsigned long mask = BIT_MASK(size_ % BITS_PER_WORD); + + if (size_ <= BITS_PER_WORD) { + if ((bbits_val_ & mask) != 0UL) + return BIT4_X; + return parity(abits_val_ & mask) ? BIT4_1 : BIT4_0; + } else { + unsigned words = (size_ + BITS_PER_WORD - 1) / BITS_PER_WORD; + unsigned long val_a = 0UL; + unsigned idx; + for (idx = 0; idx < words - 1; idx += 1) { + if (bbits_ptr_[idx] != 0UL) + return BIT4_X; + val_a ^= abits_ptr_[idx]; + } + if ((bbits_ptr_[idx] & mask) != 0UL) + return BIT4_X; + val_a ^= abits_ptr_[idx] & mask; + return parity(val_a) ? BIT4_1 : BIT4_0; + } +} + vvp_vector4_t& vvp_vector4_t::operator &= (const vvp_vector4_t&that) { // The truth table is: diff --git a/vvp/vvp_net.h b/vvp/vvp_net.h index 197406bb7..f6beeb095 100644 --- a/vvp/vvp_net.h +++ b/vvp/vvp_net.h @@ -326,6 +326,10 @@ class vvp_vector4_t { vvp_vector4_t& operator ^= (const vvp_vector4_t&that); vvp_vector4_t& operator += (int64_t); + vvp_bit4_t reduce_or() const; + vvp_bit4_t reduce_and() const; + vvp_bit4_t reduce_xor() const; + private: // Number of vvp_bit4_t bits that can be shoved into a word. enum { BITS_PER_WORD = 8*sizeof(unsigned long) };