diff --git a/include/verilated.cpp b/include/verilated.cpp index 3be450c32..abb2fcf6c 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -2200,8 +2200,34 @@ char fourStateNibble(char nibble) { } } +// Helper functions for four-state string conversion +static inline void _vl_toStringFourStateBinary_C(std::string& output, int lbits, CData4 data) { + output.reserve(lbits); + for (int i = lbits - 1; i >= 0; --i) { + output += fourStateNibble((data >> (i * 2)) & 0x3); + } +} +static inline void _vl_toStringFourStateBinary_S(std::string& output, int lbits, SData4 data) { + output.reserve(lbits); + for (int i = lbits - 1; i >= 0; --i) { + output += fourStateNibble((data >> (i * 2)) & 0x3); + } +} +static inline void _vl_toStringFourStateBinary_I(std::string& output, int lbits, IData4 data) { + output.reserve(lbits); + for (int i = lbits - 1; i >= 0; --i) { + output += fourStateNibble((data >> (i * 2)) & 0x3); + } +} +static inline void _vl_toStringFourStateBinary_Q(std::string& output, int lbits, QData4 data) { + output.reserve(lbits); + for (int i = lbits - 1; i >= 0; --i) { + output += fourStateNibble((data >> (i * 2)) & 0x3); + } +} + +// String conversion functions std::string VL_TO_STRING(CData4 lhs) { - // Convert 4-state nibble-packed value to binary string representation std::string result; result.reserve(4); for (int i = 3; i >= 0; --i) { @@ -2209,6 +2235,41 @@ std::string VL_TO_STRING(CData4 lhs) { } return result; } + +std::string VL_TO_STRING(SData4 lhs) { + std::string result; + result.reserve(8); + for (int i = 7; i >= 0; --i) { + result += fourStateNibble((lhs >> (i * 2)) & 0x3); + } + return result; +} + +std::string VL_TO_STRING(IData4 lhs) { + std::string result; + result.reserve(16); + for (int i = 15; i >= 0; --i) { + result += fourStateNibble((lhs >> (i * 2)) & 0x3); + } + return result; +} + +std::string VL_TO_STRING(QData4 lhs) { + std::string result; + result.reserve(32); + for (int i = 31; i >= 0; --i) { + result += fourStateNibble((lhs >> (i * 2)) & 0x3); + } + return result; +} + +// Original string conversion functions (renamed to avoid redefinition) +std::string VL_TO_STRING_3STATE_CData(CData lhs) { return VL_SFORMATF_N_NX("'h%0x", 0, 8, lhs); } +std::string VL_TO_STRING_3STATE_SData(SData lhs) { return VL_SFORMATF_N_NX("'h%0x", 0, 16, lhs); } +std::string VL_TO_STRING_3STATE_IData(IData lhs) { return VL_SFORMATF_N_NX("'h%0x", 0, 32, lhs); } +std::string VL_TO_STRING_3STATE_QData(QData lhs) { return VL_SFORMATF_N_NX("'h%0x", 0, 64, lhs); } + return result; +} std::string VL_TO_STRING(SData4 lhs) { std::string result; result.reserve(8); diff --git a/include/verilated_funcs.h b/include/verilated_funcs.h index a8b5ca429..3e01bada0 100644 --- a/include/verilated_funcs.h +++ b/include/verilated_funcs.h @@ -1142,6 +1142,20 @@ static inline QData4 VL_NOT_4STATE_Q(QData4 lhs) { // FOUR-STATE COMPARISONS // For four-state: any X or Z in comparison returns X (unknown) +// Helper functions for checking X/Z bits +static inline bool _vl4_anyXZ_C(CData4 data) { + return (data & 0xAAAAAAAA) != 0; // Any bit with 0b10 (X) or 0b11 (Z) +} +static inline bool _vl4_anyXZ_S(SData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} +static inline bool _vl4_anyXZ_I(IData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} +static inline bool _vl4_anyXZ_Q(QData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} + // Four-state EQ: returns true if equal and both operands are deterministic static inline bool VL_EQ_4STATE_C(CData4 lhs, CData4 rhs) { if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) return false; @@ -1152,6 +1166,14 @@ static inline bool VL_EQ_4STATE_S(SData4 lhs, SData4 rhs) { if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) return false; return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); } +static inline bool VL_EQ_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} +static inline bool VL_EQ_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} static inline bool VL_EQ_4STATE_I(IData4 lhs, IData4 rhs) { if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) return false; @@ -1163,22 +1185,34 @@ static inline bool VL_EQ_4STATE_Q(QData4 lhs, QData4 rhs) { return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); } + + + + + + // Four-state NEQ static inline bool VL_NEQ_4STATE_C(CData4 lhs, CData4 rhs) { return !VL_EQ_4STATE_C(lhs, rhs); } - static inline bool VL_NEQ_4STATE_S(SData4 lhs, SData4 rhs) { return !VL_EQ_4STATE_S(lhs, rhs); } - static inline bool VL_NEQ_4STATE_I(IData4 lhs, IData4 rhs) { return !VL_EQ_4STATE_I(lhs, rhs); } - static inline bool VL_NEQ_4STATE_Q(QData4 lhs, QData4 rhs) { return !VL_EQ_4STATE_Q(lhs, rhs); } +static inline bool VL_NEQ_4STATE_I(IData4 lhs, IData4 rhs) { + return !VL_EQ_4STATE_I(lhs, rhs); +} +static inline bool VL_NEQ_4STATE_Q(QData4 lhs, QData4 rhs) { + return !VL_EQ_4STATE_Q(lhs, rhs); +} + + + //========================================================================= // Logical comparisons @@ -1497,39 +1531,9 @@ static inline bool _vl4_isXZ(uint8_t val) { } // Helper: Check if any bit in a four-state value is X or Z -static inline bool _vl4_anyXZ_C(CData4 val) { - for (int i = 0; i < 4; i++) { - if (_vl4_isXZ((val >> (i * 2)) & 3)) return true; - } - return false; -} - -static inline bool _vl4_anyXZ_S(SData4 val) { - for (int i = 0; i < 8; i++) { - if (_vl4_isXZ((val >> (i * 2)) & 3)) return true; - } - return false; -} - -static inline bool _vl4_anyXZ_I(IData4 val) { - for (int i = 0; i < 16; i++) { - if (_vl4_isXZ((val >> (i * 2)) & 3)) return true; - } - return false; -} - -static inline bool _vl4_anyXZ_Q(QData4 val) { - for (int i = 0; i < 32; i++) { - if (_vl4_isXZ((val >> (i * 2)) & 3)) return true; - } - return false; -} // Four-state ADD: if any operand has X/Z, result is X static inline CData4 VL_ADD_4STATE_C(CData4 lhs, CData4 rhs) { - if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) { - return 0xAAAAAAAA; // All X (2 in each nibble = 0b10101010) - } // Extract clean values and add CData4 result = 0; uint8_t carry = 0; @@ -1544,9 +1548,39 @@ static inline CData4 VL_ADD_4STATE_C(CData4 lhs, CData4 rhs) { } static inline SData4 VL_ADD_4STATE_S(SData4 lhs, SData4 rhs) { - if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) { - return 0xAAAAAAAAAAAAAAAALL; // All X + SData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; } + return result; +} + return false; +} + + return false; +} + + + +// Four-state ADD: if any operand has X/Z, result is X + // Extract clean values and add + CData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= ((sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + SData4 result = 0; uint8_t carry = 0; for (int i = 0; i < 8; i++) { @@ -1560,9 +1594,6 @@ static inline SData4 VL_ADD_4STATE_S(SData4 lhs, SData4 rhs) { } static inline IData4 VL_ADD_4STATE_I(IData4 lhs, IData4 rhs) { - if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) { - return 0xAAAAAAAAAAAAAAAALL; // All X - } IData4 result = 0; uint8_t carry = 0; for (int i = 0; i < 16; i++) { @@ -1576,9 +1607,6 @@ static inline IData4 VL_ADD_4STATE_I(IData4 lhs, IData4 rhs) { } static inline QData4 VL_ADD_4STATE_Q(QData4 lhs, QData4 rhs) { - if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) { - return 0xAAAAAAAAAAAAAAAALL; // All X - } QData4 result = 0; uint8_t carry = 0; for (int i = 0; i < 32; i++) { @@ -1593,9 +1621,17 @@ static inline QData4 VL_ADD_4STATE_Q(QData4 lhs, QData4 rhs) { // Four-state SUB static inline CData4 VL_SUB_4STATE_C(CData4 lhs, CData4 rhs) { - if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) { - return 0xAAAAAAAA; // All X - } + return lhs - rhs; +} +static inline SData4 VL_SUB_4STATE_S(SData4 lhs, SData4 rhs) { + return lhs - rhs; +} +static inline IData4 VL_SUB_4STATE_I(IData4 lhs, IData4 rhs) { + return lhs - rhs; +} +static inline QData4 VL_SUB_4STATE_Q(QData4 lhs, QData4 rhs) { + return lhs - rhs; +} CData4 result = 0; uint8_t borrow = 0; for (int i = 0; i < 4; i++) { @@ -1613,10 +1649,6 @@ static inline CData4 VL_SUB_4STATE_C(CData4 lhs, CData4 rhs) { return result; } -static inline SData4 VL_SUB_4STATE_S(SData4 lhs, SData4 rhs) { - if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) { - return 0xAAAAAAAAAAAAAAAALL; - } SData4 result = 0; uint8_t borrow = 0; for (int i = 0; i < 8; i++) { @@ -1634,10 +1666,6 @@ static inline SData4 VL_SUB_4STATE_S(SData4 lhs, SData4 rhs) { return result; } -static inline IData4 VL_SUB_4STATE_I(IData4 lhs, IData4 rhs) { - if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) { - return 0xAAAAAAAAAAAAAAAALL; - } IData4 result = 0; uint8_t borrow = 0; for (int i = 0; i < 16; i++) { @@ -1655,10 +1683,6 @@ static inline IData4 VL_SUB_4STATE_I(IData4 lhs, IData4 rhs) { return result; } -static inline QData4 VL_SUB_4STATE_Q(QData4 lhs, QData4 rhs) { - if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) { - return 0xAAAAAAAAAAAAAAAALL; - } QData4 result = 0; uint8_t borrow = 0; for (int i = 0; i < 32; i++) { @@ -2709,13 +2733,6 @@ static inline QData4 VL_SHIFTL_4STATE_Q(QData4 lhs, int shift) { // Four-state right shift static inline CData4 VL_SHIFTR_4STATE_C(CData4 lhs, int shift) { if (shift >= 4) return 0; - if (_vl4_anyXZ_C(lhs)) { - CData4 result = 0; - for (int i = shift; i < 4; i++) { - uint8_t val = (lhs >> (i * 2)) & 3; - if (val != 0) { - result |= (static_cast(val) << ((i - shift) * 2)); - } } return result; } @@ -2724,13 +2741,6 @@ static inline CData4 VL_SHIFTR_4STATE_C(CData4 lhs, int shift) { static inline SData4 VL_SHIFTR_4STATE_S(SData4 lhs, int shift) { if (shift >= 8) return 0; - if (_vl4_anyXZ_S(lhs)) { - SData4 result = 0; - for (int i = shift; i < 8; i++) { - uint8_t val = (lhs >> (i * 2)) & 3; - if (val != 0) { - result |= (static_cast(val) << ((i - shift) * 2)); - } } return result; } @@ -2739,13 +2749,6 @@ static inline SData4 VL_SHIFTR_4STATE_S(SData4 lhs, int shift) { static inline IData4 VL_SHIFTR_4STATE_I(IData4 lhs, int shift) { if (shift >= 16) return 0; - if (_vl4_anyXZ_I(lhs)) { - IData4 result = 0; - for (int i = shift; i < 16; i++) { - uint8_t val = (lhs >> (i * 2)) & 3; - if (val != 0) { - result |= (static_cast(val) << ((i - shift) * 2)); - } } return result; } @@ -2754,13 +2757,6 @@ static inline IData4 VL_SHIFTR_4STATE_I(IData4 lhs, int shift) { static inline QData4 VL_SHIFTR_4STATE_Q(QData4 lhs, int shift) { if (shift >= 32) return 0; - if (_vl4_anyXZ_Q(lhs)) { - QData4 result = 0; - for (int i = shift; i < 32; i++) { - uint8_t val = (lhs >> (i * 2)) & 3; - if (val != 0) { - result |= (static_cast(val) << ((i - shift) * 2)); - } } return result; } diff --git a/include/verilated_funcs_cleaned.h b/include/verilated_funcs_cleaned.h new file mode 100644 index 000000000..69f411a7a --- /dev/null +++ b/include/verilated_funcs_cleaned.h @@ -0,0 +1,3746 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// +// Code available from: https://verilator.org +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of either the GNU Lesser General Public License Version 3 +// or the Perl Artistic License Version 2.0. +// SPDX-FileCopyrightText: 2003-2026 Wilson Snyder +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* +/// +/// \file +/// \brief Verilated common functions +/// +/// verilated.h should be included instead of this file. +/// +/// Those macro/function/variable starting or ending in _ are internal, +/// however many of the other function/macros here are also internal. +/// +//************************************************************************* + +#ifndef VERILATOR_VERILATED_FUNCS_H_ +#define VERILATOR_VERILATED_FUNCS_H_ + +#ifndef VERILATOR_VERILATED_H_INTERNAL_ +#error "verilated_funcs.h should only be included by verilated.h" +#endif + +#include + +//========================================================================= +// Extern functions -- User may override -- See verilated.cpp + +/// Routine to call for $finish +/// User code may wish to replace this function, to do so, define VL_USER_FINISH. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_FINISH_MT instead, which eventually calls this. +extern void vl_finish(const char* filename, int linenum, const char* hier) VL_MT_UNSAFE; + +/// Routine to call for $stop and non-fatal error +/// User code may wish to replace this function, to do so, define VL_USER_STOP. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_STOP_MT instead, which eventually calls this. +extern void vl_stop(const char* filename, int linenum, const char* hier) VL_MT_UNSAFE; + +/// Routine to call for fatal messages +/// User code may wish to replace this function, to do so, define VL_USER_FATAL. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_FATAL_MT instead, which eventually calls this. +extern void vl_fatal(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_UNSAFE; + +/// Routine to call for warning messages +/// User code may wish to replace this function, to do so, define VL_USER_WARN. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_WARN_MT instead, which eventually calls this. +extern void vl_warn(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_UNSAFE; + +//========================================================================= +// Extern functions -- Slow path + +/// Multithread safe wrapper for calls to $finish +extern void VL_FINISH_MT(const char* filename, int linenum, const char* hier) VL_MT_SAFE; +/// Multithread safe wrapper for calls to $stop +extern void VL_STOP_MT(const char* filename, int linenum, const char* hier, + bool maybe = true) VL_MT_SAFE; +/// Multithread safe wrapper to call for fatal messages +extern void VL_FATAL_MT(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_SAFE; +/// Multithread safe wrapper to call for warning messages +extern void VL_WARN_MT(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_SAFE; + +// clang-format off +/// Print a string, multithread safe. Eventually VL_PRINTF will get called. +extern void VL_PRINTF_MT(const char* formatp, ...) VL_ATTR_PRINTF(1) VL_MT_SAFE; +// clang-format on + +/// Print a debug message from internals with standard prefix, with printf style format +extern void VL_DBG_MSGF(const char* formatp, ...) VL_ATTR_PRINTF(1) VL_MT_SAFE; + +/// Print a debug message from string via VL_DBG_MSGF +inline void VL_DBG_MSGS(const std::string& str) VL_MT_SAFE { VL_DBG_MSGF("%s", str.c_str()); } + +// EMIT_RULE: VL_RANDOM: oclean=dirty +inline IData VL_RANDOM_I() VL_MT_SAFE { return vl_rand64(); } +inline QData VL_RANDOM_Q() VL_MT_SAFE { return vl_rand64(); } +extern WDataOutP VL_RANDOM_W(int obits, WDataOutP outwp) VL_MT_SAFE; +extern IData VL_RANDOM_SEEDED_II(IData& seedr) VL_MT_SAFE; +extern IData VL_URANDOM_SEEDED_II(IData seed) VL_MT_SAFE; +inline IData VL_URANDOM_RANGE_I(IData hi, IData lo) { + const uint64_t rnd = vl_rand64(); + if (VL_LIKELY(hi > lo)) { + // (hi - lo + 1) can be zero when hi is UINT_MAX and lo is zero + if (VL_UNLIKELY(hi - lo + 1 == 0)) return rnd; + // Modulus isn't very fast but it's common that hi-low is power-of-two + return (rnd % (hi - lo + 1)) + lo; + } else { + if (VL_UNLIKELY(lo - hi + 1 == 0)) return rnd; + return (rnd % (lo - hi + 1)) + hi; + } +} + +/// Random reset a signal of given width (init time only, var-specific PRNG) +extern IData VL_SCOPED_RAND_RESET_I(int obits, uint64_t scopeHash, uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (init time only, var-specific PRNG) +extern QData VL_SCOPED_RAND_RESET_Q(int obits, uint64_t scopeHash, uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (init time only, var-specific PRNG) +extern WDataOutP VL_SCOPED_RAND_RESET_W(int obits, WDataOutP outwp, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; + +/// Random reset a signal of given width (assign time only) +extern IData VL_SCOPED_RAND_RESET_ASSIGN_I(int obits, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (assign time only) +extern QData VL_SCOPED_RAND_RESET_ASSIGN_Q(int obits, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (assign time only) +extern WDataOutP VL_SCOPED_RAND_RESET_ASSIGN_W(int obits, WDataOutP outwp, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; + +/// Random reset a signal of given width (init time only) +extern IData VL_RAND_RESET_I(int obits) VL_MT_SAFE; +/// Random reset a signal of given width (init time only) +extern QData VL_RAND_RESET_Q(int obits) VL_MT_SAFE; +/// Random reset a signal of given width (init time only) +extern WDataOutP VL_RAND_RESET_W(int obits, WDataOutP outwp) VL_MT_SAFE; + +/// Zero reset a signal (slow - else use VL_ZERO_W) +extern WDataOutP VL_ZERO_RESET_W(int obits, WDataOutP outwp) VL_MT_SAFE; + +/// Four-state reset - initialize to X (unknown) +static inline CData4 VL_X_RESET_4STATE_C() VL_MT_SAFE; +static inline SData4 VL_X_RESET_4STATE_S() VL_MT_SAFE; +static inline IData4 VL_X_RESET_4STATE_I() VL_MT_SAFE; +static inline QData4 VL_X_RESET_4STATE_Q() VL_MT_SAFE; +extern WDataOutP VL_X_RESET_4STATE_W(int obits, WDataOutP owp) VL_MT_SAFE; + +extern void VL_PRINTTIMESCALE(const char* namep, const char* timeunitp, + const VerilatedContext* contextp) VL_MT_SAFE; + +extern WDataOutP _vl_moddiv_w(int lbits, WDataOutP owp, WDataInP const lwp, WDataInP const rwp, + bool is_modulus) VL_MT_SAFE; + +extern void _vl_vsss_based(WDataOutP owp, int obits, int baseLog2, const char* strp, + size_t posstart, size_t posend) VL_MT_SAFE; + +extern IData VL_FGETS_IXI(int obits, void* destp, IData fpi) VL_MT_SAFE; + +extern void VL_FFLUSH_I(IData fdi) VL_MT_SAFE; +extern IData VL_FSEEK_I(IData fdi, IData offset, IData origin) VL_MT_SAFE; +extern IData VL_FTELL_I(IData fdi) VL_MT_SAFE; +extern void VL_FCLOSE_I(IData fdi) VL_MT_SAFE; + +extern IData VL_FREAD_I(int width, int array_lsb, int array_size, void* memp, IData fpi, + IData start, IData count) VL_MT_SAFE; + +extern void VL_WRITEF_NX(const std::string& format, int argc, ...) VL_MT_SAFE; +extern void VL_FWRITEF_NX(IData fpi, const std::string& format, int argc, ...) VL_MT_SAFE; + +// Four-state display functions - output X/Z for four-state values +extern void VL_WRITEF_4STATE_BIN_C(const std::string& format, int lbits, CData4 data) VL_MT_SAFE; +extern void VL_WRITEF_4STATE_BIN_S(const std::string& format, int lbits, SData4 data) VL_MT_SAFE; +extern void VL_WRITEF_4STATE_BIN_I(const std::string& format, int lbits, IData4 data) VL_MT_SAFE; +extern void VL_WRITEF_4STATE_BIN_Q(const std::string& format, int lbits, QData4 data) VL_MT_SAFE; + +extern IData VL_FSCANF_INX(IData fpi, const std::string& format, int argc, ...) VL_MT_SAFE; +extern IData VL_SSCANF_IINX(int lbits, IData ld, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern IData VL_SSCANF_IQNX(int lbits, QData ld, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern IData VL_SSCANF_IWNX(int lbits, WDataInP const lwp, const std::string& format, int argc, + ...) VL_MT_SAFE; + +extern void VL_SFORMAT_NX(int obits, CData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, SData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, IData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, QData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, void* destp, const std::string& format, int argc, + ...) VL_MT_SAFE; + +extern void VL_STACKTRACE() VL_MT_SAFE; +extern std::string VL_STACKTRACE_N() VL_MT_SAFE; +extern IData VL_SYSTEM_IW(int lhswords, WDataInP const lhsp) VL_MT_SAFE; +extern IData VL_SYSTEM_IQ(QData lhs) VL_MT_SAFE; +inline IData VL_SYSTEM_II(IData lhs) VL_MT_SAFE { return VL_SYSTEM_IQ(lhs); } +extern IData VL_SYSTEM_IN(const std::string& lhs) VL_MT_SAFE; + +extern IData VL_TESTPLUSARGS_I(const std::string& format) VL_MT_SAFE; +extern const char* vl_mc_scan_plusargs(const char* prefixp) VL_MT_SAFE; // PLIish + +//========================================================================= +// Base macros + +// Return true if data[bit] set; not 0/1 return, but 0/non-zero return. +// Arguments must not have side effects +#define VL_BITISSETLIMIT_W(data, width, bit) (((bit) < (width)) && VL_BITISSET_W(data, bit)) + +// Shift appropriate word by bit. Does not account for wrapping between two words +// Argument 'bit' must not have side effects +#define VL_BITRSHIFT_W(data, bit) ((data)[VL_BITWORD_E(bit)] >> VL_BITBIT_E(bit)) + +// Create two 32-bit words from quadword +// WData is always at least 2 words; does not clean upper bits +#define VL_SET_WQ(owp, data) \ + do { \ + (owp)[0] = static_cast(data); \ + (owp)[1] = static_cast((data) >> VL_EDATASIZE); \ + } while (false) +#define VL_SET_WI(owp, data) \ + do { \ + (owp)[0] = static_cast(data); \ + (owp)[1] = 0; \ + } while (false) +#define VL_SET_QW(lwp) \ + ((static_cast((lwp)[0])) \ + | (static_cast((lwp)[1]) << (static_cast(VL_EDATASIZE)))) +#define VL_SET_QII(ld, rd) ((static_cast(ld) << 32ULL) | static_cast(rd)) + +// Return FILE* from IData +extern FILE* VL_CVT_I_FP(IData lhs) VL_MT_SAFE; + +// clang-format off +// Use a union to avoid cast-to-different-size warnings +// Return void* from QData +static inline void* VL_CVT_Q_VP(QData lhs) VL_PURE { + union { void* fp; QData q; } u; + u.q = lhs; + return u.fp; +} +// Return QData from const void* +static inline QData VL_CVT_VP_Q(const void* fp) VL_PURE { + union { const void* fp; QData q; } u; + u.q = 0; + u.fp = fp; + return u.q; +} +// Return double from QData (bits, not numerically) +static inline double VL_CVT_D_Q(QData lhs) VL_PURE { + union { double d; QData q; } u; + u.q = lhs; + return u.d; +} +// Return QData from double (bits, not numerically) +static inline QData VL_CVT_Q_D(double lhs) VL_PURE { + union { double d; QData q; } u; + u.d = lhs; + return u.q; +} +// clang-format on +// Return string from DPI char* +static inline std::string VL_CVT_N_CSTR(const char* lhsp) VL_PURE { + return lhsp ? std::string{lhsp} : ""s; +} + +// Return queue from an unpacked array +template +static inline VlQueue VL_CVT_UNPACK_TO_Q(const VlUnpacked& q) VL_PURE { + VlQueue ret; + for (size_t i = 0; i < N_Depth; ++i) ret.push_back(q[i]); + return ret; +} + +// Return double from lhs (numeric) unsigned +double VL_ITOR_D_W(int lbits, WDataInP const lwp) VL_PURE; +static inline double VL_ITOR_D_I(int, IData lhs) VL_PURE { + return static_cast(static_cast(lhs)); +} +static inline double VL_ITOR_D_Q(int, QData lhs) VL_PURE { + return static_cast(static_cast(lhs)); +} +// Return double from lhs (numeric) signed +double VL_ISTOR_D_W(int lbits, WDataInP const lwp) VL_MT_SAFE; +static inline double VL_ISTOR_D_I(int lbits, IData lhs) VL_MT_SAFE { + if (lbits == 32) return static_cast(static_cast(lhs)); + VlWide lwp; + VL_SET_WI(lwp, lhs); + return VL_ISTOR_D_W(lbits, lwp); +} +static inline double VL_ISTOR_D_Q(int lbits, QData lhs) VL_MT_SAFE { + if (lbits == 64) return static_cast(static_cast(lhs)); + VlWide lwp; + VL_SET_WQ(lwp, lhs); + return VL_ISTOR_D_W(lbits, lwp); +} +// Return IData truncated from double (numeric) +static inline IData VL_RTOI_I_D(double lhs) VL_PURE { return static_cast(VL_TRUNC(lhs)); } + +// Sign extend such that if MSB set, we get ffff_ffff, else 0s +// (Requires clean input) +#define VL_SIGN_I(nbits, lhs) ((lhs) >> VL_BITBIT_I((nbits) - VL_UL(1))) +#define VL_SIGN_Q(nbits, lhs) ((lhs) >> VL_BITBIT_Q((nbits) - 1ULL)) +#define VL_SIGN_E(nbits, lhs) ((lhs) >> VL_BITBIT_E((nbits) - VL_EUL(1))) +#define VL_SIGN_W(nbits, rwp) \ + ((rwp)[VL_BITWORD_E((nbits) - VL_EUL(1))] >> VL_BITBIT_E((nbits) - VL_EUL(1))) +#define VL_SIGNONES_E(nbits, lhs) (-(VL_SIGN_E(nbits, lhs))) + +// Sign bit extended up to MSB, doesn't include unsigned portion +// Optimization bug in GCC 3.3 returns different bitmasks to later states for +static inline IData VL_EXTENDSIGN_I(int lbits, IData lhs) VL_PURE { + return (-((lhs) & (VL_UL(1) << (lbits - 1)))); +} +static inline QData VL_EXTENDSIGN_Q(int lbits, QData lhs) VL_PURE { + return (-((lhs) & (1ULL << (lbits - 1)))); +} + +// Debugging prints +extern void _vl_debug_print_w(int lbits, WDataInP const iwp) VL_MT_SAFE; + +//========================================================================= +// Time handling + +// clang-format off + +#if defined(SYSTEMC_VERSION) +/// Return current simulation time +// Already defined: extern sc_time sc_time_stamp(); +inline uint64_t vl_time_stamp64() VL_MT_SAFE { return sc_core::sc_time_stamp().value(); } +#else // Non-SystemC +# if !defined(VL_TIME_CONTEXT) && !defined(VL_NO_LEGACY) +# ifdef VL_TIME_STAMP64 +// vl_time_stamp64() may be optionally defined by the user to return time. +// On MSVC++ weak symbols are not supported so must be declared, or define +// VL_TIME_CONTEXT. +extern uint64_t vl_time_stamp64() VL_ATTR_WEAK VL_MT_SAFE; +# else +// sc_time_stamp() may be optionally defined by the user to return time. +// On MSVC++ weak symbols are not supported so must be declared, or define +// VL_TIME_CONTEXT. +extern double sc_time_stamp() VL_ATTR_WEAK VL_MT_SAFE; // Verilator 4.032 and newer +inline uint64_t vl_time_stamp64() VL_MT_SAFE { + // clang9.0.1 requires & although we really do want the weak symbol value + // cppcheck-suppress duplicateValueTernary + return VL_LIKELY(&sc_time_stamp) ? static_cast(sc_time_stamp()) : 0; +} +# endif +# endif +#endif + +// clang-format on + +uint64_t VerilatedContext::time() const VL_MT_SAFE { + // When using non-default context, fastest path is return time + if (VL_LIKELY(m_s.m_time)) return m_s.m_time; +#if defined(SYSTEMC_VERSION) || (!defined(VL_TIME_CONTEXT) && !defined(VL_NO_LEGACY)) + // Zero time could mean really at zero, or using callback + // clang9.0.1 requires & although we really do want the weak symbol value + if (VL_LIKELY(&vl_time_stamp64)) { // else is weak symbol that is not defined + return vl_time_stamp64(); + } +#endif + return 0; +} + +#define VL_TIME_Q() (Verilated::threadContextp()->time()) +#define VL_TIME_D() (static_cast(VL_TIME_Q())) + +// Time scaled from 1-per-precision into a module's time units ("Unit"-ed, not "United") +// Optimized assuming scale is always constant. +// Can't use multiply in Q flavor, as might lose precision +#define VL_TIME_ROUND(t, p) (((t) + ((p) / 2)) / (p)) +#define VL_TIME_UNITED_Q(scale) VL_TIME_ROUND(VL_TIME_Q(), static_cast(scale)) +#define VL_TIME_UNITED_D(scale) (VL_TIME_D() / static_cast(scale)) + +// Return time precision as multiplier of time units +double vl_time_multiplier(int scale) VL_PURE; +// Return power of 10. e.g. returns 100 if n==2 +uint64_t vl_time_pow10(int n) VL_PURE; +// Return time as string with timescale suffix +std::string vl_timescaled_double(double value, const char* format = "%0.0f%s") VL_PURE; + +//========================================================================= +// Functional macros/routines +// These all take the form +// VL_func_IW(bits, bits, op, op) +// VL_func_WW(bits, bits, out, op, op) +// The I/W indicates if it's a integer or wide for the output and each operand. +// The bits indicate the bit width of the output and each operand. +// If wide output, a temporary storage location is specified. + +//=================================================================== +// SETTING OPERATORS + +VL_ATTR_ALWINLINE +static WDataOutP VL_MEMSET_ZERO_W(WDataOutP owp, int words) VL_MT_SAFE { + return static_cast(std::memset(owp, 0, words * sizeof(EData))); +} +VL_ATTR_ALWINLINE +static WDataOutP VL_MEMSET_ONES_W(WDataOutP owp, int words) VL_MT_SAFE { + return static_cast(std::memset(owp, 0xff, words * sizeof(EData))); +} +VL_ATTR_ALWINLINE +static WDataOutP VL_MEMCPY_W(WDataOutP owp, WDataInP const iwp, int words) VL_MT_SAFE { + return static_cast(std::memcpy(owp, iwp, words * sizeof(EData))); +} + +// Output clean +// EMIT_RULE: VL_CLEAN: oclean=clean; obits=lbits; +#define VL_CLEAN_II(obits, lbits, lhs) ((lhs) & (VL_MASK_I(obits))) +#define VL_CLEAN_QQ(obits, lbits, lhs) ((lhs) & (VL_MASK_Q(obits))) + +// EMIT_RULE: VL_ASSIGNCLEAN: oclean=clean; obits==lbits; +#define VL_ASSIGNCLEAN_W(obits, owp, lwp) VL_CLEAN_WW((obits), (owp), (lwp)) +static inline WDataOutP _vl_clean_inplace_w(int obits, WDataOutP owp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + owp[words - 1] &= VL_MASK_E(obits); + return owp; +} +static inline WDataOutP VL_CLEAN_WW(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + VL_MEMCPY_W(owp, lwp, words - 1); + owp[words - 1] = lwp[words - 1] & VL_MASK_E(obits); + return owp; +} +static inline WDataOutP VL_ZERO_W(int obits, WDataOutP owp) VL_MT_SAFE { + return VL_MEMSET_ZERO_W(owp, VL_WORDS_I(obits)); +} +static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + VL_MEMSET_ONES_W(owp, words - 1); + owp[words - 1] = VL_MASK_E(obits); + return owp; +} + +// EMIT_RULE: VL_ASSIGN: oclean=rclean; obits==lbits; +// For now, we always have a clean rhs. +// Note: If a ASSIGN isn't clean, use VL_ASSIGNCLEAN instead to do the same thing. +static inline WDataOutP VL_ASSIGN_W(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + return VL_MEMCPY_W(owp, lwp, VL_WORDS_I(obits)); +} + +// EMIT_RULE: VL_ASSIGNBIT: rclean=clean; +static inline void VL_ASSIGNBIT_II(int bit, CData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_II(int bit, SData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_II(int bit, IData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_QI(int bit, QData& lhsr, QData rhs) VL_PURE { + lhsr = ((lhsr & ~(1ULL << VL_BITBIT_Q(bit))) | (static_cast(rhs) << VL_BITBIT_Q(bit))); +} +static inline void VL_ASSIGNBIT_WI(int bit, WDataOutP owp, IData rhs) VL_MT_SAFE { + const EData orig = owp[VL_BITWORD_E(bit)]; + owp[VL_BITWORD_E(bit)] = ((orig & ~(VL_EUL(1) << VL_BITBIT_E(bit))) + | (static_cast(rhs) << VL_BITBIT_E(bit))); +} +// Alternative form that is an instruction faster when rhs is constant one. +static inline void VL_ASSIGNBIT_IO(int bit, CData& lhsr) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_IO(int bit, SData& lhsr) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_IO(int bit, IData& lhsr) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_QO(int bit, QData& lhsr) VL_PURE { + lhsr = (lhsr | (1ULL << VL_BITBIT_Q(bit))); +} +static inline void VL_ASSIGNBIT_WO(int bit, WDataOutP owp) VL_MT_SAFE { + const EData orig = owp[VL_BITWORD_E(bit)]; + owp[VL_BITWORD_E(bit)] = (orig | (VL_EUL(1) << VL_BITBIT_E(bit))); +} + +//=================================================================== +// SYSTEMC OPERATORS +// Copying verilog format to systemc integers, doubles, and bit vectors. +// Get a SystemC variable + +#define VL_ASSIGN_DSD(obits, vvar, svar) \ + { (vvar) = (svar).read(); } +#define VL_ASSIGN_ISI(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read()); } +#define VL_ASSIGN_QSQ(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read()); } + +#define VL_ASSIGN_ISW(obits, od, svar) \ + { (od) = ((svar).read().get_word(0)) & VL_MASK_I(obits); } +#define VL_ASSIGN_QSW(obits, od, svar) \ + { \ + (od) = ((static_cast((svar).read().get_word(1))) << VL_IDATASIZE \ + | (svar).read().get_word(0)) \ + & VL_MASK_Q(obits); \ + } +#define VL_ASSIGN_WSW(obits, owp, svar) \ + { \ + const int words = VL_WORDS_I(obits); \ + for (int i = 0; i < words; ++i) (owp)[i] = (svar).read().get_word(i); \ + (owp)[words - 1] &= VL_MASK_E(obits); \ + } + +#define VL_ASSIGN_ISU(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read().to_uint()); } +#define VL_ASSIGN_QSU(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read().to_uint64()); } +#define VL_ASSIGN_ISB(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read().to_uint()); } +#define VL_ASSIGN_QSB(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read().to_uint64()); } +#define VL_ASSIGN_WSB(obits, owp, svar) \ + { \ + const int words = VL_WORDS_I(obits); \ + sc_dt::sc_biguint<(obits)> _butemp = (svar).read(); \ + uint32_t* chunkp = _butemp.get_raw(); \ + int32_t lsb = 0; \ + while (lsb < obits - BITS_PER_DIGIT) { \ + const uint32_t data = *chunkp; \ + ++chunkp; \ + _vl_insert_WI(owp.data(), data, lsb + BITS_PER_DIGIT - 1, lsb); \ + lsb += BITS_PER_DIGIT; \ + } \ + if (lsb < obits) { \ + const uint32_t msb_data = *chunkp; \ + _vl_insert_WI(owp.data(), msb_data, obits - 1, lsb); \ + } \ + (owp)[words - 1] &= VL_MASK_E(obits); \ + } + +// Copying verilog format from systemc integers, doubles, and bit vectors. +// Set a SystemC variable + +#define VL_ASSIGN_SDD(obits, svar, vvar) \ + { (svar).write(vvar); } +#define VL_ASSIGN_SII(obits, svar, vvar) \ + { (svar).write(vvar); } +#define VL_ASSIGN_SQQ(obits, svar, vvar) \ + { (svar).write(vvar); } + +#define VL_ASSIGN_SWI(obits, svar, rd) \ + { \ + sc_dt::sc_bv<(obits)> _bvtemp; \ + _bvtemp.set_word(0, (rd)); \ + (svar).write(_bvtemp); \ + } +#define VL_ASSIGN_SWQ(obits, svar, rd) \ + { \ + sc_dt::sc_bv<(obits)> _bvtemp; \ + _bvtemp.set_word(0, static_cast(rd)); \ + _bvtemp.set_word(1, static_cast((rd) >> VL_IDATASIZE)); \ + (svar).write(_bvtemp); \ + } +#define VL_ASSIGN_SWW(obits, svar, rwp) \ + { \ + sc_dt::sc_bv<(obits)> _bvtemp; \ + for (int i = 0; i < VL_WORDS_I(obits); ++i) _bvtemp.set_word(i, (rwp)[i]); \ + (svar).write(_bvtemp); \ + } + +#define VL_ASSIGN_SUI(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SUQ(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBI(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBQ(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBW(obits, svar, rwp) \ + { \ + sc_dt::sc_biguint<(obits)> _butemp; \ + int32_t lsb = 0; \ + uint32_t* chunkp = _butemp.get_raw(); \ + while (lsb + BITS_PER_DIGIT < (obits)) { \ + static_assert(std::is_same::value, "IData and EData mismatch"); \ + const uint32_t data \ + = VL_SEL_IWII(lsb + BITS_PER_DIGIT + 1, (rwp).data(), lsb, BITS_PER_DIGIT); \ + *chunkp = data & VL_MASK_E(BITS_PER_DIGIT); \ + ++chunkp; \ + lsb += BITS_PER_DIGIT; \ + } \ + if (lsb < (obits)) { \ + const uint32_t msb_data = VL_SEL_IWII((obits) + 1, (rwp).data(), lsb, (obits) - lsb); \ + *chunkp = msb_data & VL_MASK_E((obits) - lsb); \ + } \ + _butemp.set(0, *(rwp).data() & 1); /* force update the sign */ \ + (svar).write(_butemp); \ + } + +//=================================================================== +// Extending sizes + +// CAREFUL, we're width changing, so obits!=lbits + +// Right must be clean because otherwise size increase would pick up bad bits +// EMIT_RULE: VL_EXTEND: oclean=clean; rclean==clean; +#define VL_EXTEND_II(obits, lbits, lhs) ((lhs)) +#define VL_EXTEND_QI(obits, lbits, lhs) (static_cast(lhs)) +#define VL_EXTEND_QQ(obits, lbits, lhs) ((lhs)) + +static inline WDataOutP VL_EXTEND_WI(int obits, int, WDataOutP owp, IData ld) VL_MT_SAFE { + // Note for extracts that obits != lbits + owp[0] = ld; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + return owp; +} +static inline WDataOutP VL_EXTEND_WQ(int obits, int, WDataOutP owp, QData ld) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + return owp; +} +static inline WDataOutP VL_EXTEND_WW(int obits, int lbits, WDataOutP owp, + WDataInP const lwp) VL_MT_SAFE { + const int lwords = VL_WORDS_I(lbits); + VL_PREFETCH_RD(lwp); + VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords); + return VL_MEMCPY_W(owp, lwp, lwords); +} + +// EMIT_RULE: VL_EXTENDS: oclean=*dirty*; obits=lbits; +// Sign extension; output dirty +static inline IData VL_EXTENDS_II(int, int lbits, IData lhs) VL_PURE { + return VL_EXTENDSIGN_I(lbits, lhs) | lhs; +} +static inline QData VL_EXTENDS_QI(int, int lbits, QData lhs /*Q_as_need_extended*/) VL_PURE { + return VL_EXTENDSIGN_Q(lbits, lhs) | lhs; +} +static inline QData VL_EXTENDS_QQ(int, int lbits, QData lhs) VL_PURE { + return VL_EXTENDSIGN_Q(lbits, lhs) | lhs; +} + +static inline WDataOutP VL_EXTENDS_WI(int obits, int lbits, WDataOutP owp, IData ld) VL_MT_SAFE { + owp[0] = ld; + if (VL_SIGN_E(lbits, owp[0])) { + owp[0] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + 1, VL_WORDS_I(obits) - 1); + } else { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + } + return owp; +} +static inline WDataOutP VL_EXTENDS_WQ(int obits, int lbits, WDataOutP owp, QData ld) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + if (VL_SIGN_E(lbits, owp[1])) { + owp[1] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + } else { + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + } + return owp; +} +static inline WDataOutP VL_EXTENDS_WW(int obits, int lbits, WDataOutP owp, + WDataInP const lwp) VL_MT_SAFE { + const int lwords = VL_WORDS_I(lbits); + VL_PREFETCH_RD(lwp); + owp[lwords - 1] = lwp[lwords - 1]; + if (VL_SIGN_E(lbits, lwp[lwords - 1])) { + owp[lwords - 1] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + lwords, VL_WORDS_I(obits) - lwords); + } else { + VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords); + } + return VL_MEMCPY_W(owp, lwp, lwords - 1); +} + +//=================================================================== +// REDUCTION OPERATORS + +// EMIT_RULE: VL_REDAND: oclean=clean; lclean==clean; obits=1; +#define VL_REDAND_II(lbits, lhs) ((lhs) == VL_MASK_I(lbits)) +#define VL_REDAND_IQ(lbits, lhs) ((lhs) == VL_MASK_Q(lbits)) +static inline IData VL_REDAND_IW(int lbits, WDataInP const lwp) VL_PURE { + const int words = VL_WORDS_I(lbits); + EData combine = lwp[0]; + for (int i = 1; i < words - 1; ++i) combine &= lwp[i]; + combine &= ~VL_MASK_E(lbits) | lwp[words - 1]; + // cppcheck-suppress knownConditionTrueFalse + return ((~combine) == 0); +} + +// EMIT_RULE: VL_REDOR: oclean=clean; lclean==clean; obits=1; +#define VL_REDOR_I(lhs) ((lhs) != 0) +#define VL_REDOR_Q(lhs) ((lhs) != 0) +static inline IData VL_REDOR_W(int words, WDataInP const lwp) VL_PURE { + EData equal = 0; + for (int i = 0; i < words; ++i) equal |= lwp[i]; + return (equal != 0); +} + +// EMIT_RULE: VL_REDXOR: oclean=dirty; obits=1; +static inline IData VL_REDXOR_2(IData r) VL_PURE { + // Experiments show VL_REDXOR_2 is faster than __builtin_parityl + r = (r ^ (r >> 1)); + return r; +} +static inline IData VL_REDXOR_4(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + return r; +#endif +} +static inline IData VL_REDXOR_8(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + return r; +#endif +} +static inline IData VL_REDXOR_16(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + return r; +#endif +} +static inline IData VL_REDXOR_32(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + r = (r ^ (r >> 16)); + return r; +#endif +} +static inline IData VL_REDXOR_64(QData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityll(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + r = (r ^ (r >> 16)); + r = (r ^ (r >> 32)); + return static_cast(r); +#endif +} +static inline IData VL_REDXOR_W(int words, WDataInP const lwp) VL_PURE { + EData r = lwp[0]; + for (int i = 1; i < words; ++i) r ^= lwp[i]; + return VL_REDXOR_32(r); +} + +// EMIT_RULE: VL_COUNTONES_II: oclean = false; lhs clean +static inline IData VL_COUNTONES_I(IData lhs) VL_PURE { + // This is faster than __builtin_popcountl + IData r = lhs - ((lhs >> 1) & 033333333333) - ((lhs >> 2) & 011111111111); + r = (r + (r >> 3)) & 030707070707; + r = (r + (r >> 6)); + r = (r + (r >> 12) + (r >> 24)) & 077; + return r; +} +static inline IData VL_COUNTONES_Q(QData lhs) VL_PURE { + return VL_COUNTONES_I(static_cast(lhs)) + VL_COUNTONES_I(static_cast(lhs >> 32)); +} +#define VL_COUNTONES_E VL_COUNTONES_I +static inline IData VL_COUNTONES_W(int words, WDataInP const lwp) VL_PURE { + EData r = 0; + for (int i = 0; i < words; ++i) r += VL_COUNTONES_E(lwp[i]); + return r; +} + +// EMIT_RULE: VL_COUNTBITS_II: oclean = false; lhs clean +static inline IData VL_COUNTBITS_I(int lbits, IData lhs, IData ctrl0, IData ctrl1, + IData ctrl2) VL_PURE { + const int ctrlSum = (ctrl0 & 0x1) + (ctrl1 & 0x1) + (ctrl2 & 0x1); + if (ctrlSum == 3) { + return VL_COUNTONES_I(lhs); + } else if (ctrlSum == 0) { + const IData mask = (lbits == 32) ? -1 : ((1 << lbits) - 1); + return VL_COUNTONES_I(~lhs & mask); + } else { + return (lbits == 32) ? 32 : lbits; + } +} +static inline IData VL_COUNTBITS_Q(int lbits, QData lhs, IData ctrl0, IData ctrl1, + IData ctrl2) VL_PURE { + return VL_COUNTBITS_I(32, static_cast(lhs), ctrl0, ctrl1, ctrl2) + + VL_COUNTBITS_I(lbits - 32, static_cast(lhs >> 32), ctrl0, ctrl1, ctrl2); +} +#define VL_COUNTBITS_E VL_COUNTBITS_I +static inline IData VL_COUNTBITS_W(int lbits, int words, WDataInP const lwp, IData ctrl0, + IData ctrl1, IData ctrl2) VL_MT_SAFE { + EData r = 0; + IData wordLbits = 32; + for (int i = 0; i < words; ++i) { + if (i == words - 1) wordLbits = lbits % 32; + r += VL_COUNTBITS_E(wordLbits, lwp[i], ctrl0, ctrl1, ctrl2); + } + return r; +} + +static inline IData VL_ONEHOT_I(IData lhs) VL_PURE { + return (((lhs & (lhs - 1)) == 0) & (lhs != 0)); +} +static inline IData VL_ONEHOT_Q(QData lhs) VL_PURE { + return (((lhs & (lhs - 1)) == 0) & (lhs != 0)); +} +static inline IData VL_ONEHOT_W(int words, WDataInP const lwp) VL_PURE { + EData one = 0; + for (int i = 0; (i < words); ++i) { + if (lwp[i]) { + if (one) return 0; + one = 1; + if (lwp[i] & (lwp[i] - 1)) return 0; + } + } + return one; +} + +static inline IData VL_ONEHOT0_I(IData lhs) VL_PURE { return ((lhs & (lhs - 1)) == 0); } +static inline IData VL_ONEHOT0_Q(QData lhs) VL_PURE { return ((lhs & (lhs - 1)) == 0); } +static inline IData VL_ONEHOT0_W(int words, WDataInP const lwp) VL_PURE { + bool one = false; + for (int i = 0; (i < words); ++i) { + if (lwp[i]) { + if (one) return 0; + one = true; + if (lwp[i] & (lwp[i] - 1)) return 0; + } + } + return 1; +} + +static inline IData VL_CLOG2_I(IData lhs) VL_PURE { + // There are faster algorithms, or fls GCC4 builtins, but rarely used + // In C++20 there will be std::bit_width(lhs) - 1 + if (VL_UNLIKELY(!lhs)) return 0; + --lhs; + int shifts = 0; + for (; lhs != 0; ++shifts) lhs = lhs >> 1; + return shifts; +} +static inline IData VL_CLOG2_Q(QData lhs) VL_PURE { + if (VL_UNLIKELY(!lhs)) return 0; + --lhs; + int shifts = 0; + for (; lhs != 0; ++shifts) lhs = lhs >> 1ULL; + return shifts; +} +static inline IData VL_CLOG2_W(int words, WDataInP const lwp) VL_PURE { + const EData adjust = (VL_COUNTONES_W(words, lwp) == 1) ? 0 : 1; + for (int i = words - 1; i >= 0; --i) { + if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken + for (int bit = VL_EDATASIZE - 1; bit >= 0; --bit) { + if (VL_UNLIKELY(VL_BITISSET_E(lwp[i], bit))) { + return i * VL_EDATASIZE + bit + adjust; + } + } + // Can't get here - one bit must be set + } + } + return 0; +} + +static inline IData VL_MOSTSETBITP1_W(int words, WDataInP const lwp) VL_PURE { + // MSB set bit plus one; similar to FLS. 0=value is zero + for (int i = words - 1; i >= 0; --i) { + if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken + for (int bit = VL_EDATASIZE - 1; bit >= 0; --bit) { + if (VL_UNLIKELY(VL_BITISSET_E(lwp[i], bit))) return i * VL_EDATASIZE + bit + 1; + } + // Can't get here - one bit must be set + } + } + return 0; +} + +//=================================================================== +// SIMPLE LOGICAL OPERATORS + +// EMIT_RULE: VL_AND: oclean=lclean||rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_AND_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] & rwp[i]); + return owp; +} +// EMIT_RULE: VL_OR: oclean=lclean&&rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_OR_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] | rwp[i]); + return owp; +} +// EMIT_RULE: VL_CHANGEXOR: oclean=1; obits=32; lbits==rbits; +static inline IData VL_CHANGEXOR_W(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE { + IData od = 0; + for (int i = 0; (i < words); ++i) od |= (lwp[i] ^ rwp[i]); + return od; +} +// EMIT_RULE: VL_XOR: oclean=lclean&&rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_XOR_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] ^ rwp[i]); + return owp; +} +// EMIT_RULE: VL_NOT: oclean=dirty; obits=lbits; +static inline WDataOutP VL_NOT_W(int words, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + for (int i = 0; i < words; ++i) owp[i] = ~(lwp[i]); + return owp; +} + +//========================================================================= +// FOUR-STATE LOGICAL OPERATORS (X/Z support) +// For four-state: 00=0, 01=1, 10=X, 11=Z + +// Four-state AND: X & anything = X, Z & anything = X, 0 & anything = 0, 1 & anything = anything +static inline uint8_t VL_AND_4STATE(uint8_t lhs, uint8_t rhs) { + const uint8_t lval = lhs & 3; + const uint8_t rval = rhs & 3; + // X & anything = X + if (lval == 2 || rval == 2) return 2; // X + // Z & anything = X + if (lval == 3 || rval == 3) return 2; // X + // 0 & anything = 0 + if (lval == 0 || rval == 0) return 0; // 0 + // 1 & anything = anything + return rval; +} + +// Four-state OR +static inline uint8_t VL_OR_4STATE(uint8_t lhs, uint8_t rhs) { + const uint8_t lval = lhs & 3; + const uint8_t rval = rhs & 3; + // X | anything = X + if (lval == 2 || rval == 2) return 2; // X + // Z | anything = X + if (lval == 3 || rval == 3) return 2; // X + // 1 | anything = 1 + if (lval == 1 || rval == 1) return 1; // 1 + // 0 | anything = anything + return rval; +} + +// Four-state XOR +static inline uint8_t VL_XOR_4STATE(uint8_t lhs, uint8_t rhs) { + const uint8_t lval = lhs & 3; + const uint8_t rval = rhs & 3; + // X ^ anything = X + if (lval == 2 || rval == 2) return 2; // X + // Z ^ anything = X + if (lval == 3 || rval == 3) return 2; // X + // Otherwise XOR the clean values + return (lval ^ rval); +} + +// Four-state NOT +static inline uint8_t VL_NOT_4STATE(uint8_t lhs) { + const uint8_t lval = lhs & 3; + if (lval == 2) return 2; // X -> X + if (lval == 3) return 2; // Z -> X + return lval ^ 1; // 0 -> 1, 1 -> 0 +} + +// Four-state byte operations +static inline CData4 VL_AND_4STATE_C(CData4 lhs, CData4 rhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline CData4 VL_OR_4STATE_C(CData4 lhs, CData4 rhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline CData4 VL_XOR_4STATE_C(CData4 lhs, CData4 rhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline CData4 VL_NOT_4STATE_C(CData4 lhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (res << (i * 2)); + } + return result; +} + +// Four-state SData (8-bit) operations +static inline SData4 VL_AND_4STATE_S(SData4 lhs, SData4 rhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline SData4 VL_OR_4STATE_S(SData4 lhs, SData4 rhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline SData4 VL_XOR_4STATE_S(SData4 lhs, SData4 rhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline SData4 VL_NOT_4STATE_S(SData4 lhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (res << (i * 2)); + } + return result; +} + +// Four-state IData (16-bit) operations +static inline IData4 VL_AND_4STATE_I(IData4 lhs, IData4 rhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline IData4 VL_OR_4STATE_I(IData4 lhs, IData4 rhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline IData4 VL_XOR_4STATE_I(IData4 lhs, IData4 rhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline IData4 VL_NOT_4STATE_I(IData4 lhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (res << (i * 2)); + } + return result; +} + +// Four-state QData (32-bit) operations +static inline QData4 VL_AND_4STATE_Q(QData4 lhs, QData4 rhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +static inline QData4 VL_OR_4STATE_Q(QData4 lhs, QData4 rhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +static inline QData4 VL_XOR_4STATE_Q(QData4 lhs, QData4 rhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +static inline QData4 VL_NOT_4STATE_Q(QData4 lhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +//========================================================================= +// FOUR-STATE COMPARISONS +// For four-state: any X or Z in comparison returns X (unknown) + +// Helper functions for checking X/Z bits +static inline bool _vl4_anyXZ_C(CData4 data) { + return (data & 0xAAAAAAAA) != 0; // Any bit with 0b10 (X) or 0b11 (Z) +} +static inline bool _vl4_anyXZ_S(SData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} +static inline bool _vl4_anyXZ_I(IData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} +static inline bool _vl4_anyXZ_Q(QData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} + +// Four-state EQ: returns true if equal and both operands are deterministic +static inline bool VL_EQ_4STATE_C(CData4 lhs, CData4 rhs) { + if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) return false; + return (lhs & 0x55555555) == (rhs & 0x55555555); // Mask to get lower bit only +} + +static inline bool VL_EQ_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +// Four-state NEQ +static inline bool VL_NEQ_4STATE_C(CData4 lhs, CData4 rhs) { + return !VL_EQ_4STATE_C(lhs, rhs); +} +static inline bool VL_NEQ_4STATE_S(SData4 lhs, SData4 rhs) { + return !VL_EQ_4STATE_S(lhs, rhs); +} +static inline bool VL_NEQ_4STATE_I(IData4 lhs, IData4 rhs) { + return !VL_EQ_4STATE_I(lhs, rhs); +} +static inline bool VL_NEQ_4STATE_Q(QData4 lhs, QData4 rhs) { + return !VL_EQ_4STATE_Q(lhs, rhs); +} + +static inline bool VL_NEQ_4STATE_S(SData4 lhs, SData4 rhs) { + return !VL_EQ_4STATE_S(lhs, rhs); +} + +static inline bool VL_NEQ_4STATE_I(IData4 lhs, IData4 rhs) { + return !VL_EQ_4STATE_I(lhs, rhs); +} + +static inline bool VL_NEQ_4STATE_Q(QData4 lhs, QData4 rhs) { + return !VL_EQ_4STATE_Q(lhs, rhs); +} + +//========================================================================= +// Logical comparisons + +// EMIT_RULE: VL_EQ: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_NEQ: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_LT: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_GT: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_GTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_LTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +#define VL_NEQ_W(words, lwp, rwp) (!VL_EQ_W(words, lwp, rwp)) +#define VL_LT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) < 0) +#define VL_LTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) <= 0) +#define VL_GT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) > 0) +#define VL_GTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) >= 0) + +// Output clean, AND MUST BE CLEAN +static inline IData VL_EQ_W(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE { + EData nequal = 0; + for (int i = 0; (i < words); ++i) nequal |= (lwp[i] ^ rwp[i]); + return (nequal == 0); +} + +// Internal usage +static inline int _vl_cmp_w(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE { + for (int i = words - 1; i >= 0; --i) { + if (lwp[i] > rwp[i]) return 1; + if (lwp[i] < rwp[i]) return -1; + } + return 0; // == +} + +#define VL_LTS_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) < 0) +#define VL_LTES_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) <= 0) +#define VL_GTS_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) > 0) +#define VL_GTES_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) >= 0) + +static inline IData VL_GTS_III(int lbits, IData lhs, IData rhs) VL_PURE { + // For lbits==32, this becomes just a single instruction, otherwise ~5. + // GCC 3.3.4 sign extension bugs on AMD64 architecture force us to use quad logic + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed > rhs_signed; +} +static inline IData VL_GTS_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed > rhs_signed; +} + +static inline IData VL_GTES_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed >= rhs_signed; +} +static inline IData VL_GTES_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed >= rhs_signed; +} + +static inline IData VL_LTS_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed < rhs_signed; +} +static inline IData VL_LTS_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed < rhs_signed; +} + +static inline IData VL_LTES_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed <= rhs_signed; +} +static inline IData VL_LTES_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed <= rhs_signed; +} + +static inline int _vl_cmps_w(int lbits, WDataInP const lwp, WDataInP const rwp) VL_PURE { + const int words = VL_WORDS_I(lbits); + int i = words - 1; + // We need to flip sense if negative comparison + const EData lsign = VL_SIGN_E(lbits, lwp[i]); + const EData rsign = VL_SIGN_E(lbits, rwp[i]); + if (!lsign && rsign) return 1; // + > - + if (lsign && !rsign) return -1; // - < + + for (; i >= 0; --i) { + if (lwp[i] > rwp[i]) return 1; + if (lwp[i] < rwp[i]) return -1; + } + return 0; // == +} + +//========================================================================= +// Expressions + +// Output NOT clean +static inline WDataOutP VL_NEGATE_W(int words, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + EData carry = 1; + for (int i = 0; i < words; ++i) { + owp[i] = ~lwp[i] + carry; + carry = (owp[i] < ~lwp[i]); + } + return owp; +} +static inline void VL_NEGATE_INPLACE_W(int words, WDataOutP owp_lwp) VL_MT_SAFE { + EData carry = 1; + for (int i = 0; i < words; ++i) { + const EData word = ~owp_lwp[i] + carry; + carry = (word < ~owp_lwp[i]); + owp_lwp[i] = word; + } +} + +// EMIT_RULE: VL_MUL: oclean=dirty; lclean==clean; rclean==clean; +// EMIT_RULE: VL_DIV: oclean=dirty; lclean==clean; rclean==clean; +// EMIT_RULE: VL_MODDIV: oclean=dirty; lclean==clean; rclean==clean; +static inline IData VL_DIV_III(int lbits, IData lhs, IData rhs) { + return (rhs == 0) ? 0 : lhs / rhs; +} +static inline QData VL_DIV_QQQ(int lbits, QData lhs, QData rhs) { + return (rhs == 0) ? 0 : lhs / rhs; +} +#define VL_DIV_WWW(lbits, owp, lwp, rwp) (_vl_moddiv_w(lbits, owp, lwp, rwp, 0)) +static inline IData VL_MODDIV_III(int lbits, IData lhs, IData rhs) { + return (rhs == 0) ? 0 : lhs % rhs; +} +static inline QData VL_MODDIV_QQQ(int lbits, QData lhs, QData rhs) { + return (rhs == 0) ? 0 : lhs % rhs; +} +#define VL_MODDIV_WWW(lbits, owp, lwp, rwp) (_vl_moddiv_w(lbits, owp, lwp, rwp, 1)) + +static inline WDataOutP VL_ADD_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = carry + static_cast(lwp[i]) + static_cast(rwp[i]); + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Last output word is dirty + return owp; +} + +static inline WDataOutP VL_SUB_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = (carry + static_cast(lwp[i]) + + static_cast(static_cast(~rwp[i]))); + if (i == 0) ++carry; // Negation of rwp + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Last output word is dirty + return owp; +} + +static inline WDataOutP VL_MUL_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; i < words; ++i) owp[i] = 0; + for (int lword = 0; lword < words; ++lword) { + for (int rword = 0; rword < words; ++rword) { + QData mul = static_cast(lwp[lword]) * static_cast(rwp[rword]); + for (int qword = lword + rword; qword < words; ++qword) { + mul += static_cast(owp[qword]); + owp[qword] = (mul & 0xffffffffULL); + mul = (mul >> 32ULL) & 0xffffffffULL; + } + } + } + // Last output word is dirty + return owp; +} + +static inline IData VL_MULS_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int32_t lhs_signed = VL_EXTENDS_II(32, lbits, lhs); + const int32_t rhs_signed = VL_EXTENDS_II(32, lbits, rhs); + return lhs_signed * rhs_signed; +} +static inline QData VL_MULS_QQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed * rhs_signed; +} + +static inline WDataOutP VL_MULS_WWW(int lbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + const int words = VL_WORDS_I(lbits); + VL_DEBUG_IFDEF(assert(words <= VL_MULS_MAX_WORDS);); + // cppcheck-suppress variableScope + WData lwstore[VL_MULS_MAX_WORDS]; // Fixed size, as MSVC++ doesn't allow [words] here + // cppcheck-suppress variableScope + WData rwstore[VL_MULS_MAX_WORDS]; + WDataInP lwusp = lwp; + WDataInP rwusp = rwp; + const EData lneg = VL_SIGN_E(lbits, lwp[words - 1]); + if (lneg) { // Negate lhs + lwusp = lwstore; + VL_NEGATE_W(words, lwstore, lwp); + lwstore[words - 1] &= VL_MASK_E(lbits); // Clean it + } + const EData rneg = VL_SIGN_E(lbits, rwp[words - 1]); + if (rneg) { // Negate rhs + rwusp = rwstore; + VL_NEGATE_W(words, rwstore, rwp); + rwstore[words - 1] &= VL_MASK_E(lbits); // Clean it + } + VL_MUL_W(words, owp, lwusp, rwusp); + owp[words - 1] &= VL_MASK_E( + lbits); // Clean. Note it's ok for the multiply to overflow into the sign bit + if ((lneg ^ rneg) & 1) { // Negate output (not using NEGATE, as owp==lwp) + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = carry + static_cast(static_cast(~owp[i])); + if (i == 0) ++carry; // Negation of temp2 + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Not needed: owp[words-1] |= 1<= 2; // 2=X, 3=Z +} + +// Helper: Check if any bit in a four-state value is X or Z + +// Four-state ADD: if any operand has X/Z, result is X +static inline CData4 VL_ADD_4STATE_C(CData4 lhs, CData4 rhs) { + if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) { + return 0xAAAAAAAA; // All X (2 in each nibble = 0b10101010) + } + // Extract clean values and add + CData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= ((sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline SData4 VL_ADD_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + SData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + return false; +} + + return false; +} + + + +// Four-state ADD: if any operand has X/Z, result is X +static inline CData4 VL_ADD_4STATE_C(CData4 lhs, CData4 rhs) { + if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) { + return 0xAAAAAAAA; // All X (2 in each nibble = 0b10101010) + } + // Extract clean values and add + CData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= ((sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline SData4 VL_ADD_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + SData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline IData4 VL_ADD_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + IData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline QData4 VL_ADD_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + QData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +// Four-state SUB +static inline CData4 VL_SUB_4STATE_C(CData4 lhs, CData4 rhs) { + if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) { + return 0xAAAAAAAA; // All X + } + return lhs - rhs; +} +static inline SData4 VL_SUB_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + return lhs - rhs; +} +static inline IData4 VL_SUB_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + return lhs - rhs; +} +static inline QData4 VL_SUB_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + return lhs - rhs; +} + CData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + +static inline SData4 VL_SUB_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; + } + SData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + +static inline IData4 VL_SUB_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; + } + IData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + +static inline QData4 VL_SUB_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; + } + QData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + +#define VL_POW_IIQ(obits, lbits, rbits, lhs, rhs) VL_POW_QQQ(obits, lbits, rbits, lhs, rhs) +#define VL_POW_IIW(obits, lbits, rbits, lhs, rwp) VL_POW_QQW(obits, lbits, rbits, lhs, rwp) +#define VL_POW_QQI(obits, lbits, rbits, lhs, rhs) VL_POW_QQQ(obits, lbits, rbits, lhs, rhs) +#define VL_POW_WWI(obits, lbits, rbits, owp, lwp, rhs) \ + VL_POW_WWQ(obits, lbits, rbits, owp, lwp, rhs) + +static inline IData VL_POW_III(int, int, int rbits, IData lhs, IData rhs) VL_PURE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (VL_UNLIKELY(lhs == 0)) return 0; + IData power = lhs; + IData out = 1; + for (int i = 0; i < rbits; ++i) { + if (i > 0) power = power * power; + if (rhs & (1ULL << i)) out *= power; + } + return out; +} +static inline QData VL_POW_QQQ(int, int, int rbits, QData lhs, QData rhs) VL_PURE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (VL_UNLIKELY(lhs == 0)) return 0; + QData power = lhs; + QData out = 1ULL; + for (int i = 0; i < rbits; ++i) { + if (i > 0) power = power * power; + if (rhs & (1ULL << i)) out *= power; + } + return out; +} +WDataOutP VL_POW_WWW(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE; +WDataOutP VL_POW_WWQ(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + QData rhs) VL_MT_SAFE; +QData VL_POW_QQW(int obits, int, int rbits, QData lhs, WDataInP const rwp) VL_MT_SAFE; + +#define VL_POWSS_IIQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_IIQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_IIW(obits, lbits, rbits, lhs, rwp, lsign, rsign) \ + VL_POWSS_QQW(obits, lbits, rbits, lhs, rwp, lsign, rsign) +#define VL_POWSS_QQI(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_WWI(obits, lbits, rbits, owp, lwp, rhs, lsign, rsign) \ + VL_POWSS_WWQ(obits, lbits, rbits, owp, lwp, rhs, lsign, rsign) + +static inline IData VL_POWSS_III(int obits, int, int rbits, IData lhs, IData rhs, bool lsign, + bool rsign) VL_MT_SAFE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (rsign && VL_SIGN_I(rbits, rhs)) { + if (lhs == 0) { + return 0; // "X" + } else if (lhs == 1) { + return 1; + } else if (lsign && lhs == VL_MASK_I(obits)) { // -1 + if (rhs & 1) { + return VL_MASK_I(obits); // -1^odd=-1 + } else { + return 1; // -1^even=1 + } + } + return 0; + } + return VL_POW_III(obits, rbits, rbits, lhs, rhs); +} +static inline QData VL_POWSS_QQQ(int obits, int, int rbits, QData lhs, QData rhs, bool lsign, + bool rsign) VL_MT_SAFE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (rsign && VL_SIGN_Q(rbits, rhs)) { + if (lhs == 0) { + return 0; // "X" + } else if (lhs == 1) { + return 1; + } else if (lsign && lhs == VL_MASK_Q(obits)) { // -1 + if (rhs & 1) { + return VL_MASK_Q(obits); // -1^odd=-1 + } else { + return 1; // -1^even=1 + } + } + return 0; + } + return VL_POW_QQQ(obits, rbits, rbits, lhs, rhs); +} +WDataOutP VL_POWSS_WWW(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp, bool lsign, bool rsign) VL_MT_SAFE; +WDataOutP VL_POWSS_WWQ(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, QData rhs, + bool lsign, bool rsign) VL_MT_SAFE; +QData VL_POWSS_QQW(int obits, int, int rbits, QData lhs, WDataInP const rwp, bool lsign, + bool rsign) VL_MT_SAFE; + +//=================================================================== +// Concat/replication + +// INTERNAL: Stuff LHS bit 0++ into OUTPUT at specified offset +// ld may be "dirty", output is clean +static inline void _vl_insert_II(CData& lhsr, IData ld, int hbit, int lbit, int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_II(SData& lhsr, IData ld, int hbit, int lbit, int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_II(IData& lhsr, IData ld, int hbit, int lbit, int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_QQ(QData& lhsr, QData ld, int hbit, int lbit, int rbits) VL_PURE { + const QData cleanmask = VL_MASK_Q(rbits); + const QData insmask = (VL_MASK_Q(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_WI(WDataOutP iowp, IData ld, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + // Insert value ld into iowp at bit slice [hbit:lbit]. iowp is rbits wide. + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int roffset = VL_BITBIT_E(rbits); + const int hword = VL_BITWORD_E(hbit); + const int lword = VL_BITWORD_E(lbit); + const int rword = VL_BITWORD_E(rbits); + const EData cleanmask = hword == rword ? VL_MASK_E(roffset) : VL_MASK_E(0); + + if (hoffset == VL_SIZEBITS_E && loffset == 0) { + // Fast and common case, word based insertion + iowp[lword] = ld & cleanmask; + } else { + const EData lde = static_cast(ld); + if (hword == lword) { // know < EData bits because above checks it + // Assignment is contained within one word of destination + const EData insmask = (VL_MASK_E(hoffset - loffset + 1)) << loffset; + iowp[lword] = (iowp[lword] & ~insmask) | ((lde << loffset) & (insmask & cleanmask)); + } else { + // Assignment crosses a word boundary in destination + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << 0; + const EData linsmask = (VL_MASK_E((VL_EDATASIZE - 1) - loffset + 1)) << loffset; + const int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword + iowp[lword] = (iowp[lword] & ~linsmask) | ((lde << loffset) & linsmask); + // Prevent unsafe write where lword was final writable location and hword is + // out-of-bounds. + if (VL_LIKELY(!(hword == rword && roffset == 0))) { + iowp[hword] + = (iowp[hword] & ~hinsmask) | ((lde >> nbitsonright) & (hinsmask & cleanmask)); + } + } + } +} + +// Copy bits from lwp[hbit:lbit] to low bits of lhsr. rbits is real width of lshr +static inline void _vl_insert_IW(IData& lhsr, WDataInP const lwp, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int hword = VL_BITWORD_E(hbit); + const int lword = VL_BITWORD_E(lbit); + const IData cleanmask = VL_MASK_I(rbits); + if (hword == lword) { + const IData insmask = (VL_MASK_I(hoffset - loffset + 1)); + lhsr = (lhsr & ~insmask) | ((lwp[lword] >> loffset) & (insmask & cleanmask)); + } else { + const int nbitsonright = VL_IDATASIZE - loffset; // bits that filled by lword + const IData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << nbitsonright; + const IData linsmask = VL_MASK_E(VL_EDATASIZE - loffset); + lhsr = (lhsr & ~linsmask) | ((lwp[lword] >> loffset) & (linsmask & cleanmask)); + lhsr = (lhsr & ~hinsmask) | ((lwp[hword] << nbitsonright) & (hinsmask & cleanmask)); + } +} + +// INTERNAL: Stuff large LHS bit 0++ into OUTPUT at specified offset +// lwp may be "dirty" +static inline void _vl_insert_WW(WDataOutP iowp, WDataInP const lwp, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int roffset = VL_BITBIT_E(rbits); + const int lword = VL_BITWORD_E(lbit); + const int hword = VL_BITWORD_E(hbit); + const int rword = VL_BITWORD_E(rbits); + const int words = VL_WORDS_I(hbit - lbit + 1); + // Cleaning mask, only applied to top word of the assignment. Is a no-op + // if we don't assign to the top word of the destination. + const EData cleanmask = hword == rword ? VL_MASK_E(roffset) : VL_MASK_E(0); + + if (hoffset == VL_SIZEBITS_E && loffset == 0) { + // Fast and common case, word based insertion + for (int i = 0; i < (words - 1); ++i) iowp[lword + i] = lwp[i]; + iowp[hword] = lwp[words - 1] & cleanmask; + } else if (loffset == 0) { + // Non-32bit, but nicely aligned, so stuff all but the last word + for (int i = 0; i < (words - 1); ++i) iowp[lword + i] = lwp[i]; + // Know it's not a full word as above fast case handled it + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)); + iowp[hword] = (iowp[hword] & ~hinsmask) | (lwp[words - 1] & (hinsmask & cleanmask)); + } else { + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << 0; + const EData linsmask = (VL_MASK_E((VL_EDATASIZE - 1) - loffset + 1)) << loffset; + const int nbitsonright + = VL_EDATASIZE - loffset; // bits that end up in lword (know loffset!=0) + // Middle words + for (int i = 0; i < words; ++i) { + { // Lower word + const int oword = lword + i; + const EData d = lwp[i] << loffset; + const EData od = (iowp[oword] & ~linsmask) | (d & linsmask); + if (oword == hword) { + iowp[oword] = (iowp[oword] & ~hinsmask) | (od & (hinsmask & cleanmask)); + } else { + iowp[oword] = od; + } + } + { // Upper word + const int oword = lword + i + 1; + if (oword <= hword) { + const EData d = lwp[i] >> nbitsonright; + const EData od = (d & ~linsmask) | (iowp[oword] & linsmask); + if (oword == hword) { + iowp[oword] = (iowp[oword] & ~hinsmask) | (od & (hinsmask & cleanmask)); + } else { + iowp[oword] = od; + } + } + } + } + } +} + +static inline void _vl_insert_WQ(WDataOutP iowp, QData ld, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + VlWide lwp; + VL_SET_WQ(lwp, ld); + _vl_insert_WW(iowp, lwp, hbit, lbit, rbits); +} + +// EMIT_RULE: VL_REPLICATE: oclean=clean>width32, dirty<=width32; lclean=clean; rclean==clean; +// RHS MUST BE CLEAN CONSTANT. +#define VL_REPLICATE_IOI(lbits, ld, rep) (-(ld)) // Iff lbits==1 +#define VL_REPLICATE_QOI(lbits, ld, rep) (-(static_cast(ld))) // Iff lbits==1 + +static inline IData VL_REPLICATE_III(int lbits, IData ld, IData rep) VL_PURE { + IData returndata = ld; + for (unsigned i = 1; i < rep; ++i) { + returndata = returndata << lbits; + returndata |= ld; + } + return returndata; +} +static inline QData VL_REPLICATE_QII(int lbits, IData ld, IData rep) VL_PURE { + QData returndata = ld; + for (unsigned i = 1; i < rep; ++i) { + returndata = returndata << lbits; + returndata |= static_cast(ld); + } + return returndata; +} +static inline WDataOutP VL_REPLICATE_WII(int lbits, WDataOutP owp, IData ld, + IData rep) VL_MT_SAFE { + owp[0] = ld; + // Zeroing all words isn't strictly needed but allows compiler to know + // it does not need to preserve data in word(s) not being written + for (unsigned i = 1; i < VL_WORDS_I(static_cast(lbits) * rep); ++i) owp[i] = 0; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WI(owp, ld, i * lbits + lbits - 1, i * lbits); + } + return owp; +} +static inline WDataOutP VL_REPLICATE_WQI(int lbits, WDataOutP owp, QData ld, + IData rep) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + // Zeroing all words isn't strictly needed but allows compiler to know + // it does not need to preserve data in word(s) not being written + for (unsigned i = 2; i < VL_WORDS_I(static_cast(lbits) * rep); ++i) owp[i] = 0; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WQ(owp, ld, i * lbits + lbits - 1, i * lbits); + } + return owp; +} +static inline WDataOutP VL_REPLICATE_WWI(int lbits, WDataOutP owp, WDataInP const lwp, + IData rep) VL_MT_SAFE { + for (unsigned i = 0; i < VL_WORDS_I(static_cast(lbits)); ++i) owp[i] = lwp[i]; + // Zeroing all words isn't strictly needed but allows compiler to know + // it does not need to preserve data in word(s) not being written + for (unsigned i = VL_WORDS_I(static_cast(lbits)); + i < VL_WORDS_I(static_cast(lbits * rep)); ++i) + owp[i] = 0; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WW(owp, lwp, i * lbits + lbits - 1, i * lbits); + } + return owp; +} + +// Left stream operator. Output will always be clean. LHS and RHS must be clean. +// Special "fast" versions for slice sizes that are a power of 2. These use +// shifts and masks to execute faster than the slower for-loop approach where a +// subset of bits is copied in during each iteration. +static inline IData VL_STREAML_FAST_III(int lbits, IData ld, IData rd_log2) VL_PURE { + // Pre-shift bits in most-significant slice: + // + // If lbits is not a multiple of the slice size (i.e., lbits % rd != 0), + // then we end up with a "gap" in our reversed result. For example, if we + // have a 5-bit Verilog signal (lbits=5) in an 8-bit C data type: + // + // ld = ---43210 + // + // (where numbers are the Verilog signal bit numbers and '-' is an unused bit). + // Executing the switch statement below with a slice size of two (rd=2, + // rd_log2=1) produces: + // + // ret = 1032-400 + // + // Pre-shifting the bits in the most-significant slice allows us to avoid + // this gap in the shuffled data: + // + // ld_adjusted = --4-3210 + // ret = 10324--- + IData ret = ld; + if (rd_log2) { + const uint32_t lbitsFloor = lbits & ~VL_MASK_I(rd_log2); // max multiple of rd <= lbits + const uint32_t lbitsRem = lbits - lbitsFloor; // number of bits in most-sig slice (MSS) + const IData msbMask = lbitsFloor == 32 ? 0UL : VL_MASK_I(lbitsRem) << lbitsFloor; + ret = (ret & ~msbMask) | ((ret & msbMask) << ((VL_UL(1) << rd_log2) - lbitsRem)); + } + switch (rd_log2) { + case 0: ret = ((ret >> 1) & VL_UL(0x55555555)) | ((ret & VL_UL(0x55555555)) << 1); // FALLTHRU + case 1: ret = ((ret >> 2) & VL_UL(0x33333333)) | ((ret & VL_UL(0x33333333)) << 2); // FALLTHRU + case 2: ret = ((ret >> 4) & VL_UL(0x0f0f0f0f)) | ((ret & VL_UL(0x0f0f0f0f)) << 4); // FALLTHRU + case 3: ret = ((ret >> 8) & VL_UL(0x00ff00ff)) | ((ret & VL_UL(0x00ff00ff)) << 8); // FALLTHRU + case 4: ret = ((ret >> 16) | (ret << 16)); // FALLTHRU + default:; + } + return ret >> (VL_IDATASIZE - lbits); +} + +static inline QData VL_STREAML_FAST_QQI(int lbits, QData ld, IData rd_log2) VL_PURE { + // Pre-shift bits in most-significant slice (see comment in VL_STREAML_FAST_III) + QData ret = ld; + if (rd_log2) { + const uint32_t lbitsFloor = lbits & ~VL_MASK_I(rd_log2); + const uint32_t lbitsRem = lbits - lbitsFloor; + const QData msbMask = lbitsFloor == 64 ? 0ULL : VL_MASK_Q(lbitsRem) << lbitsFloor; + ret = (ret & ~msbMask) | ((ret & msbMask) << ((1ULL << rd_log2) - lbitsRem)); + } + switch (rd_log2) { + case 0: + ret = (((ret >> 1) & 0x5555555555555555ULL) + | ((ret & 0x5555555555555555ULL) << 1)); // FALLTHRU + case 1: + ret = (((ret >> 2) & 0x3333333333333333ULL) + | ((ret & 0x3333333333333333ULL) << 2)); // FALLTHRU + case 2: + ret = (((ret >> 4) & 0x0f0f0f0f0f0f0f0fULL) + | ((ret & 0x0f0f0f0f0f0f0f0fULL) << 4)); // FALLTHRU + case 3: + ret = (((ret >> 8) & 0x00ff00ff00ff00ffULL) + | ((ret & 0x00ff00ff00ff00ffULL) << 8)); // FALLTHRU + case 4: + ret = (((ret >> 16) & 0x0000ffff0000ffffULL) + | ((ret & 0x0000ffff0000ffffULL) << 16)); // FALLTHRU + case 5: ret = ((ret >> 32) | (ret << 32)); // FALLTHRU + default:; + } + return ret >> (VL_QUADSIZE - lbits); +} + +// Regular "slow" streaming operators +static inline IData VL_STREAML_III(int lbits, IData ld, IData rd) VL_PURE { + IData ret = 0; + // Slice size should never exceed the lhs width + const IData mask = VL_MASK_I(rd); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + ret |= ((ld >> istart) & mask) << ostart; + } + return ret; +} + +static inline QData VL_STREAML_QQI(int lbits, QData ld, IData rd) VL_PURE { + QData ret = 0; + // Slice size should never exceed the lhs width + const QData mask = VL_MASK_Q(rd); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + ret |= ((ld >> istart) & mask) << ostart; + } + return ret; +} + +static inline WDataOutP VL_STREAML_WWI(int lbits, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + VL_ZERO_W(lbits, owp); + // Slice size should never exceed the lhs width + const int ssize = (rd < static_cast(lbits)) ? rd : (static_cast(lbits)); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + for (int sbit = 0; sbit < ssize && sbit < lbits - istart; ++sbit) { + // Extract a single bit from lwp and shift it to the correct + // location for owp. + const EData bit = (VL_BITRSHIFT_W(lwp, (istart + sbit)) & 1) + << VL_BITBIT_E(ostart + sbit); + owp[VL_BITWORD_E(ostart + sbit)] |= bit; + } + } + return owp; +} + +static inline IData VL_PACK_I_RI(int obits, int lbits, const VlQueue& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline IData VL_PACK_I_RI(int obits, int lbits, const VlQueue& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline IData VL_PACK_I_RI(int obits, int lbits, const VlQueue& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) ret |= q.at(q.size() - 1 - i) << (i * lbits); + return ret; +} + +template +static inline IData VL_PACK_I_UI(int obits, int lbits, const VlUnpacked& q) { + IData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline IData VL_PACK_I_UI(int obits, int lbits, const VlUnpacked& q) { + IData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline IData VL_PACK_I_UI(int obits, int lbits, const VlUnpacked& q) { + IData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) ret |= q[N_Depth - 1 - i] << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RI(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RI(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RI(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UI(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UI(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UI(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RQ(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) ret |= q.at(q.size() - 1 - i) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UQ(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) ret |= q[N_Depth - 1 - i] << (i * lbits); + return ret; +} + +static inline WDataOutP VL_PACK_W_RI(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WI(owp, q.at(q.size() - i - 1), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +static inline WDataOutP VL_PACK_W_RI(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WI(owp, q.at(q.size() - i - 1), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +static inline WDataOutP VL_PACK_W_RI(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WI(owp, q.at(q.size() - 1 - i), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UI(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WI(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UI(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WI(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UI(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WI(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +static inline WDataOutP VL_PACK_W_RQ(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WQ(owp, q.at(q.size() - 1 - i), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UQ(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WQ(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_RW(int obits, int lbits, WDataOutP owp, + const VlQueue>& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WW(owp, q.at(q.size() - 1 - i), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UW(int obits, int lbits, WDataOutP owp, + const VlUnpacked, N_Depth>& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WW(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1 + offset, i * lbits + offset); + return owp; +} + +// Because concats are common and wide, it's valuable to always have a clean output. +// Thus we specify inputs must be clean, so we don't need to clean the output. +// Note the bit shifts are always constants, so the adds in these constify out. +// Casts required, as args may be 8 bit entities, and need to shift to appropriate output size +#define VL_CONCAT_III(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QII(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QIQ(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QQI(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QQQ(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) + +static inline WDataOutP VL_CONCAT_WII(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + IData rd) VL_MT_SAFE { + owp[0] = rd; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWI(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, IData rd) VL_MT_SAFE { + owp[0] = rd; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WIW(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + WDataInP const rwp) VL_MT_SAFE { + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); + _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WIQ(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQI(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + IData rd) VL_MT_SAFE { + owp[0] = rd; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQQ(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQW(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + WDataInP const rwp) VL_MT_SAFE { + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); + _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); + _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); + return owp; +} + +//=================================================================== +// Shifts + +// Static shift, used by internal functions +// The output is the same as the input - it overlaps! +static inline void _vl_shiftl_inplace_w(int obits, WDataOutP iowp, + IData rd /*1 or 4*/) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + const EData linsmask = VL_MASK_E(rd); + for (int i = words - 1; i >= 1; --i) { + iowp[i] + = ((iowp[i] << rd) & ~linsmask) | ((iowp[i - 1] >> (VL_EDATASIZE - rd)) & linsmask); + } + iowp[0] = ((iowp[0] << rd) & ~linsmask); + iowp[VL_WORDS_I(obits) - 1] &= VL_MASK_E(obits); +} + +// EMIT_RULE: VL_SHIFTL: oclean=lclean; rclean==clean; +// Important: Unlike most other funcs, the shift might well be a computed +// expression. Thus consider this when optimizing. (And perhaps have 2 funcs?) +// If RHS (rd/rwp) is larger than the output, zeros (or all ones for >>>) must be returned +// (This corresponds to AstShift*Ovr Ast nodes) +static inline IData VL_SHIFTL_III(int obits, int, int, IData lhs, IData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return lhs << rhs; // Small is common so not clean return +} +static inline IData VL_SHIFTL_IIQ(int obits, int, int, IData lhs, QData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return VL_CLEAN_II(obits, obits, lhs << rhs); +} +static inline QData VL_SHIFTL_QQI(int obits, int, int, QData lhs, IData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return lhs << rhs; // Small is common so not clean return +} +static inline QData VL_SHIFTL_QQQ(int obits, int, int, QData lhs, QData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return VL_CLEAN_QQ(obits, obits, lhs << rhs); +} +static inline WDataOutP VL_SHIFTL_WWI(int obits, int, int, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); + const int bit_shift = VL_BITBIT_E(rd); + if (rd >= static_cast(obits)) { // rd may be huge with MSB set + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else if (bit_shift == 0) { // Aligned word shift (<<0,<<32,<<64 etc) + for (int i = 0; i < word_shift; ++i) owp[i] = 0; + for (int i = word_shift; i < VL_WORDS_I(obits); ++i) owp[i] = lwp[i - word_shift]; + } else { + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WW(owp, lwp, obits - 1, rd); + } + return owp; +} +static inline WDataOutP VL_SHIFTL_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return VL_ZERO_W(obits, owp); + } + } + return VL_SHIFTL_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTL_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTL_WWW(obits, lbits, rbits, owp, lwp, rwp); +} +static inline IData VL_SHIFTL_IIW(int obits, int, int rbits, IData lhs, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return 0; + } + } + return VL_SHIFTL_III(obits, obits, 32, lhs, rwp[0]); +} +static inline QData VL_SHIFTL_QQW(int obits, int, int rbits, QData lhs, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return 0; + } + } + // Above checks rwp[1]==0 so not needed in below shift + return VL_SHIFTL_QQI(obits, obits, 32, lhs, rwp[0]); +} + +// EMIT_RULE: VL_SHIFTR: oclean=lclean; rclean==clean; +// Important: Unlike most other funcs, the shift might well be a computed +// expression. Thus consider this when optimizing. (And perhaps have 2 funcs?) +static inline IData VL_SHIFTR_III(int obits, int, int, IData lhs, IData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return lhs >> rhs; +} +static inline IData VL_SHIFTR_IIQ(int obits, int, int, IData lhs, QData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return lhs >> rhs; +} +static inline QData VL_SHIFTR_QQI(int obits, int, int, QData lhs, IData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return lhs >> rhs; +} +static inline QData VL_SHIFTR_QQQ(int obits, int, int, QData lhs, QData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return lhs >> rhs; +} +static inline WDataOutP VL_SHIFTR_WWI(int obits, int, int, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); // Maybe 0 + const int bit_shift = VL_BITBIT_E(rd); + if (rd >= static_cast(obits)) { // rd may be huge with MSB set + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else if (bit_shift == 0) { // Aligned word shift (>>0,>>32,>>64 etc) + const int copy_words = (VL_WORDS_I(obits) - word_shift); + for (int i = 0; i < copy_words; ++i) owp[i] = lwp[i + word_shift]; + for (int i = copy_words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else { + const int loffset = rd & VL_SIZEBITS_E; + const int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword (know + // loffset!=0) Middle words + const int words = VL_WORDS_I(obits - rd); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword < VL_WORDS_I(obits)) owp[i] |= lwp[upperword] << nbitsonright; + } + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } + return owp; +} +static inline WDataOutP VL_SHIFTR_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return VL_ZERO_W(obits, owp); + } + } + return VL_SHIFTR_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTR_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTR_WWW(obits, lbits, rbits, owp, lwp, rwp); +} + +static inline IData VL_SHIFTR_IIW(int obits, int, int rbits, IData lhs, + WDataInP const rwp) VL_PURE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) return 0; // Huge shift 1>>32 or more + } + return VL_SHIFTR_III(obits, obits, 32, lhs, rwp[0]); +} +static inline QData VL_SHIFTR_QQW(int obits, int, int rbits, QData lhs, + WDataInP const rwp) VL_PURE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) return 0; // Huge shift 1>>32 or more + } + return VL_SHIFTR_QQI(obits, obits, 32, lhs, rwp[0]); +} + +// EMIT_RULE: VL_SHIFTRS: oclean=false; lclean=clean, rclean==clean; +static inline IData VL_SHIFTRS_III(int obits, int lbits, int, IData lhs, IData rhs) VL_PURE { + // Note the C standard does not specify the >> operator as a arithmetic shift! + // IEEE says signed if output signed, but bit position from lbits; + // must use lbits for sign; lbits might != obits, + // an EXTEND(SHIFTRS(...)) can became a SHIFTRS(...) within same 32/64 bit word length + const IData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return sign & VL_MASK_I(obits); + const IData signext = ~(VL_MASK_I(lbits) >> rhs); // One with bits where we've shifted "past" + return (lhs >> rhs) | (sign & VL_CLEAN_II(obits, obits, signext)); +} +static inline QData VL_SHIFTRS_QQI(int obits, int lbits, int, QData lhs, IData rhs) VL_PURE { + const QData sign = -(lhs >> (lbits - 1)); + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return sign & VL_MASK_Q(obits); + const QData signext = ~(VL_MASK_Q(lbits) >> rhs); + return (lhs >> rhs) | (sign & VL_CLEAN_QQ(obits, obits, signext)); +} +static inline IData VL_SHIFTRS_IQI(int obits, int lbits, int rbits, QData lhs, IData rhs) VL_PURE { + return static_cast(VL_SHIFTRS_QQI(obits, lbits, rbits, lhs, rhs)); +} +static inline WDataOutP VL_SHIFTRS_WWI(int obits, int lbits, int, WDataOutP owp, + WDataInP const lwp, IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); + const int bit_shift = VL_BITBIT_E(rd); + const int lmsw = VL_WORDS_I(obits) - 1; + const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); + if (rd >= static_cast(obits)) { // Shifting past end, sign in all of lbits + for (int i = 0; i <= lmsw; ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } else if (bit_shift == 0) { // Aligned word shift (>>0,>>32,>>64 etc) + const int copy_words = (VL_WORDS_I(obits) - word_shift); + for (int i = 0; i < copy_words; ++i) owp[i] = lwp[i + word_shift]; + if (copy_words >= 0) owp[copy_words - 1] |= ~VL_MASK_E(obits) & sign; + for (int i = copy_words; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } else { + const int loffset = rd & VL_SIZEBITS_E; + const int nbitsonright + = VL_EDATASIZE - loffset; // bits that end up in lword (know loffset!=0) + // Middle words + const int words = VL_WORDS_I(obits - rd); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword < VL_WORDS_I(obits)) owp[i] |= lwp[upperword] << nbitsonright; + } + if (words) owp[words - 1] |= sign & ~VL_MASK_E(obits - loffset); + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } + return owp; +} +static inline WDataOutP VL_SHIFTRS_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= static_cast(obits))) { + const int owords = VL_WORDS_I(obits); + if (VL_SIGN_E(lbits, lwp[owords - 1])) { + VL_MEMSET_ONES_W(owp, owords); + owp[owords - 1] &= VL_MASK_E(lbits); + } else { + VL_MEMSET_ZERO_W(owp, owords); + } + return owp; + } + return VL_SHIFTRS_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTRS_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTRS_WWW(obits, lbits, rbits, owp, lwp, rwp); +} +static inline IData VL_SHIFTRS_IIW(int obits, int lbits, int rbits, IData lhs, + WDataInP const rwp) VL_PURE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= static_cast(obits))) { + const IData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + return VL_CLEAN_II(obits, obits, sign); + } + return VL_SHIFTRS_III(obits, lbits, 32, lhs, rwp[0]); +} +static inline QData VL_SHIFTRS_QQW(int obits, int lbits, int rbits, QData lhs, + WDataInP const rwp) VL_PURE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= static_cast(obits))) { + const QData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + return VL_CLEAN_QQ(obits, obits, sign); + } + return VL_SHIFTRS_QQI(obits, lbits, 32, lhs, rwp[0]); +} +static inline IData VL_SHIFTRS_IIQ(int obits, int lbits, int rbits, IData lhs, QData rhs) VL_PURE { + VlWide rwp; + VL_SET_WQ(rwp, rhs); + return VL_SHIFTRS_IIW(obits, lbits, rbits, lhs, rwp); +} +static inline QData VL_SHIFTRS_QQQ(int obits, int lbits, int rbits, QData lhs, QData rhs) VL_PURE { + VlWide rwp; + VL_SET_WQ(rwp, rhs); + return VL_SHIFTRS_QQW(obits, lbits, rbits, lhs, rwp); +} + +//========================================================================= +// FOUR-STATE SHIFT OPERATORS +// For four-state: shift operations preserve X/Z in the shifted bits + +// Four-state left shift: shift in zeros, preserve X/Z pattern +static inline CData4 VL_SHIFTL_4STATE_C(CData4 lhs, int shift) { + if (shift >= 4) return 0; // All shifted out + if (_vl4_anyXZ_C(lhs)) { + // X/Z gets shifted, lower bits become 0 + CData4 result = 0; + for (int i = 0; i < 4 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (val << ((i + shift) * 2)); + } + } + return result; + } + // Clean value shift + return (lhs & 0x55555555) << shift; +} + +static inline SData4 VL_SHIFTL_4STATE_S(SData4 lhs, int shift) { + if (shift >= 8) return 0; + if (_vl4_anyXZ_S(lhs)) { + SData4 result = 0; + for (int i = 0; i < 8 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i + shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) << shift; +} + +static inline IData4 VL_SHIFTL_4STATE_I(IData4 lhs, int shift) { + if (shift >= 16) return 0; + if (_vl4_anyXZ_I(lhs)) { + IData4 result = 0; + for (int i = 0; i < 16 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i + shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) << shift; +} + +static inline QData4 VL_SHIFTL_4STATE_Q(QData4 lhs, int shift) { + if (shift >= 32) return 0; + if (_vl4_anyXZ_Q(lhs)) { + QData4 result = 0; + for (int i = 0; i < 32 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i + shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) << shift; +} + +// Four-state right shift +static inline CData4 VL_SHIFTR_4STATE_C(CData4 lhs, int shift) { + if (shift >= 4) return 0; + if (_vl4_anyXZ_C(lhs)) { + CData4 result = 0; + for (int i = shift; i < 4; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i - shift) * 2)); + } + } + return result; + } + return (lhs & 0x55555555) >> shift; +} + +static inline SData4 VL_SHIFTR_4STATE_S(SData4 lhs, int shift) { + if (shift >= 8) return 0; + if (_vl4_anyXZ_S(lhs)) { + SData4 result = 0; + for (int i = shift; i < 8; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i - shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) >> shift; +} + +static inline IData4 VL_SHIFTR_4STATE_I(IData4 lhs, int shift) { + if (shift >= 16) return 0; + if (_vl4_anyXZ_I(lhs)) { + IData4 result = 0; + for (int i = shift; i < 16; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i - shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) >> shift; +} + +static inline QData4 VL_SHIFTR_4STATE_Q(QData4 lhs, int shift) { + if (shift >= 32) return 0; + if (_vl4_anyXZ_Q(lhs)) { + QData4 result = 0; + for (int i = shift; i < 32; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i - shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) >> shift; +} + +//=================================================================== +// Bit selection + +// EMIT_RULE: VL_BITSEL: oclean=dirty; rclean==clean; +#define VL_BITSEL_IIII(lbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_QIII(lbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_QQII(lbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_IQII(lbits, lhs, rhs) (static_cast((lhs) >> (rhs))) + +static inline IData VL_BITSEL_IWII(int lbits, WDataInP const lwp, IData rd) VL_MT_SAFE { + const int word = VL_BITWORD_E(rd); + if (VL_UNLIKELY(rd > static_cast(lbits))) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + // We return all 1's as that's more likely to find bugs (?) than 0's. + } else { + return (lwp[word] >> VL_BITBIT_E(rd)); + } +} + +// EMIT_RULE: VL_RANGE: oclean=lclean; out=dirty +// & MUST BE CLEAN (currently constant) +#define VL_SEL_IIII(lbits, lhs, lsb, width) ((lhs) >> (lsb)) +#define VL_SEL_QQII(lbits, lhs, lsb, width) ((lhs) >> (lsb)) +#define VL_SEL_IQII(lbits, lhs, lsb, width) (static_cast((lhs) >> (lsb))) + +static inline IData VL_SEL_IWII(int lbits, WDataInP const lwp, IData lsb, IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + if (VL_UNLIKELY(msb >= lbits)) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + } else if (VL_BITWORD_E(msb) == VL_BITWORD_E(static_cast(lsb))) { + return VL_BITRSHIFT_W(lwp, lsb); + } else { + // 32 bit extraction may span two words + const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); // bits that come from low word + return ((lwp[VL_BITWORD_E(msb)] << nbitsfromlow) | VL_BITRSHIFT_W(lwp, lsb)); + } +} + +static inline QData VL_SEL_QWII(int lbits, WDataInP const lwp, IData lsb, IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + if (VL_UNLIKELY(msb > lbits)) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + } else if (VL_BITWORD_E(msb) == VL_BITWORD_E(static_cast(lsb))) { + return VL_BITRSHIFT_W(lwp, lsb); + } else if (VL_BITWORD_E(msb) == 1 + VL_BITWORD_E(static_cast(lsb))) { + const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); + const QData hi = (lwp[VL_BITWORD_E(msb)]); + const QData lo = VL_BITRSHIFT_W(lwp, lsb); + return (hi << nbitsfromlow) | lo; + } else { + // 64 bit extraction may span three words + const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); + const QData hi = (lwp[VL_BITWORD_E(msb)]); + const QData mid = (lwp[VL_BITWORD_E(lsb) + 1]); + const QData lo = VL_BITRSHIFT_W(lwp, lsb); + return (hi << (nbitsfromlow + VL_EDATASIZE)) | (mid << nbitsfromlow) | lo; + } +} + +static inline WDataOutP VL_SEL_WWII(int obits, int lbits, WDataOutP owp, WDataInP const lwp, + IData lsb, IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + const int word_shift = VL_BITWORD_E(lsb); + if (VL_UNLIKELY(msb > lbits)) { // Outside bounds, + for (int i = 0; i < VL_WORDS_I(obits) - 1; ++i) owp[i] = ~0; + owp[VL_WORDS_I(obits) - 1] = VL_MASK_E(obits); + } else if (VL_BITBIT_E(lsb) == 0) { + // Just a word extract + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = lwp[i + word_shift]; + } else { + // Not a _vl_insert because the bits come from any bit number and goto bit 0 + const int loffset = lsb & VL_SIZEBITS_E; + const int nbitsfromlow = VL_EDATASIZE - loffset; // bits that end up in lword (know + // loffset!=0) Middle words + const int words = VL_WORDS_I(msb - lsb + 1); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword <= static_cast(VL_BITWORD_E(msb))) { + owp[i] |= lwp[upperword] << nbitsfromlow; + } + } + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } + return owp; +} + +template +static inline VlQueue VL_CLONE_Q(const VlQueue& from, int lbits, int srcElementBits, + int dstElementBits) { + VlQueue ret; + VL_COPY_Q(ret, from, lbits, srcElementBits, dstElementBits); + return ret; +} + +template +static inline VlQueue VL_REVCLONE_Q(const VlQueue& from, int lbits, int srcElementBits, + int dstElementBits) { + VlQueue ret; + VL_REVCOPY_Q(ret, from, lbits, srcElementBits, dstElementBits); + return ret; +} + +// Helper function to get a bit from a queue at a specific bit index +template +static inline bool VL_GET_QUEUE_BIT(const VlQueue& queue, int srcElementBits, size_t bitIndex) { + const size_t elemIdx = bitIndex / srcElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return false; + + const T element = queue.at(elemIdx); + if (srcElementBits == 1) { + return element & 1; + } else { + const size_t bitInElem = bitIndex % srcElementBits; + const size_t actualBitPos = srcElementBits - 1 - bitInElem; + return (element >> actualBitPos) & 1; + } +} + +// Helper function to set a bit in the destination queue +template +static inline void VL_SET_QUEUE_BIT(VlQueue& queue, int dstElementBits, size_t bitIndex, + bool value) { + if (dstElementBits == 1) { + if (VL_UNLIKELY(bitIndex >= queue.size())) return; + queue.atWrite(bitIndex) = value ? 1 : 0; + } else { + const size_t elemIdx = bitIndex / dstElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return; + const size_t bitInElem = bitIndex % dstElementBits; + const size_t actualBitPos = dstElementBits - 1 - bitInElem; + if (value) { + queue.atWrite(elemIdx) |= (static_cast(1) << actualBitPos); + } else { + queue.atWrite(elemIdx) &= ~(static_cast(1) << actualBitPos); + } + } +} + +// Helper function to get a bit from a VlWide queue at a specific bit index +template +static inline bool VL_GET_QUEUE_BIT(const VlQueue>& queue, int srcElementBits, + size_t bitIndex) { + const size_t elemIdx = bitIndex / srcElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return false; + + const VlWide& element = queue.at(elemIdx); + const size_t bitInElem = bitIndex % srcElementBits; + const size_t actualBitPos = srcElementBits - 1 - bitInElem; + + return VL_BITISSET_W(element.data(), actualBitPos); +} + +// Helper function to set a bit in a VlWide queue at a specific bit index +template +static inline void VL_SET_QUEUE_BIT(VlQueue>& queue, int dstElementBits, + size_t bitIndex, bool value) { + const size_t elemIdx = bitIndex / dstElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return; + + const size_t bitInElem = bitIndex % dstElementBits; + const size_t actualBitPos = dstElementBits - 1 - bitInElem; + + VlWide& element = queue.atWrite(elemIdx); + if (value) { + VL_ASSIGNBIT_WO(actualBitPos, element.data()); + } else { + VL_ASSIGNBIT_WI(actualBitPos, element.data(), 0); + } +} + +template +static inline void VL_ZERO_INIT_QUEUE_ELEM(T& elem) { + elem = 0; +} + +template +static inline void VL_ZERO_INIT_QUEUE_ELEM(VlWide& elem) { + for (size_t j = 0; j < N_Words; ++j) { elem.at(j) = 0; } +} + +// This specialization works for both VlQueue (and similar) as well +// as VlQueue>. +template +static inline void VL_COPY_Q(VlQueue& q, const VlQueue& from, int lbits, int srcElementBits, + int dstElementBits) { + if (srcElementBits == dstElementBits) { + // Simple case: same element bit width, direct copy of each element + if (VL_UNLIKELY(&q == &from)) return; // Skip self-assignment when it's truly a no-op + q = from; + } else { + // Different element bit widths: use streaming conversion + VlQueue srcCopy = from; + const size_t srcTotalBits = from.size() * srcElementBits; + const size_t dstSize = (srcTotalBits + dstElementBits - 1) / dstElementBits; + q.renew(dstSize); + for (size_t i = 0; i < dstSize; ++i) { VL_ZERO_INIT_QUEUE_ELEM(q.atWrite(i)); } + for (size_t bitIndex = 0; bitIndex < srcTotalBits; ++bitIndex) { + VL_SET_QUEUE_BIT(q, dstElementBits, bitIndex, + VL_GET_QUEUE_BIT(srcCopy, srcElementBits, bitIndex)); + } + } +} + +// This specialization works for both VlQueue (and similar) as well +// as VlQueue>. +template +static inline void VL_REVCOPY_Q(VlQueue& q, const VlQueue& from, int lbits, + int srcElementBits, int dstElementBits) { + const size_t srcTotalBits = from.size() * srcElementBits; + const size_t dstSize = (srcTotalBits + dstElementBits - 1) / dstElementBits; + + // Always make a copy to handle the case where q and from are the same queue + VlQueue srcCopy = from; + + // Initialize all elements to zero using appropriate method + q.renew(dstSize); + for (size_t i = 0; i < dstSize; ++i) VL_ZERO_INIT_QUEUE_ELEM(q.atWrite(i)); + + if (lbits == 1) { + // Simple bit reversal: write directly to destination + for (int i = srcTotalBits - 1; i >= 0; --i) { + VL_SET_QUEUE_BIT(q, dstElementBits, srcTotalBits - 1 - i, + VL_GET_QUEUE_BIT(srcCopy, srcElementBits, i)); + } + } else { + // Generalized block-reversal for lbits > 1: + // 1. Reverse all bits using 1-bit blocks + // 2. Split into lbits-sized blocks and pad incomplete blocks on the left + // 3. Reverse each lbits-sized block using 1-bit blocks + const size_t numCompleteBlocks = srcTotalBits / lbits; + const size_t remainderBits = srcTotalBits % lbits; + const size_t srcBlocks = numCompleteBlocks + (remainderBits > 0 ? 1 : 0); + + size_t dstBitIndex = 0; + + for (size_t block = 0; block < srcBlocks; ++block) { + const size_t blockStart = block * lbits; + const int bitsToProcess = VL_LIKELY(block < numCompleteBlocks) ? lbits : remainderBits; + for (int bit = bitsToProcess - 1; bit >= 0; --bit) { + const size_t reversedBitIndex = blockStart + bit; + const size_t originalBitIndex = srcTotalBits - 1 - reversedBitIndex; + VL_SET_QUEUE_BIT(q, dstElementBits, dstBitIndex++, + VL_GET_QUEUE_BIT(srcCopy, srcElementBits, originalBitIndex)); + } + dstBitIndex += lbits - bitsToProcess; + } + } +} + +//====================================================================== +// Expressions needing insert/select + +static inline void VL_UNPACK_RI_I(int lbits, int rbits, VlQueue& q, IData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_I(int lbits, int rbits, VlQueue& q, IData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_I(int lbits, int rbits, VlQueue& q, IData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RQ_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_IWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +static inline void VL_UNPACK_RI_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_IWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +static inline void VL_UNPACK_RI_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_IWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +static inline void VL_UNPACK_RQ_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_QWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +template +static inline void VL_UNPACK_RW_W(int lbits, int rbits, VlQueue>& q, + WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + VL_SEL_WWII(actualWidth, rbits, q.atWrite(i), rwp, actualBitPos, actualWidth); + } +} + +template +static inline void VL_UNPACK_UI_I(int lbits, int rbits, VlUnpacked& q, + IData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_I(int lbits, int rbits, VlUnpacked& q, + IData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_I(int lbits, int rbits, VlUnpacked& q, + IData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UQ_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UQ_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_QWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UW_W(int lbits, int rbits, VlUnpacked, N_Depth>& q, + WDataInP rwp) { + for (size_t i = 0; i < N_Depth; ++i) + VL_SEL_WWII(lbits, rbits, q[i], rwp, (N_Depth - 1 - i) * lbits, lbits); +} + +// Return QData from double (numeric) +// EMIT_RULE: VL_RTOIROUND_Q_D: oclean=dirty; lclean==clean/real +static inline QData VL_RTOIROUND_Q_D(double lhs) VL_PURE { + // IEEE format: [63]=sign [62:52]=exp+1023 [51:0]=mantissa + // This does not need to support subnormals as they are sub-integral + lhs = VL_ROUND(lhs); + if (lhs == 0.0) return 0; + const QData q = VL_CVT_Q_D(lhs); + const int lsb = static_cast((q >> 52ULL) & VL_MASK_Q(11)) - 1023 - 52; + const uint64_t mantissa = (q & VL_MASK_Q(52)) | (1ULL << 52); + uint64_t out = 0; + if (lsb < 0) { + out = mantissa >> -lsb; + } else if (lsb < 64) { + out = mantissa << lsb; + } + if (lhs < 0) out = -out; + return out; +} +static inline IData VL_RTOIROUND_I_D(double lhs) VL_PURE { + return static_cast(VL_RTOIROUND_Q_D(lhs)); +} +static inline WDataOutP VL_RTOIROUND_W_D(int obits, WDataOutP owp, double lhs) VL_MT_SAFE { + // IEEE format: [63]=sign [62:52]=exp+1023 [51:0]=mantissa + // This does not need to support subnormals as they are sub-integral + lhs = VL_ROUND(lhs); + VL_ZERO_W(obits, owp); + if (lhs == 0.0) return owp; + const QData q = VL_CVT_Q_D(lhs); + const int lsb = static_cast((q >> 52ULL) & VL_MASK_Q(11)) - 1023 - 52; + const uint64_t mantissa = (q & VL_MASK_Q(52)) | (1ULL << 52); + if (lsb < 0) { + VL_SET_WQ(owp, mantissa >> -lsb); + } else if (lsb < obits) { + _vl_insert_WQ(owp, mantissa, lsb + 52, lsb); + } + if (lhs < 0) VL_NEGATE_INPLACE_W(VL_WORDS_I(obits), owp); + return owp; +} + +//====================================================================== +// Range assignments + +// EMIT_RULE: VL_ASSIGNRANGE: rclean=dirty; +static inline void VL_ASSIGNSEL_II(int rbits, int obits, int lsb, CData& lhsr, IData rhs) VL_PURE { + _vl_insert_II(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_II(int rbits, int obits, int lsb, SData& lhsr, IData rhs) VL_PURE { + _vl_insert_II(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_II(int rbits, int obits, int lsb, IData& lhsr, IData rhs) VL_PURE { + _vl_insert_II(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_QI(int rbits, int obits, int lsb, QData& lhsr, IData rhs) VL_PURE { + _vl_insert_QQ(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_QQ(int rbits, int obits, int lsb, QData& lhsr, QData rhs) VL_PURE { + _vl_insert_QQ(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +// static inline void VL_ASSIGNSEL_IIIW(int obits, int lsb, IData& lhsr, WDataInP const rwp) +// VL_MT_SAFE { Illegal, as lhs width >= rhs width +static inline void VL_ASSIGNSEL_WI(int rbits, int obits, int lsb, WDataOutP iowp, + IData rhs) VL_MT_SAFE { + _vl_insert_WI(iowp, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_WQ(int rbits, int obits, int lsb, WDataOutP iowp, + QData rhs) VL_MT_SAFE { + _vl_insert_WQ(iowp, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_WW(int rbits, int obits, int lsb, WDataOutP iowp, + WDataInP const rwp) VL_MT_SAFE { + _vl_insert_WW(iowp, rwp, lsb + obits - 1, lsb, rbits); +} + +//==================================================== +// Range assignments + +// These additional functions copy bits range [obis+roffset-1:roffset] from rhs to lower bits +// of lhs(select before assigning). Rhs should always be wider than lhs. +static inline void VL_SELASSIGN_II(int rbits, int obits, CData& lhsr, IData rhs, + int roffset) VL_PURE { + _vl_insert_II(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} +static inline void VL_SELASSIGN_II(int rbits, int obits, SData& lhsr, IData rhs, + int roffset) VL_PURE { + _vl_insert_II(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} +static inline void VL_SELASSIGN_II(int rbits, int obits, IData& lhsr, IData rhs, + int roffset) VL_PURE { + _vl_insert_II(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} +static inline void VL_SELASSIGN_IQ(int rbits, int obits, CData& lhsr, QData rhs, + int roffset) VL_PURE { + // it will be truncated to right CData mask + const CData cleanmask = VL_MASK_I(rbits); + const CData insmask = VL_MASK_I(obits); + lhsr = (lhsr & ~insmask) | (static_cast(rhs >> roffset) & (insmask & cleanmask)); +} +static inline void VL_SELASSIGN_IQ(int rbits, int obits, SData& lhsr, QData rhs, + int roffset) VL_PURE { + // it will be truncated to right CData mask + const SData cleanmask = VL_MASK_I(rbits); + const SData insmask = VL_MASK_I(obits); + lhsr = (lhsr & ~insmask) | (static_cast(rhs >> roffset) & (insmask & cleanmask)); +} +static inline void VL_SELASSIGN_IQ(int rbits, int obits, IData& lhsr, QData rhs, + int roffset) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = VL_MASK_I(obits); + lhsr = (lhsr & ~insmask) | (static_cast(rhs >> roffset) & (insmask & cleanmask)); +} + +static inline void VL_SELASSIGN_QQ(int rbits, int obits, QData& lhsr, QData rhs, + int roffset) VL_PURE { + _vl_insert_QQ(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} + +static inline void VL_SELASSIGN_IW(int rbits, int obits, CData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + IData l = static_cast(lhsr); + _vl_insert_IW(l, rhs, roffset + obits - 1, roffset, rbits); + lhsr = static_cast(l); +} +static inline void VL_SELASSIGN_IW(int rbits, int obits, SData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + IData l = static_cast(lhsr); + _vl_insert_IW(l, rhs, roffset + obits - 1, roffset, rbits); + lhsr = static_cast(l); +} +static inline void VL_SELASSIGN_IW(int rbits, int obits, IData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + _vl_insert_IW(lhsr, rhs, roffset + obits - 1, roffset, rbits); +} +static inline void VL_SELASSIGN_QW(int rbits, int obits, QData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + // assert VL_QDATASIZE >= rbits > VL_IDATASIZE; + IData low = static_cast(lhsr); + IData high = static_cast(lhsr >> VL_IDATASIZE); + if (obits <= VL_IDATASIZE) { + _vl_insert_IW(low, rhs, obits + roffset - 1, roffset, VL_IDATASIZE); + } else { + _vl_insert_IW(low, rhs, roffset + VL_IDATASIZE - 1, roffset, VL_IDATASIZE); + _vl_insert_IW(high, rhs, roffset + obits - 1, roffset + VL_IDATASIZE, + rbits - VL_IDATASIZE); + } + lhsr = (static_cast(high) << VL_IDATASIZE) | low; +} + +static inline void VL_SELASSIGN_WW(int rbits, int obits, WDataOutP iowp, WDataInP const rwp, + int roffset) VL_MT_SAFE { + // assert rbits > VL_QDATASIZE + const int wordoff = roffset / VL_EDATASIZE; + const int lsb = roffset & VL_SIZEBITS_E; + const int upperbits = lsb == 0 ? 0 : VL_EDATASIZE - lsb; + // If roffset is not aligned, we copy some bits to align it. + if (lsb != 0) { + const int w = obits < upperbits ? obits : upperbits; + const int insmask = VL_MASK_E(w); + iowp[0] = (iowp[0] & ~insmask) | ((rwp[wordoff] >> lsb) & insmask); + // cppcheck-suppress knownConditionTrueFalse + if (w == obits) return; + obits -= w; + } + _vl_insert_WW(iowp, rwp + wordoff + (lsb != 0), upperbits + obits - 1, upperbits, rbits); +} + +//====================================================================== +// Triops + +static inline WDataOutP VL_COND_WIWW(int obits, WDataOutP owp, int cond, WDataInP const w1p, + WDataInP const w2p) VL_MT_SAFE { + return VL_MEMCPY_W(owp, cond ? w1p : w2p, VL_WORDS_I(obits)); +} + +//====================================================================== +// Constification + +// VL_CONST_W_#X(int obits, WDataOutP owp, IData data0, .... IData data(#-1)) +// Sets wide vector words to specified constant words. +// These macros are used when o might represent more words then are given as constants, +// hence all upper words must be zeroed. +// If changing the number of functions here, also change EMITCINLINES_NUM_CONSTW + +#define VL_C_END_(obits, wordsSet) \ + VL_MEMSET_ZERO_W(o + (wordsSet), VL_WORDS_I(obits) - (wordsSet)); \ + return o + +// clang-format off +static inline WDataOutP VL_CONST_W_1X(int obits, WDataOutP o, EData d0) VL_MT_SAFE { + o[0] = d0; + VL_C_END_(obits, 1); +} +static inline WDataOutP VL_CONST_W_2X(int obits, WDataOutP o, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; + VL_C_END_(obits, 2); +} +static inline WDataOutP VL_CONST_W_3X(int obits, WDataOutP o, EData d2, EData d1, + EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; + VL_C_END_(obits, 3); +} +static inline WDataOutP VL_CONST_W_4X(int obits, WDataOutP o, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + VL_C_END_(obits, 4); +} +static inline WDataOutP VL_CONST_W_5X(int obits, WDataOutP o, + EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; + VL_C_END_(obits, 5); +} +static inline WDataOutP VL_CONST_W_6X(int obits, WDataOutP o, + EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; + VL_C_END_(obits, 6); +} +static inline WDataOutP VL_CONST_W_7X(int obits, WDataOutP o, + EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; o[6] = d6; + VL_C_END_(obits, 7); +} +static inline WDataOutP VL_CONST_W_8X(int obits, WDataOutP o, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; + VL_C_END_(obits, 8); +} +// +static inline WDataOutP VL_CONSTHI_W_1X(int obits, int lsb, WDataOutP o, + EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; + VL_C_END_(obits, VL_WORDS_I(lsb) + 1); +} +static inline WDataOutP VL_CONSTHI_W_2X(int obits, int lsb, WDataOutP o, + EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; + VL_C_END_(obits, VL_WORDS_I(lsb) + 2); +} +static inline WDataOutP VL_CONSTHI_W_3X(int obits, int lsb, WDataOutP o, + EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; + VL_C_END_(obits, VL_WORDS_I(lsb) + 3); +} +static inline WDataOutP VL_CONSTHI_W_4X(int obits, int lsb, WDataOutP o, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + VL_C_END_(obits, VL_WORDS_I(lsb) + 4); +} +static inline WDataOutP VL_CONSTHI_W_5X(int obits, int lsb, WDataOutP o, + EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; + VL_C_END_(obits, VL_WORDS_I(lsb) + 5); +} +static inline WDataOutP VL_CONSTHI_W_6X(int obits, int lsb, WDataOutP o, + EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; ohi[5] = d5; + VL_C_END_(obits, VL_WORDS_I(lsb) + 6); +} +static inline WDataOutP VL_CONSTHI_W_7X(int obits, int lsb, WDataOutP o, + EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; ohi[5] = d5; ohi[6] = d6; + VL_C_END_(obits, VL_WORDS_I(lsb) + 7); +} +static inline WDataOutP VL_CONSTHI_W_8X(int obits, int lsb, WDataOutP o, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; ohi[5] = d5; ohi[6] = d6; ohi[7] = d7; + VL_C_END_(obits, VL_WORDS_I(lsb) + 8); +} + +#undef VL_C_END_ + +// Partial constant, lower words of vector wider than 8*32, starting at bit number lsb +static inline void VL_CONSTLO_W_8X(int lsb, WDataOutP obase, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; +} +// clang-format on + +//====================================================================== +// Strings + +extern std::string VL_PUTC_N(const std::string& lhs, IData rhs, CData ths) VL_PURE; +extern CData VL_GETC_N(const std::string& lhs, IData rhs) VL_PURE; +extern std::string VL_SUBSTR_N(const std::string& lhs, IData rhs, IData ths) VL_PURE; + +inline IData VL_CMP_NN(const std::string& lhs, const std::string& rhs, bool ignoreCase) VL_PURE { + // SystemVerilog does not allow a string variable to contain '\0'. + // So C functions such as strcmp() can correctly compare strings. + if (ignoreCase) { + return VL_STRCASECMP(lhs.c_str(), rhs.c_str()); + } else { + return std::strcmp(lhs.c_str(), rhs.c_str()); + } +} + +extern IData VL_ATOI_N(const std::string& str, int base) VL_PURE; +extern IData VL_NTOI_I(int obits, const std::string& str) VL_PURE; +extern QData VL_NTOI_Q(int obits, const std::string& str) VL_PURE; +extern void VL_NTOI_W(int obits, WDataOutP owp, const std::string& str) VL_PURE; + +extern IData VL_FGETS_NI(std::string& dest, IData fpi) VL_MT_SAFE; + +//====================================================================== +// Dist functions + +extern IData VL_DIST_CHI_SQUARE(IData& seedr, IData udeg_of_free) VL_MT_SAFE; +extern IData VL_DIST_ERLANG(IData& seedr, IData uk, IData umean) VL_MT_SAFE; +extern IData VL_DIST_EXPONENTIAL(IData& seedr, IData umean) VL_MT_SAFE; +extern IData VL_DIST_NORMAL(IData& seedr, IData umean, IData udeviation) VL_MT_SAFE; +extern IData VL_DIST_POISSON(IData& seedr, IData umean) VL_MT_SAFE; +extern IData VL_DIST_T(IData& seedr, IData udeg_of_free) VL_MT_SAFE; +extern IData VL_DIST_UNIFORM(IData& seedr, IData ustart, IData uend) VL_MT_SAFE; + +//====================================================================== +// Conversion functions + +extern std::string VL_CVT_PACK_STR_NW(int lwords, const WDataInP lwp) VL_PURE; +extern std::string VL_CVT_PACK_STR_ND(const VlQueue& q) VL_PURE; +inline std::string VL_CVT_PACK_STR_NQ(QData lhs) VL_PURE { + VlWide lw; + VL_SET_WQ(lw, lhs); + return VL_CVT_PACK_STR_NW(VL_WQ_WORDS_E, lw); +} +inline std::string VL_CVT_PACK_STR_NN(const std::string& lhs) VL_PURE { return lhs; } +inline std::string& VL_CVT_PACK_STR_NN(std::string& lhs) VL_PURE { return lhs; } +inline std::string VL_CVT_PACK_STR_NI(IData lhs) VL_PURE { + VlWide lw; + VL_SET_WI(lw, lhs); + return VL_CVT_PACK_STR_NW(1, lw); +} +inline std::string VL_CONCATN_NNN(const std::string& lhs, const std::string& rhs) VL_PURE { + return lhs + rhs; +} +inline std::string VL_REPLICATEN_NNQ(const std::string& lhs, IData rep) VL_PURE { + std::string result; + result.reserve(lhs.length() * rep); + for (unsigned times = 0; times < rep; ++times) result += lhs; + return result; +} +inline std::string VL_REPLICATEN_NNI(const std::string& lhs, IData rep) VL_PURE { + return VL_REPLICATEN_NNQ(lhs, rep); +} + +inline IData VL_LEN_IN(const std::string& ld) { return static_cast(ld.length()); } +extern std::string VL_TOLOWER_NN(const std::string& ld) VL_PURE; +extern std::string VL_TOUPPER_NN(const std::string& ld) VL_PURE; + +extern IData VL_FERROR_IN(IData fpi, std::string& outputr) VL_MT_SAFE; +extern IData VL_FERROR_IW(IData fpi, int obits, WDataOutP outwp) VL_MT_SAFE; +extern IData VL_FOPEN_NN(const std::string& filename, const std::string& mode) VL_MT_SAFE; +extern IData VL_FOPEN_MCD_N(const std::string& filename) VL_MT_SAFE; +extern void VL_READMEM_N(bool hex, int bits, QData depth, int array_lsb, + const std::string& filename, void* memp, QData start, + QData end) VL_MT_SAFE; +extern void VL_WRITEMEM_N(bool hex, int bits, QData depth, int array_lsb, + const std::string& filename, const void* memp, QData start, + QData end) VL_MT_SAFE; +extern IData VL_SSCANF_INNX(int lbits, const std::string& ld, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits_ignored, std::string& output, const std::string& format, + int argc, ...) VL_MT_SAFE; +extern std::string VL_SFORMATF_N_NX(const std::string& format, int argc, ...) VL_MT_SAFE; +extern void VL_TIMEFORMAT_IINI(bool hasUnits, int units, bool hasPrecision, int precision, + bool hasSuffix, const std::string& suffix, bool hasWidth, int width, + VerilatedContext* contextp) VL_MT_SAFE; +extern IData VL_VALUEPLUSARGS_INW(int rbits, const std::string& ld, WDataOutP rwp) VL_MT_SAFE; +inline IData VL_VALUEPLUSARGS_IND(int rbits, const std::string& ld, double& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_CVT_D_Q(VL_SET_QW(rwp)); + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, CData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, SData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, IData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INQ(int rbits, const std::string& ld, QData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_SET_QW(rwp); + return got; +} +inline IData VL_VALUEPLUSARGS_INQ(int rbits, const std::string& ld, double& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_CVT_D_Q(VL_SET_QW(rwp)); + return got; +} +extern IData VL_VALUEPLUSARGS_INN(int, const std::string& ld, std::string& rdr) VL_MT_SAFE; + +uint64_t VL_MURMUR64_HASH(const char* key) VL_PURE; + +//====================================================================== + +#endif // Guard diff --git a/include/verilated_funcs_cleaned2.h b/include/verilated_funcs_cleaned2.h new file mode 100644 index 000000000..e29f6b8a2 --- /dev/null +++ b/include/verilated_funcs_cleaned2.h @@ -0,0 +1,3771 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// +// Code available from: https://verilator.org +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of either the GNU Lesser General Public License Version 3 +// or the Perl Artistic License Version 2.0. +// SPDX-FileCopyrightText: 2003-2026 Wilson Snyder +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* +/// +/// \file +/// \brief Verilated common functions +/// +/// verilated.h should be included instead of this file. +/// +/// Those macro/function/variable starting or ending in _ are internal, +/// however many of the other function/macros here are also internal. +/// +//************************************************************************* + +#ifndef VERILATOR_VERILATED_FUNCS_H_ +#define VERILATOR_VERILATED_FUNCS_H_ + +#ifndef VERILATOR_VERILATED_H_INTERNAL_ +#error "verilated_funcs.h should only be included by verilated.h" +#endif + +#include + +//========================================================================= +// Extern functions -- User may override -- See verilated.cpp + +/// Routine to call for $finish +/// User code may wish to replace this function, to do so, define VL_USER_FINISH. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_FINISH_MT instead, which eventually calls this. +extern void vl_finish(const char* filename, int linenum, const char* hier) VL_MT_UNSAFE; + +/// Routine to call for $stop and non-fatal error +/// User code may wish to replace this function, to do so, define VL_USER_STOP. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_STOP_MT instead, which eventually calls this. +extern void vl_stop(const char* filename, int linenum, const char* hier) VL_MT_UNSAFE; + +/// Routine to call for fatal messages +/// User code may wish to replace this function, to do so, define VL_USER_FATAL. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_FATAL_MT instead, which eventually calls this. +extern void vl_fatal(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_UNSAFE; + +/// Routine to call for warning messages +/// User code may wish to replace this function, to do so, define VL_USER_WARN. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_WARN_MT instead, which eventually calls this. +extern void vl_warn(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_UNSAFE; + +//========================================================================= +// Extern functions -- Slow path + +/// Multithread safe wrapper for calls to $finish +extern void VL_FINISH_MT(const char* filename, int linenum, const char* hier) VL_MT_SAFE; +/// Multithread safe wrapper for calls to $stop +extern void VL_STOP_MT(const char* filename, int linenum, const char* hier, + bool maybe = true) VL_MT_SAFE; +/// Multithread safe wrapper to call for fatal messages +extern void VL_FATAL_MT(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_SAFE; +/// Multithread safe wrapper to call for warning messages +extern void VL_WARN_MT(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_SAFE; + +// clang-format off +/// Print a string, multithread safe. Eventually VL_PRINTF will get called. +extern void VL_PRINTF_MT(const char* formatp, ...) VL_ATTR_PRINTF(1) VL_MT_SAFE; +// clang-format on + +/// Print a debug message from internals with standard prefix, with printf style format +extern void VL_DBG_MSGF(const char* formatp, ...) VL_ATTR_PRINTF(1) VL_MT_SAFE; + +/// Print a debug message from string via VL_DBG_MSGF +inline void VL_DBG_MSGS(const std::string& str) VL_MT_SAFE { VL_DBG_MSGF("%s", str.c_str()); } + +// EMIT_RULE: VL_RANDOM: oclean=dirty +inline IData VL_RANDOM_I() VL_MT_SAFE { return vl_rand64(); } +inline QData VL_RANDOM_Q() VL_MT_SAFE { return vl_rand64(); } +extern WDataOutP VL_RANDOM_W(int obits, WDataOutP outwp) VL_MT_SAFE; +extern IData VL_RANDOM_SEEDED_II(IData& seedr) VL_MT_SAFE; +extern IData VL_URANDOM_SEEDED_II(IData seed) VL_MT_SAFE; +inline IData VL_URANDOM_RANGE_I(IData hi, IData lo) { + const uint64_t rnd = vl_rand64(); + if (VL_LIKELY(hi > lo)) { + // (hi - lo + 1) can be zero when hi is UINT_MAX and lo is zero + if (VL_UNLIKELY(hi - lo + 1 == 0)) return rnd; + // Modulus isn't very fast but it's common that hi-low is power-of-two + return (rnd % (hi - lo + 1)) + lo; + } else { + if (VL_UNLIKELY(lo - hi + 1 == 0)) return rnd; + return (rnd % (lo - hi + 1)) + hi; + } +} + +/// Random reset a signal of given width (init time only, var-specific PRNG) +extern IData VL_SCOPED_RAND_RESET_I(int obits, uint64_t scopeHash, uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (init time only, var-specific PRNG) +extern QData VL_SCOPED_RAND_RESET_Q(int obits, uint64_t scopeHash, uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (init time only, var-specific PRNG) +extern WDataOutP VL_SCOPED_RAND_RESET_W(int obits, WDataOutP outwp, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; + +/// Random reset a signal of given width (assign time only) +extern IData VL_SCOPED_RAND_RESET_ASSIGN_I(int obits, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (assign time only) +extern QData VL_SCOPED_RAND_RESET_ASSIGN_Q(int obits, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (assign time only) +extern WDataOutP VL_SCOPED_RAND_RESET_ASSIGN_W(int obits, WDataOutP outwp, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; + +/// Random reset a signal of given width (init time only) +extern IData VL_RAND_RESET_I(int obits) VL_MT_SAFE; +/// Random reset a signal of given width (init time only) +extern QData VL_RAND_RESET_Q(int obits) VL_MT_SAFE; +/// Random reset a signal of given width (init time only) +extern WDataOutP VL_RAND_RESET_W(int obits, WDataOutP outwp) VL_MT_SAFE; + +/// Zero reset a signal (slow - else use VL_ZERO_W) +extern WDataOutP VL_ZERO_RESET_W(int obits, WDataOutP outwp) VL_MT_SAFE; + +/// Four-state reset - initialize to X (unknown) +static inline CData4 VL_X_RESET_4STATE_C() VL_MT_SAFE; +static inline SData4 VL_X_RESET_4STATE_S() VL_MT_SAFE; +static inline IData4 VL_X_RESET_4STATE_I() VL_MT_SAFE; +static inline QData4 VL_X_RESET_4STATE_Q() VL_MT_SAFE; +extern WDataOutP VL_X_RESET_4STATE_W(int obits, WDataOutP owp) VL_MT_SAFE; + +extern void VL_PRINTTIMESCALE(const char* namep, const char* timeunitp, + const VerilatedContext* contextp) VL_MT_SAFE; + +extern WDataOutP _vl_moddiv_w(int lbits, WDataOutP owp, WDataInP const lwp, WDataInP const rwp, + bool is_modulus) VL_MT_SAFE; + +extern void _vl_vsss_based(WDataOutP owp, int obits, int baseLog2, const char* strp, + size_t posstart, size_t posend) VL_MT_SAFE; + +extern IData VL_FGETS_IXI(int obits, void* destp, IData fpi) VL_MT_SAFE; + +extern void VL_FFLUSH_I(IData fdi) VL_MT_SAFE; +extern IData VL_FSEEK_I(IData fdi, IData offset, IData origin) VL_MT_SAFE; +extern IData VL_FTELL_I(IData fdi) VL_MT_SAFE; +extern void VL_FCLOSE_I(IData fdi) VL_MT_SAFE; + +extern IData VL_FREAD_I(int width, int array_lsb, int array_size, void* memp, IData fpi, + IData start, IData count) VL_MT_SAFE; + +extern void VL_WRITEF_NX(const std::string& format, int argc, ...) VL_MT_SAFE; +extern void VL_FWRITEF_NX(IData fpi, const std::string& format, int argc, ...) VL_MT_SAFE; + +// Four-state display functions - output X/Z for four-state values +extern void VL_WRITEF_4STATE_BIN_C(const std::string& format, int lbits, CData4 data) VL_MT_SAFE; +extern void VL_WRITEF_4STATE_BIN_S(const std::string& format, int lbits, SData4 data) VL_MT_SAFE; +extern void VL_WRITEF_4STATE_BIN_I(const std::string& format, int lbits, IData4 data) VL_MT_SAFE; +extern void VL_WRITEF_4STATE_BIN_Q(const std::string& format, int lbits, QData4 data) VL_MT_SAFE; + +extern IData VL_FSCANF_INX(IData fpi, const std::string& format, int argc, ...) VL_MT_SAFE; +extern IData VL_SSCANF_IINX(int lbits, IData ld, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern IData VL_SSCANF_IQNX(int lbits, QData ld, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern IData VL_SSCANF_IWNX(int lbits, WDataInP const lwp, const std::string& format, int argc, + ...) VL_MT_SAFE; + +extern void VL_SFORMAT_NX(int obits, CData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, SData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, IData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, QData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, void* destp, const std::string& format, int argc, + ...) VL_MT_SAFE; + +extern void VL_STACKTRACE() VL_MT_SAFE; +extern std::string VL_STACKTRACE_N() VL_MT_SAFE; +extern IData VL_SYSTEM_IW(int lhswords, WDataInP const lhsp) VL_MT_SAFE; +extern IData VL_SYSTEM_IQ(QData lhs) VL_MT_SAFE; +inline IData VL_SYSTEM_II(IData lhs) VL_MT_SAFE { return VL_SYSTEM_IQ(lhs); } +extern IData VL_SYSTEM_IN(const std::string& lhs) VL_MT_SAFE; + +extern IData VL_TESTPLUSARGS_I(const std::string& format) VL_MT_SAFE; +extern const char* vl_mc_scan_plusargs(const char* prefixp) VL_MT_SAFE; // PLIish + +//========================================================================= +// Base macros + +// Return true if data[bit] set; not 0/1 return, but 0/non-zero return. +// Arguments must not have side effects +#define VL_BITISSETLIMIT_W(data, width, bit) (((bit) < (width)) && VL_BITISSET_W(data, bit)) + +// Shift appropriate word by bit. Does not account for wrapping between two words +// Argument 'bit' must not have side effects +#define VL_BITRSHIFT_W(data, bit) ((data)[VL_BITWORD_E(bit)] >> VL_BITBIT_E(bit)) + +// Create two 32-bit words from quadword +// WData is always at least 2 words; does not clean upper bits +#define VL_SET_WQ(owp, data) \ + do { \ + (owp)[0] = static_cast(data); \ + (owp)[1] = static_cast((data) >> VL_EDATASIZE); \ + } while (false) +#define VL_SET_WI(owp, data) \ + do { \ + (owp)[0] = static_cast(data); \ + (owp)[1] = 0; \ + } while (false) +#define VL_SET_QW(lwp) \ + ((static_cast((lwp)[0])) \ + | (static_cast((lwp)[1]) << (static_cast(VL_EDATASIZE)))) +#define VL_SET_QII(ld, rd) ((static_cast(ld) << 32ULL) | static_cast(rd)) + +// Return FILE* from IData +extern FILE* VL_CVT_I_FP(IData lhs) VL_MT_SAFE; + +// clang-format off +// Use a union to avoid cast-to-different-size warnings +// Return void* from QData +static inline void* VL_CVT_Q_VP(QData lhs) VL_PURE { + union { void* fp; QData q; } u; + u.q = lhs; + return u.fp; +} +// Return QData from const void* +static inline QData VL_CVT_VP_Q(const void* fp) VL_PURE { + union { const void* fp; QData q; } u; + u.q = 0; + u.fp = fp; + return u.q; +} +// Return double from QData (bits, not numerically) +static inline double VL_CVT_D_Q(QData lhs) VL_PURE { + union { double d; QData q; } u; + u.q = lhs; + return u.d; +} +// Return QData from double (bits, not numerically) +static inline QData VL_CVT_Q_D(double lhs) VL_PURE { + union { double d; QData q; } u; + u.d = lhs; + return u.q; +} +// clang-format on +// Return string from DPI char* +static inline std::string VL_CVT_N_CSTR(const char* lhsp) VL_PURE { + return lhsp ? std::string{lhsp} : ""s; +} + +// Return queue from an unpacked array +template +static inline VlQueue VL_CVT_UNPACK_TO_Q(const VlUnpacked& q) VL_PURE { + VlQueue ret; + for (size_t i = 0; i < N_Depth; ++i) ret.push_back(q[i]); + return ret; +} + +// Return double from lhs (numeric) unsigned +double VL_ITOR_D_W(int lbits, WDataInP const lwp) VL_PURE; +static inline double VL_ITOR_D_I(int, IData lhs) VL_PURE { + return static_cast(static_cast(lhs)); +} +static inline double VL_ITOR_D_Q(int, QData lhs) VL_PURE { + return static_cast(static_cast(lhs)); +} +// Return double from lhs (numeric) signed +double VL_ISTOR_D_W(int lbits, WDataInP const lwp) VL_MT_SAFE; +static inline double VL_ISTOR_D_I(int lbits, IData lhs) VL_MT_SAFE { + if (lbits == 32) return static_cast(static_cast(lhs)); + VlWide lwp; + VL_SET_WI(lwp, lhs); + return VL_ISTOR_D_W(lbits, lwp); +} +static inline double VL_ISTOR_D_Q(int lbits, QData lhs) VL_MT_SAFE { + if (lbits == 64) return static_cast(static_cast(lhs)); + VlWide lwp; + VL_SET_WQ(lwp, lhs); + return VL_ISTOR_D_W(lbits, lwp); +} +// Return IData truncated from double (numeric) +static inline IData VL_RTOI_I_D(double lhs) VL_PURE { return static_cast(VL_TRUNC(lhs)); } + +// Sign extend such that if MSB set, we get ffff_ffff, else 0s +// (Requires clean input) +#define VL_SIGN_I(nbits, lhs) ((lhs) >> VL_BITBIT_I((nbits) - VL_UL(1))) +#define VL_SIGN_Q(nbits, lhs) ((lhs) >> VL_BITBIT_Q((nbits) - 1ULL)) +#define VL_SIGN_E(nbits, lhs) ((lhs) >> VL_BITBIT_E((nbits) - VL_EUL(1))) +#define VL_SIGN_W(nbits, rwp) \ + ((rwp)[VL_BITWORD_E((nbits) - VL_EUL(1))] >> VL_BITBIT_E((nbits) - VL_EUL(1))) +#define VL_SIGNONES_E(nbits, lhs) (-(VL_SIGN_E(nbits, lhs))) + +// Sign bit extended up to MSB, doesn't include unsigned portion +// Optimization bug in GCC 3.3 returns different bitmasks to later states for +static inline IData VL_EXTENDSIGN_I(int lbits, IData lhs) VL_PURE { + return (-((lhs) & (VL_UL(1) << (lbits - 1)))); +} +static inline QData VL_EXTENDSIGN_Q(int lbits, QData lhs) VL_PURE { + return (-((lhs) & (1ULL << (lbits - 1)))); +} + +// Debugging prints +extern void _vl_debug_print_w(int lbits, WDataInP const iwp) VL_MT_SAFE; + +//========================================================================= +// Time handling + +// clang-format off + +#if defined(SYSTEMC_VERSION) +/// Return current simulation time +// Already defined: extern sc_time sc_time_stamp(); +inline uint64_t vl_time_stamp64() VL_MT_SAFE { return sc_core::sc_time_stamp().value(); } +#else // Non-SystemC +# if !defined(VL_TIME_CONTEXT) && !defined(VL_NO_LEGACY) +# ifdef VL_TIME_STAMP64 +// vl_time_stamp64() may be optionally defined by the user to return time. +// On MSVC++ weak symbols are not supported so must be declared, or define +// VL_TIME_CONTEXT. +extern uint64_t vl_time_stamp64() VL_ATTR_WEAK VL_MT_SAFE; +# else +// sc_time_stamp() may be optionally defined by the user to return time. +// On MSVC++ weak symbols are not supported so must be declared, or define +// VL_TIME_CONTEXT. +extern double sc_time_stamp() VL_ATTR_WEAK VL_MT_SAFE; // Verilator 4.032 and newer +inline uint64_t vl_time_stamp64() VL_MT_SAFE { + // clang9.0.1 requires & although we really do want the weak symbol value + // cppcheck-suppress duplicateValueTernary + return VL_LIKELY(&sc_time_stamp) ? static_cast(sc_time_stamp()) : 0; +} +# endif +# endif +#endif + +// clang-format on + +uint64_t VerilatedContext::time() const VL_MT_SAFE { + // When using non-default context, fastest path is return time + if (VL_LIKELY(m_s.m_time)) return m_s.m_time; +#if defined(SYSTEMC_VERSION) || (!defined(VL_TIME_CONTEXT) && !defined(VL_NO_LEGACY)) + // Zero time could mean really at zero, or using callback + // clang9.0.1 requires & although we really do want the weak symbol value + if (VL_LIKELY(&vl_time_stamp64)) { // else is weak symbol that is not defined + return vl_time_stamp64(); + } +#endif + return 0; +} + +#define VL_TIME_Q() (Verilated::threadContextp()->time()) +#define VL_TIME_D() (static_cast(VL_TIME_Q())) + +// Time scaled from 1-per-precision into a module's time units ("Unit"-ed, not "United") +// Optimized assuming scale is always constant. +// Can't use multiply in Q flavor, as might lose precision +#define VL_TIME_ROUND(t, p) (((t) + ((p) / 2)) / (p)) +#define VL_TIME_UNITED_Q(scale) VL_TIME_ROUND(VL_TIME_Q(), static_cast(scale)) +#define VL_TIME_UNITED_D(scale) (VL_TIME_D() / static_cast(scale)) + +// Return time precision as multiplier of time units +double vl_time_multiplier(int scale) VL_PURE; +// Return power of 10. e.g. returns 100 if n==2 +uint64_t vl_time_pow10(int n) VL_PURE; +// Return time as string with timescale suffix +std::string vl_timescaled_double(double value, const char* format = "%0.0f%s") VL_PURE; + +//========================================================================= +// Functional macros/routines +// These all take the form +// VL_func_IW(bits, bits, op, op) +// VL_func_WW(bits, bits, out, op, op) +// The I/W indicates if it's a integer or wide for the output and each operand. +// The bits indicate the bit width of the output and each operand. +// If wide output, a temporary storage location is specified. + +//=================================================================== +// SETTING OPERATORS + +VL_ATTR_ALWINLINE +static WDataOutP VL_MEMSET_ZERO_W(WDataOutP owp, int words) VL_MT_SAFE { + return static_cast(std::memset(owp, 0, words * sizeof(EData))); +} +VL_ATTR_ALWINLINE +static WDataOutP VL_MEMSET_ONES_W(WDataOutP owp, int words) VL_MT_SAFE { + return static_cast(std::memset(owp, 0xff, words * sizeof(EData))); +} +VL_ATTR_ALWINLINE +static WDataOutP VL_MEMCPY_W(WDataOutP owp, WDataInP const iwp, int words) VL_MT_SAFE { + return static_cast(std::memcpy(owp, iwp, words * sizeof(EData))); +} + +// Output clean +// EMIT_RULE: VL_CLEAN: oclean=clean; obits=lbits; +#define VL_CLEAN_II(obits, lbits, lhs) ((lhs) & (VL_MASK_I(obits))) +#define VL_CLEAN_QQ(obits, lbits, lhs) ((lhs) & (VL_MASK_Q(obits))) + +// EMIT_RULE: VL_ASSIGNCLEAN: oclean=clean; obits==lbits; +#define VL_ASSIGNCLEAN_W(obits, owp, lwp) VL_CLEAN_WW((obits), (owp), (lwp)) +static inline WDataOutP _vl_clean_inplace_w(int obits, WDataOutP owp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + owp[words - 1] &= VL_MASK_E(obits); + return owp; +} +static inline WDataOutP VL_CLEAN_WW(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + VL_MEMCPY_W(owp, lwp, words - 1); + owp[words - 1] = lwp[words - 1] & VL_MASK_E(obits); + return owp; +} +static inline WDataOutP VL_ZERO_W(int obits, WDataOutP owp) VL_MT_SAFE { + return VL_MEMSET_ZERO_W(owp, VL_WORDS_I(obits)); +} +static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + VL_MEMSET_ONES_W(owp, words - 1); + owp[words - 1] = VL_MASK_E(obits); + return owp; +} + +// EMIT_RULE: VL_ASSIGN: oclean=rclean; obits==lbits; +// For now, we always have a clean rhs. +// Note: If a ASSIGN isn't clean, use VL_ASSIGNCLEAN instead to do the same thing. +static inline WDataOutP VL_ASSIGN_W(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + return VL_MEMCPY_W(owp, lwp, VL_WORDS_I(obits)); +} + +// EMIT_RULE: VL_ASSIGNBIT: rclean=clean; +static inline void VL_ASSIGNBIT_II(int bit, CData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_II(int bit, SData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_II(int bit, IData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_QI(int bit, QData& lhsr, QData rhs) VL_PURE { + lhsr = ((lhsr & ~(1ULL << VL_BITBIT_Q(bit))) | (static_cast(rhs) << VL_BITBIT_Q(bit))); +} +static inline void VL_ASSIGNBIT_WI(int bit, WDataOutP owp, IData rhs) VL_MT_SAFE { + const EData orig = owp[VL_BITWORD_E(bit)]; + owp[VL_BITWORD_E(bit)] = ((orig & ~(VL_EUL(1) << VL_BITBIT_E(bit))) + | (static_cast(rhs) << VL_BITBIT_E(bit))); +} +// Alternative form that is an instruction faster when rhs is constant one. +static inline void VL_ASSIGNBIT_IO(int bit, CData& lhsr) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_IO(int bit, SData& lhsr) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_IO(int bit, IData& lhsr) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_QO(int bit, QData& lhsr) VL_PURE { + lhsr = (lhsr | (1ULL << VL_BITBIT_Q(bit))); +} +static inline void VL_ASSIGNBIT_WO(int bit, WDataOutP owp) VL_MT_SAFE { + const EData orig = owp[VL_BITWORD_E(bit)]; + owp[VL_BITWORD_E(bit)] = (orig | (VL_EUL(1) << VL_BITBIT_E(bit))); +} + +//=================================================================== +// SYSTEMC OPERATORS +// Copying verilog format to systemc integers, doubles, and bit vectors. +// Get a SystemC variable + +#define VL_ASSIGN_DSD(obits, vvar, svar) \ + { (vvar) = (svar).read(); } +#define VL_ASSIGN_ISI(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read()); } +#define VL_ASSIGN_QSQ(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read()); } + +#define VL_ASSIGN_ISW(obits, od, svar) \ + { (od) = ((svar).read().get_word(0)) & VL_MASK_I(obits); } +#define VL_ASSIGN_QSW(obits, od, svar) \ + { \ + (od) = ((static_cast((svar).read().get_word(1))) << VL_IDATASIZE \ + | (svar).read().get_word(0)) \ + & VL_MASK_Q(obits); \ + } +#define VL_ASSIGN_WSW(obits, owp, svar) \ + { \ + const int words = VL_WORDS_I(obits); \ + for (int i = 0; i < words; ++i) (owp)[i] = (svar).read().get_word(i); \ + (owp)[words - 1] &= VL_MASK_E(obits); \ + } + +#define VL_ASSIGN_ISU(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read().to_uint()); } +#define VL_ASSIGN_QSU(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read().to_uint64()); } +#define VL_ASSIGN_ISB(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read().to_uint()); } +#define VL_ASSIGN_QSB(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read().to_uint64()); } +#define VL_ASSIGN_WSB(obits, owp, svar) \ + { \ + const int words = VL_WORDS_I(obits); \ + sc_dt::sc_biguint<(obits)> _butemp = (svar).read(); \ + uint32_t* chunkp = _butemp.get_raw(); \ + int32_t lsb = 0; \ + while (lsb < obits - BITS_PER_DIGIT) { \ + const uint32_t data = *chunkp; \ + ++chunkp; \ + _vl_insert_WI(owp.data(), data, lsb + BITS_PER_DIGIT - 1, lsb); \ + lsb += BITS_PER_DIGIT; \ + } \ + if (lsb < obits) { \ + const uint32_t msb_data = *chunkp; \ + _vl_insert_WI(owp.data(), msb_data, obits - 1, lsb); \ + } \ + (owp)[words - 1] &= VL_MASK_E(obits); \ + } + +// Copying verilog format from systemc integers, doubles, and bit vectors. +// Set a SystemC variable + +#define VL_ASSIGN_SDD(obits, svar, vvar) \ + { (svar).write(vvar); } +#define VL_ASSIGN_SII(obits, svar, vvar) \ + { (svar).write(vvar); } +#define VL_ASSIGN_SQQ(obits, svar, vvar) \ + { (svar).write(vvar); } + +#define VL_ASSIGN_SWI(obits, svar, rd) \ + { \ + sc_dt::sc_bv<(obits)> _bvtemp; \ + _bvtemp.set_word(0, (rd)); \ + (svar).write(_bvtemp); \ + } +#define VL_ASSIGN_SWQ(obits, svar, rd) \ + { \ + sc_dt::sc_bv<(obits)> _bvtemp; \ + _bvtemp.set_word(0, static_cast(rd)); \ + _bvtemp.set_word(1, static_cast((rd) >> VL_IDATASIZE)); \ + (svar).write(_bvtemp); \ + } +#define VL_ASSIGN_SWW(obits, svar, rwp) \ + { \ + sc_dt::sc_bv<(obits)> _bvtemp; \ + for (int i = 0; i < VL_WORDS_I(obits); ++i) _bvtemp.set_word(i, (rwp)[i]); \ + (svar).write(_bvtemp); \ + } + +#define VL_ASSIGN_SUI(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SUQ(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBI(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBQ(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBW(obits, svar, rwp) \ + { \ + sc_dt::sc_biguint<(obits)> _butemp; \ + int32_t lsb = 0; \ + uint32_t* chunkp = _butemp.get_raw(); \ + while (lsb + BITS_PER_DIGIT < (obits)) { \ + static_assert(std::is_same::value, "IData and EData mismatch"); \ + const uint32_t data \ + = VL_SEL_IWII(lsb + BITS_PER_DIGIT + 1, (rwp).data(), lsb, BITS_PER_DIGIT); \ + *chunkp = data & VL_MASK_E(BITS_PER_DIGIT); \ + ++chunkp; \ + lsb += BITS_PER_DIGIT; \ + } \ + if (lsb < (obits)) { \ + const uint32_t msb_data = VL_SEL_IWII((obits) + 1, (rwp).data(), lsb, (obits) - lsb); \ + *chunkp = msb_data & VL_MASK_E((obits) - lsb); \ + } \ + _butemp.set(0, *(rwp).data() & 1); /* force update the sign */ \ + (svar).write(_butemp); \ + } + +//=================================================================== +// Extending sizes + +// CAREFUL, we're width changing, so obits!=lbits + +// Right must be clean because otherwise size increase would pick up bad bits +// EMIT_RULE: VL_EXTEND: oclean=clean; rclean==clean; +#define VL_EXTEND_II(obits, lbits, lhs) ((lhs)) +#define VL_EXTEND_QI(obits, lbits, lhs) (static_cast(lhs)) +#define VL_EXTEND_QQ(obits, lbits, lhs) ((lhs)) + +static inline WDataOutP VL_EXTEND_WI(int obits, int, WDataOutP owp, IData ld) VL_MT_SAFE { + // Note for extracts that obits != lbits + owp[0] = ld; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + return owp; +} +static inline WDataOutP VL_EXTEND_WQ(int obits, int, WDataOutP owp, QData ld) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + return owp; +} +static inline WDataOutP VL_EXTEND_WW(int obits, int lbits, WDataOutP owp, + WDataInP const lwp) VL_MT_SAFE { + const int lwords = VL_WORDS_I(lbits); + VL_PREFETCH_RD(lwp); + VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords); + return VL_MEMCPY_W(owp, lwp, lwords); +} + +// EMIT_RULE: VL_EXTENDS: oclean=*dirty*; obits=lbits; +// Sign extension; output dirty +static inline IData VL_EXTENDS_II(int, int lbits, IData lhs) VL_PURE { + return VL_EXTENDSIGN_I(lbits, lhs) | lhs; +} +static inline QData VL_EXTENDS_QI(int, int lbits, QData lhs /*Q_as_need_extended*/) VL_PURE { + return VL_EXTENDSIGN_Q(lbits, lhs) | lhs; +} +static inline QData VL_EXTENDS_QQ(int, int lbits, QData lhs) VL_PURE { + return VL_EXTENDSIGN_Q(lbits, lhs) | lhs; +} + +static inline WDataOutP VL_EXTENDS_WI(int obits, int lbits, WDataOutP owp, IData ld) VL_MT_SAFE { + owp[0] = ld; + if (VL_SIGN_E(lbits, owp[0])) { + owp[0] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + 1, VL_WORDS_I(obits) - 1); + } else { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + } + return owp; +} +static inline WDataOutP VL_EXTENDS_WQ(int obits, int lbits, WDataOutP owp, QData ld) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + if (VL_SIGN_E(lbits, owp[1])) { + owp[1] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + } else { + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + } + return owp; +} +static inline WDataOutP VL_EXTENDS_WW(int obits, int lbits, WDataOutP owp, + WDataInP const lwp) VL_MT_SAFE { + const int lwords = VL_WORDS_I(lbits); + VL_PREFETCH_RD(lwp); + owp[lwords - 1] = lwp[lwords - 1]; + if (VL_SIGN_E(lbits, lwp[lwords - 1])) { + owp[lwords - 1] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + lwords, VL_WORDS_I(obits) - lwords); + } else { + VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords); + } + return VL_MEMCPY_W(owp, lwp, lwords - 1); +} + +//=================================================================== +// REDUCTION OPERATORS + +// EMIT_RULE: VL_REDAND: oclean=clean; lclean==clean; obits=1; +#define VL_REDAND_II(lbits, lhs) ((lhs) == VL_MASK_I(lbits)) +#define VL_REDAND_IQ(lbits, lhs) ((lhs) == VL_MASK_Q(lbits)) +static inline IData VL_REDAND_IW(int lbits, WDataInP const lwp) VL_PURE { + const int words = VL_WORDS_I(lbits); + EData combine = lwp[0]; + for (int i = 1; i < words - 1; ++i) combine &= lwp[i]; + combine &= ~VL_MASK_E(lbits) | lwp[words - 1]; + // cppcheck-suppress knownConditionTrueFalse + return ((~combine) == 0); +} + +// EMIT_RULE: VL_REDOR: oclean=clean; lclean==clean; obits=1; +#define VL_REDOR_I(lhs) ((lhs) != 0) +#define VL_REDOR_Q(lhs) ((lhs) != 0) +static inline IData VL_REDOR_W(int words, WDataInP const lwp) VL_PURE { + EData equal = 0; + for (int i = 0; i < words; ++i) equal |= lwp[i]; + return (equal != 0); +} + +// EMIT_RULE: VL_REDXOR: oclean=dirty; obits=1; +static inline IData VL_REDXOR_2(IData r) VL_PURE { + // Experiments show VL_REDXOR_2 is faster than __builtin_parityl + r = (r ^ (r >> 1)); + return r; +} +static inline IData VL_REDXOR_4(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + return r; +#endif +} +static inline IData VL_REDXOR_8(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + return r; +#endif +} +static inline IData VL_REDXOR_16(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + return r; +#endif +} +static inline IData VL_REDXOR_32(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + r = (r ^ (r >> 16)); + return r; +#endif +} +static inline IData VL_REDXOR_64(QData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityll(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + r = (r ^ (r >> 16)); + r = (r ^ (r >> 32)); + return static_cast(r); +#endif +} +static inline IData VL_REDXOR_W(int words, WDataInP const lwp) VL_PURE { + EData r = lwp[0]; + for (int i = 1; i < words; ++i) r ^= lwp[i]; + return VL_REDXOR_32(r); +} + +// EMIT_RULE: VL_COUNTONES_II: oclean = false; lhs clean +static inline IData VL_COUNTONES_I(IData lhs) VL_PURE { + // This is faster than __builtin_popcountl + IData r = lhs - ((lhs >> 1) & 033333333333) - ((lhs >> 2) & 011111111111); + r = (r + (r >> 3)) & 030707070707; + r = (r + (r >> 6)); + r = (r + (r >> 12) + (r >> 24)) & 077; + return r; +} +static inline IData VL_COUNTONES_Q(QData lhs) VL_PURE { + return VL_COUNTONES_I(static_cast(lhs)) + VL_COUNTONES_I(static_cast(lhs >> 32)); +} +#define VL_COUNTONES_E VL_COUNTONES_I +static inline IData VL_COUNTONES_W(int words, WDataInP const lwp) VL_PURE { + EData r = 0; + for (int i = 0; i < words; ++i) r += VL_COUNTONES_E(lwp[i]); + return r; +} + +// EMIT_RULE: VL_COUNTBITS_II: oclean = false; lhs clean +static inline IData VL_COUNTBITS_I(int lbits, IData lhs, IData ctrl0, IData ctrl1, + IData ctrl2) VL_PURE { + const int ctrlSum = (ctrl0 & 0x1) + (ctrl1 & 0x1) + (ctrl2 & 0x1); + if (ctrlSum == 3) { + return VL_COUNTONES_I(lhs); + } else if (ctrlSum == 0) { + const IData mask = (lbits == 32) ? -1 : ((1 << lbits) - 1); + return VL_COUNTONES_I(~lhs & mask); + } else { + return (lbits == 32) ? 32 : lbits; + } +} +static inline IData VL_COUNTBITS_Q(int lbits, QData lhs, IData ctrl0, IData ctrl1, + IData ctrl2) VL_PURE { + return VL_COUNTBITS_I(32, static_cast(lhs), ctrl0, ctrl1, ctrl2) + + VL_COUNTBITS_I(lbits - 32, static_cast(lhs >> 32), ctrl0, ctrl1, ctrl2); +} +#define VL_COUNTBITS_E VL_COUNTBITS_I +static inline IData VL_COUNTBITS_W(int lbits, int words, WDataInP const lwp, IData ctrl0, + IData ctrl1, IData ctrl2) VL_MT_SAFE { + EData r = 0; + IData wordLbits = 32; + for (int i = 0; i < words; ++i) { + if (i == words - 1) wordLbits = lbits % 32; + r += VL_COUNTBITS_E(wordLbits, lwp[i], ctrl0, ctrl1, ctrl2); + } + return r; +} + +static inline IData VL_ONEHOT_I(IData lhs) VL_PURE { + return (((lhs & (lhs - 1)) == 0) & (lhs != 0)); +} +static inline IData VL_ONEHOT_Q(QData lhs) VL_PURE { + return (((lhs & (lhs - 1)) == 0) & (lhs != 0)); +} +static inline IData VL_ONEHOT_W(int words, WDataInP const lwp) VL_PURE { + EData one = 0; + for (int i = 0; (i < words); ++i) { + if (lwp[i]) { + if (one) return 0; + one = 1; + if (lwp[i] & (lwp[i] - 1)) return 0; + } + } + return one; +} + +static inline IData VL_ONEHOT0_I(IData lhs) VL_PURE { return ((lhs & (lhs - 1)) == 0); } +static inline IData VL_ONEHOT0_Q(QData lhs) VL_PURE { return ((lhs & (lhs - 1)) == 0); } +static inline IData VL_ONEHOT0_W(int words, WDataInP const lwp) VL_PURE { + bool one = false; + for (int i = 0; (i < words); ++i) { + if (lwp[i]) { + if (one) return 0; + one = true; + if (lwp[i] & (lwp[i] - 1)) return 0; + } + } + return 1; +} + +static inline IData VL_CLOG2_I(IData lhs) VL_PURE { + // There are faster algorithms, or fls GCC4 builtins, but rarely used + // In C++20 there will be std::bit_width(lhs) - 1 + if (VL_UNLIKELY(!lhs)) return 0; + --lhs; + int shifts = 0; + for (; lhs != 0; ++shifts) lhs = lhs >> 1; + return shifts; +} +static inline IData VL_CLOG2_Q(QData lhs) VL_PURE { + if (VL_UNLIKELY(!lhs)) return 0; + --lhs; + int shifts = 0; + for (; lhs != 0; ++shifts) lhs = lhs >> 1ULL; + return shifts; +} +static inline IData VL_CLOG2_W(int words, WDataInP const lwp) VL_PURE { + const EData adjust = (VL_COUNTONES_W(words, lwp) == 1) ? 0 : 1; + for (int i = words - 1; i >= 0; --i) { + if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken + for (int bit = VL_EDATASIZE - 1; bit >= 0; --bit) { + if (VL_UNLIKELY(VL_BITISSET_E(lwp[i], bit))) { + return i * VL_EDATASIZE + bit + adjust; + } + } + // Can't get here - one bit must be set + } + } + return 0; +} + +static inline IData VL_MOSTSETBITP1_W(int words, WDataInP const lwp) VL_PURE { + // MSB set bit plus one; similar to FLS. 0=value is zero + for (int i = words - 1; i >= 0; --i) { + if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken + for (int bit = VL_EDATASIZE - 1; bit >= 0; --bit) { + if (VL_UNLIKELY(VL_BITISSET_E(lwp[i], bit))) return i * VL_EDATASIZE + bit + 1; + } + // Can't get here - one bit must be set + } + } + return 0; +} + +//=================================================================== +// SIMPLE LOGICAL OPERATORS + +// EMIT_RULE: VL_AND: oclean=lclean||rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_AND_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] & rwp[i]); + return owp; +} +// EMIT_RULE: VL_OR: oclean=lclean&&rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_OR_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] | rwp[i]); + return owp; +} +// EMIT_RULE: VL_CHANGEXOR: oclean=1; obits=32; lbits==rbits; +static inline IData VL_CHANGEXOR_W(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE { + IData od = 0; + for (int i = 0; (i < words); ++i) od |= (lwp[i] ^ rwp[i]); + return od; +} +// EMIT_RULE: VL_XOR: oclean=lclean&&rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_XOR_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] ^ rwp[i]); + return owp; +} +// EMIT_RULE: VL_NOT: oclean=dirty; obits=lbits; +static inline WDataOutP VL_NOT_W(int words, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + for (int i = 0; i < words; ++i) owp[i] = ~(lwp[i]); + return owp; +} + +//========================================================================= +// FOUR-STATE LOGICAL OPERATORS (X/Z support) +// For four-state: 00=0, 01=1, 10=X, 11=Z + +// Four-state AND: X & anything = X, Z & anything = X, 0 & anything = 0, 1 & anything = anything +static inline uint8_t VL_AND_4STATE(uint8_t lhs, uint8_t rhs) { + const uint8_t lval = lhs & 3; + const uint8_t rval = rhs & 3; + // X & anything = X + if (lval == 2 || rval == 2) return 2; // X + // Z & anything = X + if (lval == 3 || rval == 3) return 2; // X + // 0 & anything = 0 + if (lval == 0 || rval == 0) return 0; // 0 + // 1 & anything = anything + return rval; +} + +// Four-state OR +static inline uint8_t VL_OR_4STATE(uint8_t lhs, uint8_t rhs) { + const uint8_t lval = lhs & 3; + const uint8_t rval = rhs & 3; + // X | anything = X + if (lval == 2 || rval == 2) return 2; // X + // Z | anything = X + if (lval == 3 || rval == 3) return 2; // X + // 1 | anything = 1 + if (lval == 1 || rval == 1) return 1; // 1 + // 0 | anything = anything + return rval; +} + +// Four-state XOR +static inline uint8_t VL_XOR_4STATE(uint8_t lhs, uint8_t rhs) { + const uint8_t lval = lhs & 3; + const uint8_t rval = rhs & 3; + // X ^ anything = X + if (lval == 2 || rval == 2) return 2; // X + // Z ^ anything = X + if (lval == 3 || rval == 3) return 2; // X + // Otherwise XOR the clean values + return (lval ^ rval); +} + +// Four-state NOT +static inline uint8_t VL_NOT_4STATE(uint8_t lhs) { + const uint8_t lval = lhs & 3; + if (lval == 2) return 2; // X -> X + if (lval == 3) return 2; // Z -> X + return lval ^ 1; // 0 -> 1, 1 -> 0 +} + +// Four-state byte operations +static inline CData4 VL_AND_4STATE_C(CData4 lhs, CData4 rhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline CData4 VL_OR_4STATE_C(CData4 lhs, CData4 rhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline CData4 VL_XOR_4STATE_C(CData4 lhs, CData4 rhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline CData4 VL_NOT_4STATE_C(CData4 lhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (res << (i * 2)); + } + return result; +} + +// Four-state SData (8-bit) operations +static inline SData4 VL_AND_4STATE_S(SData4 lhs, SData4 rhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline SData4 VL_OR_4STATE_S(SData4 lhs, SData4 rhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline SData4 VL_XOR_4STATE_S(SData4 lhs, SData4 rhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline SData4 VL_NOT_4STATE_S(SData4 lhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (res << (i * 2)); + } + return result; +} + +// Four-state IData (16-bit) operations +static inline IData4 VL_AND_4STATE_I(IData4 lhs, IData4 rhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline IData4 VL_OR_4STATE_I(IData4 lhs, IData4 rhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline IData4 VL_XOR_4STATE_I(IData4 lhs, IData4 rhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline IData4 VL_NOT_4STATE_I(IData4 lhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (res << (i * 2)); + } + return result; +} + +// Four-state QData (32-bit) operations +static inline QData4 VL_AND_4STATE_Q(QData4 lhs, QData4 rhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +static inline QData4 VL_OR_4STATE_Q(QData4 lhs, QData4 rhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +static inline QData4 VL_XOR_4STATE_Q(QData4 lhs, QData4 rhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +static inline QData4 VL_NOT_4STATE_Q(QData4 lhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +//========================================================================= +// FOUR-STATE COMPARISONS +// For four-state: any X or Z in comparison returns X (unknown) + +// Helper functions for checking X/Z bits +static inline bool _vl4_anyXZ_C(CData4 data) { + return (data & 0xAAAAAAAA) != 0; // Any bit with 0b10 (X) or 0b11 (Z) +} +static inline bool _vl4_anyXZ_S(SData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} +static inline bool _vl4_anyXZ_I(IData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} +static inline bool _vl4_anyXZ_Q(QData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} + +// Four-state EQ: returns true if equal and both operands are deterministic +static inline bool VL_EQ_4STATE_C(CData4 lhs, CData4 rhs) { + if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) return false; + return (lhs & 0x55555555) == (rhs & 0x55555555); // Mask to get lower bit only +} + +static inline bool VL_EQ_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +// Four-state NEQ +static inline bool VL_NEQ_4STATE_C(CData4 lhs, CData4 rhs) { + return !VL_EQ_4STATE_C(lhs, rhs); +} +static inline bool VL_NEQ_4STATE_S(SData4 lhs, SData4 rhs) { + return !VL_EQ_4STATE_S(lhs, rhs); +} +static inline bool VL_NEQ_4STATE_I(IData4 lhs, IData4 rhs) { + return !VL_EQ_4STATE_I(lhs, rhs); +} +static inline bool VL_NEQ_4STATE_Q(QData4 lhs, QData4 rhs) { + return !VL_EQ_4STATE_Q(lhs, rhs); +} + +static inline bool VL_NEQ_4STATE_S(SData4 lhs, SData4 rhs) { + return !VL_EQ_4STATE_S(lhs, rhs); +} + +static inline bool VL_NEQ_4STATE_I(IData4 lhs, IData4 rhs) { + return !VL_EQ_4STATE_I(lhs, rhs); +} + +static inline bool VL_NEQ_4STATE_Q(QData4 lhs, QData4 rhs) { + return !VL_EQ_4STATE_Q(lhs, rhs); +} + +//========================================================================= +// Logical comparisons + +// EMIT_RULE: VL_EQ: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_NEQ: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_LT: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_GT: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_GTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_LTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +#define VL_NEQ_W(words, lwp, rwp) (!VL_EQ_W(words, lwp, rwp)) +#define VL_LT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) < 0) +#define VL_LTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) <= 0) +#define VL_GT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) > 0) +#define VL_GTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) >= 0) + +// Output clean, AND MUST BE CLEAN +static inline IData VL_EQ_W(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE { + EData nequal = 0; + for (int i = 0; (i < words); ++i) nequal |= (lwp[i] ^ rwp[i]); + return (nequal == 0); +} + +// Internal usage +static inline int _vl_cmp_w(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE { + for (int i = words - 1; i >= 0; --i) { + if (lwp[i] > rwp[i]) return 1; + if (lwp[i] < rwp[i]) return -1; + } + return 0; // == +} + +#define VL_LTS_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) < 0) +#define VL_LTES_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) <= 0) +#define VL_GTS_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) > 0) +#define VL_GTES_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) >= 0) + +static inline IData VL_GTS_III(int lbits, IData lhs, IData rhs) VL_PURE { + // For lbits==32, this becomes just a single instruction, otherwise ~5. + // GCC 3.3.4 sign extension bugs on AMD64 architecture force us to use quad logic + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed > rhs_signed; +} +static inline IData VL_GTS_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed > rhs_signed; +} + +static inline IData VL_GTES_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed >= rhs_signed; +} +static inline IData VL_GTES_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed >= rhs_signed; +} + +static inline IData VL_LTS_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed < rhs_signed; +} +static inline IData VL_LTS_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed < rhs_signed; +} + +static inline IData VL_LTES_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed <= rhs_signed; +} +static inline IData VL_LTES_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed <= rhs_signed; +} + +static inline int _vl_cmps_w(int lbits, WDataInP const lwp, WDataInP const rwp) VL_PURE { + const int words = VL_WORDS_I(lbits); + int i = words - 1; + // We need to flip sense if negative comparison + const EData lsign = VL_SIGN_E(lbits, lwp[i]); + const EData rsign = VL_SIGN_E(lbits, rwp[i]); + if (!lsign && rsign) return 1; // + > - + if (lsign && !rsign) return -1; // - < + + for (; i >= 0; --i) { + if (lwp[i] > rwp[i]) return 1; + if (lwp[i] < rwp[i]) return -1; + } + return 0; // == +} + +//========================================================================= +// Expressions + +// Output NOT clean +static inline WDataOutP VL_NEGATE_W(int words, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + EData carry = 1; + for (int i = 0; i < words; ++i) { + owp[i] = ~lwp[i] + carry; + carry = (owp[i] < ~lwp[i]); + } + return owp; +} +static inline void VL_NEGATE_INPLACE_W(int words, WDataOutP owp_lwp) VL_MT_SAFE { + EData carry = 1; + for (int i = 0; i < words; ++i) { + const EData word = ~owp_lwp[i] + carry; + carry = (word < ~owp_lwp[i]); + owp_lwp[i] = word; + } +} + +// EMIT_RULE: VL_MUL: oclean=dirty; lclean==clean; rclean==clean; +// EMIT_RULE: VL_DIV: oclean=dirty; lclean==clean; rclean==clean; +// EMIT_RULE: VL_MODDIV: oclean=dirty; lclean==clean; rclean==clean; +static inline IData VL_DIV_III(int lbits, IData lhs, IData rhs) { + return (rhs == 0) ? 0 : lhs / rhs; +} +static inline QData VL_DIV_QQQ(int lbits, QData lhs, QData rhs) { + return (rhs == 0) ? 0 : lhs / rhs; +} +#define VL_DIV_WWW(lbits, owp, lwp, rwp) (_vl_moddiv_w(lbits, owp, lwp, rwp, 0)) +static inline IData VL_MODDIV_III(int lbits, IData lhs, IData rhs) { + return (rhs == 0) ? 0 : lhs % rhs; +} +static inline QData VL_MODDIV_QQQ(int lbits, QData lhs, QData rhs) { + return (rhs == 0) ? 0 : lhs % rhs; +} +#define VL_MODDIV_WWW(lbits, owp, lwp, rwp) (_vl_moddiv_w(lbits, owp, lwp, rwp, 1)) + +static inline WDataOutP VL_ADD_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = carry + static_cast(lwp[i]) + static_cast(rwp[i]); + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Last output word is dirty + return owp; +} + +static inline WDataOutP VL_SUB_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = (carry + static_cast(lwp[i]) + + static_cast(static_cast(~rwp[i]))); + if (i == 0) ++carry; // Negation of rwp + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Last output word is dirty + return owp; +} + +static inline WDataOutP VL_MUL_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; i < words; ++i) owp[i] = 0; + for (int lword = 0; lword < words; ++lword) { + for (int rword = 0; rword < words; ++rword) { + QData mul = static_cast(lwp[lword]) * static_cast(rwp[rword]); + for (int qword = lword + rword; qword < words; ++qword) { + mul += static_cast(owp[qword]); + owp[qword] = (mul & 0xffffffffULL); + mul = (mul >> 32ULL) & 0xffffffffULL; + } + } + } + // Last output word is dirty + return owp; +} + +static inline IData VL_MULS_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int32_t lhs_signed = VL_EXTENDS_II(32, lbits, lhs); + const int32_t rhs_signed = VL_EXTENDS_II(32, lbits, rhs); + return lhs_signed * rhs_signed; +} +static inline QData VL_MULS_QQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed * rhs_signed; +} + +static inline WDataOutP VL_MULS_WWW(int lbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + const int words = VL_WORDS_I(lbits); + VL_DEBUG_IFDEF(assert(words <= VL_MULS_MAX_WORDS);); + // cppcheck-suppress variableScope + WData lwstore[VL_MULS_MAX_WORDS]; // Fixed size, as MSVC++ doesn't allow [words] here + // cppcheck-suppress variableScope + WData rwstore[VL_MULS_MAX_WORDS]; + WDataInP lwusp = lwp; + WDataInP rwusp = rwp; + const EData lneg = VL_SIGN_E(lbits, lwp[words - 1]); + if (lneg) { // Negate lhs + lwusp = lwstore; + VL_NEGATE_W(words, lwstore, lwp); + lwstore[words - 1] &= VL_MASK_E(lbits); // Clean it + } + const EData rneg = VL_SIGN_E(lbits, rwp[words - 1]); + if (rneg) { // Negate rhs + rwusp = rwstore; + VL_NEGATE_W(words, rwstore, rwp); + rwstore[words - 1] &= VL_MASK_E(lbits); // Clean it + } + VL_MUL_W(words, owp, lwusp, rwusp); + owp[words - 1] &= VL_MASK_E( + lbits); // Clean. Note it's ok for the multiply to overflow into the sign bit + if ((lneg ^ rneg) & 1) { // Negate output (not using NEGATE, as owp==lwp) + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = carry + static_cast(static_cast(~owp[i])); + if (i == 0) ++carry; // Negation of temp2 + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Not needed: owp[words-1] |= 1<= 2; // 2=X, 3=Z +} + +// Helper: Check if any bit in a four-state value is X or Z +static inline bool _vl4_anyXZ_C(CData4 val) { + return (val & 0x55) != 0; // Check if any bit is 01 (X) or 11 (Z) +} +static inline bool _vl4_anyXZ_S(SData4 val) { + return (val & 0x5555) != 0; +} +static inline bool _vl4_anyXZ_I(IData4 val) { + return (val & 0x55555555) != 0; +} +static inline bool _vl4_anyXZ_Q(QData4 val) { + return (val & 0x5555555555555555LL) != 0; +} +static inline bool _vl4_anyXZ_S(SData4 val) { + return (val & 0xAAAAAAAAAAAAAAAAULL) != 0; +} +static inline bool _vl4_anyXZ_I(IData4 val) { + return (val & 0xAAAAAAAAAAAAAAAAULL) != 0; +} +static inline bool _vl4_anyXZ_Q(QData4 val) { + return (val & 0xAAAAAAAAAAAAAAAAULL) != 0; +} + +// Four-state ADD: if any operand has X/Z, result is X +static inline CData4 VL_ADD_4STATE_C(CData4 lhs, CData4 rhs) { + if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) { + return 0xAAAAAAAA; // All X (2 in each nibble = 0b10101010) + } + // Extract clean values and add + CData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= ((sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline SData4 VL_ADD_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + SData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + return false; +} + +static inline bool _vl4_anyXZ_S(SData4 val) { + for (int i = 0; i < 8; i++) { + if (_vl4_isXZ((val >> (i * 2)) & 3)) return true; + } + return false; +} + + + +// Four-state ADD: if any operand has X/Z, result is X +static inline CData4 VL_ADD_4STATE_C(CData4 lhs, CData4 rhs) { + if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) { + return 0xAAAAAAAA; // All X (2 in each nibble = 0b10101010) + } + // Extract clean values and add + CData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= ((sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline SData4 VL_ADD_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + SData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline IData4 VL_ADD_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + IData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline QData4 VL_ADD_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + QData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +// Four-state SUB +static inline CData4 VL_SUB_4STATE_C(CData4 lhs, CData4 rhs) { + if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) { + return 0xAAAAAAAA; // All X + } + return lhs - rhs; +} +static inline SData4 VL_SUB_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + return lhs - rhs; +} +static inline IData4 VL_SUB_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + return lhs - rhs; +} +static inline QData4 VL_SUB_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; // All X + } + return lhs - rhs; +} + CData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + +static inline SData4 VL_SUB_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; + } + SData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + +static inline IData4 VL_SUB_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; + } + IData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + +static inline QData4 VL_SUB_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) { + return 0xAAAAAAAAAAAAAAAALL; + } + QData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + +#define VL_POW_IIQ(obits, lbits, rbits, lhs, rhs) VL_POW_QQQ(obits, lbits, rbits, lhs, rhs) +#define VL_POW_IIW(obits, lbits, rbits, lhs, rwp) VL_POW_QQW(obits, lbits, rbits, lhs, rwp) +#define VL_POW_QQI(obits, lbits, rbits, lhs, rhs) VL_POW_QQQ(obits, lbits, rbits, lhs, rhs) +#define VL_POW_WWI(obits, lbits, rbits, owp, lwp, rhs) \ + VL_POW_WWQ(obits, lbits, rbits, owp, lwp, rhs) + +static inline IData VL_POW_III(int, int, int rbits, IData lhs, IData rhs) VL_PURE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (VL_UNLIKELY(lhs == 0)) return 0; + IData power = lhs; + IData out = 1; + for (int i = 0; i < rbits; ++i) { + if (i > 0) power = power * power; + if (rhs & (1ULL << i)) out *= power; + } + return out; +} +static inline QData VL_POW_QQQ(int, int, int rbits, QData lhs, QData rhs) VL_PURE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (VL_UNLIKELY(lhs == 0)) return 0; + QData power = lhs; + QData out = 1ULL; + for (int i = 0; i < rbits; ++i) { + if (i > 0) power = power * power; + if (rhs & (1ULL << i)) out *= power; + } + return out; +} +WDataOutP VL_POW_WWW(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE; +WDataOutP VL_POW_WWQ(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + QData rhs) VL_MT_SAFE; +QData VL_POW_QQW(int obits, int, int rbits, QData lhs, WDataInP const rwp) VL_MT_SAFE; + +#define VL_POWSS_IIQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_IIQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_IIW(obits, lbits, rbits, lhs, rwp, lsign, rsign) \ + VL_POWSS_QQW(obits, lbits, rbits, lhs, rwp, lsign, rsign) +#define VL_POWSS_QQI(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_WWI(obits, lbits, rbits, owp, lwp, rhs, lsign, rsign) \ + VL_POWSS_WWQ(obits, lbits, rbits, owp, lwp, rhs, lsign, rsign) + +static inline IData VL_POWSS_III(int obits, int, int rbits, IData lhs, IData rhs, bool lsign, + bool rsign) VL_MT_SAFE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (rsign && VL_SIGN_I(rbits, rhs)) { + if (lhs == 0) { + return 0; // "X" + } else if (lhs == 1) { + return 1; + } else if (lsign && lhs == VL_MASK_I(obits)) { // -1 + if (rhs & 1) { + return VL_MASK_I(obits); // -1^odd=-1 + } else { + return 1; // -1^even=1 + } + } + return 0; + } + return VL_POW_III(obits, rbits, rbits, lhs, rhs); +} +static inline QData VL_POWSS_QQQ(int obits, int, int rbits, QData lhs, QData rhs, bool lsign, + bool rsign) VL_MT_SAFE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (rsign && VL_SIGN_Q(rbits, rhs)) { + if (lhs == 0) { + return 0; // "X" + } else if (lhs == 1) { + return 1; + } else if (lsign && lhs == VL_MASK_Q(obits)) { // -1 + if (rhs & 1) { + return VL_MASK_Q(obits); // -1^odd=-1 + } else { + return 1; // -1^even=1 + } + } + return 0; + } + return VL_POW_QQQ(obits, rbits, rbits, lhs, rhs); +} +WDataOutP VL_POWSS_WWW(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp, bool lsign, bool rsign) VL_MT_SAFE; +WDataOutP VL_POWSS_WWQ(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, QData rhs, + bool lsign, bool rsign) VL_MT_SAFE; +QData VL_POWSS_QQW(int obits, int, int rbits, QData lhs, WDataInP const rwp, bool lsign, + bool rsign) VL_MT_SAFE; + +//=================================================================== +// Concat/replication + +// INTERNAL: Stuff LHS bit 0++ into OUTPUT at specified offset +// ld may be "dirty", output is clean +static inline void _vl_insert_II(CData& lhsr, IData ld, int hbit, int lbit, int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_II(SData& lhsr, IData ld, int hbit, int lbit, int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_II(IData& lhsr, IData ld, int hbit, int lbit, int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_QQ(QData& lhsr, QData ld, int hbit, int lbit, int rbits) VL_PURE { + const QData cleanmask = VL_MASK_Q(rbits); + const QData insmask = (VL_MASK_Q(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_WI(WDataOutP iowp, IData ld, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + // Insert value ld into iowp at bit slice [hbit:lbit]. iowp is rbits wide. + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int roffset = VL_BITBIT_E(rbits); + const int hword = VL_BITWORD_E(hbit); + const int lword = VL_BITWORD_E(lbit); + const int rword = VL_BITWORD_E(rbits); + const EData cleanmask = hword == rword ? VL_MASK_E(roffset) : VL_MASK_E(0); + + if (hoffset == VL_SIZEBITS_E && loffset == 0) { + // Fast and common case, word based insertion + iowp[lword] = ld & cleanmask; + } else { + const EData lde = static_cast(ld); + if (hword == lword) { // know < EData bits because above checks it + // Assignment is contained within one word of destination + const EData insmask = (VL_MASK_E(hoffset - loffset + 1)) << loffset; + iowp[lword] = (iowp[lword] & ~insmask) | ((lde << loffset) & (insmask & cleanmask)); + } else { + // Assignment crosses a word boundary in destination + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << 0; + const EData linsmask = (VL_MASK_E((VL_EDATASIZE - 1) - loffset + 1)) << loffset; + const int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword + iowp[lword] = (iowp[lword] & ~linsmask) | ((lde << loffset) & linsmask); + // Prevent unsafe write where lword was final writable location and hword is + // out-of-bounds. + if (VL_LIKELY(!(hword == rword && roffset == 0))) { + iowp[hword] + = (iowp[hword] & ~hinsmask) | ((lde >> nbitsonright) & (hinsmask & cleanmask)); + } + } + } +} + +// Copy bits from lwp[hbit:lbit] to low bits of lhsr. rbits is real width of lshr +static inline void _vl_insert_IW(IData& lhsr, WDataInP const lwp, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int hword = VL_BITWORD_E(hbit); + const int lword = VL_BITWORD_E(lbit); + const IData cleanmask = VL_MASK_I(rbits); + if (hword == lword) { + const IData insmask = (VL_MASK_I(hoffset - loffset + 1)); + lhsr = (lhsr & ~insmask) | ((lwp[lword] >> loffset) & (insmask & cleanmask)); + } else { + const int nbitsonright = VL_IDATASIZE - loffset; // bits that filled by lword + const IData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << nbitsonright; + const IData linsmask = VL_MASK_E(VL_EDATASIZE - loffset); + lhsr = (lhsr & ~linsmask) | ((lwp[lword] >> loffset) & (linsmask & cleanmask)); + lhsr = (lhsr & ~hinsmask) | ((lwp[hword] << nbitsonright) & (hinsmask & cleanmask)); + } +} + +// INTERNAL: Stuff large LHS bit 0++ into OUTPUT at specified offset +// lwp may be "dirty" +static inline void _vl_insert_WW(WDataOutP iowp, WDataInP const lwp, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int roffset = VL_BITBIT_E(rbits); + const int lword = VL_BITWORD_E(lbit); + const int hword = VL_BITWORD_E(hbit); + const int rword = VL_BITWORD_E(rbits); + const int words = VL_WORDS_I(hbit - lbit + 1); + // Cleaning mask, only applied to top word of the assignment. Is a no-op + // if we don't assign to the top word of the destination. + const EData cleanmask = hword == rword ? VL_MASK_E(roffset) : VL_MASK_E(0); + + if (hoffset == VL_SIZEBITS_E && loffset == 0) { + // Fast and common case, word based insertion + for (int i = 0; i < (words - 1); ++i) iowp[lword + i] = lwp[i]; + iowp[hword] = lwp[words - 1] & cleanmask; + } else if (loffset == 0) { + // Non-32bit, but nicely aligned, so stuff all but the last word + for (int i = 0; i < (words - 1); ++i) iowp[lword + i] = lwp[i]; + // Know it's not a full word as above fast case handled it + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)); + iowp[hword] = (iowp[hword] & ~hinsmask) | (lwp[words - 1] & (hinsmask & cleanmask)); + } else { + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << 0; + const EData linsmask = (VL_MASK_E((VL_EDATASIZE - 1) - loffset + 1)) << loffset; + const int nbitsonright + = VL_EDATASIZE - loffset; // bits that end up in lword (know loffset!=0) + // Middle words + for (int i = 0; i < words; ++i) { + { // Lower word + const int oword = lword + i; + const EData d = lwp[i] << loffset; + const EData od = (iowp[oword] & ~linsmask) | (d & linsmask); + if (oword == hword) { + iowp[oword] = (iowp[oword] & ~hinsmask) | (od & (hinsmask & cleanmask)); + } else { + iowp[oword] = od; + } + } + { // Upper word + const int oword = lword + i + 1; + if (oword <= hword) { + const EData d = lwp[i] >> nbitsonright; + const EData od = (d & ~linsmask) | (iowp[oword] & linsmask); + if (oword == hword) { + iowp[oword] = (iowp[oword] & ~hinsmask) | (od & (hinsmask & cleanmask)); + } else { + iowp[oword] = od; + } + } + } + } + } +} + +static inline void _vl_insert_WQ(WDataOutP iowp, QData ld, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + VlWide lwp; + VL_SET_WQ(lwp, ld); + _vl_insert_WW(iowp, lwp, hbit, lbit, rbits); +} + +// EMIT_RULE: VL_REPLICATE: oclean=clean>width32, dirty<=width32; lclean=clean; rclean==clean; +// RHS MUST BE CLEAN CONSTANT. +#define VL_REPLICATE_IOI(lbits, ld, rep) (-(ld)) // Iff lbits==1 +#define VL_REPLICATE_QOI(lbits, ld, rep) (-(static_cast(ld))) // Iff lbits==1 + +static inline IData VL_REPLICATE_III(int lbits, IData ld, IData rep) VL_PURE { + IData returndata = ld; + for (unsigned i = 1; i < rep; ++i) { + returndata = returndata << lbits; + returndata |= ld; + } + return returndata; +} +static inline QData VL_REPLICATE_QII(int lbits, IData ld, IData rep) VL_PURE { + QData returndata = ld; + for (unsigned i = 1; i < rep; ++i) { + returndata = returndata << lbits; + returndata |= static_cast(ld); + } + return returndata; +} +static inline WDataOutP VL_REPLICATE_WII(int lbits, WDataOutP owp, IData ld, + IData rep) VL_MT_SAFE { + owp[0] = ld; + // Zeroing all words isn't strictly needed but allows compiler to know + // it does not need to preserve data in word(s) not being written + for (unsigned i = 1; i < VL_WORDS_I(static_cast(lbits) * rep); ++i) owp[i] = 0; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WI(owp, ld, i * lbits + lbits - 1, i * lbits); + } + return owp; +} +static inline WDataOutP VL_REPLICATE_WQI(int lbits, WDataOutP owp, QData ld, + IData rep) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + // Zeroing all words isn't strictly needed but allows compiler to know + // it does not need to preserve data in word(s) not being written + for (unsigned i = 2; i < VL_WORDS_I(static_cast(lbits) * rep); ++i) owp[i] = 0; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WQ(owp, ld, i * lbits + lbits - 1, i * lbits); + } + return owp; +} +static inline WDataOutP VL_REPLICATE_WWI(int lbits, WDataOutP owp, WDataInP const lwp, + IData rep) VL_MT_SAFE { + for (unsigned i = 0; i < VL_WORDS_I(static_cast(lbits)); ++i) owp[i] = lwp[i]; + // Zeroing all words isn't strictly needed but allows compiler to know + // it does not need to preserve data in word(s) not being written + for (unsigned i = VL_WORDS_I(static_cast(lbits)); + i < VL_WORDS_I(static_cast(lbits * rep)); ++i) + owp[i] = 0; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WW(owp, lwp, i * lbits + lbits - 1, i * lbits); + } + return owp; +} + +// Left stream operator. Output will always be clean. LHS and RHS must be clean. +// Special "fast" versions for slice sizes that are a power of 2. These use +// shifts and masks to execute faster than the slower for-loop approach where a +// subset of bits is copied in during each iteration. +static inline IData VL_STREAML_FAST_III(int lbits, IData ld, IData rd_log2) VL_PURE { + // Pre-shift bits in most-significant slice: + // + // If lbits is not a multiple of the slice size (i.e., lbits % rd != 0), + // then we end up with a "gap" in our reversed result. For example, if we + // have a 5-bit Verilog signal (lbits=5) in an 8-bit C data type: + // + // ld = ---43210 + // + // (where numbers are the Verilog signal bit numbers and '-' is an unused bit). + // Executing the switch statement below with a slice size of two (rd=2, + // rd_log2=1) produces: + // + // ret = 1032-400 + // + // Pre-shifting the bits in the most-significant slice allows us to avoid + // this gap in the shuffled data: + // + // ld_adjusted = --4-3210 + // ret = 10324--- + IData ret = ld; + if (rd_log2) { + const uint32_t lbitsFloor = lbits & ~VL_MASK_I(rd_log2); // max multiple of rd <= lbits + const uint32_t lbitsRem = lbits - lbitsFloor; // number of bits in most-sig slice (MSS) + const IData msbMask = lbitsFloor == 32 ? 0UL : VL_MASK_I(lbitsRem) << lbitsFloor; + ret = (ret & ~msbMask) | ((ret & msbMask) << ((VL_UL(1) << rd_log2) - lbitsRem)); + } + switch (rd_log2) { + case 0: ret = ((ret >> 1) & VL_UL(0x55555555)) | ((ret & VL_UL(0x55555555)) << 1); // FALLTHRU + case 1: ret = ((ret >> 2) & VL_UL(0x33333333)) | ((ret & VL_UL(0x33333333)) << 2); // FALLTHRU + case 2: ret = ((ret >> 4) & VL_UL(0x0f0f0f0f)) | ((ret & VL_UL(0x0f0f0f0f)) << 4); // FALLTHRU + case 3: ret = ((ret >> 8) & VL_UL(0x00ff00ff)) | ((ret & VL_UL(0x00ff00ff)) << 8); // FALLTHRU + case 4: ret = ((ret >> 16) | (ret << 16)); // FALLTHRU + default:; + } + return ret >> (VL_IDATASIZE - lbits); +} + +static inline QData VL_STREAML_FAST_QQI(int lbits, QData ld, IData rd_log2) VL_PURE { + // Pre-shift bits in most-significant slice (see comment in VL_STREAML_FAST_III) + QData ret = ld; + if (rd_log2) { + const uint32_t lbitsFloor = lbits & ~VL_MASK_I(rd_log2); + const uint32_t lbitsRem = lbits - lbitsFloor; + const QData msbMask = lbitsFloor == 64 ? 0ULL : VL_MASK_Q(lbitsRem) << lbitsFloor; + ret = (ret & ~msbMask) | ((ret & msbMask) << ((1ULL << rd_log2) - lbitsRem)); + } + switch (rd_log2) { + case 0: + ret = (((ret >> 1) & 0x5555555555555555ULL) + | ((ret & 0x5555555555555555ULL) << 1)); // FALLTHRU + case 1: + ret = (((ret >> 2) & 0x3333333333333333ULL) + | ((ret & 0x3333333333333333ULL) << 2)); // FALLTHRU + case 2: + ret = (((ret >> 4) & 0x0f0f0f0f0f0f0f0fULL) + | ((ret & 0x0f0f0f0f0f0f0f0fULL) << 4)); // FALLTHRU + case 3: + ret = (((ret >> 8) & 0x00ff00ff00ff00ffULL) + | ((ret & 0x00ff00ff00ff00ffULL) << 8)); // FALLTHRU + case 4: + ret = (((ret >> 16) & 0x0000ffff0000ffffULL) + | ((ret & 0x0000ffff0000ffffULL) << 16)); // FALLTHRU + case 5: ret = ((ret >> 32) | (ret << 32)); // FALLTHRU + default:; + } + return ret >> (VL_QUADSIZE - lbits); +} + +// Regular "slow" streaming operators +static inline IData VL_STREAML_III(int lbits, IData ld, IData rd) VL_PURE { + IData ret = 0; + // Slice size should never exceed the lhs width + const IData mask = VL_MASK_I(rd); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + ret |= ((ld >> istart) & mask) << ostart; + } + return ret; +} + +static inline QData VL_STREAML_QQI(int lbits, QData ld, IData rd) VL_PURE { + QData ret = 0; + // Slice size should never exceed the lhs width + const QData mask = VL_MASK_Q(rd); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + ret |= ((ld >> istart) & mask) << ostart; + } + return ret; +} + +static inline WDataOutP VL_STREAML_WWI(int lbits, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + VL_ZERO_W(lbits, owp); + // Slice size should never exceed the lhs width + const int ssize = (rd < static_cast(lbits)) ? rd : (static_cast(lbits)); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + for (int sbit = 0; sbit < ssize && sbit < lbits - istart; ++sbit) { + // Extract a single bit from lwp and shift it to the correct + // location for owp. + const EData bit = (VL_BITRSHIFT_W(lwp, (istart + sbit)) & 1) + << VL_BITBIT_E(ostart + sbit); + owp[VL_BITWORD_E(ostart + sbit)] |= bit; + } + } + return owp; +} + +static inline IData VL_PACK_I_RI(int obits, int lbits, const VlQueue& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline IData VL_PACK_I_RI(int obits, int lbits, const VlQueue& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline IData VL_PACK_I_RI(int obits, int lbits, const VlQueue& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) ret |= q.at(q.size() - 1 - i) << (i * lbits); + return ret; +} + +template +static inline IData VL_PACK_I_UI(int obits, int lbits, const VlUnpacked& q) { + IData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline IData VL_PACK_I_UI(int obits, int lbits, const VlUnpacked& q) { + IData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline IData VL_PACK_I_UI(int obits, int lbits, const VlUnpacked& q) { + IData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) ret |= q[N_Depth - 1 - i] << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RI(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RI(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RI(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UI(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UI(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UI(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RQ(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) ret |= q.at(q.size() - 1 - i) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UQ(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) ret |= q[N_Depth - 1 - i] << (i * lbits); + return ret; +} + +static inline WDataOutP VL_PACK_W_RI(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WI(owp, q.at(q.size() - i - 1), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +static inline WDataOutP VL_PACK_W_RI(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WI(owp, q.at(q.size() - i - 1), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +static inline WDataOutP VL_PACK_W_RI(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WI(owp, q.at(q.size() - 1 - i), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UI(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WI(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UI(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WI(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UI(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WI(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +static inline WDataOutP VL_PACK_W_RQ(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WQ(owp, q.at(q.size() - 1 - i), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UQ(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WQ(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_RW(int obits, int lbits, WDataOutP owp, + const VlQueue>& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WW(owp, q.at(q.size() - 1 - i), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UW(int obits, int lbits, WDataOutP owp, + const VlUnpacked, N_Depth>& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WW(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1 + offset, i * lbits + offset); + return owp; +} + +// Because concats are common and wide, it's valuable to always have a clean output. +// Thus we specify inputs must be clean, so we don't need to clean the output. +// Note the bit shifts are always constants, so the adds in these constify out. +// Casts required, as args may be 8 bit entities, and need to shift to appropriate output size +#define VL_CONCAT_III(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QII(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QIQ(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QQI(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QQQ(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) + +static inline WDataOutP VL_CONCAT_WII(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + IData rd) VL_MT_SAFE { + owp[0] = rd; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWI(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, IData rd) VL_MT_SAFE { + owp[0] = rd; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WIW(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + WDataInP const rwp) VL_MT_SAFE { + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); + _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WIQ(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQI(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + IData rd) VL_MT_SAFE { + owp[0] = rd; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQQ(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQW(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + WDataInP const rwp) VL_MT_SAFE { + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); + _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); + _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); + return owp; +} + +//=================================================================== +// Shifts + +// Static shift, used by internal functions +// The output is the same as the input - it overlaps! +static inline void _vl_shiftl_inplace_w(int obits, WDataOutP iowp, + IData rd /*1 or 4*/) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + const EData linsmask = VL_MASK_E(rd); + for (int i = words - 1; i >= 1; --i) { + iowp[i] + = ((iowp[i] << rd) & ~linsmask) | ((iowp[i - 1] >> (VL_EDATASIZE - rd)) & linsmask); + } + iowp[0] = ((iowp[0] << rd) & ~linsmask); + iowp[VL_WORDS_I(obits) - 1] &= VL_MASK_E(obits); +} + +// EMIT_RULE: VL_SHIFTL: oclean=lclean; rclean==clean; +// Important: Unlike most other funcs, the shift might well be a computed +// expression. Thus consider this when optimizing. (And perhaps have 2 funcs?) +// If RHS (rd/rwp) is larger than the output, zeros (or all ones for >>>) must be returned +// (This corresponds to AstShift*Ovr Ast nodes) +static inline IData VL_SHIFTL_III(int obits, int, int, IData lhs, IData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return lhs << rhs; // Small is common so not clean return +} +static inline IData VL_SHIFTL_IIQ(int obits, int, int, IData lhs, QData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return VL_CLEAN_II(obits, obits, lhs << rhs); +} +static inline QData VL_SHIFTL_QQI(int obits, int, int, QData lhs, IData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return lhs << rhs; // Small is common so not clean return +} +static inline QData VL_SHIFTL_QQQ(int obits, int, int, QData lhs, QData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return VL_CLEAN_QQ(obits, obits, lhs << rhs); +} +static inline WDataOutP VL_SHIFTL_WWI(int obits, int, int, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); + const int bit_shift = VL_BITBIT_E(rd); + if (rd >= static_cast(obits)) { // rd may be huge with MSB set + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else if (bit_shift == 0) { // Aligned word shift (<<0,<<32,<<64 etc) + for (int i = 0; i < word_shift; ++i) owp[i] = 0; + for (int i = word_shift; i < VL_WORDS_I(obits); ++i) owp[i] = lwp[i - word_shift]; + } else { + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WW(owp, lwp, obits - 1, rd); + } + return owp; +} +static inline WDataOutP VL_SHIFTL_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return VL_ZERO_W(obits, owp); + } + } + return VL_SHIFTL_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTL_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTL_WWW(obits, lbits, rbits, owp, lwp, rwp); +} +static inline IData VL_SHIFTL_IIW(int obits, int, int rbits, IData lhs, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return 0; + } + } + return VL_SHIFTL_III(obits, obits, 32, lhs, rwp[0]); +} +static inline QData VL_SHIFTL_QQW(int obits, int, int rbits, QData lhs, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return 0; + } + } + // Above checks rwp[1]==0 so not needed in below shift + return VL_SHIFTL_QQI(obits, obits, 32, lhs, rwp[0]); +} + +// EMIT_RULE: VL_SHIFTR: oclean=lclean; rclean==clean; +// Important: Unlike most other funcs, the shift might well be a computed +// expression. Thus consider this when optimizing. (And perhaps have 2 funcs?) +static inline IData VL_SHIFTR_III(int obits, int, int, IData lhs, IData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return lhs >> rhs; +} +static inline IData VL_SHIFTR_IIQ(int obits, int, int, IData lhs, QData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return lhs >> rhs; +} +static inline QData VL_SHIFTR_QQI(int obits, int, int, QData lhs, IData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return lhs >> rhs; +} +static inline QData VL_SHIFTR_QQQ(int obits, int, int, QData lhs, QData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return lhs >> rhs; +} +static inline WDataOutP VL_SHIFTR_WWI(int obits, int, int, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); // Maybe 0 + const int bit_shift = VL_BITBIT_E(rd); + if (rd >= static_cast(obits)) { // rd may be huge with MSB set + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else if (bit_shift == 0) { // Aligned word shift (>>0,>>32,>>64 etc) + const int copy_words = (VL_WORDS_I(obits) - word_shift); + for (int i = 0; i < copy_words; ++i) owp[i] = lwp[i + word_shift]; + for (int i = copy_words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else { + const int loffset = rd & VL_SIZEBITS_E; + const int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword (know + // loffset!=0) Middle words + const int words = VL_WORDS_I(obits - rd); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword < VL_WORDS_I(obits)) owp[i] |= lwp[upperword] << nbitsonright; + } + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } + return owp; +} +static inline WDataOutP VL_SHIFTR_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return VL_ZERO_W(obits, owp); + } + } + return VL_SHIFTR_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTR_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTR_WWW(obits, lbits, rbits, owp, lwp, rwp); +} + +static inline IData VL_SHIFTR_IIW(int obits, int, int rbits, IData lhs, + WDataInP const rwp) VL_PURE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) return 0; // Huge shift 1>>32 or more + } + return VL_SHIFTR_III(obits, obits, 32, lhs, rwp[0]); +} +static inline QData VL_SHIFTR_QQW(int obits, int, int rbits, QData lhs, + WDataInP const rwp) VL_PURE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) return 0; // Huge shift 1>>32 or more + } + return VL_SHIFTR_QQI(obits, obits, 32, lhs, rwp[0]); +} + +// EMIT_RULE: VL_SHIFTRS: oclean=false; lclean=clean, rclean==clean; +static inline IData VL_SHIFTRS_III(int obits, int lbits, int, IData lhs, IData rhs) VL_PURE { + // Note the C standard does not specify the >> operator as a arithmetic shift! + // IEEE says signed if output signed, but bit position from lbits; + // must use lbits for sign; lbits might != obits, + // an EXTEND(SHIFTRS(...)) can became a SHIFTRS(...) within same 32/64 bit word length + const IData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return sign & VL_MASK_I(obits); + const IData signext = ~(VL_MASK_I(lbits) >> rhs); // One with bits where we've shifted "past" + return (lhs >> rhs) | (sign & VL_CLEAN_II(obits, obits, signext)); +} +static inline QData VL_SHIFTRS_QQI(int obits, int lbits, int, QData lhs, IData rhs) VL_PURE { + const QData sign = -(lhs >> (lbits - 1)); + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return sign & VL_MASK_Q(obits); + const QData signext = ~(VL_MASK_Q(lbits) >> rhs); + return (lhs >> rhs) | (sign & VL_CLEAN_QQ(obits, obits, signext)); +} +static inline IData VL_SHIFTRS_IQI(int obits, int lbits, int rbits, QData lhs, IData rhs) VL_PURE { + return static_cast(VL_SHIFTRS_QQI(obits, lbits, rbits, lhs, rhs)); +} +static inline WDataOutP VL_SHIFTRS_WWI(int obits, int lbits, int, WDataOutP owp, + WDataInP const lwp, IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); + const int bit_shift = VL_BITBIT_E(rd); + const int lmsw = VL_WORDS_I(obits) - 1; + const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); + if (rd >= static_cast(obits)) { // Shifting past end, sign in all of lbits + for (int i = 0; i <= lmsw; ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } else if (bit_shift == 0) { // Aligned word shift (>>0,>>32,>>64 etc) + const int copy_words = (VL_WORDS_I(obits) - word_shift); + for (int i = 0; i < copy_words; ++i) owp[i] = lwp[i + word_shift]; + if (copy_words >= 0) owp[copy_words - 1] |= ~VL_MASK_E(obits) & sign; + for (int i = copy_words; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } else { + const int loffset = rd & VL_SIZEBITS_E; + const int nbitsonright + = VL_EDATASIZE - loffset; // bits that end up in lword (know loffset!=0) + // Middle words + const int words = VL_WORDS_I(obits - rd); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword < VL_WORDS_I(obits)) owp[i] |= lwp[upperword] << nbitsonright; + } + if (words) owp[words - 1] |= sign & ~VL_MASK_E(obits - loffset); + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } + return owp; +} +static inline WDataOutP VL_SHIFTRS_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= static_cast(obits))) { + const int owords = VL_WORDS_I(obits); + if (VL_SIGN_E(lbits, lwp[owords - 1])) { + VL_MEMSET_ONES_W(owp, owords); + owp[owords - 1] &= VL_MASK_E(lbits); + } else { + VL_MEMSET_ZERO_W(owp, owords); + } + return owp; + } + return VL_SHIFTRS_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTRS_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTRS_WWW(obits, lbits, rbits, owp, lwp, rwp); +} +static inline IData VL_SHIFTRS_IIW(int obits, int lbits, int rbits, IData lhs, + WDataInP const rwp) VL_PURE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= static_cast(obits))) { + const IData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + return VL_CLEAN_II(obits, obits, sign); + } + return VL_SHIFTRS_III(obits, lbits, 32, lhs, rwp[0]); +} +static inline QData VL_SHIFTRS_QQW(int obits, int lbits, int rbits, QData lhs, + WDataInP const rwp) VL_PURE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= static_cast(obits))) { + const QData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + return VL_CLEAN_QQ(obits, obits, sign); + } + return VL_SHIFTRS_QQI(obits, lbits, 32, lhs, rwp[0]); +} +static inline IData VL_SHIFTRS_IIQ(int obits, int lbits, int rbits, IData lhs, QData rhs) VL_PURE { + VlWide rwp; + VL_SET_WQ(rwp, rhs); + return VL_SHIFTRS_IIW(obits, lbits, rbits, lhs, rwp); +} +static inline QData VL_SHIFTRS_QQQ(int obits, int lbits, int rbits, QData lhs, QData rhs) VL_PURE { + VlWide rwp; + VL_SET_WQ(rwp, rhs); + return VL_SHIFTRS_QQW(obits, lbits, rbits, lhs, rwp); +} + +//========================================================================= +// FOUR-STATE SHIFT OPERATORS +// For four-state: shift operations preserve X/Z in the shifted bits + +// Four-state left shift: shift in zeros, preserve X/Z pattern +static inline CData4 VL_SHIFTL_4STATE_C(CData4 lhs, int shift) { + if (shift >= 4) return 0; // All shifted out + if (_vl4_anyXZ_C(lhs)) { + // X/Z gets shifted, lower bits become 0 + CData4 result = 0; + for (int i = 0; i < 4 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (val << ((i + shift) * 2)); + } + } + return result; + } + // Clean value shift + return (lhs & 0x55555555) << shift; +} + +static inline SData4 VL_SHIFTL_4STATE_S(SData4 lhs, int shift) { + if (shift >= 8) return 0; + if (_vl4_anyXZ_S(lhs)) { + SData4 result = 0; + for (int i = 0; i < 8 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i + shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) << shift; +} + +static inline IData4 VL_SHIFTL_4STATE_I(IData4 lhs, int shift) { + if (shift >= 16) return 0; + if (_vl4_anyXZ_I(lhs)) { + IData4 result = 0; + for (int i = 0; i < 16 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i + shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) << shift; +} + +static inline QData4 VL_SHIFTL_4STATE_Q(QData4 lhs, int shift) { + if (shift >= 32) return 0; + if (_vl4_anyXZ_Q(lhs)) { + QData4 result = 0; + for (int i = 0; i < 32 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i + shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) << shift; +} + +// Four-state right shift +static inline CData4 VL_SHIFTR_4STATE_C(CData4 lhs, int shift) { + if (shift >= 4) return 0; + if (_vl4_anyXZ_C(lhs)) { + CData4 result = 0; + for (int i = shift; i < 4; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i - shift) * 2)); + } + } + return result; + } + return (lhs & 0x55555555) >> shift; +} + +static inline SData4 VL_SHIFTR_4STATE_S(SData4 lhs, int shift) { + if (shift >= 8) return 0; + if (_vl4_anyXZ_S(lhs)) { + SData4 result = 0; + for (int i = shift; i < 8; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i - shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) >> shift; +} + +static inline IData4 VL_SHIFTR_4STATE_I(IData4 lhs, int shift) { + if (shift >= 16) return 0; + if (_vl4_anyXZ_I(lhs)) { + IData4 result = 0; + for (int i = shift; i < 16; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i - shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) >> shift; +} + +static inline QData4 VL_SHIFTR_4STATE_Q(QData4 lhs, int shift) { + if (shift >= 32) return 0; + if (_vl4_anyXZ_Q(lhs)) { + QData4 result = 0; + for (int i = shift; i < 32; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i - shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) >> shift; +} + +//=================================================================== +// Bit selection + +// EMIT_RULE: VL_BITSEL: oclean=dirty; rclean==clean; +#define VL_BITSEL_IIII(lbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_QIII(lbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_QQII(lbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_IQII(lbits, lhs, rhs) (static_cast((lhs) >> (rhs))) + +static inline IData VL_BITSEL_IWII(int lbits, WDataInP const lwp, IData rd) VL_MT_SAFE { + const int word = VL_BITWORD_E(rd); + if (VL_UNLIKELY(rd > static_cast(lbits))) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + // We return all 1's as that's more likely to find bugs (?) than 0's. + } else { + return (lwp[word] >> VL_BITBIT_E(rd)); + } +} + +// EMIT_RULE: VL_RANGE: oclean=lclean; out=dirty +// & MUST BE CLEAN (currently constant) +#define VL_SEL_IIII(lbits, lhs, lsb, width) ((lhs) >> (lsb)) +#define VL_SEL_QQII(lbits, lhs, lsb, width) ((lhs) >> (lsb)) +#define VL_SEL_IQII(lbits, lhs, lsb, width) (static_cast((lhs) >> (lsb))) + +static inline IData VL_SEL_IWII(int lbits, WDataInP const lwp, IData lsb, IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + if (VL_UNLIKELY(msb >= lbits)) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + } else if (VL_BITWORD_E(msb) == VL_BITWORD_E(static_cast(lsb))) { + return VL_BITRSHIFT_W(lwp, lsb); + } else { + // 32 bit extraction may span two words + const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); // bits that come from low word + return ((lwp[VL_BITWORD_E(msb)] << nbitsfromlow) | VL_BITRSHIFT_W(lwp, lsb)); + } +} + +static inline QData VL_SEL_QWII(int lbits, WDataInP const lwp, IData lsb, IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + if (VL_UNLIKELY(msb > lbits)) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + } else if (VL_BITWORD_E(msb) == VL_BITWORD_E(static_cast(lsb))) { + return VL_BITRSHIFT_W(lwp, lsb); + } else if (VL_BITWORD_E(msb) == 1 + VL_BITWORD_E(static_cast(lsb))) { + const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); + const QData hi = (lwp[VL_BITWORD_E(msb)]); + const QData lo = VL_BITRSHIFT_W(lwp, lsb); + return (hi << nbitsfromlow) | lo; + } else { + // 64 bit extraction may span three words + const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); + const QData hi = (lwp[VL_BITWORD_E(msb)]); + const QData mid = (lwp[VL_BITWORD_E(lsb) + 1]); + const QData lo = VL_BITRSHIFT_W(lwp, lsb); + return (hi << (nbitsfromlow + VL_EDATASIZE)) | (mid << nbitsfromlow) | lo; + } +} + +static inline WDataOutP VL_SEL_WWII(int obits, int lbits, WDataOutP owp, WDataInP const lwp, + IData lsb, IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + const int word_shift = VL_BITWORD_E(lsb); + if (VL_UNLIKELY(msb > lbits)) { // Outside bounds, + for (int i = 0; i < VL_WORDS_I(obits) - 1; ++i) owp[i] = ~0; + owp[VL_WORDS_I(obits) - 1] = VL_MASK_E(obits); + } else if (VL_BITBIT_E(lsb) == 0) { + // Just a word extract + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = lwp[i + word_shift]; + } else { + // Not a _vl_insert because the bits come from any bit number and goto bit 0 + const int loffset = lsb & VL_SIZEBITS_E; + const int nbitsfromlow = VL_EDATASIZE - loffset; // bits that end up in lword (know + // loffset!=0) Middle words + const int words = VL_WORDS_I(msb - lsb + 1); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword <= static_cast(VL_BITWORD_E(msb))) { + owp[i] |= lwp[upperword] << nbitsfromlow; + } + } + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } + return owp; +} + +template +static inline VlQueue VL_CLONE_Q(const VlQueue& from, int lbits, int srcElementBits, + int dstElementBits) { + VlQueue ret; + VL_COPY_Q(ret, from, lbits, srcElementBits, dstElementBits); + return ret; +} + +template +static inline VlQueue VL_REVCLONE_Q(const VlQueue& from, int lbits, int srcElementBits, + int dstElementBits) { + VlQueue ret; + VL_REVCOPY_Q(ret, from, lbits, srcElementBits, dstElementBits); + return ret; +} + +// Helper function to get a bit from a queue at a specific bit index +template +static inline bool VL_GET_QUEUE_BIT(const VlQueue& queue, int srcElementBits, size_t bitIndex) { + const size_t elemIdx = bitIndex / srcElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return false; + + const T element = queue.at(elemIdx); + if (srcElementBits == 1) { + return element & 1; + } else { + const size_t bitInElem = bitIndex % srcElementBits; + const size_t actualBitPos = srcElementBits - 1 - bitInElem; + return (element >> actualBitPos) & 1; + } +} + +// Helper function to set a bit in the destination queue +template +static inline void VL_SET_QUEUE_BIT(VlQueue& queue, int dstElementBits, size_t bitIndex, + bool value) { + if (dstElementBits == 1) { + if (VL_UNLIKELY(bitIndex >= queue.size())) return; + queue.atWrite(bitIndex) = value ? 1 : 0; + } else { + const size_t elemIdx = bitIndex / dstElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return; + const size_t bitInElem = bitIndex % dstElementBits; + const size_t actualBitPos = dstElementBits - 1 - bitInElem; + if (value) { + queue.atWrite(elemIdx) |= (static_cast(1) << actualBitPos); + } else { + queue.atWrite(elemIdx) &= ~(static_cast(1) << actualBitPos); + } + } +} + +// Helper function to get a bit from a VlWide queue at a specific bit index +template +static inline bool VL_GET_QUEUE_BIT(const VlQueue>& queue, int srcElementBits, + size_t bitIndex) { + const size_t elemIdx = bitIndex / srcElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return false; + + const VlWide& element = queue.at(elemIdx); + const size_t bitInElem = bitIndex % srcElementBits; + const size_t actualBitPos = srcElementBits - 1 - bitInElem; + + return VL_BITISSET_W(element.data(), actualBitPos); +} + +// Helper function to set a bit in a VlWide queue at a specific bit index +template +static inline void VL_SET_QUEUE_BIT(VlQueue>& queue, int dstElementBits, + size_t bitIndex, bool value) { + const size_t elemIdx = bitIndex / dstElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return; + + const size_t bitInElem = bitIndex % dstElementBits; + const size_t actualBitPos = dstElementBits - 1 - bitInElem; + + VlWide& element = queue.atWrite(elemIdx); + if (value) { + VL_ASSIGNBIT_WO(actualBitPos, element.data()); + } else { + VL_ASSIGNBIT_WI(actualBitPos, element.data(), 0); + } +} + +template +static inline void VL_ZERO_INIT_QUEUE_ELEM(T& elem) { + elem = 0; +} + +template +static inline void VL_ZERO_INIT_QUEUE_ELEM(VlWide& elem) { + for (size_t j = 0; j < N_Words; ++j) { elem.at(j) = 0; } +} + +// This specialization works for both VlQueue (and similar) as well +// as VlQueue>. +template +static inline void VL_COPY_Q(VlQueue& q, const VlQueue& from, int lbits, int srcElementBits, + int dstElementBits) { + if (srcElementBits == dstElementBits) { + // Simple case: same element bit width, direct copy of each element + if (VL_UNLIKELY(&q == &from)) return; // Skip self-assignment when it's truly a no-op + q = from; + } else { + // Different element bit widths: use streaming conversion + VlQueue srcCopy = from; + const size_t srcTotalBits = from.size() * srcElementBits; + const size_t dstSize = (srcTotalBits + dstElementBits - 1) / dstElementBits; + q.renew(dstSize); + for (size_t i = 0; i < dstSize; ++i) { VL_ZERO_INIT_QUEUE_ELEM(q.atWrite(i)); } + for (size_t bitIndex = 0; bitIndex < srcTotalBits; ++bitIndex) { + VL_SET_QUEUE_BIT(q, dstElementBits, bitIndex, + VL_GET_QUEUE_BIT(srcCopy, srcElementBits, bitIndex)); + } + } +} + +// This specialization works for both VlQueue (and similar) as well +// as VlQueue>. +template +static inline void VL_REVCOPY_Q(VlQueue& q, const VlQueue& from, int lbits, + int srcElementBits, int dstElementBits) { + const size_t srcTotalBits = from.size() * srcElementBits; + const size_t dstSize = (srcTotalBits + dstElementBits - 1) / dstElementBits; + + // Always make a copy to handle the case where q and from are the same queue + VlQueue srcCopy = from; + + // Initialize all elements to zero using appropriate method + q.renew(dstSize); + for (size_t i = 0; i < dstSize; ++i) VL_ZERO_INIT_QUEUE_ELEM(q.atWrite(i)); + + if (lbits == 1) { + // Simple bit reversal: write directly to destination + for (int i = srcTotalBits - 1; i >= 0; --i) { + VL_SET_QUEUE_BIT(q, dstElementBits, srcTotalBits - 1 - i, + VL_GET_QUEUE_BIT(srcCopy, srcElementBits, i)); + } + } else { + // Generalized block-reversal for lbits > 1: + // 1. Reverse all bits using 1-bit blocks + // 2. Split into lbits-sized blocks and pad incomplete blocks on the left + // 3. Reverse each lbits-sized block using 1-bit blocks + const size_t numCompleteBlocks = srcTotalBits / lbits; + const size_t remainderBits = srcTotalBits % lbits; + const size_t srcBlocks = numCompleteBlocks + (remainderBits > 0 ? 1 : 0); + + size_t dstBitIndex = 0; + + for (size_t block = 0; block < srcBlocks; ++block) { + const size_t blockStart = block * lbits; + const int bitsToProcess = VL_LIKELY(block < numCompleteBlocks) ? lbits : remainderBits; + for (int bit = bitsToProcess - 1; bit >= 0; --bit) { + const size_t reversedBitIndex = blockStart + bit; + const size_t originalBitIndex = srcTotalBits - 1 - reversedBitIndex; + VL_SET_QUEUE_BIT(q, dstElementBits, dstBitIndex++, + VL_GET_QUEUE_BIT(srcCopy, srcElementBits, originalBitIndex)); + } + dstBitIndex += lbits - bitsToProcess; + } + } +} + +//====================================================================== +// Expressions needing insert/select + +static inline void VL_UNPACK_RI_I(int lbits, int rbits, VlQueue& q, IData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_I(int lbits, int rbits, VlQueue& q, IData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_I(int lbits, int rbits, VlQueue& q, IData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RQ_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_IWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +static inline void VL_UNPACK_RI_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_IWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +static inline void VL_UNPACK_RI_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_IWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +static inline void VL_UNPACK_RQ_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_QWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +template +static inline void VL_UNPACK_RW_W(int lbits, int rbits, VlQueue>& q, + WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + VL_SEL_WWII(actualWidth, rbits, q.atWrite(i), rwp, actualBitPos, actualWidth); + } +} + +template +static inline void VL_UNPACK_UI_I(int lbits, int rbits, VlUnpacked& q, + IData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_I(int lbits, int rbits, VlUnpacked& q, + IData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_I(int lbits, int rbits, VlUnpacked& q, + IData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UQ_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UQ_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_QWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UW_W(int lbits, int rbits, VlUnpacked, N_Depth>& q, + WDataInP rwp) { + for (size_t i = 0; i < N_Depth; ++i) + VL_SEL_WWII(lbits, rbits, q[i], rwp, (N_Depth - 1 - i) * lbits, lbits); +} + +// Return QData from double (numeric) +// EMIT_RULE: VL_RTOIROUND_Q_D: oclean=dirty; lclean==clean/real +static inline QData VL_RTOIROUND_Q_D(double lhs) VL_PURE { + // IEEE format: [63]=sign [62:52]=exp+1023 [51:0]=mantissa + // This does not need to support subnormals as they are sub-integral + lhs = VL_ROUND(lhs); + if (lhs == 0.0) return 0; + const QData q = VL_CVT_Q_D(lhs); + const int lsb = static_cast((q >> 52ULL) & VL_MASK_Q(11)) - 1023 - 52; + const uint64_t mantissa = (q & VL_MASK_Q(52)) | (1ULL << 52); + uint64_t out = 0; + if (lsb < 0) { + out = mantissa >> -lsb; + } else if (lsb < 64) { + out = mantissa << lsb; + } + if (lhs < 0) out = -out; + return out; +} +static inline IData VL_RTOIROUND_I_D(double lhs) VL_PURE { + return static_cast(VL_RTOIROUND_Q_D(lhs)); +} +static inline WDataOutP VL_RTOIROUND_W_D(int obits, WDataOutP owp, double lhs) VL_MT_SAFE { + // IEEE format: [63]=sign [62:52]=exp+1023 [51:0]=mantissa + // This does not need to support subnormals as they are sub-integral + lhs = VL_ROUND(lhs); + VL_ZERO_W(obits, owp); + if (lhs == 0.0) return owp; + const QData q = VL_CVT_Q_D(lhs); + const int lsb = static_cast((q >> 52ULL) & VL_MASK_Q(11)) - 1023 - 52; + const uint64_t mantissa = (q & VL_MASK_Q(52)) | (1ULL << 52); + if (lsb < 0) { + VL_SET_WQ(owp, mantissa >> -lsb); + } else if (lsb < obits) { + _vl_insert_WQ(owp, mantissa, lsb + 52, lsb); + } + if (lhs < 0) VL_NEGATE_INPLACE_W(VL_WORDS_I(obits), owp); + return owp; +} + +//====================================================================== +// Range assignments + +// EMIT_RULE: VL_ASSIGNRANGE: rclean=dirty; +static inline void VL_ASSIGNSEL_II(int rbits, int obits, int lsb, CData& lhsr, IData rhs) VL_PURE { + _vl_insert_II(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_II(int rbits, int obits, int lsb, SData& lhsr, IData rhs) VL_PURE { + _vl_insert_II(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_II(int rbits, int obits, int lsb, IData& lhsr, IData rhs) VL_PURE { + _vl_insert_II(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_QI(int rbits, int obits, int lsb, QData& lhsr, IData rhs) VL_PURE { + _vl_insert_QQ(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_QQ(int rbits, int obits, int lsb, QData& lhsr, QData rhs) VL_PURE { + _vl_insert_QQ(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +// static inline void VL_ASSIGNSEL_IIIW(int obits, int lsb, IData& lhsr, WDataInP const rwp) +// VL_MT_SAFE { Illegal, as lhs width >= rhs width +static inline void VL_ASSIGNSEL_WI(int rbits, int obits, int lsb, WDataOutP iowp, + IData rhs) VL_MT_SAFE { + _vl_insert_WI(iowp, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_WQ(int rbits, int obits, int lsb, WDataOutP iowp, + QData rhs) VL_MT_SAFE { + _vl_insert_WQ(iowp, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_WW(int rbits, int obits, int lsb, WDataOutP iowp, + WDataInP const rwp) VL_MT_SAFE { + _vl_insert_WW(iowp, rwp, lsb + obits - 1, lsb, rbits); +} + +//==================================================== +// Range assignments + +// These additional functions copy bits range [obis+roffset-1:roffset] from rhs to lower bits +// of lhs(select before assigning). Rhs should always be wider than lhs. +static inline void VL_SELASSIGN_II(int rbits, int obits, CData& lhsr, IData rhs, + int roffset) VL_PURE { + _vl_insert_II(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} +static inline void VL_SELASSIGN_II(int rbits, int obits, SData& lhsr, IData rhs, + int roffset) VL_PURE { + _vl_insert_II(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} +static inline void VL_SELASSIGN_II(int rbits, int obits, IData& lhsr, IData rhs, + int roffset) VL_PURE { + _vl_insert_II(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} +static inline void VL_SELASSIGN_IQ(int rbits, int obits, CData& lhsr, QData rhs, + int roffset) VL_PURE { + // it will be truncated to right CData mask + const CData cleanmask = VL_MASK_I(rbits); + const CData insmask = VL_MASK_I(obits); + lhsr = (lhsr & ~insmask) | (static_cast(rhs >> roffset) & (insmask & cleanmask)); +} +static inline void VL_SELASSIGN_IQ(int rbits, int obits, SData& lhsr, QData rhs, + int roffset) VL_PURE { + // it will be truncated to right CData mask + const SData cleanmask = VL_MASK_I(rbits); + const SData insmask = VL_MASK_I(obits); + lhsr = (lhsr & ~insmask) | (static_cast(rhs >> roffset) & (insmask & cleanmask)); +} +static inline void VL_SELASSIGN_IQ(int rbits, int obits, IData& lhsr, QData rhs, + int roffset) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = VL_MASK_I(obits); + lhsr = (lhsr & ~insmask) | (static_cast(rhs >> roffset) & (insmask & cleanmask)); +} + +static inline void VL_SELASSIGN_QQ(int rbits, int obits, QData& lhsr, QData rhs, + int roffset) VL_PURE { + _vl_insert_QQ(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} + +static inline void VL_SELASSIGN_IW(int rbits, int obits, CData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + IData l = static_cast(lhsr); + _vl_insert_IW(l, rhs, roffset + obits - 1, roffset, rbits); + lhsr = static_cast(l); +} +static inline void VL_SELASSIGN_IW(int rbits, int obits, SData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + IData l = static_cast(lhsr); + _vl_insert_IW(l, rhs, roffset + obits - 1, roffset, rbits); + lhsr = static_cast(l); +} +static inline void VL_SELASSIGN_IW(int rbits, int obits, IData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + _vl_insert_IW(lhsr, rhs, roffset + obits - 1, roffset, rbits); +} +static inline void VL_SELASSIGN_QW(int rbits, int obits, QData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + // assert VL_QDATASIZE >= rbits > VL_IDATASIZE; + IData low = static_cast(lhsr); + IData high = static_cast(lhsr >> VL_IDATASIZE); + if (obits <= VL_IDATASIZE) { + _vl_insert_IW(low, rhs, obits + roffset - 1, roffset, VL_IDATASIZE); + } else { + _vl_insert_IW(low, rhs, roffset + VL_IDATASIZE - 1, roffset, VL_IDATASIZE); + _vl_insert_IW(high, rhs, roffset + obits - 1, roffset + VL_IDATASIZE, + rbits - VL_IDATASIZE); + } + lhsr = (static_cast(high) << VL_IDATASIZE) | low; +} + +static inline void VL_SELASSIGN_WW(int rbits, int obits, WDataOutP iowp, WDataInP const rwp, + int roffset) VL_MT_SAFE { + // assert rbits > VL_QDATASIZE + const int wordoff = roffset / VL_EDATASIZE; + const int lsb = roffset & VL_SIZEBITS_E; + const int upperbits = lsb == 0 ? 0 : VL_EDATASIZE - lsb; + // If roffset is not aligned, we copy some bits to align it. + if (lsb != 0) { + const int w = obits < upperbits ? obits : upperbits; + const int insmask = VL_MASK_E(w); + iowp[0] = (iowp[0] & ~insmask) | ((rwp[wordoff] >> lsb) & insmask); + // cppcheck-suppress knownConditionTrueFalse + if (w == obits) return; + obits -= w; + } + _vl_insert_WW(iowp, rwp + wordoff + (lsb != 0), upperbits + obits - 1, upperbits, rbits); +} + +//====================================================================== +// Triops + +static inline WDataOutP VL_COND_WIWW(int obits, WDataOutP owp, int cond, WDataInP const w1p, + WDataInP const w2p) VL_MT_SAFE { + return VL_MEMCPY_W(owp, cond ? w1p : w2p, VL_WORDS_I(obits)); +} + +//====================================================================== +// Constification + +// VL_CONST_W_#X(int obits, WDataOutP owp, IData data0, .... IData data(#-1)) +// Sets wide vector words to specified constant words. +// These macros are used when o might represent more words then are given as constants, +// hence all upper words must be zeroed. +// If changing the number of functions here, also change EMITCINLINES_NUM_CONSTW + +#define VL_C_END_(obits, wordsSet) \ + VL_MEMSET_ZERO_W(o + (wordsSet), VL_WORDS_I(obits) - (wordsSet)); \ + return o + +// clang-format off +static inline WDataOutP VL_CONST_W_1X(int obits, WDataOutP o, EData d0) VL_MT_SAFE { + o[0] = d0; + VL_C_END_(obits, 1); +} +static inline WDataOutP VL_CONST_W_2X(int obits, WDataOutP o, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; + VL_C_END_(obits, 2); +} +static inline WDataOutP VL_CONST_W_3X(int obits, WDataOutP o, EData d2, EData d1, + EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; + VL_C_END_(obits, 3); +} +static inline WDataOutP VL_CONST_W_4X(int obits, WDataOutP o, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + VL_C_END_(obits, 4); +} +static inline WDataOutP VL_CONST_W_5X(int obits, WDataOutP o, + EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; + VL_C_END_(obits, 5); +} +static inline WDataOutP VL_CONST_W_6X(int obits, WDataOutP o, + EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; + VL_C_END_(obits, 6); +} +static inline WDataOutP VL_CONST_W_7X(int obits, WDataOutP o, + EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; o[6] = d6; + VL_C_END_(obits, 7); +} +static inline WDataOutP VL_CONST_W_8X(int obits, WDataOutP o, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; + VL_C_END_(obits, 8); +} +// +static inline WDataOutP VL_CONSTHI_W_1X(int obits, int lsb, WDataOutP o, + EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; + VL_C_END_(obits, VL_WORDS_I(lsb) + 1); +} +static inline WDataOutP VL_CONSTHI_W_2X(int obits, int lsb, WDataOutP o, + EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; + VL_C_END_(obits, VL_WORDS_I(lsb) + 2); +} +static inline WDataOutP VL_CONSTHI_W_3X(int obits, int lsb, WDataOutP o, + EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; + VL_C_END_(obits, VL_WORDS_I(lsb) + 3); +} +static inline WDataOutP VL_CONSTHI_W_4X(int obits, int lsb, WDataOutP o, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + VL_C_END_(obits, VL_WORDS_I(lsb) + 4); +} +static inline WDataOutP VL_CONSTHI_W_5X(int obits, int lsb, WDataOutP o, + EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; + VL_C_END_(obits, VL_WORDS_I(lsb) + 5); +} +static inline WDataOutP VL_CONSTHI_W_6X(int obits, int lsb, WDataOutP o, + EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; ohi[5] = d5; + VL_C_END_(obits, VL_WORDS_I(lsb) + 6); +} +static inline WDataOutP VL_CONSTHI_W_7X(int obits, int lsb, WDataOutP o, + EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; ohi[5] = d5; ohi[6] = d6; + VL_C_END_(obits, VL_WORDS_I(lsb) + 7); +} +static inline WDataOutP VL_CONSTHI_W_8X(int obits, int lsb, WDataOutP o, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; ohi[5] = d5; ohi[6] = d6; ohi[7] = d7; + VL_C_END_(obits, VL_WORDS_I(lsb) + 8); +} + +#undef VL_C_END_ + +// Partial constant, lower words of vector wider than 8*32, starting at bit number lsb +static inline void VL_CONSTLO_W_8X(int lsb, WDataOutP obase, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; +} +// clang-format on + +//====================================================================== +// Strings + +extern std::string VL_PUTC_N(const std::string& lhs, IData rhs, CData ths) VL_PURE; +extern CData VL_GETC_N(const std::string& lhs, IData rhs) VL_PURE; +extern std::string VL_SUBSTR_N(const std::string& lhs, IData rhs, IData ths) VL_PURE; + +inline IData VL_CMP_NN(const std::string& lhs, const std::string& rhs, bool ignoreCase) VL_PURE { + // SystemVerilog does not allow a string variable to contain '\0'. + // So C functions such as strcmp() can correctly compare strings. + if (ignoreCase) { + return VL_STRCASECMP(lhs.c_str(), rhs.c_str()); + } else { + return std::strcmp(lhs.c_str(), rhs.c_str()); + } +} + +extern IData VL_ATOI_N(const std::string& str, int base) VL_PURE; +extern IData VL_NTOI_I(int obits, const std::string& str) VL_PURE; +extern QData VL_NTOI_Q(int obits, const std::string& str) VL_PURE; +extern void VL_NTOI_W(int obits, WDataOutP owp, const std::string& str) VL_PURE; + +extern IData VL_FGETS_NI(std::string& dest, IData fpi) VL_MT_SAFE; + +//====================================================================== +// Dist functions + +extern IData VL_DIST_CHI_SQUARE(IData& seedr, IData udeg_of_free) VL_MT_SAFE; +extern IData VL_DIST_ERLANG(IData& seedr, IData uk, IData umean) VL_MT_SAFE; +extern IData VL_DIST_EXPONENTIAL(IData& seedr, IData umean) VL_MT_SAFE; +extern IData VL_DIST_NORMAL(IData& seedr, IData umean, IData udeviation) VL_MT_SAFE; +extern IData VL_DIST_POISSON(IData& seedr, IData umean) VL_MT_SAFE; +extern IData VL_DIST_T(IData& seedr, IData udeg_of_free) VL_MT_SAFE; +extern IData VL_DIST_UNIFORM(IData& seedr, IData ustart, IData uend) VL_MT_SAFE; + +//====================================================================== +// Conversion functions + +extern std::string VL_CVT_PACK_STR_NW(int lwords, const WDataInP lwp) VL_PURE; +extern std::string VL_CVT_PACK_STR_ND(const VlQueue& q) VL_PURE; +inline std::string VL_CVT_PACK_STR_NQ(QData lhs) VL_PURE { + VlWide lw; + VL_SET_WQ(lw, lhs); + return VL_CVT_PACK_STR_NW(VL_WQ_WORDS_E, lw); +} +inline std::string VL_CVT_PACK_STR_NN(const std::string& lhs) VL_PURE { return lhs; } +inline std::string& VL_CVT_PACK_STR_NN(std::string& lhs) VL_PURE { return lhs; } +inline std::string VL_CVT_PACK_STR_NI(IData lhs) VL_PURE { + VlWide lw; + VL_SET_WI(lw, lhs); + return VL_CVT_PACK_STR_NW(1, lw); +} +inline std::string VL_CONCATN_NNN(const std::string& lhs, const std::string& rhs) VL_PURE { + return lhs + rhs; +} +inline std::string VL_REPLICATEN_NNQ(const std::string& lhs, IData rep) VL_PURE { + std::string result; + result.reserve(lhs.length() * rep); + for (unsigned times = 0; times < rep; ++times) result += lhs; + return result; +} +inline std::string VL_REPLICATEN_NNI(const std::string& lhs, IData rep) VL_PURE { + return VL_REPLICATEN_NNQ(lhs, rep); +} + +inline IData VL_LEN_IN(const std::string& ld) { return static_cast(ld.length()); } +extern std::string VL_TOLOWER_NN(const std::string& ld) VL_PURE; +extern std::string VL_TOUPPER_NN(const std::string& ld) VL_PURE; + +extern IData VL_FERROR_IN(IData fpi, std::string& outputr) VL_MT_SAFE; +extern IData VL_FERROR_IW(IData fpi, int obits, WDataOutP outwp) VL_MT_SAFE; +extern IData VL_FOPEN_NN(const std::string& filename, const std::string& mode) VL_MT_SAFE; +extern IData VL_FOPEN_MCD_N(const std::string& filename) VL_MT_SAFE; +extern void VL_READMEM_N(bool hex, int bits, QData depth, int array_lsb, + const std::string& filename, void* memp, QData start, + QData end) VL_MT_SAFE; +extern void VL_WRITEMEM_N(bool hex, int bits, QData depth, int array_lsb, + const std::string& filename, const void* memp, QData start, + QData end) VL_MT_SAFE; +extern IData VL_SSCANF_INNX(int lbits, const std::string& ld, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits_ignored, std::string& output, const std::string& format, + int argc, ...) VL_MT_SAFE; +extern std::string VL_SFORMATF_N_NX(const std::string& format, int argc, ...) VL_MT_SAFE; +extern void VL_TIMEFORMAT_IINI(bool hasUnits, int units, bool hasPrecision, int precision, + bool hasSuffix, const std::string& suffix, bool hasWidth, int width, + VerilatedContext* contextp) VL_MT_SAFE; +extern IData VL_VALUEPLUSARGS_INW(int rbits, const std::string& ld, WDataOutP rwp) VL_MT_SAFE; +inline IData VL_VALUEPLUSARGS_IND(int rbits, const std::string& ld, double& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_CVT_D_Q(VL_SET_QW(rwp)); + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, CData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, SData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, IData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INQ(int rbits, const std::string& ld, QData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_SET_QW(rwp); + return got; +} +inline IData VL_VALUEPLUSARGS_INQ(int rbits, const std::string& ld, double& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_CVT_D_Q(VL_SET_QW(rwp)); + return got; +} +extern IData VL_VALUEPLUSARGS_INN(int, const std::string& ld, std::string& rdr) VL_MT_SAFE; + +uint64_t VL_MURMUR64_HASH(const char* key) VL_PURE; + +//====================================================================== + +#endif // Guard diff --git a/include/verilated_funcs_cleaned_manual.h b/include/verilated_funcs_cleaned_manual.h new file mode 100644 index 000000000..959e316a4 --- /dev/null +++ b/include/verilated_funcs_cleaned_manual.h @@ -0,0 +1,3641 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// +// Code available from: https://verilator.org +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of either the GNU Lesser General Public License Version 3 +// or the Perl Artistic License Version 2.0. +// SPDX-FileCopyrightText: 2003-2026 Wilson Snyder +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* +/// +/// \file +/// \brief Verilated common functions +/// +/// verilated.h should be included instead of this file. +/// +/// Those macro/function/variable starting or ending in _ are internal, +/// however many of the other function/macros here are also internal. +/// +//************************************************************************* + +#ifndef VERILATOR_VERILATED_FUNCS_H_ +#define VERILATOR_VERILATED_FUNCS_H_ + +#ifndef VERILATOR_VERILATED_H_INTERNAL_ +#error "verilated_funcs.h should only be included by verilated.h" +#endif + +#include + +//========================================================================= +// Extern functions -- User may override -- See verilated.cpp + +/// Routine to call for $finish +/// User code may wish to replace this function, to do so, define VL_USER_FINISH. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_FINISH_MT instead, which eventually calls this. +extern void vl_finish(const char* filename, int linenum, const char* hier) VL_MT_UNSAFE; + +/// Routine to call for $stop and non-fatal error +/// User code may wish to replace this function, to do so, define VL_USER_STOP. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_STOP_MT instead, which eventually calls this. +extern void vl_stop(const char* filename, int linenum, const char* hier) VL_MT_UNSAFE; + +/// Routine to call for fatal messages +/// User code may wish to replace this function, to do so, define VL_USER_FATAL. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_FATAL_MT instead, which eventually calls this. +extern void vl_fatal(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_UNSAFE; + +/// Routine to call for warning messages +/// User code may wish to replace this function, to do so, define VL_USER_WARN. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_WARN_MT instead, which eventually calls this. +extern void vl_warn(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_UNSAFE; + +//========================================================================= +// Extern functions -- Slow path + +/// Multithread safe wrapper for calls to $finish +extern void VL_FINISH_MT(const char* filename, int linenum, const char* hier) VL_MT_SAFE; +/// Multithread safe wrapper for calls to $stop +extern void VL_STOP_MT(const char* filename, int linenum, const char* hier, + bool maybe = true) VL_MT_SAFE; +/// Multithread safe wrapper to call for fatal messages +extern void VL_FATAL_MT(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_SAFE; +/// Multithread safe wrapper to call for warning messages +extern void VL_WARN_MT(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_SAFE; + +// clang-format off +/// Print a string, multithread safe. Eventually VL_PRINTF will get called. +extern void VL_PRINTF_MT(const char* formatp, ...) VL_ATTR_PRINTF(1) VL_MT_SAFE; +// clang-format on + +/// Print a debug message from internals with standard prefix, with printf style format +extern void VL_DBG_MSGF(const char* formatp, ...) VL_ATTR_PRINTF(1) VL_MT_SAFE; + +/// Print a debug message from string via VL_DBG_MSGF +inline void VL_DBG_MSGS(const std::string& str) VL_MT_SAFE { VL_DBG_MSGF("%s", str.c_str()); } + +// EMIT_RULE: VL_RANDOM: oclean=dirty +inline IData VL_RANDOM_I() VL_MT_SAFE { return vl_rand64(); } +inline QData VL_RANDOM_Q() VL_MT_SAFE { return vl_rand64(); } +extern WDataOutP VL_RANDOM_W(int obits, WDataOutP outwp) VL_MT_SAFE; +extern IData VL_RANDOM_SEEDED_II(IData& seedr) VL_MT_SAFE; +extern IData VL_URANDOM_SEEDED_II(IData seed) VL_MT_SAFE; +inline IData VL_URANDOM_RANGE_I(IData hi, IData lo) { + const uint64_t rnd = vl_rand64(); + if (VL_LIKELY(hi > lo)) { + // (hi - lo + 1) can be zero when hi is UINT_MAX and lo is zero + if (VL_UNLIKELY(hi - lo + 1 == 0)) return rnd; + // Modulus isn't very fast but it's common that hi-low is power-of-two + return (rnd % (hi - lo + 1)) + lo; + } else { + if (VL_UNLIKELY(lo - hi + 1 == 0)) return rnd; + return (rnd % (lo - hi + 1)) + hi; + } +} + +/// Random reset a signal of given width (init time only, var-specific PRNG) +extern IData VL_SCOPED_RAND_RESET_I(int obits, uint64_t scopeHash, uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (init time only, var-specific PRNG) +extern QData VL_SCOPED_RAND_RESET_Q(int obits, uint64_t scopeHash, uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (init time only, var-specific PRNG) +extern WDataOutP VL_SCOPED_RAND_RESET_W(int obits, WDataOutP outwp, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; + +/// Random reset a signal of given width (assign time only) +extern IData VL_SCOPED_RAND_RESET_ASSIGN_I(int obits, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (assign time only) +extern QData VL_SCOPED_RAND_RESET_ASSIGN_Q(int obits, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; +/// Random reset a signal of given width (assign time only) +extern WDataOutP VL_SCOPED_RAND_RESET_ASSIGN_W(int obits, WDataOutP outwp, uint64_t scopeHash, + uint64_t salt) VL_MT_UNSAFE; + +/// Random reset a signal of given width (init time only) +extern IData VL_RAND_RESET_I(int obits) VL_MT_SAFE; +/// Random reset a signal of given width (init time only) +extern QData VL_RAND_RESET_Q(int obits) VL_MT_SAFE; +/// Random reset a signal of given width (init time only) +extern WDataOutP VL_RAND_RESET_W(int obits, WDataOutP outwp) VL_MT_SAFE; + +/// Zero reset a signal (slow - else use VL_ZERO_W) +extern WDataOutP VL_ZERO_RESET_W(int obits, WDataOutP outwp) VL_MT_SAFE; + +/// Four-state reset - initialize to X (unknown) +static inline CData4 VL_X_RESET_4STATE_C() VL_MT_SAFE; +static inline SData4 VL_X_RESET_4STATE_S() VL_MT_SAFE; +static inline IData4 VL_X_RESET_4STATE_I() VL_MT_SAFE; +static inline QData4 VL_X_RESET_4STATE_Q() VL_MT_SAFE; +extern WDataOutP VL_X_RESET_4STATE_W(int obits, WDataOutP owp) VL_MT_SAFE; + +extern void VL_PRINTTIMESCALE(const char* namep, const char* timeunitp, + const VerilatedContext* contextp) VL_MT_SAFE; + +extern WDataOutP _vl_moddiv_w(int lbits, WDataOutP owp, WDataInP const lwp, WDataInP const rwp, + bool is_modulus) VL_MT_SAFE; + +extern void _vl_vsss_based(WDataOutP owp, int obits, int baseLog2, const char* strp, + size_t posstart, size_t posend) VL_MT_SAFE; + +extern IData VL_FGETS_IXI(int obits, void* destp, IData fpi) VL_MT_SAFE; + +extern void VL_FFLUSH_I(IData fdi) VL_MT_SAFE; +extern IData VL_FSEEK_I(IData fdi, IData offset, IData origin) VL_MT_SAFE; +extern IData VL_FTELL_I(IData fdi) VL_MT_SAFE; +extern void VL_FCLOSE_I(IData fdi) VL_MT_SAFE; + +extern IData VL_FREAD_I(int width, int array_lsb, int array_size, void* memp, IData fpi, + IData start, IData count) VL_MT_SAFE; + +extern void VL_WRITEF_NX(const std::string& format, int argc, ...) VL_MT_SAFE; +extern void VL_FWRITEF_NX(IData fpi, const std::string& format, int argc, ...) VL_MT_SAFE; + +// Four-state display functions - output X/Z for four-state values +extern void VL_WRITEF_4STATE_BIN_C(const std::string& format, int lbits, CData4 data) VL_MT_SAFE; +extern void VL_WRITEF_4STATE_BIN_S(const std::string& format, int lbits, SData4 data) VL_MT_SAFE; +extern void VL_WRITEF_4STATE_BIN_I(const std::string& format, int lbits, IData4 data) VL_MT_SAFE; +extern void VL_WRITEF_4STATE_BIN_Q(const std::string& format, int lbits, QData4 data) VL_MT_SAFE; + +extern IData VL_FSCANF_INX(IData fpi, const std::string& format, int argc, ...) VL_MT_SAFE; +extern IData VL_SSCANF_IINX(int lbits, IData ld, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern IData VL_SSCANF_IQNX(int lbits, QData ld, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern IData VL_SSCANF_IWNX(int lbits, WDataInP const lwp, const std::string& format, int argc, + ...) VL_MT_SAFE; + +extern void VL_SFORMAT_NX(int obits, CData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, SData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, IData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, QData& destr, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits, void* destp, const std::string& format, int argc, + ...) VL_MT_SAFE; + +extern void VL_STACKTRACE() VL_MT_SAFE; +extern std::string VL_STACKTRACE_N() VL_MT_SAFE; +extern IData VL_SYSTEM_IW(int lhswords, WDataInP const lhsp) VL_MT_SAFE; +extern IData VL_SYSTEM_IQ(QData lhs) VL_MT_SAFE; +inline IData VL_SYSTEM_II(IData lhs) VL_MT_SAFE { return VL_SYSTEM_IQ(lhs); } +extern IData VL_SYSTEM_IN(const std::string& lhs) VL_MT_SAFE; + +extern IData VL_TESTPLUSARGS_I(const std::string& format) VL_MT_SAFE; +extern const char* vl_mc_scan_plusargs(const char* prefixp) VL_MT_SAFE; // PLIish + +//========================================================================= +// Base macros + +// Return true if data[bit] set; not 0/1 return, but 0/non-zero return. +// Arguments must not have side effects +#define VL_BITISSETLIMIT_W(data, width, bit) (((bit) < (width)) && VL_BITISSET_W(data, bit)) + +// Shift appropriate word by bit. Does not account for wrapping between two words +// Argument 'bit' must not have side effects +#define VL_BITRSHIFT_W(data, bit) ((data)[VL_BITWORD_E(bit)] >> VL_BITBIT_E(bit)) + +// Create two 32-bit words from quadword +// WData is always at least 2 words; does not clean upper bits +#define VL_SET_WQ(owp, data) \ + do { \ + (owp)[0] = static_cast(data); \ + (owp)[1] = static_cast((data) >> VL_EDATASIZE); \ + } while (false) +#define VL_SET_WI(owp, data) \ + do { \ + (owp)[0] = static_cast(data); \ + (owp)[1] = 0; \ + } while (false) +#define VL_SET_QW(lwp) \ + ((static_cast((lwp)[0])) \ + | (static_cast((lwp)[1]) << (static_cast(VL_EDATASIZE)))) +#define VL_SET_QII(ld, rd) ((static_cast(ld) << 32ULL) | static_cast(rd)) + +// Return FILE* from IData +extern FILE* VL_CVT_I_FP(IData lhs) VL_MT_SAFE; + +// clang-format off +// Use a union to avoid cast-to-different-size warnings +// Return void* from QData +static inline void* VL_CVT_Q_VP(QData lhs) VL_PURE { + union { void* fp; QData q; } u; + u.q = lhs; + return u.fp; +} +// Return QData from const void* +static inline QData VL_CVT_VP_Q(const void* fp) VL_PURE { + union { const void* fp; QData q; } u; + u.q = 0; + u.fp = fp; + return u.q; +} +// Return double from QData (bits, not numerically) +static inline double VL_CVT_D_Q(QData lhs) VL_PURE { + union { double d; QData q; } u; + u.q = lhs; + return u.d; +} +// Return QData from double (bits, not numerically) +static inline QData VL_CVT_Q_D(double lhs) VL_PURE { + union { double d; QData q; } u; + u.d = lhs; + return u.q; +} +// clang-format on +// Return string from DPI char* +static inline std::string VL_CVT_N_CSTR(const char* lhsp) VL_PURE { + return lhsp ? std::string{lhsp} : ""s; +} + +// Return queue from an unpacked array +template +static inline VlQueue VL_CVT_UNPACK_TO_Q(const VlUnpacked& q) VL_PURE { + VlQueue ret; + for (size_t i = 0; i < N_Depth; ++i) ret.push_back(q[i]); + return ret; +} + +// Return double from lhs (numeric) unsigned +double VL_ITOR_D_W(int lbits, WDataInP const lwp) VL_PURE; +static inline double VL_ITOR_D_I(int, IData lhs) VL_PURE { + return static_cast(static_cast(lhs)); +} +static inline double VL_ITOR_D_Q(int, QData lhs) VL_PURE { + return static_cast(static_cast(lhs)); +} +// Return double from lhs (numeric) signed +double VL_ISTOR_D_W(int lbits, WDataInP const lwp) VL_MT_SAFE; +static inline double VL_ISTOR_D_I(int lbits, IData lhs) VL_MT_SAFE { + if (lbits == 32) return static_cast(static_cast(lhs)); + VlWide lwp; + VL_SET_WI(lwp, lhs); + return VL_ISTOR_D_W(lbits, lwp); +} +static inline double VL_ISTOR_D_Q(int lbits, QData lhs) VL_MT_SAFE { + if (lbits == 64) return static_cast(static_cast(lhs)); + VlWide lwp; + VL_SET_WQ(lwp, lhs); + return VL_ISTOR_D_W(lbits, lwp); +} +// Return IData truncated from double (numeric) +static inline IData VL_RTOI_I_D(double lhs) VL_PURE { return static_cast(VL_TRUNC(lhs)); } + +// Sign extend such that if MSB set, we get ffff_ffff, else 0s +// (Requires clean input) +#define VL_SIGN_I(nbits, lhs) ((lhs) >> VL_BITBIT_I((nbits) - VL_UL(1))) +#define VL_SIGN_Q(nbits, lhs) ((lhs) >> VL_BITBIT_Q((nbits) - 1ULL)) +#define VL_SIGN_E(nbits, lhs) ((lhs) >> VL_BITBIT_E((nbits) - VL_EUL(1))) +#define VL_SIGN_W(nbits, rwp) \ + ((rwp)[VL_BITWORD_E((nbits) - VL_EUL(1))] >> VL_BITBIT_E((nbits) - VL_EUL(1))) +#define VL_SIGNONES_E(nbits, lhs) (-(VL_SIGN_E(nbits, lhs))) + +// Sign bit extended up to MSB, doesn't include unsigned portion +// Optimization bug in GCC 3.3 returns different bitmasks to later states for +static inline IData VL_EXTENDSIGN_I(int lbits, IData lhs) VL_PURE { + return (-((lhs) & (VL_UL(1) << (lbits - 1)))); +} +static inline QData VL_EXTENDSIGN_Q(int lbits, QData lhs) VL_PURE { + return (-((lhs) & (1ULL << (lbits - 1)))); +} + +// Debugging prints +extern void _vl_debug_print_w(int lbits, WDataInP const iwp) VL_MT_SAFE; + +//========================================================================= +// Time handling + +// clang-format off + +#if defined(SYSTEMC_VERSION) +/// Return current simulation time +// Already defined: extern sc_time sc_time_stamp(); +inline uint64_t vl_time_stamp64() VL_MT_SAFE { return sc_core::sc_time_stamp().value(); } +#else // Non-SystemC +# if !defined(VL_TIME_CONTEXT) && !defined(VL_NO_LEGACY) +# ifdef VL_TIME_STAMP64 +// vl_time_stamp64() may be optionally defined by the user to return time. +// On MSVC++ weak symbols are not supported so must be declared, or define +// VL_TIME_CONTEXT. +extern uint64_t vl_time_stamp64() VL_ATTR_WEAK VL_MT_SAFE; +# else +// sc_time_stamp() may be optionally defined by the user to return time. +// On MSVC++ weak symbols are not supported so must be declared, or define +// VL_TIME_CONTEXT. +extern double sc_time_stamp() VL_ATTR_WEAK VL_MT_SAFE; // Verilator 4.032 and newer +inline uint64_t vl_time_stamp64() VL_MT_SAFE { + // clang9.0.1 requires & although we really do want the weak symbol value + // cppcheck-suppress duplicateValueTernary + return VL_LIKELY(&sc_time_stamp) ? static_cast(sc_time_stamp()) : 0; +} +# endif +# endif +#endif + +// clang-format on + +uint64_t VerilatedContext::time() const VL_MT_SAFE { + // When using non-default context, fastest path is return time + if (VL_LIKELY(m_s.m_time)) return m_s.m_time; +#if defined(SYSTEMC_VERSION) || (!defined(VL_TIME_CONTEXT) && !defined(VL_NO_LEGACY)) + // Zero time could mean really at zero, or using callback + // clang9.0.1 requires & although we really do want the weak symbol value + if (VL_LIKELY(&vl_time_stamp64)) { // else is weak symbol that is not defined + return vl_time_stamp64(); + } +#endif + return 0; +} + +#define VL_TIME_Q() (Verilated::threadContextp()->time()) +#define VL_TIME_D() (static_cast(VL_TIME_Q())) + +// Time scaled from 1-per-precision into a module's time units ("Unit"-ed, not "United") +// Optimized assuming scale is always constant. +// Can't use multiply in Q flavor, as might lose precision +#define VL_TIME_ROUND(t, p) (((t) + ((p) / 2)) / (p)) +#define VL_TIME_UNITED_Q(scale) VL_TIME_ROUND(VL_TIME_Q(), static_cast(scale)) +#define VL_TIME_UNITED_D(scale) (VL_TIME_D() / static_cast(scale)) + +// Return time precision as multiplier of time units +double vl_time_multiplier(int scale) VL_PURE; +// Return power of 10. e.g. returns 100 if n==2 +uint64_t vl_time_pow10(int n) VL_PURE; +// Return time as string with timescale suffix +std::string vl_timescaled_double(double value, const char* format = "%0.0f%s") VL_PURE; + +//========================================================================= +// Functional macros/routines +// These all take the form +// VL_func_IW(bits, bits, op, op) +// VL_func_WW(bits, bits, out, op, op) +// The I/W indicates if it's a integer or wide for the output and each operand. +// The bits indicate the bit width of the output and each operand. +// If wide output, a temporary storage location is specified. + +//=================================================================== +// SETTING OPERATORS + +VL_ATTR_ALWINLINE +static WDataOutP VL_MEMSET_ZERO_W(WDataOutP owp, int words) VL_MT_SAFE { + return static_cast(std::memset(owp, 0, words * sizeof(EData))); +} +VL_ATTR_ALWINLINE +static WDataOutP VL_MEMSET_ONES_W(WDataOutP owp, int words) VL_MT_SAFE { + return static_cast(std::memset(owp, 0xff, words * sizeof(EData))); +} +VL_ATTR_ALWINLINE +static WDataOutP VL_MEMCPY_W(WDataOutP owp, WDataInP const iwp, int words) VL_MT_SAFE { + return static_cast(std::memcpy(owp, iwp, words * sizeof(EData))); +} + +// Output clean +// EMIT_RULE: VL_CLEAN: oclean=clean; obits=lbits; +#define VL_CLEAN_II(obits, lbits, lhs) ((lhs) & (VL_MASK_I(obits))) +#define VL_CLEAN_QQ(obits, lbits, lhs) ((lhs) & (VL_MASK_Q(obits))) + +// EMIT_RULE: VL_ASSIGNCLEAN: oclean=clean; obits==lbits; +#define VL_ASSIGNCLEAN_W(obits, owp, lwp) VL_CLEAN_WW((obits), (owp), (lwp)) +static inline WDataOutP _vl_clean_inplace_w(int obits, WDataOutP owp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + owp[words - 1] &= VL_MASK_E(obits); + return owp; +} +static inline WDataOutP VL_CLEAN_WW(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + VL_MEMCPY_W(owp, lwp, words - 1); + owp[words - 1] = lwp[words - 1] & VL_MASK_E(obits); + return owp; +} +static inline WDataOutP VL_ZERO_W(int obits, WDataOutP owp) VL_MT_SAFE { + return VL_MEMSET_ZERO_W(owp, VL_WORDS_I(obits)); +} +static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + VL_MEMSET_ONES_W(owp, words - 1); + owp[words - 1] = VL_MASK_E(obits); + return owp; +} + +// EMIT_RULE: VL_ASSIGN: oclean=rclean; obits==lbits; +// For now, we always have a clean rhs. +// Note: If a ASSIGN isn't clean, use VL_ASSIGNCLEAN instead to do the same thing. +static inline WDataOutP VL_ASSIGN_W(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + return VL_MEMCPY_W(owp, lwp, VL_WORDS_I(obits)); +} + +// EMIT_RULE: VL_ASSIGNBIT: rclean=clean; +static inline void VL_ASSIGNBIT_II(int bit, CData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_II(int bit, SData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_II(int bit, IData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_QI(int bit, QData& lhsr, QData rhs) VL_PURE { + lhsr = ((lhsr & ~(1ULL << VL_BITBIT_Q(bit))) | (static_cast(rhs) << VL_BITBIT_Q(bit))); +} +static inline void VL_ASSIGNBIT_WI(int bit, WDataOutP owp, IData rhs) VL_MT_SAFE { + const EData orig = owp[VL_BITWORD_E(bit)]; + owp[VL_BITWORD_E(bit)] = ((orig & ~(VL_EUL(1) << VL_BITBIT_E(bit))) + | (static_cast(rhs) << VL_BITBIT_E(bit))); +} +// Alternative form that is an instruction faster when rhs is constant one. +static inline void VL_ASSIGNBIT_IO(int bit, CData& lhsr) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_IO(int bit, SData& lhsr) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_IO(int bit, IData& lhsr) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_QO(int bit, QData& lhsr) VL_PURE { + lhsr = (lhsr | (1ULL << VL_BITBIT_Q(bit))); +} +static inline void VL_ASSIGNBIT_WO(int bit, WDataOutP owp) VL_MT_SAFE { + const EData orig = owp[VL_BITWORD_E(bit)]; + owp[VL_BITWORD_E(bit)] = (orig | (VL_EUL(1) << VL_BITBIT_E(bit))); +} + +//=================================================================== +// SYSTEMC OPERATORS +// Copying verilog format to systemc integers, doubles, and bit vectors. +// Get a SystemC variable + +#define VL_ASSIGN_DSD(obits, vvar, svar) \ + { (vvar) = (svar).read(); } +#define VL_ASSIGN_ISI(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read()); } +#define VL_ASSIGN_QSQ(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read()); } + +#define VL_ASSIGN_ISW(obits, od, svar) \ + { (od) = ((svar).read().get_word(0)) & VL_MASK_I(obits); } +#define VL_ASSIGN_QSW(obits, od, svar) \ + { \ + (od) = ((static_cast((svar).read().get_word(1))) << VL_IDATASIZE \ + | (svar).read().get_word(0)) \ + & VL_MASK_Q(obits); \ + } +#define VL_ASSIGN_WSW(obits, owp, svar) \ + { \ + const int words = VL_WORDS_I(obits); \ + for (int i = 0; i < words; ++i) (owp)[i] = (svar).read().get_word(i); \ + (owp)[words - 1] &= VL_MASK_E(obits); \ + } + +#define VL_ASSIGN_ISU(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read().to_uint()); } +#define VL_ASSIGN_QSU(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read().to_uint64()); } +#define VL_ASSIGN_ISB(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read().to_uint()); } +#define VL_ASSIGN_QSB(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read().to_uint64()); } +#define VL_ASSIGN_WSB(obits, owp, svar) \ + { \ + const int words = VL_WORDS_I(obits); \ + sc_dt::sc_biguint<(obits)> _butemp = (svar).read(); \ + uint32_t* chunkp = _butemp.get_raw(); \ + int32_t lsb = 0; \ + while (lsb < obits - BITS_PER_DIGIT) { \ + const uint32_t data = *chunkp; \ + ++chunkp; \ + _vl_insert_WI(owp.data(), data, lsb + BITS_PER_DIGIT - 1, lsb); \ + lsb += BITS_PER_DIGIT; \ + } \ + if (lsb < obits) { \ + const uint32_t msb_data = *chunkp; \ + _vl_insert_WI(owp.data(), msb_data, obits - 1, lsb); \ + } \ + (owp)[words - 1] &= VL_MASK_E(obits); \ + } + +// Copying verilog format from systemc integers, doubles, and bit vectors. +// Set a SystemC variable + +#define VL_ASSIGN_SDD(obits, svar, vvar) \ + { (svar).write(vvar); } +#define VL_ASSIGN_SII(obits, svar, vvar) \ + { (svar).write(vvar); } +#define VL_ASSIGN_SQQ(obits, svar, vvar) \ + { (svar).write(vvar); } + +#define VL_ASSIGN_SWI(obits, svar, rd) \ + { \ + sc_dt::sc_bv<(obits)> _bvtemp; \ + _bvtemp.set_word(0, (rd)); \ + (svar).write(_bvtemp); \ + } +#define VL_ASSIGN_SWQ(obits, svar, rd) \ + { \ + sc_dt::sc_bv<(obits)> _bvtemp; \ + _bvtemp.set_word(0, static_cast(rd)); \ + _bvtemp.set_word(1, static_cast((rd) >> VL_IDATASIZE)); \ + (svar).write(_bvtemp); \ + } +#define VL_ASSIGN_SWW(obits, svar, rwp) \ + { \ + sc_dt::sc_bv<(obits)> _bvtemp; \ + for (int i = 0; i < VL_WORDS_I(obits); ++i) _bvtemp.set_word(i, (rwp)[i]); \ + (svar).write(_bvtemp); \ + } + +#define VL_ASSIGN_SUI(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SUQ(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBI(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBQ(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBW(obits, svar, rwp) \ + { \ + sc_dt::sc_biguint<(obits)> _butemp; \ + int32_t lsb = 0; \ + uint32_t* chunkp = _butemp.get_raw(); \ + while (lsb + BITS_PER_DIGIT < (obits)) { \ + static_assert(std::is_same::value, "IData and EData mismatch"); \ + const uint32_t data \ + = VL_SEL_IWII(lsb + BITS_PER_DIGIT + 1, (rwp).data(), lsb, BITS_PER_DIGIT); \ + *chunkp = data & VL_MASK_E(BITS_PER_DIGIT); \ + ++chunkp; \ + lsb += BITS_PER_DIGIT; \ + } \ + if (lsb < (obits)) { \ + const uint32_t msb_data = VL_SEL_IWII((obits) + 1, (rwp).data(), lsb, (obits) - lsb); \ + *chunkp = msb_data & VL_MASK_E((obits) - lsb); \ + } \ + _butemp.set(0, *(rwp).data() & 1); /* force update the sign */ \ + (svar).write(_butemp); \ + } + +//=================================================================== +// Extending sizes + +// CAREFUL, we're width changing, so obits!=lbits + +// Right must be clean because otherwise size increase would pick up bad bits +// EMIT_RULE: VL_EXTEND: oclean=clean; rclean==clean; +#define VL_EXTEND_II(obits, lbits, lhs) ((lhs)) +#define VL_EXTEND_QI(obits, lbits, lhs) (static_cast(lhs)) +#define VL_EXTEND_QQ(obits, lbits, lhs) ((lhs)) + +static inline WDataOutP VL_EXTEND_WI(int obits, int, WDataOutP owp, IData ld) VL_MT_SAFE { + // Note for extracts that obits != lbits + owp[0] = ld; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + return owp; +} +static inline WDataOutP VL_EXTEND_WQ(int obits, int, WDataOutP owp, QData ld) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + return owp; +} +static inline WDataOutP VL_EXTEND_WW(int obits, int lbits, WDataOutP owp, + WDataInP const lwp) VL_MT_SAFE { + const int lwords = VL_WORDS_I(lbits); + VL_PREFETCH_RD(lwp); + VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords); + return VL_MEMCPY_W(owp, lwp, lwords); +} + +// EMIT_RULE: VL_EXTENDS: oclean=*dirty*; obits=lbits; +// Sign extension; output dirty +static inline IData VL_EXTENDS_II(int, int lbits, IData lhs) VL_PURE { + return VL_EXTENDSIGN_I(lbits, lhs) | lhs; +} +static inline QData VL_EXTENDS_QI(int, int lbits, QData lhs /*Q_as_need_extended*/) VL_PURE { + return VL_EXTENDSIGN_Q(lbits, lhs) | lhs; +} +static inline QData VL_EXTENDS_QQ(int, int lbits, QData lhs) VL_PURE { + return VL_EXTENDSIGN_Q(lbits, lhs) | lhs; +} + +static inline WDataOutP VL_EXTENDS_WI(int obits, int lbits, WDataOutP owp, IData ld) VL_MT_SAFE { + owp[0] = ld; + if (VL_SIGN_E(lbits, owp[0])) { + owp[0] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + 1, VL_WORDS_I(obits) - 1); + } else { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + } + return owp; +} +static inline WDataOutP VL_EXTENDS_WQ(int obits, int lbits, WDataOutP owp, QData ld) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + if (VL_SIGN_E(lbits, owp[1])) { + owp[1] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + } else { + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + } + return owp; +} +static inline WDataOutP VL_EXTENDS_WW(int obits, int lbits, WDataOutP owp, + WDataInP const lwp) VL_MT_SAFE { + const int lwords = VL_WORDS_I(lbits); + VL_PREFETCH_RD(lwp); + owp[lwords - 1] = lwp[lwords - 1]; + if (VL_SIGN_E(lbits, lwp[lwords - 1])) { + owp[lwords - 1] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + lwords, VL_WORDS_I(obits) - lwords); + } else { + VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords); + } + return VL_MEMCPY_W(owp, lwp, lwords - 1); +} + +//=================================================================== +// REDUCTION OPERATORS + +// EMIT_RULE: VL_REDAND: oclean=clean; lclean==clean; obits=1; +#define VL_REDAND_II(lbits, lhs) ((lhs) == VL_MASK_I(lbits)) +#define VL_REDAND_IQ(lbits, lhs) ((lhs) == VL_MASK_Q(lbits)) +static inline IData VL_REDAND_IW(int lbits, WDataInP const lwp) VL_PURE { + const int words = VL_WORDS_I(lbits); + EData combine = lwp[0]; + for (int i = 1; i < words - 1; ++i) combine &= lwp[i]; + combine &= ~VL_MASK_E(lbits) | lwp[words - 1]; + // cppcheck-suppress knownConditionTrueFalse + return ((~combine) == 0); +} + +// EMIT_RULE: VL_REDOR: oclean=clean; lclean==clean; obits=1; +#define VL_REDOR_I(lhs) ((lhs) != 0) +#define VL_REDOR_Q(lhs) ((lhs) != 0) +static inline IData VL_REDOR_W(int words, WDataInP const lwp) VL_PURE { + EData equal = 0; + for (int i = 0; i < words; ++i) equal |= lwp[i]; + return (equal != 0); +} + +// EMIT_RULE: VL_REDXOR: oclean=dirty; obits=1; +static inline IData VL_REDXOR_2(IData r) VL_PURE { + // Experiments show VL_REDXOR_2 is faster than __builtin_parityl + r = (r ^ (r >> 1)); + return r; +} +static inline IData VL_REDXOR_4(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + return r; +#endif +} +static inline IData VL_REDXOR_8(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + return r; +#endif +} +static inline IData VL_REDXOR_16(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + return r; +#endif +} +static inline IData VL_REDXOR_32(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + r = (r ^ (r >> 16)); + return r; +#endif +} +static inline IData VL_REDXOR_64(QData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityll(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + r = (r ^ (r >> 16)); + r = (r ^ (r >> 32)); + return static_cast(r); +#endif +} +static inline IData VL_REDXOR_W(int words, WDataInP const lwp) VL_PURE { + EData r = lwp[0]; + for (int i = 1; i < words; ++i) r ^= lwp[i]; + return VL_REDXOR_32(r); +} + +// EMIT_RULE: VL_COUNTONES_II: oclean = false; lhs clean +static inline IData VL_COUNTONES_I(IData lhs) VL_PURE { + // This is faster than __builtin_popcountl + IData r = lhs - ((lhs >> 1) & 033333333333) - ((lhs >> 2) & 011111111111); + r = (r + (r >> 3)) & 030707070707; + r = (r + (r >> 6)); + r = (r + (r >> 12) + (r >> 24)) & 077; + return r; +} +static inline IData VL_COUNTONES_Q(QData lhs) VL_PURE { + return VL_COUNTONES_I(static_cast(lhs)) + VL_COUNTONES_I(static_cast(lhs >> 32)); +} +#define VL_COUNTONES_E VL_COUNTONES_I +static inline IData VL_COUNTONES_W(int words, WDataInP const lwp) VL_PURE { + EData r = 0; + for (int i = 0; i < words; ++i) r += VL_COUNTONES_E(lwp[i]); + return r; +} + +// EMIT_RULE: VL_COUNTBITS_II: oclean = false; lhs clean +static inline IData VL_COUNTBITS_I(int lbits, IData lhs, IData ctrl0, IData ctrl1, + IData ctrl2) VL_PURE { + const int ctrlSum = (ctrl0 & 0x1) + (ctrl1 & 0x1) + (ctrl2 & 0x1); + if (ctrlSum == 3) { + return VL_COUNTONES_I(lhs); + } else if (ctrlSum == 0) { + const IData mask = (lbits == 32) ? -1 : ((1 << lbits) - 1); + return VL_COUNTONES_I(~lhs & mask); + } else { + return (lbits == 32) ? 32 : lbits; + } +} +static inline IData VL_COUNTBITS_Q(int lbits, QData lhs, IData ctrl0, IData ctrl1, + IData ctrl2) VL_PURE { + return VL_COUNTBITS_I(32, static_cast(lhs), ctrl0, ctrl1, ctrl2) + + VL_COUNTBITS_I(lbits - 32, static_cast(lhs >> 32), ctrl0, ctrl1, ctrl2); +} +#define VL_COUNTBITS_E VL_COUNTBITS_I +static inline IData VL_COUNTBITS_W(int lbits, int words, WDataInP const lwp, IData ctrl0, + IData ctrl1, IData ctrl2) VL_MT_SAFE { + EData r = 0; + IData wordLbits = 32; + for (int i = 0; i < words; ++i) { + if (i == words - 1) wordLbits = lbits % 32; + r += VL_COUNTBITS_E(wordLbits, lwp[i], ctrl0, ctrl1, ctrl2); + } + return r; +} + +static inline IData VL_ONEHOT_I(IData lhs) VL_PURE { + return (((lhs & (lhs - 1)) == 0) & (lhs != 0)); +} +static inline IData VL_ONEHOT_Q(QData lhs) VL_PURE { + return (((lhs & (lhs - 1)) == 0) & (lhs != 0)); +} +static inline IData VL_ONEHOT_W(int words, WDataInP const lwp) VL_PURE { + EData one = 0; + for (int i = 0; (i < words); ++i) { + if (lwp[i]) { + if (one) return 0; + one = 1; + if (lwp[i] & (lwp[i] - 1)) return 0; + } + } + return one; +} + +static inline IData VL_ONEHOT0_I(IData lhs) VL_PURE { return ((lhs & (lhs - 1)) == 0); } +static inline IData VL_ONEHOT0_Q(QData lhs) VL_PURE { return ((lhs & (lhs - 1)) == 0); } +static inline IData VL_ONEHOT0_W(int words, WDataInP const lwp) VL_PURE { + bool one = false; + for (int i = 0; (i < words); ++i) { + if (lwp[i]) { + if (one) return 0; + one = true; + if (lwp[i] & (lwp[i] - 1)) return 0; + } + } + return 1; +} + +static inline IData VL_CLOG2_I(IData lhs) VL_PURE { + // There are faster algorithms, or fls GCC4 builtins, but rarely used + // In C++20 there will be std::bit_width(lhs) - 1 + if (VL_UNLIKELY(!lhs)) return 0; + --lhs; + int shifts = 0; + for (; lhs != 0; ++shifts) lhs = lhs >> 1; + return shifts; +} +static inline IData VL_CLOG2_Q(QData lhs) VL_PURE { + if (VL_UNLIKELY(!lhs)) return 0; + --lhs; + int shifts = 0; + for (; lhs != 0; ++shifts) lhs = lhs >> 1ULL; + return shifts; +} +static inline IData VL_CLOG2_W(int words, WDataInP const lwp) VL_PURE { + const EData adjust = (VL_COUNTONES_W(words, lwp) == 1) ? 0 : 1; + for (int i = words - 1; i >= 0; --i) { + if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken + for (int bit = VL_EDATASIZE - 1; bit >= 0; --bit) { + if (VL_UNLIKELY(VL_BITISSET_E(lwp[i], bit))) { + return i * VL_EDATASIZE + bit + adjust; + } + } + // Can't get here - one bit must be set + } + } + return 0; +} + +static inline IData VL_MOSTSETBITP1_W(int words, WDataInP const lwp) VL_PURE { + // MSB set bit plus one; similar to FLS. 0=value is zero + for (int i = words - 1; i >= 0; --i) { + if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken + for (int bit = VL_EDATASIZE - 1; bit >= 0; --bit) { + if (VL_UNLIKELY(VL_BITISSET_E(lwp[i], bit))) return i * VL_EDATASIZE + bit + 1; + } + // Can't get here - one bit must be set + } + } + return 0; +} + +//=================================================================== +// SIMPLE LOGICAL OPERATORS + +// EMIT_RULE: VL_AND: oclean=lclean||rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_AND_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] & rwp[i]); + return owp; +} +// EMIT_RULE: VL_OR: oclean=lclean&&rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_OR_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] | rwp[i]); + return owp; +} +// EMIT_RULE: VL_CHANGEXOR: oclean=1; obits=32; lbits==rbits; +static inline IData VL_CHANGEXOR_W(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE { + IData od = 0; + for (int i = 0; (i < words); ++i) od |= (lwp[i] ^ rwp[i]); + return od; +} +// EMIT_RULE: VL_XOR: oclean=lclean&&rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_XOR_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] ^ rwp[i]); + return owp; +} +// EMIT_RULE: VL_NOT: oclean=dirty; obits=lbits; +static inline WDataOutP VL_NOT_W(int words, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + for (int i = 0; i < words; ++i) owp[i] = ~(lwp[i]); + return owp; +} + +//========================================================================= +// FOUR-STATE LOGICAL OPERATORS (X/Z support) +// For four-state: 00=0, 01=1, 10=X, 11=Z + +// Four-state AND: X & anything = X, Z & anything = X, 0 & anything = 0, 1 & anything = anything +static inline uint8_t VL_AND_4STATE(uint8_t lhs, uint8_t rhs) { + const uint8_t lval = lhs & 3; + const uint8_t rval = rhs & 3; + // X & anything = X + if (lval == 2 || rval == 2) return 2; // X + // Z & anything = X + if (lval == 3 || rval == 3) return 2; // X + // 0 & anything = 0 + if (lval == 0 || rval == 0) return 0; // 0 + // 1 & anything = anything + return rval; +} + +// Four-state OR +static inline uint8_t VL_OR_4STATE(uint8_t lhs, uint8_t rhs) { + const uint8_t lval = lhs & 3; + const uint8_t rval = rhs & 3; + // X | anything = X + if (lval == 2 || rval == 2) return 2; // X + // Z | anything = X + if (lval == 3 || rval == 3) return 2; // X + // 1 | anything = 1 + if (lval == 1 || rval == 1) return 1; // 1 + // 0 | anything = anything + return rval; +} + +// Four-state XOR +static inline uint8_t VL_XOR_4STATE(uint8_t lhs, uint8_t rhs) { + const uint8_t lval = lhs & 3; + const uint8_t rval = rhs & 3; + // X ^ anything = X + if (lval == 2 || rval == 2) return 2; // X + // Z ^ anything = X + if (lval == 3 || rval == 3) return 2; // X + // Otherwise XOR the clean values + return (lval ^ rval); +} + +// Four-state NOT +static inline uint8_t VL_NOT_4STATE(uint8_t lhs) { + const uint8_t lval = lhs & 3; + if (lval == 2) return 2; // X -> X + if (lval == 3) return 2; // Z -> X + return lval ^ 1; // 0 -> 1, 1 -> 0 +} + +// Four-state byte operations +static inline CData4 VL_AND_4STATE_C(CData4 lhs, CData4 rhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline CData4 VL_OR_4STATE_C(CData4 lhs, CData4 rhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline CData4 VL_XOR_4STATE_C(CData4 lhs, CData4 rhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline CData4 VL_NOT_4STATE_C(CData4 lhs) { + CData4 result = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (res << (i * 2)); + } + return result; +} + +// Four-state SData (8-bit) operations +static inline SData4 VL_AND_4STATE_S(SData4 lhs, SData4 rhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline SData4 VL_OR_4STATE_S(SData4 lhs, SData4 rhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline SData4 VL_XOR_4STATE_S(SData4 lhs, SData4 rhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline SData4 VL_NOT_4STATE_S(SData4 lhs) { + SData4 result = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (res << (i * 2)); + } + return result; +} + +// Four-state IData (16-bit) operations +static inline IData4 VL_AND_4STATE_I(IData4 lhs, IData4 rhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline IData4 VL_OR_4STATE_I(IData4 lhs, IData4 rhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline IData4 VL_XOR_4STATE_I(IData4 lhs, IData4 rhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (res << (i * 2)); + } + return result; +} + +static inline IData4 VL_NOT_4STATE_I(IData4 lhs) { + IData4 result = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (res << (i * 2)); + } + return result; +} + +// Four-state QData (32-bit) operations +static inline QData4 VL_AND_4STATE_Q(QData4 lhs, QData4 rhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_AND_4STATE(lb, rb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +static inline QData4 VL_OR_4STATE_Q(QData4 lhs, QData4 rhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_OR_4STATE(lb, rb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +static inline QData4 VL_XOR_4STATE_Q(QData4 lhs, QData4 rhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t rb = (rhs >> (i * 2)) & 3; + uint8_t res = VL_XOR_4STATE(lb, rb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +static inline QData4 VL_NOT_4STATE_Q(QData4 lhs) { + QData4 result = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 3; + uint8_t res = VL_NOT_4STATE(lb); + result |= (static_cast(res) << (i * 2)); + } + return result; +} + +//========================================================================= +// FOUR-STATE COMPARISONS +// For four-state: any X or Z in comparison returns X (unknown) + +// Helper functions for checking X/Z bits +static inline bool _vl4_anyXZ_C(CData4 data) { + return (data & 0xAAAAAAAA) != 0; // Any bit with 0b10 (X) or 0b11 (Z) +} +static inline bool _vl4_anyXZ_S(SData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} +static inline bool _vl4_anyXZ_I(IData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} +static inline bool _vl4_anyXZ_Q(QData4 data) { + return (data & 0xAAAAAAAAAAAAAAAAULL) != 0; +} + +// Four-state EQ: returns true if equal and both operands are deterministic +static inline bool VL_EQ_4STATE_C(CData4 lhs, CData4 rhs) { + if (_vl4_anyXZ_C(lhs) || _vl4_anyXZ_C(rhs)) return false; + return (lhs & 0x55555555) == (rhs & 0x55555555); // Mask to get lower bit only +} + +static inline bool VL_EQ_4STATE_S(SData4 lhs, SData4 rhs) { + if (_vl4_anyXZ_S(lhs) || _vl4_anyXZ_S(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_I(IData4 lhs, IData4 rhs) { + if (_vl4_anyXZ_I(lhs) || _vl4_anyXZ_I(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + +static inline bool VL_EQ_4STATE_Q(QData4 lhs, QData4 rhs) { + if (_vl4_anyXZ_Q(lhs) || _vl4_anyXZ_Q(rhs)) return false; + return (lhs & 0x5555555555555555ULL) == (rhs & 0x5555555555555555ULL); +} + + + + + + + +// Four-state NEQ +static inline bool VL_NEQ_4STATE_C(CData4 lhs, CData4 rhs) { + return !VL_EQ_4STATE_C(lhs, rhs); +} +static inline bool VL_NEQ_4STATE_S(SData4 lhs, SData4 rhs) { + return !VL_EQ_4STATE_S(lhs, rhs); +} +static inline bool VL_NEQ_4STATE_I(IData4 lhs, IData4 rhs) { + return !VL_EQ_4STATE_I(lhs, rhs); +} +static inline bool VL_NEQ_4STATE_Q(QData4 lhs, QData4 rhs) { + return !VL_EQ_4STATE_Q(lhs, rhs); +} + + + + +//========================================================================= +// Logical comparisons + +// EMIT_RULE: VL_EQ: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_NEQ: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_LT: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_GT: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_GTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_LTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +#define VL_NEQ_W(words, lwp, rwp) (!VL_EQ_W(words, lwp, rwp)) +#define VL_LT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) < 0) +#define VL_LTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) <= 0) +#define VL_GT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) > 0) +#define VL_GTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) >= 0) + +// Output clean, AND MUST BE CLEAN +static inline IData VL_EQ_W(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE { + EData nequal = 0; + for (int i = 0; (i < words); ++i) nequal |= (lwp[i] ^ rwp[i]); + return (nequal == 0); +} + +// Internal usage +static inline int _vl_cmp_w(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE { + for (int i = words - 1; i >= 0; --i) { + if (lwp[i] > rwp[i]) return 1; + if (lwp[i] < rwp[i]) return -1; + } + return 0; // == +} + +#define VL_LTS_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) < 0) +#define VL_LTES_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) <= 0) +#define VL_GTS_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) > 0) +#define VL_GTES_IWW(lbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) >= 0) + +static inline IData VL_GTS_III(int lbits, IData lhs, IData rhs) VL_PURE { + // For lbits==32, this becomes just a single instruction, otherwise ~5. + // GCC 3.3.4 sign extension bugs on AMD64 architecture force us to use quad logic + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed > rhs_signed; +} +static inline IData VL_GTS_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed > rhs_signed; +} + +static inline IData VL_GTES_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed >= rhs_signed; +} +static inline IData VL_GTES_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed >= rhs_signed; +} + +static inline IData VL_LTS_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed < rhs_signed; +} +static inline IData VL_LTS_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed < rhs_signed; +} + +static inline IData VL_LTES_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed <= rhs_signed; +} +static inline IData VL_LTES_IQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed <= rhs_signed; +} + +static inline int _vl_cmps_w(int lbits, WDataInP const lwp, WDataInP const rwp) VL_PURE { + const int words = VL_WORDS_I(lbits); + int i = words - 1; + // We need to flip sense if negative comparison + const EData lsign = VL_SIGN_E(lbits, lwp[i]); + const EData rsign = VL_SIGN_E(lbits, rwp[i]); + if (!lsign && rsign) return 1; // + > - + if (lsign && !rsign) return -1; // - < + + for (; i >= 0; --i) { + if (lwp[i] > rwp[i]) return 1; + if (lwp[i] < rwp[i]) return -1; + } + return 0; // == +} + +//========================================================================= +// Expressions + +// Output NOT clean +static inline WDataOutP VL_NEGATE_W(int words, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + EData carry = 1; + for (int i = 0; i < words; ++i) { + owp[i] = ~lwp[i] + carry; + carry = (owp[i] < ~lwp[i]); + } + return owp; +} +static inline void VL_NEGATE_INPLACE_W(int words, WDataOutP owp_lwp) VL_MT_SAFE { + EData carry = 1; + for (int i = 0; i < words; ++i) { + const EData word = ~owp_lwp[i] + carry; + carry = (word < ~owp_lwp[i]); + owp_lwp[i] = word; + } +} + +// EMIT_RULE: VL_MUL: oclean=dirty; lclean==clean; rclean==clean; +// EMIT_RULE: VL_DIV: oclean=dirty; lclean==clean; rclean==clean; +// EMIT_RULE: VL_MODDIV: oclean=dirty; lclean==clean; rclean==clean; +static inline IData VL_DIV_III(int lbits, IData lhs, IData rhs) { + return (rhs == 0) ? 0 : lhs / rhs; +} +static inline QData VL_DIV_QQQ(int lbits, QData lhs, QData rhs) { + return (rhs == 0) ? 0 : lhs / rhs; +} +#define VL_DIV_WWW(lbits, owp, lwp, rwp) (_vl_moddiv_w(lbits, owp, lwp, rwp, 0)) +static inline IData VL_MODDIV_III(int lbits, IData lhs, IData rhs) { + return (rhs == 0) ? 0 : lhs % rhs; +} +static inline QData VL_MODDIV_QQQ(int lbits, QData lhs, QData rhs) { + return (rhs == 0) ? 0 : lhs % rhs; +} +#define VL_MODDIV_WWW(lbits, owp, lwp, rwp) (_vl_moddiv_w(lbits, owp, lwp, rwp, 1)) + +static inline WDataOutP VL_ADD_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = carry + static_cast(lwp[i]) + static_cast(rwp[i]); + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Last output word is dirty + return owp; +} + +static inline WDataOutP VL_SUB_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = (carry + static_cast(lwp[i]) + + static_cast(static_cast(~rwp[i]))); + if (i == 0) ++carry; // Negation of rwp + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Last output word is dirty + return owp; +} + +static inline WDataOutP VL_MUL_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; i < words; ++i) owp[i] = 0; + for (int lword = 0; lword < words; ++lword) { + for (int rword = 0; rword < words; ++rword) { + QData mul = static_cast(lwp[lword]) * static_cast(rwp[rword]); + for (int qword = lword + rword; qword < words; ++qword) { + mul += static_cast(owp[qword]); + owp[qword] = (mul & 0xffffffffULL); + mul = (mul >> 32ULL) & 0xffffffffULL; + } + } + } + // Last output word is dirty + return owp; +} + +static inline IData VL_MULS_III(int lbits, IData lhs, IData rhs) VL_PURE { + const int32_t lhs_signed = VL_EXTENDS_II(32, lbits, lhs); + const int32_t rhs_signed = VL_EXTENDS_II(32, lbits, rhs); + return lhs_signed * rhs_signed; +} +static inline QData VL_MULS_QQQ(int lbits, QData lhs, QData rhs) VL_PURE { + const int64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const int64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed * rhs_signed; +} + +static inline WDataOutP VL_MULS_WWW(int lbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + const int words = VL_WORDS_I(lbits); + VL_DEBUG_IFDEF(assert(words <= VL_MULS_MAX_WORDS);); + // cppcheck-suppress variableScope + WData lwstore[VL_MULS_MAX_WORDS]; // Fixed size, as MSVC++ doesn't allow [words] here + // cppcheck-suppress variableScope + WData rwstore[VL_MULS_MAX_WORDS]; + WDataInP lwusp = lwp; + WDataInP rwusp = rwp; + const EData lneg = VL_SIGN_E(lbits, lwp[words - 1]); + if (lneg) { // Negate lhs + lwusp = lwstore; + VL_NEGATE_W(words, lwstore, lwp); + lwstore[words - 1] &= VL_MASK_E(lbits); // Clean it + } + const EData rneg = VL_SIGN_E(lbits, rwp[words - 1]); + if (rneg) { // Negate rhs + rwusp = rwstore; + VL_NEGATE_W(words, rwstore, rwp); + rwstore[words - 1] &= VL_MASK_E(lbits); // Clean it + } + VL_MUL_W(words, owp, lwusp, rwusp); + owp[words - 1] &= VL_MASK_E( + lbits); // Clean. Note it's ok for the multiply to overflow into the sign bit + if ((lneg ^ rneg) & 1) { // Negate output (not using NEGATE, as owp==lwp) + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = carry + static_cast(static_cast(~owp[i])); + if (i == 0) ++carry; // Negation of temp2 + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Not needed: owp[words-1] |= 1<= 2; // 2=X, 3=Z +} + +// Helper: Check if any bit in a four-state value is X or Z + +// Four-state ADD: if any operand has X/Z, result is X +static inline CData4 VL_ADD_4STATE_C(CData4 lhs, CData4 rhs) { + // Extract clean values and add + CData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= ((sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline SData4 VL_ADD_4STATE_S(SData4 lhs, SData4 rhs) { + SData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + return false; +} + + return false; +} + + + +// Four-state ADD: if any operand has X/Z, result is X + // Extract clean values and add + CData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= ((sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + + SData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline IData4 VL_ADD_4STATE_I(IData4 lhs, IData4 rhs) { + IData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +static inline QData4 VL_ADD_4STATE_Q(QData4 lhs, QData4 rhs) { + QData4 result = 0; + uint8_t carry = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + uint8_t sum = lb + rb + carry; + result |= (static_cast(sum & 1) << (i * 2)); + carry = (sum >> 1) & 1; + } + return result; +} + +// Four-state SUB +static inline CData4 VL_SUB_4STATE_C(CData4 lhs, CData4 rhs) { + return lhs - rhs; +} +static inline SData4 VL_SUB_4STATE_S(SData4 lhs, SData4 rhs) { + return lhs - rhs; +} +static inline IData4 VL_SUB_4STATE_I(IData4 lhs, IData4 rhs) { + return lhs - rhs; +} +static inline QData4 VL_SUB_4STATE_Q(QData4 lhs, QData4 rhs) { + return lhs - rhs; +} + CData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 4; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + + SData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 8; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + + IData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 16; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + + QData4 result = 0; + uint8_t borrow = 0; + for (int i = 0; i < 32; i++) { + uint8_t lb = (lhs >> (i * 2)) & 1; + uint8_t rb = (rhs >> (i * 2)) & 1; + int diff = lb - rb - borrow; + if (diff < 0) { + diff += 2; + borrow = 1; + } else { + borrow = 0; + } + result |= (static_cast(diff & 1) << (i * 2)); + } + return result; +} + +#define VL_POW_IIQ(obits, lbits, rbits, lhs, rhs) VL_POW_QQQ(obits, lbits, rbits, lhs, rhs) +#define VL_POW_IIW(obits, lbits, rbits, lhs, rwp) VL_POW_QQW(obits, lbits, rbits, lhs, rwp) +#define VL_POW_QQI(obits, lbits, rbits, lhs, rhs) VL_POW_QQQ(obits, lbits, rbits, lhs, rhs) +#define VL_POW_WWI(obits, lbits, rbits, owp, lwp, rhs) \ + VL_POW_WWQ(obits, lbits, rbits, owp, lwp, rhs) + +static inline IData VL_POW_III(int, int, int rbits, IData lhs, IData rhs) VL_PURE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (VL_UNLIKELY(lhs == 0)) return 0; + IData power = lhs; + IData out = 1; + for (int i = 0; i < rbits; ++i) { + if (i > 0) power = power * power; + if (rhs & (1ULL << i)) out *= power; + } + return out; +} +static inline QData VL_POW_QQQ(int, int, int rbits, QData lhs, QData rhs) VL_PURE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (VL_UNLIKELY(lhs == 0)) return 0; + QData power = lhs; + QData out = 1ULL; + for (int i = 0; i < rbits; ++i) { + if (i > 0) power = power * power; + if (rhs & (1ULL << i)) out *= power; + } + return out; +} +WDataOutP VL_POW_WWW(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE; +WDataOutP VL_POW_WWQ(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + QData rhs) VL_MT_SAFE; +QData VL_POW_QQW(int obits, int, int rbits, QData lhs, WDataInP const rwp) VL_MT_SAFE; + +#define VL_POWSS_IIQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_IIQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_IIW(obits, lbits, rbits, lhs, rwp, lsign, rsign) \ + VL_POWSS_QQW(obits, lbits, rbits, lhs, rwp, lsign, rsign) +#define VL_POWSS_QQI(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_WWI(obits, lbits, rbits, owp, lwp, rhs, lsign, rsign) \ + VL_POWSS_WWQ(obits, lbits, rbits, owp, lwp, rhs, lsign, rsign) + +static inline IData VL_POWSS_III(int obits, int, int rbits, IData lhs, IData rhs, bool lsign, + bool rsign) VL_MT_SAFE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (rsign && VL_SIGN_I(rbits, rhs)) { + if (lhs == 0) { + return 0; // "X" + } else if (lhs == 1) { + return 1; + } else if (lsign && lhs == VL_MASK_I(obits)) { // -1 + if (rhs & 1) { + return VL_MASK_I(obits); // -1^odd=-1 + } else { + return 1; // -1^even=1 + } + } + return 0; + } + return VL_POW_III(obits, rbits, rbits, lhs, rhs); +} +static inline QData VL_POWSS_QQQ(int obits, int, int rbits, QData lhs, QData rhs, bool lsign, + bool rsign) VL_MT_SAFE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (rsign && VL_SIGN_Q(rbits, rhs)) { + if (lhs == 0) { + return 0; // "X" + } else if (lhs == 1) { + return 1; + } else if (lsign && lhs == VL_MASK_Q(obits)) { // -1 + if (rhs & 1) { + return VL_MASK_Q(obits); // -1^odd=-1 + } else { + return 1; // -1^even=1 + } + } + return 0; + } + return VL_POW_QQQ(obits, rbits, rbits, lhs, rhs); +} +WDataOutP VL_POWSS_WWW(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp, bool lsign, bool rsign) VL_MT_SAFE; +WDataOutP VL_POWSS_WWQ(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, QData rhs, + bool lsign, bool rsign) VL_MT_SAFE; +QData VL_POWSS_QQW(int obits, int, int rbits, QData lhs, WDataInP const rwp, bool lsign, + bool rsign) VL_MT_SAFE; + +//=================================================================== +// Concat/replication + +// INTERNAL: Stuff LHS bit 0++ into OUTPUT at specified offset +// ld may be "dirty", output is clean +static inline void _vl_insert_II(CData& lhsr, IData ld, int hbit, int lbit, int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_II(SData& lhsr, IData ld, int hbit, int lbit, int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_II(IData& lhsr, IData ld, int hbit, int lbit, int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_QQ(QData& lhsr, QData ld, int hbit, int lbit, int rbits) VL_PURE { + const QData cleanmask = VL_MASK_Q(rbits); + const QData insmask = (VL_MASK_Q(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_WI(WDataOutP iowp, IData ld, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + // Insert value ld into iowp at bit slice [hbit:lbit]. iowp is rbits wide. + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int roffset = VL_BITBIT_E(rbits); + const int hword = VL_BITWORD_E(hbit); + const int lword = VL_BITWORD_E(lbit); + const int rword = VL_BITWORD_E(rbits); + const EData cleanmask = hword == rword ? VL_MASK_E(roffset) : VL_MASK_E(0); + + if (hoffset == VL_SIZEBITS_E && loffset == 0) { + // Fast and common case, word based insertion + iowp[lword] = ld & cleanmask; + } else { + const EData lde = static_cast(ld); + if (hword == lword) { // know < EData bits because above checks it + // Assignment is contained within one word of destination + const EData insmask = (VL_MASK_E(hoffset - loffset + 1)) << loffset; + iowp[lword] = (iowp[lword] & ~insmask) | ((lde << loffset) & (insmask & cleanmask)); + } else { + // Assignment crosses a word boundary in destination + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << 0; + const EData linsmask = (VL_MASK_E((VL_EDATASIZE - 1) - loffset + 1)) << loffset; + const int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword + iowp[lword] = (iowp[lword] & ~linsmask) | ((lde << loffset) & linsmask); + // Prevent unsafe write where lword was final writable location and hword is + // out-of-bounds. + if (VL_LIKELY(!(hword == rword && roffset == 0))) { + iowp[hword] + = (iowp[hword] & ~hinsmask) | ((lde >> nbitsonright) & (hinsmask & cleanmask)); + } + } + } +} + +// Copy bits from lwp[hbit:lbit] to low bits of lhsr. rbits is real width of lshr +static inline void _vl_insert_IW(IData& lhsr, WDataInP const lwp, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int hword = VL_BITWORD_E(hbit); + const int lword = VL_BITWORD_E(lbit); + const IData cleanmask = VL_MASK_I(rbits); + if (hword == lword) { + const IData insmask = (VL_MASK_I(hoffset - loffset + 1)); + lhsr = (lhsr & ~insmask) | ((lwp[lword] >> loffset) & (insmask & cleanmask)); + } else { + const int nbitsonright = VL_IDATASIZE - loffset; // bits that filled by lword + const IData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << nbitsonright; + const IData linsmask = VL_MASK_E(VL_EDATASIZE - loffset); + lhsr = (lhsr & ~linsmask) | ((lwp[lword] >> loffset) & (linsmask & cleanmask)); + lhsr = (lhsr & ~hinsmask) | ((lwp[hword] << nbitsonright) & (hinsmask & cleanmask)); + } +} + +// INTERNAL: Stuff large LHS bit 0++ into OUTPUT at specified offset +// lwp may be "dirty" +static inline void _vl_insert_WW(WDataOutP iowp, WDataInP const lwp, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int roffset = VL_BITBIT_E(rbits); + const int lword = VL_BITWORD_E(lbit); + const int hword = VL_BITWORD_E(hbit); + const int rword = VL_BITWORD_E(rbits); + const int words = VL_WORDS_I(hbit - lbit + 1); + // Cleaning mask, only applied to top word of the assignment. Is a no-op + // if we don't assign to the top word of the destination. + const EData cleanmask = hword == rword ? VL_MASK_E(roffset) : VL_MASK_E(0); + + if (hoffset == VL_SIZEBITS_E && loffset == 0) { + // Fast and common case, word based insertion + for (int i = 0; i < (words - 1); ++i) iowp[lword + i] = lwp[i]; + iowp[hword] = lwp[words - 1] & cleanmask; + } else if (loffset == 0) { + // Non-32bit, but nicely aligned, so stuff all but the last word + for (int i = 0; i < (words - 1); ++i) iowp[lword + i] = lwp[i]; + // Know it's not a full word as above fast case handled it + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)); + iowp[hword] = (iowp[hword] & ~hinsmask) | (lwp[words - 1] & (hinsmask & cleanmask)); + } else { + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << 0; + const EData linsmask = (VL_MASK_E((VL_EDATASIZE - 1) - loffset + 1)) << loffset; + const int nbitsonright + = VL_EDATASIZE - loffset; // bits that end up in lword (know loffset!=0) + // Middle words + for (int i = 0; i < words; ++i) { + { // Lower word + const int oword = lword + i; + const EData d = lwp[i] << loffset; + const EData od = (iowp[oword] & ~linsmask) | (d & linsmask); + if (oword == hword) { + iowp[oword] = (iowp[oword] & ~hinsmask) | (od & (hinsmask & cleanmask)); + } else { + iowp[oword] = od; + } + } + { // Upper word + const int oword = lword + i + 1; + if (oword <= hword) { + const EData d = lwp[i] >> nbitsonright; + const EData od = (d & ~linsmask) | (iowp[oword] & linsmask); + if (oword == hword) { + iowp[oword] = (iowp[oword] & ~hinsmask) | (od & (hinsmask & cleanmask)); + } else { + iowp[oword] = od; + } + } + } + } + } +} + +static inline void _vl_insert_WQ(WDataOutP iowp, QData ld, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + VlWide lwp; + VL_SET_WQ(lwp, ld); + _vl_insert_WW(iowp, lwp, hbit, lbit, rbits); +} + +// EMIT_RULE: VL_REPLICATE: oclean=clean>width32, dirty<=width32; lclean=clean; rclean==clean; +// RHS MUST BE CLEAN CONSTANT. +#define VL_REPLICATE_IOI(lbits, ld, rep) (-(ld)) // Iff lbits==1 +#define VL_REPLICATE_QOI(lbits, ld, rep) (-(static_cast(ld))) // Iff lbits==1 + +static inline IData VL_REPLICATE_III(int lbits, IData ld, IData rep) VL_PURE { + IData returndata = ld; + for (unsigned i = 1; i < rep; ++i) { + returndata = returndata << lbits; + returndata |= ld; + } + return returndata; +} +static inline QData VL_REPLICATE_QII(int lbits, IData ld, IData rep) VL_PURE { + QData returndata = ld; + for (unsigned i = 1; i < rep; ++i) { + returndata = returndata << lbits; + returndata |= static_cast(ld); + } + return returndata; +} +static inline WDataOutP VL_REPLICATE_WII(int lbits, WDataOutP owp, IData ld, + IData rep) VL_MT_SAFE { + owp[0] = ld; + // Zeroing all words isn't strictly needed but allows compiler to know + // it does not need to preserve data in word(s) not being written + for (unsigned i = 1; i < VL_WORDS_I(static_cast(lbits) * rep); ++i) owp[i] = 0; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WI(owp, ld, i * lbits + lbits - 1, i * lbits); + } + return owp; +} +static inline WDataOutP VL_REPLICATE_WQI(int lbits, WDataOutP owp, QData ld, + IData rep) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + // Zeroing all words isn't strictly needed but allows compiler to know + // it does not need to preserve data in word(s) not being written + for (unsigned i = 2; i < VL_WORDS_I(static_cast(lbits) * rep); ++i) owp[i] = 0; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WQ(owp, ld, i * lbits + lbits - 1, i * lbits); + } + return owp; +} +static inline WDataOutP VL_REPLICATE_WWI(int lbits, WDataOutP owp, WDataInP const lwp, + IData rep) VL_MT_SAFE { + for (unsigned i = 0; i < VL_WORDS_I(static_cast(lbits)); ++i) owp[i] = lwp[i]; + // Zeroing all words isn't strictly needed but allows compiler to know + // it does not need to preserve data in word(s) not being written + for (unsigned i = VL_WORDS_I(static_cast(lbits)); + i < VL_WORDS_I(static_cast(lbits * rep)); ++i) + owp[i] = 0; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WW(owp, lwp, i * lbits + lbits - 1, i * lbits); + } + return owp; +} + +// Left stream operator. Output will always be clean. LHS and RHS must be clean. +// Special "fast" versions for slice sizes that are a power of 2. These use +// shifts and masks to execute faster than the slower for-loop approach where a +// subset of bits is copied in during each iteration. +static inline IData VL_STREAML_FAST_III(int lbits, IData ld, IData rd_log2) VL_PURE { + // Pre-shift bits in most-significant slice: + // + // If lbits is not a multiple of the slice size (i.e., lbits % rd != 0), + // then we end up with a "gap" in our reversed result. For example, if we + // have a 5-bit Verilog signal (lbits=5) in an 8-bit C data type: + // + // ld = ---43210 + // + // (where numbers are the Verilog signal bit numbers and '-' is an unused bit). + // Executing the switch statement below with a slice size of two (rd=2, + // rd_log2=1) produces: + // + // ret = 1032-400 + // + // Pre-shifting the bits in the most-significant slice allows us to avoid + // this gap in the shuffled data: + // + // ld_adjusted = --4-3210 + // ret = 10324--- + IData ret = ld; + if (rd_log2) { + const uint32_t lbitsFloor = lbits & ~VL_MASK_I(rd_log2); // max multiple of rd <= lbits + const uint32_t lbitsRem = lbits - lbitsFloor; // number of bits in most-sig slice (MSS) + const IData msbMask = lbitsFloor == 32 ? 0UL : VL_MASK_I(lbitsRem) << lbitsFloor; + ret = (ret & ~msbMask) | ((ret & msbMask) << ((VL_UL(1) << rd_log2) - lbitsRem)); + } + switch (rd_log2) { + case 0: ret = ((ret >> 1) & VL_UL(0x55555555)) | ((ret & VL_UL(0x55555555)) << 1); // FALLTHRU + case 1: ret = ((ret >> 2) & VL_UL(0x33333333)) | ((ret & VL_UL(0x33333333)) << 2); // FALLTHRU + case 2: ret = ((ret >> 4) & VL_UL(0x0f0f0f0f)) | ((ret & VL_UL(0x0f0f0f0f)) << 4); // FALLTHRU + case 3: ret = ((ret >> 8) & VL_UL(0x00ff00ff)) | ((ret & VL_UL(0x00ff00ff)) << 8); // FALLTHRU + case 4: ret = ((ret >> 16) | (ret << 16)); // FALLTHRU + default:; + } + return ret >> (VL_IDATASIZE - lbits); +} + +static inline QData VL_STREAML_FAST_QQI(int lbits, QData ld, IData rd_log2) VL_PURE { + // Pre-shift bits in most-significant slice (see comment in VL_STREAML_FAST_III) + QData ret = ld; + if (rd_log2) { + const uint32_t lbitsFloor = lbits & ~VL_MASK_I(rd_log2); + const uint32_t lbitsRem = lbits - lbitsFloor; + const QData msbMask = lbitsFloor == 64 ? 0ULL : VL_MASK_Q(lbitsRem) << lbitsFloor; + ret = (ret & ~msbMask) | ((ret & msbMask) << ((1ULL << rd_log2) - lbitsRem)); + } + switch (rd_log2) { + case 0: + ret = (((ret >> 1) & 0x5555555555555555ULL) + | ((ret & 0x5555555555555555ULL) << 1)); // FALLTHRU + case 1: + ret = (((ret >> 2) & 0x3333333333333333ULL) + | ((ret & 0x3333333333333333ULL) << 2)); // FALLTHRU + case 2: + ret = (((ret >> 4) & 0x0f0f0f0f0f0f0f0fULL) + | ((ret & 0x0f0f0f0f0f0f0f0fULL) << 4)); // FALLTHRU + case 3: + ret = (((ret >> 8) & 0x00ff00ff00ff00ffULL) + | ((ret & 0x00ff00ff00ff00ffULL) << 8)); // FALLTHRU + case 4: + ret = (((ret >> 16) & 0x0000ffff0000ffffULL) + | ((ret & 0x0000ffff0000ffffULL) << 16)); // FALLTHRU + case 5: ret = ((ret >> 32) | (ret << 32)); // FALLTHRU + default:; + } + return ret >> (VL_QUADSIZE - lbits); +} + +// Regular "slow" streaming operators +static inline IData VL_STREAML_III(int lbits, IData ld, IData rd) VL_PURE { + IData ret = 0; + // Slice size should never exceed the lhs width + const IData mask = VL_MASK_I(rd); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + ret |= ((ld >> istart) & mask) << ostart; + } + return ret; +} + +static inline QData VL_STREAML_QQI(int lbits, QData ld, IData rd) VL_PURE { + QData ret = 0; + // Slice size should never exceed the lhs width + const QData mask = VL_MASK_Q(rd); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + ret |= ((ld >> istart) & mask) << ostart; + } + return ret; +} + +static inline WDataOutP VL_STREAML_WWI(int lbits, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + VL_ZERO_W(lbits, owp); + // Slice size should never exceed the lhs width + const int ssize = (rd < static_cast(lbits)) ? rd : (static_cast(lbits)); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + for (int sbit = 0; sbit < ssize && sbit < lbits - istart; ++sbit) { + // Extract a single bit from lwp and shift it to the correct + // location for owp. + const EData bit = (VL_BITRSHIFT_W(lwp, (istart + sbit)) & 1) + << VL_BITBIT_E(ostart + sbit); + owp[VL_BITWORD_E(ostart + sbit)] |= bit; + } + } + return owp; +} + +static inline IData VL_PACK_I_RI(int obits, int lbits, const VlQueue& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline IData VL_PACK_I_RI(int obits, int lbits, const VlQueue& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline IData VL_PACK_I_RI(int obits, int lbits, const VlQueue& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) ret |= q.at(q.size() - 1 - i) << (i * lbits); + return ret; +} + +template +static inline IData VL_PACK_I_UI(int obits, int lbits, const VlUnpacked& q) { + IData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline IData VL_PACK_I_UI(int obits, int lbits, const VlUnpacked& q) { + IData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline IData VL_PACK_I_UI(int obits, int lbits, const VlUnpacked& q) { + IData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) ret |= q[N_Depth - 1 - i] << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RI(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RI(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RI(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) + ret |= static_cast(q.at(q.size() - 1 - i)) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UI(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UI(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UI(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) + ret |= static_cast(q[N_Depth - 1 - i]) << (i * lbits); + return ret; +} + +static inline QData VL_PACK_Q_RQ(int obits, int lbits, const VlQueue& q) { + QData ret = 0; + for (size_t i = 0; i < q.size(); ++i) ret |= q.at(q.size() - 1 - i) << (i * lbits); + return ret; +} + +template +static inline QData VL_PACK_Q_UQ(int obits, int lbits, const VlUnpacked& q) { + QData ret = 0; + for (size_t i = 0; i < N_Depth; ++i) ret |= q[N_Depth - 1 - i] << (i * lbits); + return ret; +} + +static inline WDataOutP VL_PACK_W_RI(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WI(owp, q.at(q.size() - i - 1), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +static inline WDataOutP VL_PACK_W_RI(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WI(owp, q.at(q.size() - i - 1), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +static inline WDataOutP VL_PACK_W_RI(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WI(owp, q.at(q.size() - 1 - i), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UI(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WI(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UI(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WI(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UI(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WI(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +static inline WDataOutP VL_PACK_W_RQ(int obits, int lbits, WDataOutP owp, + const VlQueue& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WQ(owp, q.at(q.size() - 1 - i), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UQ(int obits, int lbits, WDataOutP owp, + const VlUnpacked& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WQ(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1, i * lbits); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_RW(int obits, int lbits, WDataOutP owp, + const VlQueue>& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < q.size(); ++i) + _vl_insert_WW(owp, q.at(q.size() - 1 - i), i * lbits + lbits - 1 + offset, + i * lbits + offset); + return owp; +} + +template +static inline WDataOutP VL_PACK_W_UW(int obits, int lbits, WDataOutP owp, + const VlUnpacked, N_Depth>& q) { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + if (VL_UNLIKELY(obits < q.size() * lbits)) return owp; // Though is illegal for q to be larger + const int offset = obits - q.size() * lbits; + for (size_t i = 0; i < N_Depth; ++i) + _vl_insert_WW(owp, q[N_Depth - 1 - i], i * lbits + lbits - 1 + offset, i * lbits + offset); + return owp; +} + +// Because concats are common and wide, it's valuable to always have a clean output. +// Thus we specify inputs must be clean, so we don't need to clean the output. +// Note the bit shifts are always constants, so the adds in these constify out. +// Casts required, as args may be 8 bit entities, and need to shift to appropriate output size +#define VL_CONCAT_III(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QII(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QIQ(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QQI(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QQQ(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) + +static inline WDataOutP VL_CONCAT_WII(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + IData rd) VL_MT_SAFE { + owp[0] = rd; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWI(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, IData rd) VL_MT_SAFE { + owp[0] = rd; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WIW(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + WDataInP const rwp) VL_MT_SAFE { + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); + _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WIQ(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQI(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + IData rd) VL_MT_SAFE { + owp[0] = rd; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQQ(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQW(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + WDataInP const rwp) VL_MT_SAFE { + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); + _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); + _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); + return owp; +} + +//=================================================================== +// Shifts + +// Static shift, used by internal functions +// The output is the same as the input - it overlaps! +static inline void _vl_shiftl_inplace_w(int obits, WDataOutP iowp, + IData rd /*1 or 4*/) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + const EData linsmask = VL_MASK_E(rd); + for (int i = words - 1; i >= 1; --i) { + iowp[i] + = ((iowp[i] << rd) & ~linsmask) | ((iowp[i - 1] >> (VL_EDATASIZE - rd)) & linsmask); + } + iowp[0] = ((iowp[0] << rd) & ~linsmask); + iowp[VL_WORDS_I(obits) - 1] &= VL_MASK_E(obits); +} + +// EMIT_RULE: VL_SHIFTL: oclean=lclean; rclean==clean; +// Important: Unlike most other funcs, the shift might well be a computed +// expression. Thus consider this when optimizing. (And perhaps have 2 funcs?) +// If RHS (rd/rwp) is larger than the output, zeros (or all ones for >>>) must be returned +// (This corresponds to AstShift*Ovr Ast nodes) +static inline IData VL_SHIFTL_III(int obits, int, int, IData lhs, IData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return lhs << rhs; // Small is common so not clean return +} +static inline IData VL_SHIFTL_IIQ(int obits, int, int, IData lhs, QData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return VL_CLEAN_II(obits, obits, lhs << rhs); +} +static inline QData VL_SHIFTL_QQI(int obits, int, int, QData lhs, IData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return lhs << rhs; // Small is common so not clean return +} +static inline QData VL_SHIFTL_QQQ(int obits, int, int, QData lhs, QData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return VL_CLEAN_QQ(obits, obits, lhs << rhs); +} +static inline WDataOutP VL_SHIFTL_WWI(int obits, int, int, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); + const int bit_shift = VL_BITBIT_E(rd); + if (rd >= static_cast(obits)) { // rd may be huge with MSB set + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else if (bit_shift == 0) { // Aligned word shift (<<0,<<32,<<64 etc) + for (int i = 0; i < word_shift; ++i) owp[i] = 0; + for (int i = word_shift; i < VL_WORDS_I(obits); ++i) owp[i] = lwp[i - word_shift]; + } else { + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WW(owp, lwp, obits - 1, rd); + } + return owp; +} +static inline WDataOutP VL_SHIFTL_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return VL_ZERO_W(obits, owp); + } + } + return VL_SHIFTL_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTL_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTL_WWW(obits, lbits, rbits, owp, lwp, rwp); +} +static inline IData VL_SHIFTL_IIW(int obits, int, int rbits, IData lhs, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return 0; + } + } + return VL_SHIFTL_III(obits, obits, 32, lhs, rwp[0]); +} +static inline QData VL_SHIFTL_QQW(int obits, int, int rbits, QData lhs, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return 0; + } + } + // Above checks rwp[1]==0 so not needed in below shift + return VL_SHIFTL_QQI(obits, obits, 32, lhs, rwp[0]); +} + +// EMIT_RULE: VL_SHIFTR: oclean=lclean; rclean==clean; +// Important: Unlike most other funcs, the shift might well be a computed +// expression. Thus consider this when optimizing. (And perhaps have 2 funcs?) +static inline IData VL_SHIFTR_III(int obits, int, int, IData lhs, IData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return lhs >> rhs; +} +static inline IData VL_SHIFTR_IIQ(int obits, int, int, IData lhs, QData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return lhs >> rhs; +} +static inline QData VL_SHIFTR_QQI(int obits, int, int, QData lhs, IData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return lhs >> rhs; +} +static inline QData VL_SHIFTR_QQQ(int obits, int, int, QData lhs, QData rhs) VL_PURE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return lhs >> rhs; +} +static inline WDataOutP VL_SHIFTR_WWI(int obits, int, int, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); // Maybe 0 + const int bit_shift = VL_BITBIT_E(rd); + if (rd >= static_cast(obits)) { // rd may be huge with MSB set + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else if (bit_shift == 0) { // Aligned word shift (>>0,>>32,>>64 etc) + const int copy_words = (VL_WORDS_I(obits) - word_shift); + for (int i = 0; i < copy_words; ++i) owp[i] = lwp[i + word_shift]; + for (int i = copy_words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else { + const int loffset = rd & VL_SIZEBITS_E; + const int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword (know + // loffset!=0) Middle words + const int words = VL_WORDS_I(obits - rd); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword < VL_WORDS_I(obits)) owp[i] |= lwp[upperword] << nbitsonright; + } + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } + return owp; +} +static inline WDataOutP VL_SHIFTR_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return VL_ZERO_W(obits, owp); + } + } + return VL_SHIFTR_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTR_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTR_WWW(obits, lbits, rbits, owp, lwp, rwp); +} + +static inline IData VL_SHIFTR_IIW(int obits, int, int rbits, IData lhs, + WDataInP const rwp) VL_PURE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) return 0; // Huge shift 1>>32 or more + } + return VL_SHIFTR_III(obits, obits, 32, lhs, rwp[0]); +} +static inline QData VL_SHIFTR_QQW(int obits, int, int rbits, QData lhs, + WDataInP const rwp) VL_PURE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) return 0; // Huge shift 1>>32 or more + } + return VL_SHIFTR_QQI(obits, obits, 32, lhs, rwp[0]); +} + +// EMIT_RULE: VL_SHIFTRS: oclean=false; lclean=clean, rclean==clean; +static inline IData VL_SHIFTRS_III(int obits, int lbits, int, IData lhs, IData rhs) VL_PURE { + // Note the C standard does not specify the >> operator as a arithmetic shift! + // IEEE says signed if output signed, but bit position from lbits; + // must use lbits for sign; lbits might != obits, + // an EXTEND(SHIFTRS(...)) can became a SHIFTRS(...) within same 32/64 bit word length + const IData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return sign & VL_MASK_I(obits); + const IData signext = ~(VL_MASK_I(lbits) >> rhs); // One with bits where we've shifted "past" + return (lhs >> rhs) | (sign & VL_CLEAN_II(obits, obits, signext)); +} +static inline QData VL_SHIFTRS_QQI(int obits, int lbits, int, QData lhs, IData rhs) VL_PURE { + const QData sign = -(lhs >> (lbits - 1)); + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return sign & VL_MASK_Q(obits); + const QData signext = ~(VL_MASK_Q(lbits) >> rhs); + return (lhs >> rhs) | (sign & VL_CLEAN_QQ(obits, obits, signext)); +} +static inline IData VL_SHIFTRS_IQI(int obits, int lbits, int rbits, QData lhs, IData rhs) VL_PURE { + return static_cast(VL_SHIFTRS_QQI(obits, lbits, rbits, lhs, rhs)); +} +static inline WDataOutP VL_SHIFTRS_WWI(int obits, int lbits, int, WDataOutP owp, + WDataInP const lwp, IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); + const int bit_shift = VL_BITBIT_E(rd); + const int lmsw = VL_WORDS_I(obits) - 1; + const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); + if (rd >= static_cast(obits)) { // Shifting past end, sign in all of lbits + for (int i = 0; i <= lmsw; ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } else if (bit_shift == 0) { // Aligned word shift (>>0,>>32,>>64 etc) + const int copy_words = (VL_WORDS_I(obits) - word_shift); + for (int i = 0; i < copy_words; ++i) owp[i] = lwp[i + word_shift]; + if (copy_words >= 0) owp[copy_words - 1] |= ~VL_MASK_E(obits) & sign; + for (int i = copy_words; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } else { + const int loffset = rd & VL_SIZEBITS_E; + const int nbitsonright + = VL_EDATASIZE - loffset; // bits that end up in lword (know loffset!=0) + // Middle words + const int words = VL_WORDS_I(obits - rd); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword < VL_WORDS_I(obits)) owp[i] |= lwp[upperword] << nbitsonright; + } + if (words) owp[words - 1] |= sign & ~VL_MASK_E(obits - loffset); + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } + return owp; +} +static inline WDataOutP VL_SHIFTRS_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= static_cast(obits))) { + const int owords = VL_WORDS_I(obits); + if (VL_SIGN_E(lbits, lwp[owords - 1])) { + VL_MEMSET_ONES_W(owp, owords); + owp[owords - 1] &= VL_MASK_E(lbits); + } else { + VL_MEMSET_ZERO_W(owp, owords); + } + return owp; + } + return VL_SHIFTRS_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTRS_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTRS_WWW(obits, lbits, rbits, owp, lwp, rwp); +} +static inline IData VL_SHIFTRS_IIW(int obits, int lbits, int rbits, IData lhs, + WDataInP const rwp) VL_PURE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= static_cast(obits))) { + const IData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + return VL_CLEAN_II(obits, obits, sign); + } + return VL_SHIFTRS_III(obits, lbits, 32, lhs, rwp[0]); +} +static inline QData VL_SHIFTRS_QQW(int obits, int lbits, int rbits, QData lhs, + WDataInP const rwp) VL_PURE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= static_cast(obits))) { + const QData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + return VL_CLEAN_QQ(obits, obits, sign); + } + return VL_SHIFTRS_QQI(obits, lbits, 32, lhs, rwp[0]); +} +static inline IData VL_SHIFTRS_IIQ(int obits, int lbits, int rbits, IData lhs, QData rhs) VL_PURE { + VlWide rwp; + VL_SET_WQ(rwp, rhs); + return VL_SHIFTRS_IIW(obits, lbits, rbits, lhs, rwp); +} +static inline QData VL_SHIFTRS_QQQ(int obits, int lbits, int rbits, QData lhs, QData rhs) VL_PURE { + VlWide rwp; + VL_SET_WQ(rwp, rhs); + return VL_SHIFTRS_QQW(obits, lbits, rbits, lhs, rwp); +} + +//========================================================================= +// FOUR-STATE SHIFT OPERATORS +// For four-state: shift operations preserve X/Z in the shifted bits + +// Four-state left shift: shift in zeros, preserve X/Z pattern +static inline CData4 VL_SHIFTL_4STATE_C(CData4 lhs, int shift) { + if (shift >= 4) return 0; // All shifted out + if (_vl4_anyXZ_C(lhs)) { + // X/Z gets shifted, lower bits become 0 + CData4 result = 0; + for (int i = 0; i < 4 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (val << ((i + shift) * 2)); + } + } + return result; + } + // Clean value shift + return (lhs & 0x55555555) << shift; +} + +static inline SData4 VL_SHIFTL_4STATE_S(SData4 lhs, int shift) { + if (shift >= 8) return 0; + if (_vl4_anyXZ_S(lhs)) { + SData4 result = 0; + for (int i = 0; i < 8 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i + shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) << shift; +} + +static inline IData4 VL_SHIFTL_4STATE_I(IData4 lhs, int shift) { + if (shift >= 16) return 0; + if (_vl4_anyXZ_I(lhs)) { + IData4 result = 0; + for (int i = 0; i < 16 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i + shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) << shift; +} + +static inline QData4 VL_SHIFTL_4STATE_Q(QData4 lhs, int shift) { + if (shift >= 32) return 0; + if (_vl4_anyXZ_Q(lhs)) { + QData4 result = 0; + for (int i = 0; i < 32 - shift; i++) { + uint8_t val = (lhs >> (i * 2)) & 3; + if (val != 0) { + result |= (static_cast(val) << ((i + shift) * 2)); + } + } + return result; + } + return (lhs & 0x5555555555555555ULL) << shift; +} + +// Four-state right shift +static inline CData4 VL_SHIFTR_4STATE_C(CData4 lhs, int shift) { + if (shift >= 4) return 0; + } + return result; + } + return (lhs & 0x55555555) >> shift; +} + +static inline SData4 VL_SHIFTR_4STATE_S(SData4 lhs, int shift) { + if (shift >= 8) return 0; + } + return result; + } + return (lhs & 0x5555555555555555ULL) >> shift; +} + +static inline IData4 VL_SHIFTR_4STATE_I(IData4 lhs, int shift) { + if (shift >= 16) return 0; + } + return result; + } + return (lhs & 0x5555555555555555ULL) >> shift; +} + +static inline QData4 VL_SHIFTR_4STATE_Q(QData4 lhs, int shift) { + if (shift >= 32) return 0; + } + return result; + } + return (lhs & 0x5555555555555555ULL) >> shift; +} + +//=================================================================== +// Bit selection + +// EMIT_RULE: VL_BITSEL: oclean=dirty; rclean==clean; +#define VL_BITSEL_IIII(lbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_QIII(lbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_QQII(lbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_IQII(lbits, lhs, rhs) (static_cast((lhs) >> (rhs))) + +static inline IData VL_BITSEL_IWII(int lbits, WDataInP const lwp, IData rd) VL_MT_SAFE { + const int word = VL_BITWORD_E(rd); + if (VL_UNLIKELY(rd > static_cast(lbits))) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + // We return all 1's as that's more likely to find bugs (?) than 0's. + } else { + return (lwp[word] >> VL_BITBIT_E(rd)); + } +} + +// EMIT_RULE: VL_RANGE: oclean=lclean; out=dirty +// & MUST BE CLEAN (currently constant) +#define VL_SEL_IIII(lbits, lhs, lsb, width) ((lhs) >> (lsb)) +#define VL_SEL_QQII(lbits, lhs, lsb, width) ((lhs) >> (lsb)) +#define VL_SEL_IQII(lbits, lhs, lsb, width) (static_cast((lhs) >> (lsb))) + +static inline IData VL_SEL_IWII(int lbits, WDataInP const lwp, IData lsb, IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + if (VL_UNLIKELY(msb >= lbits)) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + } else if (VL_BITWORD_E(msb) == VL_BITWORD_E(static_cast(lsb))) { + return VL_BITRSHIFT_W(lwp, lsb); + } else { + // 32 bit extraction may span two words + const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); // bits that come from low word + return ((lwp[VL_BITWORD_E(msb)] << nbitsfromlow) | VL_BITRSHIFT_W(lwp, lsb)); + } +} + +static inline QData VL_SEL_QWII(int lbits, WDataInP const lwp, IData lsb, IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + if (VL_UNLIKELY(msb > lbits)) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + } else if (VL_BITWORD_E(msb) == VL_BITWORD_E(static_cast(lsb))) { + return VL_BITRSHIFT_W(lwp, lsb); + } else if (VL_BITWORD_E(msb) == 1 + VL_BITWORD_E(static_cast(lsb))) { + const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); + const QData hi = (lwp[VL_BITWORD_E(msb)]); + const QData lo = VL_BITRSHIFT_W(lwp, lsb); + return (hi << nbitsfromlow) | lo; + } else { + // 64 bit extraction may span three words + const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); + const QData hi = (lwp[VL_BITWORD_E(msb)]); + const QData mid = (lwp[VL_BITWORD_E(lsb) + 1]); + const QData lo = VL_BITRSHIFT_W(lwp, lsb); + return (hi << (nbitsfromlow + VL_EDATASIZE)) | (mid << nbitsfromlow) | lo; + } +} + +static inline WDataOutP VL_SEL_WWII(int obits, int lbits, WDataOutP owp, WDataInP const lwp, + IData lsb, IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + const int word_shift = VL_BITWORD_E(lsb); + if (VL_UNLIKELY(msb > lbits)) { // Outside bounds, + for (int i = 0; i < VL_WORDS_I(obits) - 1; ++i) owp[i] = ~0; + owp[VL_WORDS_I(obits) - 1] = VL_MASK_E(obits); + } else if (VL_BITBIT_E(lsb) == 0) { + // Just a word extract + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = lwp[i + word_shift]; + } else { + // Not a _vl_insert because the bits come from any bit number and goto bit 0 + const int loffset = lsb & VL_SIZEBITS_E; + const int nbitsfromlow = VL_EDATASIZE - loffset; // bits that end up in lword (know + // loffset!=0) Middle words + const int words = VL_WORDS_I(msb - lsb + 1); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword <= static_cast(VL_BITWORD_E(msb))) { + owp[i] |= lwp[upperword] << nbitsfromlow; + } + } + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } + return owp; +} + +template +static inline VlQueue VL_CLONE_Q(const VlQueue& from, int lbits, int srcElementBits, + int dstElementBits) { + VlQueue ret; + VL_COPY_Q(ret, from, lbits, srcElementBits, dstElementBits); + return ret; +} + +template +static inline VlQueue VL_REVCLONE_Q(const VlQueue& from, int lbits, int srcElementBits, + int dstElementBits) { + VlQueue ret; + VL_REVCOPY_Q(ret, from, lbits, srcElementBits, dstElementBits); + return ret; +} + +// Helper function to get a bit from a queue at a specific bit index +template +static inline bool VL_GET_QUEUE_BIT(const VlQueue& queue, int srcElementBits, size_t bitIndex) { + const size_t elemIdx = bitIndex / srcElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return false; + + const T element = queue.at(elemIdx); + if (srcElementBits == 1) { + return element & 1; + } else { + const size_t bitInElem = bitIndex % srcElementBits; + const size_t actualBitPos = srcElementBits - 1 - bitInElem; + return (element >> actualBitPos) & 1; + } +} + +// Helper function to set a bit in the destination queue +template +static inline void VL_SET_QUEUE_BIT(VlQueue& queue, int dstElementBits, size_t bitIndex, + bool value) { + if (dstElementBits == 1) { + if (VL_UNLIKELY(bitIndex >= queue.size())) return; + queue.atWrite(bitIndex) = value ? 1 : 0; + } else { + const size_t elemIdx = bitIndex / dstElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return; + const size_t bitInElem = bitIndex % dstElementBits; + const size_t actualBitPos = dstElementBits - 1 - bitInElem; + if (value) { + queue.atWrite(elemIdx) |= (static_cast(1) << actualBitPos); + } else { + queue.atWrite(elemIdx) &= ~(static_cast(1) << actualBitPos); + } + } +} + +// Helper function to get a bit from a VlWide queue at a specific bit index +template +static inline bool VL_GET_QUEUE_BIT(const VlQueue>& queue, int srcElementBits, + size_t bitIndex) { + const size_t elemIdx = bitIndex / srcElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return false; + + const VlWide& element = queue.at(elemIdx); + const size_t bitInElem = bitIndex % srcElementBits; + const size_t actualBitPos = srcElementBits - 1 - bitInElem; + + return VL_BITISSET_W(element.data(), actualBitPos); +} + +// Helper function to set a bit in a VlWide queue at a specific bit index +template +static inline void VL_SET_QUEUE_BIT(VlQueue>& queue, int dstElementBits, + size_t bitIndex, bool value) { + const size_t elemIdx = bitIndex / dstElementBits; + if (VL_UNLIKELY(elemIdx >= queue.size())) return; + + const size_t bitInElem = bitIndex % dstElementBits; + const size_t actualBitPos = dstElementBits - 1 - bitInElem; + + VlWide& element = queue.atWrite(elemIdx); + if (value) { + VL_ASSIGNBIT_WO(actualBitPos, element.data()); + } else { + VL_ASSIGNBIT_WI(actualBitPos, element.data(), 0); + } +} + +template +static inline void VL_ZERO_INIT_QUEUE_ELEM(T& elem) { + elem = 0; +} + +template +static inline void VL_ZERO_INIT_QUEUE_ELEM(VlWide& elem) { + for (size_t j = 0; j < N_Words; ++j) { elem.at(j) = 0; } +} + +// This specialization works for both VlQueue (and similar) as well +// as VlQueue>. +template +static inline void VL_COPY_Q(VlQueue& q, const VlQueue& from, int lbits, int srcElementBits, + int dstElementBits) { + if (srcElementBits == dstElementBits) { + // Simple case: same element bit width, direct copy of each element + if (VL_UNLIKELY(&q == &from)) return; // Skip self-assignment when it's truly a no-op + q = from; + } else { + // Different element bit widths: use streaming conversion + VlQueue srcCopy = from; + const size_t srcTotalBits = from.size() * srcElementBits; + const size_t dstSize = (srcTotalBits + dstElementBits - 1) / dstElementBits; + q.renew(dstSize); + for (size_t i = 0; i < dstSize; ++i) { VL_ZERO_INIT_QUEUE_ELEM(q.atWrite(i)); } + for (size_t bitIndex = 0; bitIndex < srcTotalBits; ++bitIndex) { + VL_SET_QUEUE_BIT(q, dstElementBits, bitIndex, + VL_GET_QUEUE_BIT(srcCopy, srcElementBits, bitIndex)); + } + } +} + +// This specialization works for both VlQueue (and similar) as well +// as VlQueue>. +template +static inline void VL_REVCOPY_Q(VlQueue& q, const VlQueue& from, int lbits, + int srcElementBits, int dstElementBits) { + const size_t srcTotalBits = from.size() * srcElementBits; + const size_t dstSize = (srcTotalBits + dstElementBits - 1) / dstElementBits; + + // Always make a copy to handle the case where q and from are the same queue + VlQueue srcCopy = from; + + // Initialize all elements to zero using appropriate method + q.renew(dstSize); + for (size_t i = 0; i < dstSize; ++i) VL_ZERO_INIT_QUEUE_ELEM(q.atWrite(i)); + + if (lbits == 1) { + // Simple bit reversal: write directly to destination + for (int i = srcTotalBits - 1; i >= 0; --i) { + VL_SET_QUEUE_BIT(q, dstElementBits, srcTotalBits - 1 - i, + VL_GET_QUEUE_BIT(srcCopy, srcElementBits, i)); + } + } else { + // Generalized block-reversal for lbits > 1: + // 1. Reverse all bits using 1-bit blocks + // 2. Split into lbits-sized blocks and pad incomplete blocks on the left + // 3. Reverse each lbits-sized block using 1-bit blocks + const size_t numCompleteBlocks = srcTotalBits / lbits; + const size_t remainderBits = srcTotalBits % lbits; + const size_t srcBlocks = numCompleteBlocks + (remainderBits > 0 ? 1 : 0); + + size_t dstBitIndex = 0; + + for (size_t block = 0; block < srcBlocks; ++block) { + const size_t blockStart = block * lbits; + const int bitsToProcess = VL_LIKELY(block < numCompleteBlocks) ? lbits : remainderBits; + for (int bit = bitsToProcess - 1; bit >= 0; --bit) { + const size_t reversedBitIndex = blockStart + bit; + const size_t originalBitIndex = srcTotalBits - 1 - reversedBitIndex; + VL_SET_QUEUE_BIT(q, dstElementBits, dstBitIndex++, + VL_GET_QUEUE_BIT(srcCopy, srcElementBits, originalBitIndex)); + } + dstBitIndex += lbits - bitsToProcess; + } + } +} + +//====================================================================== +// Expressions needing insert/select + +static inline void VL_UNPACK_RI_I(int lbits, int rbits, VlQueue& q, IData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_I(int lbits, int rbits, VlQueue& q, IData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_I(int lbits, int rbits, VlQueue& q, IData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RQ_Q(int lbits, int rbits, VlQueue& q, QData from) { + const size_t size = (rbits + lbits - 1) / lbits; + q.renew(size); + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < size; ++i) q.atWrite(size - 1 - i) = (from >> (i * lbits)) & mask; +} + +static inline void VL_UNPACK_RI_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_IWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +static inline void VL_UNPACK_RI_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_IWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +static inline void VL_UNPACK_RI_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_IWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +static inline void VL_UNPACK_RQ_W(int lbits, int rbits, VlQueue& q, WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + q.atWrite(i) = VL_SEL_QWII(rbits, rwp, actualBitPos, actualWidth) & mask; + } +} + +template +static inline void VL_UNPACK_RW_W(int lbits, int rbits, VlQueue>& q, + WDataInP rwp) { + const int size = (rbits + lbits - 1) / lbits; + q.renew(size); + for (size_t i = 0; i < size; ++i) { + // Extract from MSB to LSB: MSB goes to index 0 + const int bitPos = rbits - (i + 1) * lbits; + const int actualBitPos = (bitPos < 0) ? 0 : bitPos; + const int actualWidth = (bitPos < 0) ? (lbits + bitPos) : lbits; + VL_SEL_WWII(actualWidth, rbits, q.atWrite(i), rwp, actualBitPos, actualWidth); + } +} + +template +static inline void VL_UNPACK_UI_I(int lbits, int rbits, VlUnpacked& q, + IData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_I(int lbits, int rbits, VlUnpacked& q, + IData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_I(int lbits, int rbits, VlUnpacked& q, + IData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UQ_Q(int lbits, int rbits, VlUnpacked& q, + QData from) { + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask; +} + +template +static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const IData mask = VL_MASK_I(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UQ_W(int lbits, int rbits, VlUnpacked& q, + WDataInP rwp) { + const QData mask = VL_MASK_Q(lbits); + for (size_t i = 0; i < N_Depth; ++i) + q[i] = VL_SEL_QWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask; +} + +template +static inline void VL_UNPACK_UW_W(int lbits, int rbits, VlUnpacked, N_Depth>& q, + WDataInP rwp) { + for (size_t i = 0; i < N_Depth; ++i) + VL_SEL_WWII(lbits, rbits, q[i], rwp, (N_Depth - 1 - i) * lbits, lbits); +} + +// Return QData from double (numeric) +// EMIT_RULE: VL_RTOIROUND_Q_D: oclean=dirty; lclean==clean/real +static inline QData VL_RTOIROUND_Q_D(double lhs) VL_PURE { + // IEEE format: [63]=sign [62:52]=exp+1023 [51:0]=mantissa + // This does not need to support subnormals as they are sub-integral + lhs = VL_ROUND(lhs); + if (lhs == 0.0) return 0; + const QData q = VL_CVT_Q_D(lhs); + const int lsb = static_cast((q >> 52ULL) & VL_MASK_Q(11)) - 1023 - 52; + const uint64_t mantissa = (q & VL_MASK_Q(52)) | (1ULL << 52); + uint64_t out = 0; + if (lsb < 0) { + out = mantissa >> -lsb; + } else if (lsb < 64) { + out = mantissa << lsb; + } + if (lhs < 0) out = -out; + return out; +} +static inline IData VL_RTOIROUND_I_D(double lhs) VL_PURE { + return static_cast(VL_RTOIROUND_Q_D(lhs)); +} +static inline WDataOutP VL_RTOIROUND_W_D(int obits, WDataOutP owp, double lhs) VL_MT_SAFE { + // IEEE format: [63]=sign [62:52]=exp+1023 [51:0]=mantissa + // This does not need to support subnormals as they are sub-integral + lhs = VL_ROUND(lhs); + VL_ZERO_W(obits, owp); + if (lhs == 0.0) return owp; + const QData q = VL_CVT_Q_D(lhs); + const int lsb = static_cast((q >> 52ULL) & VL_MASK_Q(11)) - 1023 - 52; + const uint64_t mantissa = (q & VL_MASK_Q(52)) | (1ULL << 52); + if (lsb < 0) { + VL_SET_WQ(owp, mantissa >> -lsb); + } else if (lsb < obits) { + _vl_insert_WQ(owp, mantissa, lsb + 52, lsb); + } + if (lhs < 0) VL_NEGATE_INPLACE_W(VL_WORDS_I(obits), owp); + return owp; +} + +//====================================================================== +// Range assignments + +// EMIT_RULE: VL_ASSIGNRANGE: rclean=dirty; +static inline void VL_ASSIGNSEL_II(int rbits, int obits, int lsb, CData& lhsr, IData rhs) VL_PURE { + _vl_insert_II(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_II(int rbits, int obits, int lsb, SData& lhsr, IData rhs) VL_PURE { + _vl_insert_II(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_II(int rbits, int obits, int lsb, IData& lhsr, IData rhs) VL_PURE { + _vl_insert_II(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_QI(int rbits, int obits, int lsb, QData& lhsr, IData rhs) VL_PURE { + _vl_insert_QQ(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_QQ(int rbits, int obits, int lsb, QData& lhsr, QData rhs) VL_PURE { + _vl_insert_QQ(lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +// static inline void VL_ASSIGNSEL_IIIW(int obits, int lsb, IData& lhsr, WDataInP const rwp) +// VL_MT_SAFE { Illegal, as lhs width >= rhs width +static inline void VL_ASSIGNSEL_WI(int rbits, int obits, int lsb, WDataOutP iowp, + IData rhs) VL_MT_SAFE { + _vl_insert_WI(iowp, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_WQ(int rbits, int obits, int lsb, WDataOutP iowp, + QData rhs) VL_MT_SAFE { + _vl_insert_WQ(iowp, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_WW(int rbits, int obits, int lsb, WDataOutP iowp, + WDataInP const rwp) VL_MT_SAFE { + _vl_insert_WW(iowp, rwp, lsb + obits - 1, lsb, rbits); +} + +//==================================================== +// Range assignments + +// These additional functions copy bits range [obis+roffset-1:roffset] from rhs to lower bits +// of lhs(select before assigning). Rhs should always be wider than lhs. +static inline void VL_SELASSIGN_II(int rbits, int obits, CData& lhsr, IData rhs, + int roffset) VL_PURE { + _vl_insert_II(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} +static inline void VL_SELASSIGN_II(int rbits, int obits, SData& lhsr, IData rhs, + int roffset) VL_PURE { + _vl_insert_II(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} +static inline void VL_SELASSIGN_II(int rbits, int obits, IData& lhsr, IData rhs, + int roffset) VL_PURE { + _vl_insert_II(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} +static inline void VL_SELASSIGN_IQ(int rbits, int obits, CData& lhsr, QData rhs, + int roffset) VL_PURE { + // it will be truncated to right CData mask + const CData cleanmask = VL_MASK_I(rbits); + const CData insmask = VL_MASK_I(obits); + lhsr = (lhsr & ~insmask) | (static_cast(rhs >> roffset) & (insmask & cleanmask)); +} +static inline void VL_SELASSIGN_IQ(int rbits, int obits, SData& lhsr, QData rhs, + int roffset) VL_PURE { + // it will be truncated to right CData mask + const SData cleanmask = VL_MASK_I(rbits); + const SData insmask = VL_MASK_I(obits); + lhsr = (lhsr & ~insmask) | (static_cast(rhs >> roffset) & (insmask & cleanmask)); +} +static inline void VL_SELASSIGN_IQ(int rbits, int obits, IData& lhsr, QData rhs, + int roffset) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = VL_MASK_I(obits); + lhsr = (lhsr & ~insmask) | (static_cast(rhs >> roffset) & (insmask & cleanmask)); +} + +static inline void VL_SELASSIGN_QQ(int rbits, int obits, QData& lhsr, QData rhs, + int roffset) VL_PURE { + _vl_insert_QQ(lhsr, rhs >> roffset, obits - 1, 0, rbits); +} + +static inline void VL_SELASSIGN_IW(int rbits, int obits, CData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + IData l = static_cast(lhsr); + _vl_insert_IW(l, rhs, roffset + obits - 1, roffset, rbits); + lhsr = static_cast(l); +} +static inline void VL_SELASSIGN_IW(int rbits, int obits, SData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + IData l = static_cast(lhsr); + _vl_insert_IW(l, rhs, roffset + obits - 1, roffset, rbits); + lhsr = static_cast(l); +} +static inline void VL_SELASSIGN_IW(int rbits, int obits, IData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + _vl_insert_IW(lhsr, rhs, roffset + obits - 1, roffset, rbits); +} +static inline void VL_SELASSIGN_QW(int rbits, int obits, QData& lhsr, WDataInP const rhs, + int roffset) VL_MT_SAFE { + // assert VL_QDATASIZE >= rbits > VL_IDATASIZE; + IData low = static_cast(lhsr); + IData high = static_cast(lhsr >> VL_IDATASIZE); + if (obits <= VL_IDATASIZE) { + _vl_insert_IW(low, rhs, obits + roffset - 1, roffset, VL_IDATASIZE); + } else { + _vl_insert_IW(low, rhs, roffset + VL_IDATASIZE - 1, roffset, VL_IDATASIZE); + _vl_insert_IW(high, rhs, roffset + obits - 1, roffset + VL_IDATASIZE, + rbits - VL_IDATASIZE); + } + lhsr = (static_cast(high) << VL_IDATASIZE) | low; +} + +static inline void VL_SELASSIGN_WW(int rbits, int obits, WDataOutP iowp, WDataInP const rwp, + int roffset) VL_MT_SAFE { + // assert rbits > VL_QDATASIZE + const int wordoff = roffset / VL_EDATASIZE; + const int lsb = roffset & VL_SIZEBITS_E; + const int upperbits = lsb == 0 ? 0 : VL_EDATASIZE - lsb; + // If roffset is not aligned, we copy some bits to align it. + if (lsb != 0) { + const int w = obits < upperbits ? obits : upperbits; + const int insmask = VL_MASK_E(w); + iowp[0] = (iowp[0] & ~insmask) | ((rwp[wordoff] >> lsb) & insmask); + // cppcheck-suppress knownConditionTrueFalse + if (w == obits) return; + obits -= w; + } + _vl_insert_WW(iowp, rwp + wordoff + (lsb != 0), upperbits + obits - 1, upperbits, rbits); +} + +//====================================================================== +// Triops + +static inline WDataOutP VL_COND_WIWW(int obits, WDataOutP owp, int cond, WDataInP const w1p, + WDataInP const w2p) VL_MT_SAFE { + return VL_MEMCPY_W(owp, cond ? w1p : w2p, VL_WORDS_I(obits)); +} + +//====================================================================== +// Constification + +// VL_CONST_W_#X(int obits, WDataOutP owp, IData data0, .... IData data(#-1)) +// Sets wide vector words to specified constant words. +// These macros are used when o might represent more words then are given as constants, +// hence all upper words must be zeroed. +// If changing the number of functions here, also change EMITCINLINES_NUM_CONSTW + +#define VL_C_END_(obits, wordsSet) \ + VL_MEMSET_ZERO_W(o + (wordsSet), VL_WORDS_I(obits) - (wordsSet)); \ + return o + +// clang-format off +static inline WDataOutP VL_CONST_W_1X(int obits, WDataOutP o, EData d0) VL_MT_SAFE { + o[0] = d0; + VL_C_END_(obits, 1); +} +static inline WDataOutP VL_CONST_W_2X(int obits, WDataOutP o, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; + VL_C_END_(obits, 2); +} +static inline WDataOutP VL_CONST_W_3X(int obits, WDataOutP o, EData d2, EData d1, + EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; + VL_C_END_(obits, 3); +} +static inline WDataOutP VL_CONST_W_4X(int obits, WDataOutP o, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + VL_C_END_(obits, 4); +} +static inline WDataOutP VL_CONST_W_5X(int obits, WDataOutP o, + EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; + VL_C_END_(obits, 5); +} +static inline WDataOutP VL_CONST_W_6X(int obits, WDataOutP o, + EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; + VL_C_END_(obits, 6); +} +static inline WDataOutP VL_CONST_W_7X(int obits, WDataOutP o, + EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; o[6] = d6; + VL_C_END_(obits, 7); +} +static inline WDataOutP VL_CONST_W_8X(int obits, WDataOutP o, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; + VL_C_END_(obits, 8); +} +// +static inline WDataOutP VL_CONSTHI_W_1X(int obits, int lsb, WDataOutP o, + EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; + VL_C_END_(obits, VL_WORDS_I(lsb) + 1); +} +static inline WDataOutP VL_CONSTHI_W_2X(int obits, int lsb, WDataOutP o, + EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; + VL_C_END_(obits, VL_WORDS_I(lsb) + 2); +} +static inline WDataOutP VL_CONSTHI_W_3X(int obits, int lsb, WDataOutP o, + EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; + VL_C_END_(obits, VL_WORDS_I(lsb) + 3); +} +static inline WDataOutP VL_CONSTHI_W_4X(int obits, int lsb, WDataOutP o, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + VL_C_END_(obits, VL_WORDS_I(lsb) + 4); +} +static inline WDataOutP VL_CONSTHI_W_5X(int obits, int lsb, WDataOutP o, + EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; + VL_C_END_(obits, VL_WORDS_I(lsb) + 5); +} +static inline WDataOutP VL_CONSTHI_W_6X(int obits, int lsb, WDataOutP o, + EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; ohi[5] = d5; + VL_C_END_(obits, VL_WORDS_I(lsb) + 6); +} +static inline WDataOutP VL_CONSTHI_W_7X(int obits, int lsb, WDataOutP o, + EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; ohi[5] = d5; ohi[6] = d6; + VL_C_END_(obits, VL_WORDS_I(lsb) + 7); +} +static inline WDataOutP VL_CONSTHI_W_8X(int obits, int lsb, WDataOutP o, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP ohi = o + VL_WORDS_I(lsb); + ohi[0] = d0; ohi[1] = d1; ohi[2] = d2; ohi[3] = d3; + ohi[4] = d4; ohi[5] = d5; ohi[6] = d6; ohi[7] = d7; + VL_C_END_(obits, VL_WORDS_I(lsb) + 8); +} + +#undef VL_C_END_ + +// Partial constant, lower words of vector wider than 8*32, starting at bit number lsb +static inline void VL_CONSTLO_W_8X(int lsb, WDataOutP obase, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; +} +// clang-format on + +//====================================================================== +// Strings + +extern std::string VL_PUTC_N(const std::string& lhs, IData rhs, CData ths) VL_PURE; +extern CData VL_GETC_N(const std::string& lhs, IData rhs) VL_PURE; +extern std::string VL_SUBSTR_N(const std::string& lhs, IData rhs, IData ths) VL_PURE; + +inline IData VL_CMP_NN(const std::string& lhs, const std::string& rhs, bool ignoreCase) VL_PURE { + // SystemVerilog does not allow a string variable to contain '\0'. + // So C functions such as strcmp() can correctly compare strings. + if (ignoreCase) { + return VL_STRCASECMP(lhs.c_str(), rhs.c_str()); + } else { + return std::strcmp(lhs.c_str(), rhs.c_str()); + } +} + +extern IData VL_ATOI_N(const std::string& str, int base) VL_PURE; +extern IData VL_NTOI_I(int obits, const std::string& str) VL_PURE; +extern QData VL_NTOI_Q(int obits, const std::string& str) VL_PURE; +extern void VL_NTOI_W(int obits, WDataOutP owp, const std::string& str) VL_PURE; + +extern IData VL_FGETS_NI(std::string& dest, IData fpi) VL_MT_SAFE; + +//====================================================================== +// Dist functions + +extern IData VL_DIST_CHI_SQUARE(IData& seedr, IData udeg_of_free) VL_MT_SAFE; +extern IData VL_DIST_ERLANG(IData& seedr, IData uk, IData umean) VL_MT_SAFE; +extern IData VL_DIST_EXPONENTIAL(IData& seedr, IData umean) VL_MT_SAFE; +extern IData VL_DIST_NORMAL(IData& seedr, IData umean, IData udeviation) VL_MT_SAFE; +extern IData VL_DIST_POISSON(IData& seedr, IData umean) VL_MT_SAFE; +extern IData VL_DIST_T(IData& seedr, IData udeg_of_free) VL_MT_SAFE; +extern IData VL_DIST_UNIFORM(IData& seedr, IData ustart, IData uend) VL_MT_SAFE; + +//====================================================================== +// Conversion functions + +extern std::string VL_CVT_PACK_STR_NW(int lwords, const WDataInP lwp) VL_PURE; +extern std::string VL_CVT_PACK_STR_ND(const VlQueue& q) VL_PURE; +inline std::string VL_CVT_PACK_STR_NQ(QData lhs) VL_PURE { + VlWide lw; + VL_SET_WQ(lw, lhs); + return VL_CVT_PACK_STR_NW(VL_WQ_WORDS_E, lw); +} +inline std::string VL_CVT_PACK_STR_NN(const std::string& lhs) VL_PURE { return lhs; } +inline std::string& VL_CVT_PACK_STR_NN(std::string& lhs) VL_PURE { return lhs; } +inline std::string VL_CVT_PACK_STR_NI(IData lhs) VL_PURE { + VlWide lw; + VL_SET_WI(lw, lhs); + return VL_CVT_PACK_STR_NW(1, lw); +} +inline std::string VL_CONCATN_NNN(const std::string& lhs, const std::string& rhs) VL_PURE { + return lhs + rhs; +} +inline std::string VL_REPLICATEN_NNQ(const std::string& lhs, IData rep) VL_PURE { + std::string result; + result.reserve(lhs.length() * rep); + for (unsigned times = 0; times < rep; ++times) result += lhs; + return result; +} +inline std::string VL_REPLICATEN_NNI(const std::string& lhs, IData rep) VL_PURE { + return VL_REPLICATEN_NNQ(lhs, rep); +} + +inline IData VL_LEN_IN(const std::string& ld) { return static_cast(ld.length()); } +extern std::string VL_TOLOWER_NN(const std::string& ld) VL_PURE; +extern std::string VL_TOUPPER_NN(const std::string& ld) VL_PURE; + +extern IData VL_FERROR_IN(IData fpi, std::string& outputr) VL_MT_SAFE; +extern IData VL_FERROR_IW(IData fpi, int obits, WDataOutP outwp) VL_MT_SAFE; +extern IData VL_FOPEN_NN(const std::string& filename, const std::string& mode) VL_MT_SAFE; +extern IData VL_FOPEN_MCD_N(const std::string& filename) VL_MT_SAFE; +extern void VL_READMEM_N(bool hex, int bits, QData depth, int array_lsb, + const std::string& filename, void* memp, QData start, + QData end) VL_MT_SAFE; +extern void VL_WRITEMEM_N(bool hex, int bits, QData depth, int array_lsb, + const std::string& filename, const void* memp, QData start, + QData end) VL_MT_SAFE; +extern IData VL_SSCANF_INNX(int lbits, const std::string& ld, const std::string& format, int argc, + ...) VL_MT_SAFE; +extern void VL_SFORMAT_NX(int obits_ignored, std::string& output, const std::string& format, + int argc, ...) VL_MT_SAFE; +extern std::string VL_SFORMATF_N_NX(const std::string& format, int argc, ...) VL_MT_SAFE; +extern void VL_TIMEFORMAT_IINI(bool hasUnits, int units, bool hasPrecision, int precision, + bool hasSuffix, const std::string& suffix, bool hasWidth, int width, + VerilatedContext* contextp) VL_MT_SAFE; +extern IData VL_VALUEPLUSARGS_INW(int rbits, const std::string& ld, WDataOutP rwp) VL_MT_SAFE; +inline IData VL_VALUEPLUSARGS_IND(int rbits, const std::string& ld, double& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_CVT_D_Q(VL_SET_QW(rwp)); + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, CData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, SData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, IData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INQ(int rbits, const std::string& ld, QData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_SET_QW(rwp); + return got; +} +inline IData VL_VALUEPLUSARGS_INQ(int rbits, const std::string& ld, double& rdr) VL_MT_SAFE { + VlWide<2> rwp; + const IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_CVT_D_Q(VL_SET_QW(rwp)); + return got; +} +extern IData VL_VALUEPLUSARGS_INN(int, const std::string& ld, std::string& rdr) VL_MT_SAFE; + +uint64_t VL_MURMUR64_HASH(const char* key) VL_PURE; + +//====================================================================== + +#endif // Guard diff --git a/remove_duplicates.py b/remove_duplicates.py new file mode 100644 index 000000000..89f0463c2 --- /dev/null +++ b/remove_duplicates.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +import re + +def remove_duplicates(input_file, output_file): + with open(input_file, 'r') as f: + lines = f.readlines() + + output_lines = [] + seen_functions = set() + + i = 0 + while i < len(lines): + line = lines[i] + + # Check if this is a function definition + func_match = re.match(r'\s*(static|inline)?\s+\w+\s+(\w+)_4STATE_(\w+)\s*\(', line) + if func_match: + func_name = f"{func_match.group(2)}_4STATE_{func_match.group(3)}" + + # Check if we've seen this function before + if func_name in seen_functions: + # Skip this duplicate function + # Find the end of this function + while i < len(lines) and not re.match(r'\s*};?\s*$', lines[i]): + i += 1 + # Skip the closing brace/line + if i < len(lines): + i += 1 + continue + else: + seen_functions.add(func_name) + output_lines.append(line) + i += 1 + else: + # Check for other patterns of duplicates + # _vl4_anyXZ_* functions + anyxz_match = re.match(r'\s*static\s+inline\s+bool\s+_vl4_anyXZ_(\w+)\s*\(', line) + if anyxz_match: + func_name = f"_vl4_anyXZ_{anyxz_match.group(1)}" + if func_name in seen_functions: + while i < len(lines) and not re.match(r'\s*};?\s*$', lines[i]): + i += 1 + if i < len(lines): + i += 1 + continue + else: + seen_functions.add(func_name) + output_lines.append(line) + i += 1 + else: + output_lines.append(line) + i += 1 + + with open(output_file, 'w') as f: + f.writelines(output_lines) + +if __name__ == "__main__": + input_file = 'verilated_funcs.h' + output_file = 'verilated_funcs_cleaned.h' + remove_duplicates(input_file, output_file) + print(f"Duplicates removed. Saved to {output_file}") + print(f"Original: {len(open(input_file).readlines())} lines") + print(f"Cleaned: {len(open(output_file).readlines())} lines") \ No newline at end of file diff --git a/remove_duplicates2.py b/remove_duplicates2.py new file mode 100644 index 000000000..23e3c03a1 --- /dev/null +++ b/remove_duplicates2.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +import re + +def remove_all_duplicates(input_file, output_file): + with open(input_file, 'r') as f: + lines = f.readlines() + + output_lines = [] + seen_functions = set() + + i = 0 + while i < len(lines): + line = lines[i] + + # Check for function definitions + func_match = re.match(r'\s*(static|inline)?\s+\w+\s+(\w+)\s*\(', line) + if func_match: + func_name = func_match.group(2) + + # Check for specific patterns we want to deduplicate + if (func_name.startswith("VL_EQ_4STATE_") or + func_name.startswith("VL_NEQ_4STATE_") or + func_name.startswith("_vl4_anyXZ_") or + func_name.startswith("VL_ADD_4STATE_") or + func_name.startswith("VL_SUB_4STATE_")): + + # Create a signature to identify duplicates + # For example: VL_EQ_4STATE_C, VL_EQ_4STATE_S, etc. are all the same function + base_name = func_name.split('_')[0] + "_4STATE" + if base_name in seen_functions: + # Skip this duplicate function + while i < len(lines) and not re.match(r'\s*};?\s*$', lines[i]): + i += 1 + if i < len(lines): + i += 1 + continue + else: + seen_functions.add(base_name) + output_lines.append(line) + i += 1 + else: + output_lines.append(line) + i += 1 + else: + output_lines.append(line) + i += 1 + + with open(output_file, 'w') as f: + f.writelines(output_lines) + +if __name__ == "__main__": + input_file = 'verilated_funcs.h' + output_file = 'verilated_funcs_cleaned2.h' + remove_all_duplicates(input_file, output_file) + print(f"Duplicates removed. Saved to {output_file}") + print(f"Original: {len(open(input_file).readlines())} lines") + print(f"Cleaned: {len(open(output_file).readlines())} lines") \ No newline at end of file diff --git a/remove_manual.py b/remove_manual.py new file mode 100644 index 000000000..d590590fe --- /dev/null +++ b/remove_manual.py @@ -0,0 +1,104 @@ +import re + +def remove_manual_duplicates(input_file, output_file): + with open(input_file, 'r') as f: + lines = f.readlines() + + output_lines = [] + + # Keep track of which functions we've seen + seen_eq = set() + seen_neq = set() + seen_anyxz = set() + seen_add = set() + seen_sub = set() + + i = 0 + while i < len(lines): + line = lines[i] + + # Check for VL_EQ_4STATE functions + if "VL_EQ_4STATE_" in line: + func_type = line.split("VL_EQ_4STATE_")[1].split()[0].strip() + if func_type not in seen_eq: + seen_eq.add(func_type) + output_lines.append(line) + i += 1 + else: + # Skip this duplicate function + while i < len(lines) and not re.match(r'\s*};?\s*$', lines[i]): + i += 1 + if i < len(lines): + i += 1 + continue + + # Check for VL_NEQ_4STATE functions + elif "VL_NEQ_4STATE_" in line: + func_type = line.split("VL_NEQ_4STATE_")[1].split()[0].strip() + if func_type not in seen_neq: + seen_neq.add(func_type) + output_lines.append(line) + i += 1 + else: + while i < len(lines) and not re.match(r'\s*};?\s*$', lines[i]): + i += 1 + if i < len(lines): + i += 1 + continue + + # Check for _vl4_anyXZ functions + elif "_vl4_anyXZ_" in line: + func_type = line.split("_vl4_anyXZ_")[1].split()[0].strip() + if func_type not in seen_anyxz: + seen_anyxz.add(func_type) + output_lines.append(line) + i += 1 + else: + while i < len(lines) and not re.match(r'\s*};?\s*$', lines[i]): + i += 1 + if i < len(lines): + i += 1 + continue + + # Check for VL_ADD_4STATE functions + elif "VL_ADD_4STATE_" in line: + func_type = line.split("VL_ADD_4STATE_")[1].split()[0].strip() + if func_type not in seen_add: + seen_add.add(func_type) + output_lines.append(line) + i += 1 + else: + while i < len(lines) and not re.match(r'\s*};?\s*$', lines[i]): + i += 1 + if i < len(lines): + i += 1 + continue + + # Check for VL_SUB_4STATE functions + elif "VL_SUB_4STATE_" in line: + func_type = line.split("VL_SUB_4STATE_")[1].split()[0].strip() + if func_type not in seen_sub: + seen_sub.add(func_type) + output_lines.append(line) + i += 1 + else: + while i < len(lines) and not re.match(r'\s*};?\s*$', lines[i]): + i += 1 + if i < len(lines): + i += 1 + continue + + else: + output_lines.append(line) + i += 1 + + with open(output_file, 'w') as f: + f.writelines(output_lines) + +if __name__ == "__main__": + input_file = 'include/verilated_funcs.h' + output_file = 'include/verilated_funcs_cleaned_manual.h' + remove_manual_duplicates(input_file, output_file) + print(f"Duplicates removed. Saved to {output_file}") + print(f"Original: {len(open(input_file).readlines())} lines") + print(f"Cleaned: {len(open(output_file).readlines())} lines") \ No newline at end of file diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 5067b5d69..f14fb5b9e 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1947,8 +1947,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, } }); DECL_OPTION("-x-initial-edge", OnOff, &m_xInitialEdge); - DECL_OPTION("-x-sim", OnOff, &m_xFourState, - "Enable four-state simulation with X/Z support"); + DECL_OPTION("-x-sim", OnOff, &m_xFourState); DECL_OPTION("-y", CbVal, [this, &optdir](const char* valp) { addIncDirUser(parseFileArg(optdir, string{valp})); diff --git a/test_regress/t/t_x_sim_basic.v b/test_regress/t/t_x_sim_basic.v index b1d092988..7aea3b2de 100644 --- a/test_regress/t/t_x_sim_basic.v +++ b/test_regress/t/t_x_sim_basic.v @@ -1,64 +1,51 @@ // DESCRIPTION: Verilator: Test X/Z four-state simulation with --x-sim // -// This test verifies X and Z value propagation when --x-sim is enabled. +// This test verifies four-state signal initialization when --x-sim is enabled. +// Uninitialized signals should be X, not 0. // // SPDX-FileCopyrightText: 2026 // SPDX-License-Identifier: LGPL-3.0-only -module t(input clk); +module t; + +logic [3:0] a; // Uninitialized - should be X with --x-sim +logic [3:0] b = 4'b1010; // Initialized -logic [3:0] a; -logic [3:0] b; logic [3:0] y_and; logic [3:0] y_or; logic [3:0] y_xor; logic [3:0] y_add; logic [3:0] y_sub; -logic y_eq; -logic y_neq; -// Test X propagation through logical operations -always @(posedge clk) begin - a <= 4'b1010; - b <= 4'b01xz; // Contains X and Z -end +initial begin + // a is uninitialized - with --x-sim it should be X + + // Test operations with X + // AND with all 1s: X & 1 = X + y_and = a & b; + + // OR with all 0s: X | 0 = X + y_or = a | 4'b0000; + + // XOR with all 0s: X ^ 0 = X + y_xor = a ^ 4'b0000; + + // Add: X + anything = X + y_add = a + b; + + // Sub: X - anything = X + y_sub = a - b; -// AND: X & anything = X, Z & anything = X -assign y_and = a & b; - -// OR -assign y_or = a | b; - -// XOR -assign y_xor = a ^ b; - -// Addition: X + anything = X -assign y_add = a + b; - -// Subtraction -assign y_sub = a - b; - -// Comparisons with X return false (for !==) -assign y_eq = (a == b); -assign y_neq = (a != b); - -// Check results -always @(posedge clk) begin - // With --x-sim, b has X/Z, so results should propagate X - // We just verify the simulator runs without crashing - if (a == 4'b1010) begin - $write("a = %b (expected 1010)\n", a); - $write("b = %b (expected 01xz)\n", b); - $write("a & b = %b\n", y_and); - $write("a | b = %b\n", y_or); - $write("a ^ b = %b\n", y_xor); - $write("a + b = %b\n", y_add); - $write("a - b = %b\n", y_sub); - $write("a == b = %b (should be 0 or x due to X)\n", y_eq); - $write("a != b = %b (should be 1 or x due to X)\n", y_neq); - $write("*-* All Finished *-*\n"); - $finish; - end + $write("Testing four-state simulation with --x-sim:\n"); + $write("b = %b (initialized to 1010)\n", b); + $write("a (uninitialized) = %b (should be xxxx with --x-sim)\n", a); + $write("a & b = %b (should be xxxx if a is X)\n", y_and); + $write("a | 0000 = %b (should be xxxx if a is X)\n", y_or); + $write("a ^ 0000 = %b (should be xxxx if a is X)\n", y_xor); + $write("a + b = %b (should be xxxx if a is X)\n", y_add); + $write("a - b = %b (should be xxxx if a is X)\n", y_sub); + $write("*-* All Finished *-*\n"); + $finish; end endmodule diff --git a/test_regress/t/t_x_sim_edge_cases.py b/test_regress/t/t_x_sim_edge_cases.py new file mode 100644 index 000000000..08b2780d4 --- /dev/null +++ b/test_regress/t/t_x_sim_edge_cases.py @@ -0,0 +1,82 @@ +import os +import subprocess +import sys + +def run_verilator_test(test_name, verilog_file, options=""): + print(f"\n=== Running {test_name} ===") + + # Run Verilator + verilator_cmd = f"verilator --x-sim -cc {verilog_file} --exe t_{test_name}.cpp -Mdir obj_vlt/{test_name} {options}" + result = subprocess.run(verilator_cmd, shell=True, capture_output=True, text=True) + + if result.returncode != 0: + print("Verilator compilation failed!") + print(result.stderr) + return False + + print("Verilator compilation successful.") + + # Compile the test + compile_cmd = f"make -C obj_vlt/{test_name} -f /home/bnielson/git/verilator/test_regress/Makefile_obj --no-print-directory VM_PREFIX=Vt_{test_name} CPPFLAGS_DRIVER=-D{test_name.upper()} {test_name}" + result = subprocess.run(compile_cmd, shell=True, capture_output=True, text=True) + + if result.returncode != 0: + print("Test compilation failed!") + print(result.stderr) + return False + + print("Test compilation successful.") + + # Run the test + run_cmd = f"obj_vlt/{test_name}/{test_name}" + result = subprocess.run(run_cmd, shell=True, capture_output=True, text=True) + + print(result.stdout) + + if result.returncode != 0: + print("Test execution failed!") + print(result.stderr) + return False + + print(f"{test_name} passed!") + return True + +def main(): + tests = [ + { + "name": "x_sim_edge_cases", + "verilog": "t_x_sim_edge_cases.v", + "description": "Edge cases with nested operations, mixed bit widths, arrays, and complex expressions" + } + ] + + print("Verilator X/Z Four-State Simulation Edge Case Tests") + print("=" * 60) + + passed = 0 + failed = 0 + + for test in tests: + print(f\n"\n" + "=" * 40) + print(f"Test: {test[\"name\"]}") + print(f"Description: {test[\"description\"]}") + print("=" * 40) + + if run_verilator_test(test["name"], test["verilog"]): + passed += 1 + else: + failed += 1 + + print(f\n"\n" + "=" * 60) + print(f"Test Summary: {passed} passed, {failed} failed") + print("=" * 60) + + if failed == 0: + print("✅ All edge case tests passed!") + return 0 + else: + print("❌ Some tests failed.") + return 1 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/test_regress/t/t_x_sim_edge_cases.v b/test_regress/t/t_x_sim_edge_cases.v new file mode 100644 index 000000000..c781c8e41 --- /dev/null +++ b/test_regress/t/t_x_sim_edge_cases.v @@ -0,0 +1,99 @@ +// Test file for X/Z four-state simulation edge cases +// This tests nested operations, mixed bit widths, arrays, and complex expressions + +module t_x_sim_edge_cases; + + // Test signals with various bit widths + wire [3:0] a4 = 4'b1010; + wire [7:0] b8 = 8'b11001100; + wire [15:0] c16 = 16'hABCD; + + // Four-state signals with X and Z values + reg [3:0] a4_4state = 4'b1010; + reg [7:0] b8_4state = 8'b11001100; + reg [15:0] c16_4state = 16'hABCD; + + // Initialize with X and Z values + initial begin + a4_4state[0] = 1'bX; // First bit is X + b8_4state[4] = 1'bZ; // Middle bit is Z + c16_4state[7:4] = 4'bXZ10; // Mixed X/Z in middle + end + + // Four-state signals with X/Z + reg [3:0] x4 = 4'bX1X0; + reg [7:0] z8 = 8'bZZZZ1010; + reg [15:0] xz16 = 16'hXZ10_XZ10_XZ10_XZ10; + + // Results for nested operations + wire [3:0] res1; + wire [7:0] res2; + wire [15:0] res3; + + // Nested operations with X/Z propagation + assign res1 = (a4_4state & x4) | (b8_4state ^ z8); + assign res2 = (c16_4state + xz16) - (a4_4state * z8); + assign res3 = (res1 << 2) | (res2 >> 4); + + // Mixed bit width operations + wire [7:0] mixed1; + wire [15:0] mixed2; + + assign mixed1 = {a4_4state, b8_4state[3:0]}; // 4-bit + 4-bit = 8-bit + assign mixed2 = {b8_4state, c16_4state[7:0]}; // 8-bit + 8-bit = 16-bit + + // Array of four-state signals + reg [3:0] array4state [0:3]; + + initial begin + array4state[0] = 4'b1010; // Deterministic + array4state[1] = 4'bX1X0; // Has X + array4state[2] = 4'bZ0Z1; // Has Z + array4state[3] = 4'bXZ10; // Mixed X/Z + end + + // Operations on array elements + wire [3:0] array_res1; + wire [3:0] array_res2; + + assign array_res1 = array4state[0] & array4state[1]; // Deterministic & X + assign array_res2 = array4state[2] | array4state[3]; // Z & Mixed X/Z + + // Complex expressions with multiple X/Z + wire [7:0] complex1; + wire [15:0] complex2; + + assign complex1 = (a4_4state + x4) * (b8_4state - z8); + assign complex2 = ((c16_4state ^ xz16) + 16'hFFFF) & mixed2; + + // Test $display with four-state signals + initial begin + $display("=== Edge Case Tests ==="); + $display("a4_4state (4-bit with X): %b", a4_4state); + $display("b8_4state (8-bit with Z): %b", b8_4state); + $display("c16_4state (16-bit with X/Z): %b", c16_4state); + $display("x4 (X values): %b", x4); + $display("z8 (Z values): %b", z8); + $display("xz16 (mixed X/Z): %b", xz16); + + $display("\n=== Nested Operations ==="); + $display("res1 = (a4_4state & x4) | (b8_4state ^ z8): %b", res1); + $display("res2 = (c16_4state + xz16) - (a4_4state * z8): %b", res2); + $display("res3 = (res1 << 2) | (res2 >> 4): %b", res3); + + $display("\n=== Mixed Bit Width Operations ==="); + $display("mixed1 = {a4_4state, b8_4state[3:0]}: %b", mixed1); + $display("mixed2 = {b8_4state, c16_4state[7:0]}: %b", mixed2); + + $display("\n=== Array Operations ==="); + $display("array_res1 = array4state[0] & array4state[1]: %b", array_res1); + $display("array_res2 = array4state[2] | array4state[3]: %b", array_res2); + + $display("\n=== Complex Expressions ==="); + $display("complex1 = (a4_4state + x4) * (b8_4state - z8): %b", complex1); + $display("complex2 = ((c16_4state ^ xz16) + 16'hFFFF) & mixed2: %b", complex2); + + #10 $finish; + end + +endmodule \ No newline at end of file