From b90fce55f418d35587383d3bc08ad7029774ffca Mon Sep 17 00:00:00 2001 From: Wilson Snyder Date: Sat, 24 Jul 2021 10:00:33 -0400 Subject: [PATCH] Includes: Refactor verilated.h and deprecate verilated_heavy.h (#2701). --- Changes | 1 + docs/guide/deprecations.rst | 5 + include/verilated.cpp | 1 - include/verilated.h | 2151 +-------------------------- include/verilated_dpi.h | 1 - include/verilated_funcs.h | 2252 +++++++++++++++++++++++++++++ include/verilated_heavy.h | 960 +----------- include/verilated_imp.h | 1 - include/verilated_save.h | 2 +- include/verilated_syms.h | 2 +- include/verilated_types.h | 897 ++++++++++++ src/V3EmitCConstPool.cpp | 2 +- src/V3EmitCHeaders.cpp | 2 +- src/V3EmitCImp.cpp | 2 +- src/V3EmitCModel.cpp | 2 +- src/V3EmitCSyms.cpp | 2 +- test_regress/t/t_verilated_all.pl | 1 + 17 files changed, 3183 insertions(+), 3101 deletions(-) create mode 100644 include/verilated_funcs.h create mode 100644 include/verilated_types.h diff --git a/Changes b/Changes index e2e7cf0da..40413b760 100644 --- a/Changes +++ b/Changes @@ -18,6 +18,7 @@ Verilator 4.211 devel in order to aid incremental compilation via ccache (#3071). [Geza Lore] * Parameter values are now emitted as 'static constexpr' instead of enum. C++ direct references to parameters might require updating (#3077). [Geza Lore] +* Refactored Verilated include files; include verilated.h not verilated_heavy.h. * Fix -G to treat simple integer literals as signed (#3060). [Anikin1610] * Fix emitted string array initializers (#2895). [Iztok Jeras] diff --git a/docs/guide/deprecations.rst b/docs/guide/deprecations.rst index aa9fcb5df..8d3124175 100644 --- a/docs/guide/deprecations.rst +++ b/docs/guide/deprecations.rst @@ -11,6 +11,11 @@ C++11 compiler support require C++14 or newer compilers for both compiling Verilator and compiling Verilated models no sooner than January 2022. +Verilated_heavy.h + The legacy "verilated_heavy.h" include was replaced with just including + "verilated.h". Verilated_heavy.h is planned for removal no sooner than + July 2022. + Configuration File -msg The :vlopt:`lint_off` "-msg" option has been replaced with the "-rule" option. "-msg" is planned for removal no sooner than January 2021. diff --git a/include/verilated.cpp b/include/verilated.cpp index 375647d2d..e01f32a78 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -1347,7 +1347,6 @@ IData VL_FGETS_IXI(int obits, void* destp, IData fpi) VL_MT_SAFE { return got; } -// declared in verilated_heavy.h IData VL_FGETS_NI(std::string& dest, IData fpi) VL_MT_SAFE { return getLine(dest, fpi, std::numeric_limits::max()); } diff --git a/include/verilated.h b/include/verilated.h index f71f13308..1866e34fb 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -29,6 +29,7 @@ #ifndef VERILATOR_VERILATED_H_ #define VERILATOR_VERILATED_H_ +#define VERILATOR_VERILATED_H_INTERNAL_ // clang-format off #include "verilatedos.h" @@ -36,18 +37,22 @@ # include "verilated_sc.h" // Get SYSTEMC_VERSION and time declarations #endif +#include +#include #include #include #include #include #include #include +#include +#include #include +#include #include +#include #include // avoided to reduce compile time -// avoided and instead in verilated_heavy.h to reduce compile time -// avoided and instead in verilated_heavy.h to reduce compile time #ifdef VL_THREADED # include # include @@ -259,31 +264,7 @@ public: const char* name() const { return m_namep; } ///< Return name of module }; -//========================================================================= -// Declare nets - -#define VL_SIG8(name, msb, lsb) CData name ///< Declare signal, 1-8 bits -#define VL_SIG16(name, msb, lsb) SData name ///< Declare signal, 9-16 bits -#define VL_SIG64(name, msb, lsb) QData name ///< Declare signal, 33-64 bits -#define VL_SIG(name, msb, lsb) IData name ///< Declare signal, 17-32 bits -#define VL_SIGW(name, msb, lsb, words) WData name[words] ///< Declare signal, 65+ bits -#define VL_IN8(name, msb, lsb) CData name ///< Declare input signal, 1-8 bits -#define VL_IN16(name, msb, lsb) SData name ///< Declare input signal, 9-16 bits -#define VL_IN64(name, msb, lsb) QData name ///< Declare input signal, 33-64 bits -#define VL_IN(name, msb, lsb) IData name ///< Declare input signal, 17-32 bits -#define VL_INW(name, msb, lsb, words) WData name[words] ///< Declare input signal, 65+ bits -#define VL_INOUT8(name, msb, lsb) CData name ///< Declare bidir signal, 1-8 bits -#define VL_INOUT16(name, msb, lsb) SData name ///< Declare bidir signal, 9-16 bits -#define VL_INOUT64(name, msb, lsb) QData name ///< Declare bidir signal, 33-64 bits -#define VL_INOUT(name, msb, lsb) IData name ///< Declare bidir signal, 17-32 bits -#define VL_INOUTW(name, msb, lsb, words) WData name[words] ///< Declare bidir signal, 65+ bits -#define VL_OUT8(name, msb, lsb) CData name ///< Declare output signal, 1-8 bits -#define VL_OUT16(name, msb, lsb) SData name ///< Declare output signal, 9-16 bits -#define VL_OUT64(name, msb, lsb) QData name ///< Declare output signal, 33-64bits -#define VL_OUT(name, msb, lsb) IData name ///< Declare output signal, 17-32 bits -#define VL_OUTW(name, msb, lsb, words) WData name[words] ///< Declare output signal, 65+ bits - -///< Declare a module, ala SC_MODULE +/// Declare a module, ala SC_MODULE #define VL_MODULE(modname) class modname VL_NOT_FINAL : public VerilatedModule // Not class final in VL_MODULE, as users might be abstracting our models (--hierarchical) @@ -887,2122 +868,16 @@ inline void VerilatedContext::debug(int val) VL_MT_SAFE { Verilated::debug(val); inline int VerilatedContext::debug() VL_MT_SAFE { return Verilated::debug(); } //========================================================================= -// Extern functions -- User may override -- See verilated.cpp +// Data Types -/// Routine to call for $finish -/// User code may wish to replace this function, to do so, define VL_USER_FINISH. -/// This code does not have to be thread safe. -/// Verilator internal code must call VL_FINISH_MT instead, which eventually calls this. -extern void vl_finish(const char* filename, int linenum, const char* hier); - -/// Routine to call for $stop and non-fatal error -/// User code may wish to replace this function, to do so, define VL_USER_STOP. -/// This code does not have to be thread safe. -/// Verilator internal code must call VL_FINISH_MT instead, which eventually calls this. -extern void vl_stop(const char* filename, int linenum, const char* hier); - -/// Routine to call for a couple of fatal messages -/// User code may wish to replace this function, to do so, define VL_USER_FATAL. -/// This code does not have to be thread safe. -/// Verilator internal code must call VL_FINISH_MT instead, which eventually calls this. -extern void vl_fatal(const char* filename, int linenum, const char* hier, const char* msg); +#include "verilated_types.h" //========================================================================= -// Extern functions -- Slow path +// Functions -/// Multithread safe wrapper for calls to $finish -extern void VL_FINISH_MT(const char* filename, int linenum, const char* hier) VL_MT_SAFE; -/// Multithread safe wrapper for calls to $stop -extern void VL_STOP_MT(const char* filename, int linenum, const char* hier, - bool maybe = true) VL_MT_SAFE; -/// Multithread safe wrapper to call for a couple of fatal messages -extern void VL_FATAL_MT(const char* filename, int linenum, const char* hier, - const char* msg) VL_MT_SAFE; - -// clang-format off -/// Print a string, multithread safe. Eventually VL_PRINTF will get called. -#ifdef VL_THREADED -extern void VL_PRINTF_MT(const char* formatp, ...) VL_ATTR_PRINTF(1) VL_MT_SAFE; -#else -# define VL_PRINTF_MT VL_PRINTF // The following parens will take care of themselves -#endif -// clang-format on - -/// Print a debug message from internals with standard prefix, with printf style format -extern void VL_DBG_MSGF(const char* formatp, ...) VL_ATTR_PRINTF(1) VL_MT_SAFE; - -extern vluint64_t vl_rand64() VL_MT_SAFE; -inline IData VL_RANDOM_I(int obits) VL_MT_SAFE { return vl_rand64() & VL_MASK_I(obits); } -inline QData VL_RANDOM_Q(int obits) VL_MT_SAFE { return vl_rand64() & VL_MASK_Q(obits); } -#ifndef VL_NO_LEGACY -extern WDataOutP VL_RANDOM_W(int obits, WDataOutP outwp); -#endif -extern IData VL_RANDOM_SEEDED_II(int obits, IData seed) VL_MT_SAFE; -inline IData VL_URANDOM_RANGE_I(IData hi, IData lo) { - vluint64_t rnd = vl_rand64(); - if (VL_LIKELY(hi > lo)) { - // Modulus isn't very fast but it's common that hi-low is power-of-two - return (rnd % (hi - lo + 1)) + lo; - } else { - return (rnd % (lo - hi + 1)) + hi; - } -} - -// These are init time only, so slow is fine -/// Random reset a signal of given width -extern IData VL_RAND_RESET_I(int obits); -/// Random reset a signal of given width -extern QData VL_RAND_RESET_Q(int obits); -/// Random reset a signal of given width -extern WDataOutP VL_RAND_RESET_W(int obits, WDataOutP outwp); -/// Zero reset a signal (slow - else use VL_ZERO_W) -extern WDataOutP VL_ZERO_RESET_W(int obits, WDataOutP outwp); - -#if VL_THREADED -/// Return high-precision counter for profiling, or 0x0 if not available -inline QData VL_RDTSC_Q() { - vluint64_t val; - VL_RDTSC(val); - return val; -} -#endif - -extern void VL_PRINTTIMESCALE(const char* namep, const char* timeunitp, - const VerilatedContext* contextp) VL_MT_SAFE; - -extern WDataOutP _vl_moddiv_w(int lbits, WDataOutP owp, WDataInP const lwp, WDataInP const rwp, - bool is_modulus); - -extern IData VL_FGETS_IXI(int obits, void* destp, IData fpi); - -extern void VL_FFLUSH_I(IData fdi); -extern IData VL_FSEEK_I(IData fdi, IData offset, IData origin); -extern IData VL_FTELL_I(IData fdi); -extern void VL_FCLOSE_I(IData fdi); - -extern IData VL_FREAD_I(int width, int array_lsb, int array_size, void* memp, IData fpi, - IData start, IData count); - -extern void VL_WRITEF(const char* formatp, ...); -extern void VL_FWRITEF(IData fpi, const char* formatp, ...); - -extern IData VL_FSCANF_IX(IData fpi, const char* formatp, ...); -extern IData VL_SSCANF_IIX(int lbits, IData ld, const char* formatp, ...); -extern IData VL_SSCANF_IQX(int lbits, QData ld, const char* formatp, ...); -extern IData VL_SSCANF_IWX(int lbits, WDataInP const lwp, const char* formatp, ...); - -extern void VL_SFORMAT_X(int obits, CData& destr, const char* formatp, ...); -extern void VL_SFORMAT_X(int obits, SData& destr, const char* formatp, ...); -extern void VL_SFORMAT_X(int obits, IData& destr, const char* formatp, ...); -extern void VL_SFORMAT_X(int obits, QData& destr, const char* formatp, ...); -extern void VL_SFORMAT_X(int obits, void* destp, const char* formatp, ...); - -extern IData VL_SYSTEM_IW(int lhswords, WDataInP const lhsp); -extern IData VL_SYSTEM_IQ(QData lhs); -inline IData VL_SYSTEM_II(IData lhs) VL_MT_SAFE { return VL_SYSTEM_IQ(lhs); } - -extern IData VL_TESTPLUSARGS_I(const char* formatp); -extern const char* vl_mc_scan_plusargs(const char* prefixp); // PLIish - -//========================================================================= -// Base macros - -// Return true if data[bit] set; not 0/1 return, but 0/non-zero return. -#define VL_BITISSET_I(data, bit) ((data) & (VL_UL(1) << VL_BITBIT_I(bit))) -#define VL_BITISSET_Q(data, bit) ((data) & (1ULL << VL_BITBIT_Q(bit))) -#define VL_BITISSET_E(data, bit) ((data) & (VL_EUL(1) << VL_BITBIT_E(bit))) -#define VL_BITISSET_W(data, bit) ((data)[VL_BITWORD_E(bit)] & (VL_EUL(1) << VL_BITBIT_E(bit))) -#define VL_BITISSETLIMIT_W(data, width, bit) (((bit) < (width)) && VL_BITISSET_W(data, bit)) - -// Shift appropriate word by bit. Does not account for wrapping between two words -#define VL_BITRSHIFT_W(data, bit) ((data)[VL_BITWORD_E(bit)] >> VL_BITBIT_E(bit)) - -// Create two 32-bit words from quadword -// WData is always at least 2 words; does not clean upper bits -#define VL_SET_WQ(owp, data) \ - do { \ - (owp)[0] = static_cast(data); \ - (owp)[1] = static_cast((data) >> VL_EDATASIZE); \ - } while (false) -#define VL_SET_WI(owp, data) \ - do { \ - (owp)[0] = static_cast(data); \ - (owp)[1] = 0; \ - } while (false) -#define VL_SET_QW(lwp) \ - ((static_cast((lwp)[0])) \ - | (static_cast((lwp)[1]) << (static_cast(VL_EDATASIZE)))) -#define VL_SET_QII(ld, rd) ((static_cast(ld) << 32ULL) | static_cast(rd)) - -// Return FILE* from IData -extern FILE* VL_CVT_I_FP(IData lhs) VL_MT_SAFE; - -// clang-format off -// Use a union to avoid cast-to-different-size warnings -// Return void* from QData -static inline void* VL_CVT_Q_VP(QData lhs) VL_PURE { - union { void* fp; QData q; } u; - u.q = lhs; - return u.fp; -} -// Return QData from const void* -static inline QData VL_CVT_VP_Q(const void* fp) VL_PURE { - union { const void* fp; QData q; } u; - u.q = 0; - u.fp = fp; - return u.q; -} -// Return double from QData (bits, not numerically) -static inline double VL_CVT_D_Q(QData lhs) VL_PURE { - union { double d; QData q; } u; - u.q = lhs; - return u.d; -} -// Return QData from double (bits, not numerically) -static inline QData VL_CVT_Q_D(double lhs) VL_PURE { - union { double d; QData q; } u; - u.d = lhs; - return u.q; -} -// clang-format on - -// Return double from lhs (numeric) unsigned -double VL_ITOR_D_W(int lbits, WDataInP const lwp) VL_PURE; -static inline double VL_ITOR_D_I(int, IData lhs) VL_PURE { - return static_cast(static_cast(lhs)); -} -static inline double VL_ITOR_D_Q(int, QData lhs) VL_PURE { - return static_cast(static_cast(lhs)); -} -// Return double from lhs (numeric) signed -double VL_ISTOR_D_W(int lbits, WDataInP const lwp) VL_PURE; -static inline double VL_ISTOR_D_I(int lbits, IData lhs) VL_PURE { - if (lbits == 32) return static_cast(static_cast(lhs)); - WData lwp[VL_WQ_WORDS_E]; - VL_SET_WI(lwp, lhs); - return VL_ISTOR_D_W(lbits, lwp); -} -static inline double VL_ISTOR_D_Q(int lbits, QData lhs) VL_PURE { - if (lbits == 64) return static_cast(static_cast(lhs)); - WData lwp[VL_WQ_WORDS_E]; - VL_SET_WQ(lwp, lhs); - return VL_ISTOR_D_W(lbits, lwp); -} -// Return QData from double (numeric) -static inline IData VL_RTOI_I_D(double lhs) VL_PURE { - return static_cast(VL_TRUNC(lhs)); -} - -// Sign extend such that if MSB set, we get ffff_ffff, else 0s -// (Requires clean input) -#define VL_SIGN_I(nbits, lhs) ((lhs) >> VL_BITBIT_I((nbits)-VL_UL(1))) -#define VL_SIGN_Q(nbits, lhs) ((lhs) >> VL_BITBIT_Q((nbits)-1ULL)) -#define VL_SIGN_E(nbits, lhs) ((lhs) >> VL_BITBIT_E((nbits)-VL_EUL(1))) -#define VL_SIGN_W(nbits, rwp) \ - ((rwp)[VL_BITWORD_E((nbits)-VL_EUL(1))] >> VL_BITBIT_E((nbits)-VL_EUL(1))) -#define VL_SIGNONES_E(nbits, lhs) (-(VL_SIGN_E(nbits, lhs))) - -// Sign bit extended up to MSB, doesn't include unsigned portion -// Optimization bug in GCC 3.3 returns different bitmasks to later states for -static inline IData VL_EXTENDSIGN_I(int lbits, IData lhs) VL_PURE { - return (-((lhs) & (VL_UL(1) << (lbits - 1)))); -} -static inline QData VL_EXTENDSIGN_Q(int lbits, QData lhs) VL_PURE { - return (-((lhs) & (1ULL << (lbits - 1)))); -} - -// Debugging prints -extern void _vl_debug_print_w(int lbits, WDataInP const iwp); - -//========================================================================= -// Pli macros - -extern int VL_TIME_STR_CONVERT(const char* strp) VL_PURE; - -// These are deprecated and used only to establish the default precision/units. -// Use Verilator timescale-override for better control. -// clang-format off -#ifndef VL_TIME_PRECISION -# ifdef VL_TIME_PRECISION_STR -# define VL_TIME_PRECISION VL_TIME_STR_CONVERT(VL_STRINGIFY(VL_TIME_PRECISION_STR)) -# else -# define VL_TIME_PRECISION (-12) ///< Timescale default units if not in Verilog - picoseconds -# endif -#endif -#ifndef VL_TIME_UNIT -# ifdef VL_TIME_UNIT_STR -# define VL_TIME_UNIT VL_TIME_STR_CONVERT(VL_STRINGIFY(VL_TIME_PRECISION_STR)) -# else -# define VL_TIME_UNIT (-12) ///< Timescale default units if not in Verilog - picoseconds -# endif -#endif - -#if defined(SYSTEMC_VERSION) -/// Return current simulation time -// Already defined: extern sc_time sc_time_stamp(); -inline vluint64_t vl_time_stamp64() { return sc_time_stamp().value(); } -#else // Non-SystemC -# if !defined(VL_TIME_CONTEXT) && !defined(VL_NO_LEGACY) -# ifdef VL_TIME_STAMP64 -// vl_time_stamp64() may be optionally defined by the user to return time. -// On MSVC++ weak symbols are not supported so must be declared, or define -// VL_TIME_CONTEXT. -extern vluint64_t vl_time_stamp64() VL_ATTR_WEAK; -# else -// sc_time_stamp() may be optionally defined by the user to return time. -// On MSVC++ weak symbols are not supported so must be declared, or define -// VL_TIME_CONTEXT. -extern double sc_time_stamp() VL_ATTR_WEAK; // Verilator 4.032 and newer -inline vluint64_t vl_time_stamp64() { - // clang9.0.1 requires & although we really do want the weak symbol value - return VL_LIKELY(&sc_time_stamp) ? static_cast(sc_time_stamp()) : 0; -} -# endif -# endif -#endif - -inline vluint64_t VerilatedContext::time() const VL_MT_SAFE { - // When using non-default context, fastest path is return time - if (VL_LIKELY(m_s.m_time)) return m_s.m_time; -#if defined(SYSTEMC_VERSION) || (!defined(VL_TIME_CONTEXT) && !defined(VL_NO_LEGACY)) - // Zero time could mean really at zero, or using callback - // clang9.0.1 requires & although we really do want the weak symbol value - if (VL_LIKELY(&vl_time_stamp64)) { // else is weak symbol that is not defined - return vl_time_stamp64(); - } -#endif - return 0; -} - -#define VL_TIME_Q() (Verilated::threadContextp()->time()) -#define VL_TIME_D() (static_cast(VL_TIME_Q())) - -// Time scaled from 1-per-precision into a module's time units ("Unit"-ed, not "United") -// Optimized assuming scale is always constant. -// Can't use multiply in Q flavor, as might lose precision -#define VL_TIME_UNITED_Q(scale) (VL_TIME_Q() / static_cast(scale)) -#define VL_TIME_UNITED_D(scale) (VL_TIME_D() / static_cast(scale)) - -// Return time precision as multiplier of time units -double vl_time_multiplier(int scale) VL_PURE; -// Return power of 10. e.g. returns 100 if n==2 -vluint64_t vl_time_pow10(int n) VL_PURE; - -#ifdef VL_DEBUG -/// Evaluate statement if Verilated::debug() enabled -# define VL_DEBUG_IF(stmt) \ - do { \ - if (VL_UNLIKELY(Verilated::debug())) {stmt} \ - } while (false) -#else -// We intentionally do not compile the stmt to improve compile speed -# define VL_DEBUG_IF(stmt) do {} while (false) -#endif - -// clang-format on - -//========================================================================= -// Functional macros/routines -// These all take the form -// VL_func_IW(bits, bits, op, op) -// VL_func_WW(bits, bits, out, op, op) -// The I/W indicates if it's a integer or wide for the output and each operand. -// The bits indicate the bit width of the output and each operand. -// If wide output, a temporary storage location is specified. - -//=================================================================== -// SETTING OPERATORS - -// Output clean -// EMIT_RULE: VL_CLEAN: oclean=clean; obits=lbits; -#define VL_CLEAN_II(obits, lbits, lhs) ((lhs)&VL_MASK_I(obits)) -#define VL_CLEAN_QQ(obits, lbits, lhs) ((lhs)&VL_MASK_Q(obits)) - -// EMIT_RULE: VL_ASSIGNCLEAN: oclean=clean; obits==lbits; -#define VL_ASSIGNCLEAN_W(obits, owp, lwp) VL_CLEAN_WW((obits), (obits), (owp), (lwp)) -static inline WDataOutP _vl_clean_inplace_w(int obits, WDataOutP owp) VL_MT_SAFE { - const int words = VL_WORDS_I(obits); - owp[words - 1] &= VL_MASK_E(obits); - return owp; -} -static inline WDataOutP VL_CLEAN_WW(int obits, int, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { - const int words = VL_WORDS_I(obits); - for (int i = 0; (i < (words - 1)); ++i) owp[i] = lwp[i]; - owp[words - 1] = lwp[words - 1] & VL_MASK_E(obits); - return owp; -} -static inline WDataOutP VL_ZERO_W(int obits, WDataOutP owp) VL_MT_SAFE { - const int words = VL_WORDS_I(obits); - for (int i = 0; i < words; ++i) owp[i] = 0; - return owp; -} -static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE { - const int words = VL_WORDS_I(obits); - for (int i = 0; i < (words - 1); ++i) owp[i] = ~VL_EUL(0); - owp[words - 1] = VL_MASK_E(obits); - return owp; -} - -// EMIT_RULE: VL_ASSIGN: oclean=rclean; obits==lbits; -// For now, we always have a clean rhs. -// Note: If a ASSIGN isn't clean, use VL_ASSIGNCLEAN instead to do the same thing. -static inline WDataOutP VL_ASSIGN_W(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { - const int words = VL_WORDS_I(obits); - for (int i = 0; i < words; ++i) owp[i] = lwp[i]; - return owp; -} - -// EMIT_RULE: VL_ASSIGNBIT: rclean=clean; -static inline void VL_ASSIGNBIT_II(int, int bit, CData& lhsr, IData rhs) VL_PURE { - lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); -} -static inline void VL_ASSIGNBIT_II(int, int bit, SData& lhsr, IData rhs) VL_PURE { - lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); -} -static inline void VL_ASSIGNBIT_II(int, int bit, IData& lhsr, IData rhs) VL_PURE { - lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); -} -static inline void VL_ASSIGNBIT_QI(int, int bit, QData& lhsr, QData rhs) VL_PURE { - lhsr = ((lhsr & ~(1ULL << VL_BITBIT_Q(bit))) | (static_cast(rhs) << VL_BITBIT_Q(bit))); -} -static inline void VL_ASSIGNBIT_WI(int, int bit, WDataOutP owp, IData rhs) VL_MT_SAFE { - EData orig = owp[VL_BITWORD_E(bit)]; - owp[VL_BITWORD_E(bit)] = ((orig & ~(VL_EUL(1) << VL_BITBIT_E(bit))) - | (static_cast(rhs) << VL_BITBIT_E(bit))); -} -// Alternative form that is an instruction faster when rhs is constant one. -static inline void VL_ASSIGNBIT_IO(int, int bit, CData& lhsr, IData) VL_PURE { - lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); -} -static inline void VL_ASSIGNBIT_IO(int, int bit, SData& lhsr, IData) VL_PURE { - lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); -} -static inline void VL_ASSIGNBIT_IO(int, int bit, IData& lhsr, IData) VL_PURE { - lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); -} -static inline void VL_ASSIGNBIT_QO(int, int bit, QData& lhsr, IData) VL_PURE { - lhsr = (lhsr | (1ULL << VL_BITBIT_Q(bit))); -} -static inline void VL_ASSIGNBIT_WO(int, int bit, WDataOutP owp, IData) VL_MT_SAFE { - const EData orig = owp[VL_BITWORD_E(bit)]; - owp[VL_BITWORD_E(bit)] = (orig | (VL_EUL(1) << VL_BITBIT_E(bit))); -} - -//=================================================================== -// SYSTEMC OPERATORS -// Copying verilog format to systemc integers and bit vectors. -// Get a SystemC variable - -#define VL_ASSIGN_ISI(obits, vvar, svar) \ - { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read()); } -#define VL_ASSIGN_QSQ(obits, vvar, svar) \ - { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read()); } - -#define VL_ASSIGN_ISW(obits, od, svar) \ - { (od) = ((svar).read().get_word(0)) & VL_MASK_I(obits); } -#define VL_ASSIGN_QSW(obits, od, svar) \ - { \ - (od) = ((static_cast((svar).read().get_word(1))) << VL_IDATASIZE \ - | (svar).read().get_word(0)) \ - & VL_MASK_Q(obits); \ - } -#define VL_ASSIGN_WSW(obits, owp, svar) \ - { \ - const int words = VL_WORDS_I(obits); \ - for (int i = 0; i < words; ++i) (owp)[i] = (svar).read().get_word(i); \ - (owp)[words - 1] &= VL_MASK_E(obits); \ - } - -#define VL_ASSIGN_ISU(obits, vvar, svar) \ - { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read().to_uint()); } -#define VL_ASSIGN_QSU(obits, vvar, svar) \ - { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read().to_uint64()); } -#define VL_ASSIGN_WSB(obits, owp, svar) \ - { \ - const int words = VL_WORDS_I(obits); \ - sc_biguint<(obits)> _butemp = (svar).read(); \ - for (int i = 0; i < words; ++i) { \ - int msb = ((i + 1) * VL_IDATASIZE) - 1; \ - msb = (msb >= (obits)) ? ((obits)-1) : msb; \ - (owp)[i] = _butemp.range(msb, i * VL_IDATASIZE).to_uint(); \ - } \ - (owp)[words - 1] &= VL_MASK_E(obits); \ - } - -// Copying verilog format from systemc integers and bit vectors. -// Set a SystemC variable - -#define VL_ASSIGN_SII(obits, svar, vvar) \ - { (svar).write(vvar); } -#define VL_ASSIGN_SQQ(obits, svar, vvar) \ - { (svar).write(vvar); } - -#define VL_ASSIGN_SWI(obits, svar, rd) \ - { \ - sc_bv<(obits)> _bvtemp; \ - _bvtemp.set_word(0, (rd)); \ - (svar).write(_bvtemp); \ - } -#define VL_ASSIGN_SWQ(obits, svar, rd) \ - { \ - sc_bv<(obits)> _bvtemp; \ - _bvtemp.set_word(0, static_cast(rd)); \ - _bvtemp.set_word(1, static_cast((rd) >> VL_IDATASIZE)); \ - (svar).write(_bvtemp); \ - } -#define VL_ASSIGN_SWW(obits, svar, rwp) \ - { \ - sc_bv<(obits)> _bvtemp; \ - for (int i = 0; i < VL_WORDS_I(obits); ++i) _bvtemp.set_word(i, (rwp)[i]); \ - (svar).write(_bvtemp); \ - } - -#define VL_ASSIGN_SUI(obits, svar, rd) \ - { (svar).write(rd); } -#define VL_ASSIGN_SUQ(obits, svar, rd) \ - { (svar).write(rd); } -#define VL_ASSIGN_SBI(obits, svar, rd) \ - { (svar).write(rd); } -#define VL_ASSIGN_SBQ(obits, svar, rd) \ - { (svar).write(rd); } -#define VL_ASSIGN_SBW(obits, svar, rwp) \ - { \ - sc_biguint<(obits)> _butemp; \ - for (int i = 0; i < VL_WORDS_I(obits); ++i) { \ - int msb = ((i + 1) * VL_IDATASIZE) - 1; \ - msb = (msb >= (obits)) ? ((obits)-1) : msb; \ - _butemp.range(msb, i* VL_IDATASIZE) = (rwp)[i]; \ - } \ - (svar).write(_butemp); \ - } - -//=================================================================== -// Extending sizes - -// CAREFUL, we're width changing, so obits!=lbits - -// Right must be clean because otherwise size increase would pick up bad bits -// EMIT_RULE: VL_EXTEND: oclean=clean; rclean==clean; -#define VL_EXTEND_II(obits, lbits, lhs) ((lhs)) -#define VL_EXTEND_QI(obits, lbits, lhs) (static_cast(lhs)) -#define VL_EXTEND_QQ(obits, lbits, lhs) ((lhs)) - -static inline WDataOutP VL_EXTEND_WI(int obits, int, WDataOutP owp, IData ld) VL_MT_SAFE { - // Note for extracts that obits != lbits - owp[0] = ld; - for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - return owp; -} -static inline WDataOutP VL_EXTEND_WQ(int obits, int, WDataOutP owp, QData ld) VL_MT_SAFE { - VL_SET_WQ(owp, ld); - for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - return owp; -} -static inline WDataOutP VL_EXTEND_WW(int obits, int lbits, WDataOutP owp, - WDataInP const lwp) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(lbits); ++i) owp[i] = lwp[i]; - for (int i = VL_WORDS_I(lbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; - return owp; -} - -// EMIT_RULE: VL_EXTENDS: oclean=*dirty*; obits=lbits; -// Sign extension; output dirty -static inline IData VL_EXTENDS_II(int, int lbits, IData lhs) VL_PURE { - return VL_EXTENDSIGN_I(lbits, lhs) | lhs; -} -static inline QData VL_EXTENDS_QI(int, int lbits, QData lhs /*Q_as_need_extended*/) VL_PURE { - return VL_EXTENDSIGN_Q(lbits, lhs) | lhs; -} -static inline QData VL_EXTENDS_QQ(int, int lbits, QData lhs) VL_PURE { - return VL_EXTENDSIGN_Q(lbits, lhs) | lhs; -} - -static inline WDataOutP VL_EXTENDS_WI(int obits, int lbits, WDataOutP owp, IData ld) VL_MT_SAFE { - const EData sign = VL_SIGNONES_E(lbits, static_cast(ld)); - owp[0] = ld | (sign & ~VL_MASK_E(lbits)); - for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = sign; - return owp; -} -static inline WDataOutP VL_EXTENDS_WQ(int obits, int lbits, WDataOutP owp, QData ld) VL_MT_SAFE { - VL_SET_WQ(owp, ld); - const EData sign = VL_SIGNONES_E(lbits, owp[1]); - owp[1] |= sign & ~VL_MASK_E(lbits); - for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = sign; - return owp; -} -static inline WDataOutP VL_EXTENDS_WW(int obits, int lbits, WDataOutP owp, - WDataInP const lwp) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(lbits) - 1; ++i) owp[i] = lwp[i]; - const int lmsw = VL_WORDS_I(lbits) - 1; - const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); - owp[lmsw] = lwp[lmsw] | (sign & ~VL_MASK_E(lbits)); - for (int i = VL_WORDS_I(lbits); i < VL_WORDS_I(obits); ++i) owp[i] = sign; - return owp; -} - -//=================================================================== -// REDUCTION OPERATORS - -// EMIT_RULE: VL_REDAND: oclean=clean; lclean==clean; obits=1; -#define VL_REDAND_II(obits, lbits, lhs) ((lhs) == VL_MASK_I(lbits)) -#define VL_REDAND_IQ(obits, lbits, lhs) ((lhs) == VL_MASK_Q(lbits)) -static inline IData VL_REDAND_IW(int, int lbits, WDataInP const lwp) VL_MT_SAFE { - const int words = VL_WORDS_I(lbits); - EData combine = lwp[0]; - for (int i = 1; i < words - 1; ++i) combine &= lwp[i]; - combine &= ~VL_MASK_E(lbits) | lwp[words - 1]; - return ((~combine) == 0); -} - -// EMIT_RULE: VL_REDOR: oclean=clean; lclean==clean; obits=1; -#define VL_REDOR_I(lhs) ((lhs) != 0) -#define VL_REDOR_Q(lhs) ((lhs) != 0) -static inline IData VL_REDOR_W(int words, WDataInP const lwp) VL_MT_SAFE { - EData equal = 0; - for (int i = 0; i < words; ++i) equal |= lwp[i]; - return (equal != 0); -} - -// EMIT_RULE: VL_REDXOR: oclean=dirty; obits=1; -static inline IData VL_REDXOR_2(IData r) VL_PURE { - // Experiments show VL_REDXOR_2 is faster than __builtin_parityl - r = (r ^ (r >> 1)); - return r; -} -static inline IData VL_REDXOR_4(IData r) VL_PURE { -#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) - return __builtin_parityl(r); -#else - r = (r ^ (r >> 1)); - r = (r ^ (r >> 2)); - return r; -#endif -} -static inline IData VL_REDXOR_8(IData r) VL_PURE { -#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) - return __builtin_parityl(r); -#else - r = (r ^ (r >> 1)); - r = (r ^ (r >> 2)); - r = (r ^ (r >> 4)); - return r; -#endif -} -static inline IData VL_REDXOR_16(IData r) VL_PURE { -#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) - return __builtin_parityl(r); -#else - r = (r ^ (r >> 1)); - r = (r ^ (r >> 2)); - r = (r ^ (r >> 4)); - r = (r ^ (r >> 8)); - return r; -#endif -} -static inline IData VL_REDXOR_32(IData r) VL_PURE { -#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) - return __builtin_parityl(r); -#else - r = (r ^ (r >> 1)); - r = (r ^ (r >> 2)); - r = (r ^ (r >> 4)); - r = (r ^ (r >> 8)); - r = (r ^ (r >> 16)); - return r; -#endif -} -static inline IData VL_REDXOR_64(QData r) VL_PURE { -#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) - return __builtin_parityll(r); -#else - r = (r ^ (r >> 1)); - r = (r ^ (r >> 2)); - r = (r ^ (r >> 4)); - r = (r ^ (r >> 8)); - r = (r ^ (r >> 16)); - r = (r ^ (r >> 32)); - return static_cast(r); -#endif -} -static inline IData VL_REDXOR_W(int words, WDataInP const lwp) VL_MT_SAFE { - EData r = lwp[0]; - for (int i = 1; i < words; ++i) r ^= lwp[i]; - return VL_REDXOR_32(r); -} - -// EMIT_RULE: VL_COUNTONES_II: oclean = false; lhs clean -static inline IData VL_COUNTONES_I(IData lhs) VL_PURE { - // This is faster than __builtin_popcountl - IData r = lhs - ((lhs >> 1) & 033333333333) - ((lhs >> 2) & 011111111111); - r = (r + (r >> 3)) & 030707070707; - r = (r + (r >> 6)); - r = (r + (r >> 12) + (r >> 24)) & 077; - return r; -} -static inline IData VL_COUNTONES_Q(QData lhs) VL_PURE { - return VL_COUNTONES_I(static_cast(lhs)) + VL_COUNTONES_I(static_cast(lhs >> 32)); -} -#define VL_COUNTONES_E VL_COUNTONES_I -static inline IData VL_COUNTONES_W(int words, WDataInP const lwp) VL_MT_SAFE { - EData r = 0; - for (int i = 0; i < words; ++i) r += VL_COUNTONES_E(lwp[i]); - return r; -} - -// EMIT_RULE: VL_COUNTBITS_II: oclean = false; lhs clean -static inline IData VL_COUNTBITS_I(int lbits, IData lhs, IData ctrl0, IData ctrl1, - IData ctrl2) VL_PURE { - int ctrlSum = (ctrl0 & 0x1) + (ctrl1 & 0x1) + (ctrl2 & 0x1); - if (ctrlSum == 3) { - return VL_COUNTONES_I(lhs); - } else if (ctrlSum == 0) { - IData mask = (lbits == 32) ? -1 : ((1 << lbits) - 1); - return VL_COUNTONES_I(~lhs & mask); - } else { - return (lbits == 32) ? 32 : lbits; - } -} -static inline IData VL_COUNTBITS_Q(int lbits, QData lhs, IData ctrl0, IData ctrl1, - IData ctrl2) VL_PURE { - return VL_COUNTBITS_I(32, static_cast(lhs), ctrl0, ctrl1, ctrl2) - + VL_COUNTBITS_I(lbits - 32, static_cast(lhs >> 32), ctrl0, ctrl1, ctrl2); -} -#define VL_COUNTBITS_E VL_COUNTBITS_I -static inline IData VL_COUNTBITS_W(int lbits, int words, WDataInP const lwp, IData ctrl0, - IData ctrl1, IData ctrl2) VL_MT_SAFE { - EData r = 0; - IData wordLbits = 32; - for (int i = 0; i < words; ++i) { - if (i == words - 1) wordLbits = lbits % 32; - r += VL_COUNTBITS_E(wordLbits, lwp[i], ctrl0, ctrl1, ctrl2); - } - return r; -} - -static inline IData VL_ONEHOT_I(IData lhs) VL_PURE { - return (((lhs & (lhs - 1)) == 0) & (lhs != 0)); -} -static inline IData VL_ONEHOT_Q(QData lhs) VL_PURE { - return (((lhs & (lhs - 1)) == 0) & (lhs != 0)); -} -static inline IData VL_ONEHOT_W(int words, WDataInP const lwp) VL_MT_SAFE { - EData one = 0; - for (int i = 0; (i < words); ++i) { - if (lwp[i]) { - if (one) return 0; - one = 1; - if (lwp[i] & (lwp[i] - 1)) return 0; - } - } - return one; -} - -static inline IData VL_ONEHOT0_I(IData lhs) VL_PURE { return ((lhs & (lhs - 1)) == 0); } -static inline IData VL_ONEHOT0_Q(QData lhs) VL_PURE { return ((lhs & (lhs - 1)) == 0); } -static inline IData VL_ONEHOT0_W(int words, WDataInP const lwp) VL_MT_SAFE { - bool one = false; - for (int i = 0; (i < words); ++i) { - if (lwp[i]) { - if (one) return 0; - one = true; - if (lwp[i] & (lwp[i] - 1)) return 0; - } - } - return 1; -} - -static inline IData VL_CLOG2_I(IData lhs) VL_PURE { - // There are faster algorithms, or fls GCC4 builtins, but rarely used - if (VL_UNLIKELY(!lhs)) return 0; - --lhs; - int shifts = 0; - for (; lhs != 0; ++shifts) lhs = lhs >> 1; - return shifts; -} -static inline IData VL_CLOG2_Q(QData lhs) VL_PURE { - if (VL_UNLIKELY(!lhs)) return 0; - --lhs; - int shifts = 0; - for (; lhs != 0; ++shifts) lhs = lhs >> 1ULL; - return shifts; -} -static inline IData VL_CLOG2_W(int words, WDataInP const lwp) VL_MT_SAFE { - EData adjust = (VL_COUNTONES_W(words, lwp) == 1) ? 0 : 1; - for (int i = words - 1; i >= 0; --i) { - if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken - for (int bit = VL_EDATASIZE - 1; bit >= 0; --bit) { - if (VL_UNLIKELY(VL_BITISSET_E(lwp[i], bit))) { - return i * VL_EDATASIZE + bit + adjust; - } - } - // Can't get here - one bit must be set - } - } - return 0; -} - -static inline IData VL_MOSTSETBITP1_W(int words, WDataInP const lwp) VL_MT_SAFE { - // MSB set bit plus one; similar to FLS. 0=value is zero - for (int i = words - 1; i >= 0; --i) { - if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken - for (int bit = VL_EDATASIZE - 1; bit >= 0; --bit) { - if (VL_UNLIKELY(VL_BITISSET_E(lwp[i], bit))) return i * VL_EDATASIZE + bit + 1; - } - // Can't get here - one bit must be set - } - } - return 0; -} - -//=================================================================== -// SIMPLE LOGICAL OPERATORS - -// EMIT_RULE: VL_AND: oclean=lclean||rclean; obits=lbits; lbits==rbits; -static inline WDataOutP VL_AND_W(int words, WDataOutP owp, WDataInP const lwp, - WDataInP const rwp) VL_MT_SAFE { - for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] & rwp[i]); - return owp; -} -// EMIT_RULE: VL_OR: oclean=lclean&&rclean; obits=lbits; lbits==rbits; -static inline WDataOutP VL_OR_W(int words, WDataOutP owp, WDataInP const lwp, - WDataInP const rwp) VL_MT_SAFE { - for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] | rwp[i]); - return owp; -} -// EMIT_RULE: VL_CHANGEXOR: oclean=1; obits=32; lbits==rbits; -static inline IData VL_CHANGEXOR_W(int words, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { - IData od = 0; - for (int i = 0; (i < words); ++i) od |= (lwp[i] ^ rwp[i]); - return od; -} -// EMIT_RULE: VL_XOR: oclean=lclean&&rclean; obits=lbits; lbits==rbits; -static inline WDataOutP VL_XOR_W(int words, WDataOutP owp, WDataInP const lwp, - WDataInP const rwp) VL_MT_SAFE { - for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] ^ rwp[i]); - return owp; -} -// EMIT_RULE: VL_NOT: oclean=dirty; obits=lbits; -static inline WDataOutP VL_NOT_W(int words, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { - for (int i = 0; i < words; ++i) owp[i] = ~(lwp[i]); - return owp; -} - -//========================================================================= -// Logical comparisons - -// EMIT_RULE: VL_EQ: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; -// EMIT_RULE: VL_NEQ: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; -// EMIT_RULE: VL_LT: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; -// EMIT_RULE: VL_GT: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; -// EMIT_RULE: VL_GTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; -// EMIT_RULE: VL_LTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; -#define VL_NEQ_W(words, lwp, rwp) (!VL_EQ_W(words, lwp, rwp)) -#define VL_LT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) < 0) -#define VL_LTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) <= 0) -#define VL_GT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) > 0) -#define VL_GTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) >= 0) - -// Output clean, AND MUST BE CLEAN -static inline IData VL_EQ_W(int words, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { - EData nequal = 0; - for (int i = 0; (i < words); ++i) nequal |= (lwp[i] ^ rwp[i]); - return (nequal == 0); -} - -// Internal usage -static inline int _vl_cmp_w(int words, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { - for (int i = words - 1; i >= 0; --i) { - if (lwp[i] > rwp[i]) return 1; - if (lwp[i] < rwp[i]) return -1; - } - return 0; // == -} - -#define VL_LTS_IWW(obits, lbits, rbbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) < 0) -#define VL_LTES_IWW(obits, lbits, rbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) <= 0) -#define VL_GTS_IWW(obits, lbits, rbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) > 0) -#define VL_GTES_IWW(obits, lbits, rbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) >= 0) - -static inline IData VL_GTS_III(int, int lbits, int, IData lhs, IData rhs) VL_PURE { - // For lbits==32, this becomes just a single instruction, otherwise ~5. - // GCC 3.3.4 sign extension bugs on AMD64 architecture force us to use quad logic - const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc - const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc - return lhs_signed > rhs_signed; -} -static inline IData VL_GTS_IQQ(int, int lbits, int, QData lhs, QData rhs) VL_PURE { - const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); - const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); - return lhs_signed > rhs_signed; -} - -static inline IData VL_GTES_III(int, int lbits, int, IData lhs, IData rhs) VL_PURE { - const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc - const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc - return lhs_signed >= rhs_signed; -} -static inline IData VL_GTES_IQQ(int, int lbits, int, QData lhs, QData rhs) VL_PURE { - const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); - const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); - return lhs_signed >= rhs_signed; -} - -static inline IData VL_LTS_III(int, int lbits, int, IData lhs, IData rhs) VL_PURE { - const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc - const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc - return lhs_signed < rhs_signed; -} -static inline IData VL_LTS_IQQ(int, int lbits, int, QData lhs, QData rhs) VL_PURE { - const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); - const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); - return lhs_signed < rhs_signed; -} - -static inline IData VL_LTES_III(int, int lbits, int, IData lhs, IData rhs) VL_PURE { - const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc - const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc - return lhs_signed <= rhs_signed; -} -static inline IData VL_LTES_IQQ(int, int lbits, int, QData lhs, QData rhs) VL_PURE { - const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); - const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); - return lhs_signed <= rhs_signed; -} - -static inline int _vl_cmps_w(int lbits, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { - const int words = VL_WORDS_I(lbits); - int i = words - 1; - // We need to flip sense if negative comparison - const EData lsign = VL_SIGN_E(lbits, lwp[i]); - const EData rsign = VL_SIGN_E(lbits, rwp[i]); - if (!lsign && rsign) return 1; // + > - - if (lsign && !rsign) return -1; // - < + - for (; i >= 0; --i) { - if (lwp[i] > rwp[i]) return 1; - if (lwp[i] < rwp[i]) return -1; - } - return 0; // == -} - -//========================================================================= -// Math - -// Output NOT clean -static inline WDataOutP VL_NEGATE_W(int words, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { - EData carry = 1; - for (int i = 0; i < words; ++i) { - owp[i] = ~lwp[i] + carry; - carry = (owp[i] < ~lwp[i]); - } - return owp; -} -static inline void VL_NEGATE_INPLACE_W(int words, WDataOutP owp_lwp) VL_MT_SAFE { - EData carry = 1; - for (int i = 0; i < words; ++i) { - EData word = ~owp_lwp[i] + carry; - carry = (word < ~owp_lwp[i]); - owp_lwp[i] = word; - } -} - -// EMIT_RULE: VL_MUL: oclean=dirty; lclean==clean; rclean==clean; -// EMIT_RULE: VL_DIV: oclean=dirty; lclean==clean; rclean==clean; -// EMIT_RULE: VL_MODDIV: oclean=dirty; lclean==clean; rclean==clean; -#define VL_DIV_III(lbits, lhs, rhs) (((rhs) == 0) ? 0 : (lhs) / (rhs)) -#define VL_DIV_QQQ(lbits, lhs, rhs) (((rhs) == 0) ? 0 : (lhs) / (rhs)) -#define VL_DIV_WWW(lbits, owp, lwp, rwp) (_vl_moddiv_w(lbits, owp, lwp, rwp, 0)) -#define VL_MODDIV_III(lbits, lhs, rhs) (((rhs) == 0) ? 0 : (lhs) % (rhs)) -#define VL_MODDIV_QQQ(lbits, lhs, rhs) (((rhs) == 0) ? 0 : (lhs) % (rhs)) -#define VL_MODDIV_WWW(lbits, owp, lwp, rwp) (_vl_moddiv_w(lbits, owp, lwp, rwp, 1)) - -static inline WDataOutP VL_ADD_W(int words, WDataOutP owp, WDataInP const lwp, - WDataInP const rwp) VL_MT_SAFE { - QData carry = 0; - for (int i = 0; i < words; ++i) { - carry = carry + static_cast(lwp[i]) + static_cast(rwp[i]); - owp[i] = (carry & 0xffffffffULL); - carry = (carry >> 32ULL) & 0xffffffffULL; - } - // Last output word is dirty - return owp; -} - -static inline WDataOutP VL_SUB_W(int words, WDataOutP owp, WDataInP const lwp, - WDataInP const rwp) VL_MT_SAFE { - QData carry = 0; - for (int i = 0; i < words; ++i) { - carry = (carry + static_cast(lwp[i]) - + static_cast(static_cast(~rwp[i]))); - if (i == 0) ++carry; // Negation of rwp - owp[i] = (carry & 0xffffffffULL); - carry = (carry >> 32ULL) & 0xffffffffULL; - } - // Last output word is dirty - return owp; -} - -static inline WDataOutP VL_MUL_W(int words, WDataOutP owp, WDataInP const lwp, - WDataInP const rwp) VL_MT_SAFE { - for (int i = 0; i < words; ++i) owp[i] = 0; - for (int lword = 0; lword < words; ++lword) { - for (int rword = 0; rword < words; ++rword) { - QData mul = static_cast(lwp[lword]) * static_cast(rwp[rword]); - for (int qword = lword + rword; qword < words; ++qword) { - mul += static_cast(owp[qword]); - owp[qword] = (mul & 0xffffffffULL); - mul = (mul >> 32ULL) & 0xffffffffULL; - } - } - } - // Last output word is dirty - return owp; -} - -static inline IData VL_MULS_III(int, int lbits, int, IData lhs, IData rhs) VL_PURE { - const vlsint32_t lhs_signed = VL_EXTENDS_II(32, lbits, lhs); - const vlsint32_t rhs_signed = VL_EXTENDS_II(32, lbits, rhs); - return lhs_signed * rhs_signed; -} -static inline QData VL_MULS_QQQ(int, int lbits, int, QData lhs, QData rhs) VL_PURE { - const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); - const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); - return lhs_signed * rhs_signed; -} - -static inline WDataOutP VL_MULS_WWW(int, int lbits, int, WDataOutP owp, WDataInP const lwp, - WDataInP const rwp) VL_MT_SAFE { - const int words = VL_WORDS_I(lbits); - // cppcheck-suppress variableScope - WData lwstore[VL_MULS_MAX_WORDS]; // Fixed size, as MSVC++ doesn't allow [words] here - // cppcheck-suppress variableScope - WData rwstore[VL_MULS_MAX_WORDS]; - WDataInP lwusp = lwp; - WDataInP rwusp = rwp; - EData lneg = VL_SIGN_E(lbits, lwp[words - 1]); - if (lneg) { // Negate lhs - lwusp = lwstore; - VL_NEGATE_W(words, lwstore, lwp); - lwstore[words - 1] &= VL_MASK_E(lbits); // Clean it - } - EData rneg = VL_SIGN_E(lbits, rwp[words - 1]); - if (rneg) { // Negate rhs - rwusp = rwstore; - VL_NEGATE_W(words, rwstore, rwp); - rwstore[words - 1] &= VL_MASK_E(lbits); // Clean it - } - VL_MUL_W(words, owp, lwusp, rwusp); - owp[words - 1] &= VL_MASK_E( - lbits); // Clean. Note it's ok for the multiply to overflow into the sign bit - if ((lneg ^ rneg) & 1) { // Negate output (not using NEGATE, as owp==lwp) - QData carry = 0; - for (int i = 0; i < words; ++i) { - carry = carry + static_cast(static_cast(~owp[i])); - if (i == 0) ++carry; // Negation of temp2 - owp[i] = (carry & 0xffffffffULL); - carry = (carry >> 32ULL) & 0xffffffffULL; - } - // Not needed: owp[words-1] |= 1< 0) power = power * power; - if (rhs & (1ULL << i)) out *= power; - } - return out; -} -static inline QData VL_POW_QQQ(int, int, int rbits, QData lhs, QData rhs) VL_PURE { - if (VL_UNLIKELY(rhs == 0)) return 1; - if (VL_UNLIKELY(lhs == 0)) return 0; - QData power = lhs; - QData out = 1ULL; - for (int i = 0; i < rbits; ++i) { - if (i > 0) power = power * power; - if (rhs & (1ULL << i)) out *= power; - } - return out; -} -WDataOutP VL_POW_WWW(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, - WDataInP const rwp); -WDataOutP VL_POW_WWQ(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, QData rhs); -QData VL_POW_QQW(int obits, int, int rbits, QData lhs, WDataInP const rwp); - -#define VL_POWSS_IIQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ - VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) -#define VL_POWSS_IIQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ - VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) -#define VL_POWSS_IIW(obits, lbits, rbits, lhs, rwp, lsign, rsign) \ - VL_POWSS_QQW(obits, lbits, rbits, lhs, rwp, lsign, rsign) -#define VL_POWSS_QQI(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ - VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) -#define VL_POWSS_WWI(obits, lbits, rbits, owp, lwp, rhs, lsign, rsign) \ - VL_POWSS_WWQ(obits, lbits, rbits, owp, lwp, rhs, lsign, rsign) - -static inline IData VL_POWSS_III(int obits, int, int rbits, IData lhs, IData rhs, bool lsign, - bool rsign) VL_MT_SAFE { - if (VL_UNLIKELY(rhs == 0)) return 1; - if (rsign && VL_SIGN_I(rbits, rhs)) { - if (lhs == 0) { - return 0; // "X" - } else if (lhs == 1) { - return 1; - } else if (lsign && lhs == VL_MASK_I(obits)) { // -1 - if (rhs & 1) { - return VL_MASK_I(obits); // -1^odd=-1 - } else { - return 1; // -1^even=1 - } - } - return 0; - } - return VL_POW_III(obits, rbits, rbits, lhs, rhs); -} -static inline QData VL_POWSS_QQQ(int obits, int, int rbits, QData lhs, QData rhs, bool lsign, - bool rsign) VL_MT_SAFE { - if (VL_UNLIKELY(rhs == 0)) return 1; - if (rsign && VL_SIGN_Q(rbits, rhs)) { - if (lhs == 0) { - return 0; // "X" - } else if (lhs == 1) { - return 1; - } else if (lsign && lhs == VL_MASK_Q(obits)) { // -1 - if (rhs & 1) { - return VL_MASK_Q(obits); // -1^odd=-1 - } else { - return 1; // -1^even=1 - } - } - return 0; - } - return VL_POW_QQQ(obits, rbits, rbits, lhs, rhs); -} -WDataOutP VL_POWSS_WWW(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, - WDataInP const rwp, bool lsign, bool rsign); -WDataOutP VL_POWSS_WWQ(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, QData rhs, - bool lsign, bool rsign); -QData VL_POWSS_QQW(int obits, int, int rbits, QData lhs, WDataInP const rwp, bool lsign, - bool rsign); - -//=================================================================== -// Concat/replication - -// INTERNAL: Stuff LHS bit 0++ into OUTPUT at specified offset -// ld may be "dirty", output is clean -static inline void _vl_insert_II(int, CData& lhsr, IData ld, int hbit, int lbit, - int rbits) VL_PURE { - const IData cleanmask = VL_MASK_I(rbits); - const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; - lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); -} -static inline void _vl_insert_II(int, SData& lhsr, IData ld, int hbit, int lbit, - int rbits) VL_PURE { - const IData cleanmask = VL_MASK_I(rbits); - const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; - lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); -} -static inline void _vl_insert_II(int, IData& lhsr, IData ld, int hbit, int lbit, - int rbits) VL_PURE { - const IData cleanmask = VL_MASK_I(rbits); - const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; - lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); -} -static inline void _vl_insert_QQ(int, QData& lhsr, QData ld, int hbit, int lbit, - int rbits) VL_PURE { - const QData cleanmask = VL_MASK_Q(rbits); - const QData insmask = (VL_MASK_Q(hbit - lbit + 1)) << lbit; - lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); -} -static inline void _vl_insert_WI(int, WDataOutP owp, IData ld, int hbit, int lbit, - int rbits = 0) VL_MT_SAFE { - const int hoffset = VL_BITBIT_E(hbit); - const int loffset = VL_BITBIT_E(lbit); - const int roffset = VL_BITBIT_E(rbits); - const int hword = VL_BITWORD_E(hbit); - const int lword = VL_BITWORD_E(lbit); - const int rword = VL_BITWORD_E(rbits); - const EData cleanmask = hword == rword ? VL_MASK_E(roffset) : VL_MASK_E(0); - - if (hoffset == VL_SIZEBITS_E && loffset == 0) { - // Fast and common case, word based insertion - owp[VL_BITWORD_E(lbit)] = ld & cleanmask; - } else { - const EData lde = static_cast(ld); - if (hword == lword) { // know < EData bits because above checks it - // Assignment is contained within one word of destination - const EData insmask = (VL_MASK_E(hoffset - loffset + 1)) << loffset; - owp[lword] = (owp[lword] & ~insmask) | ((lde << loffset) & (insmask & cleanmask)); - } else { - // Assignment crosses a word boundary in destination - const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << 0; - const EData linsmask = (VL_MASK_E((VL_EDATASIZE - 1) - loffset + 1)) << loffset; - const int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword - owp[lword] = (owp[lword] & ~linsmask) | ((lde << loffset) & linsmask); - owp[hword] - = (owp[hword] & ~hinsmask) | ((lde >> nbitsonright) & (hinsmask & cleanmask)); - } - } -} - -// INTERNAL: Stuff large LHS bit 0++ into OUTPUT at specified offset -// lwp may be "dirty" -static inline void _vl_insert_WW(int, WDataOutP owp, WDataInP const lwp, int hbit, int lbit, - int rbits = 0) VL_MT_SAFE { - const int hoffset = VL_BITBIT_E(hbit); - const int loffset = VL_BITBIT_E(lbit); - const int roffset = VL_BITBIT_E(rbits); - const int lword = VL_BITWORD_E(lbit); - const int hword = VL_BITWORD_E(hbit); - const int rword = VL_BITWORD_E(rbits); - const int words = VL_WORDS_I(hbit - lbit + 1); - // Cleaning mask, only applied to top word of the assignment. Is a no-op - // if we don't assign to the top word of the destination. - const EData cleanmask = hword == rword ? VL_MASK_E(roffset) : VL_MASK_E(0); - - if (hoffset == VL_SIZEBITS_E && loffset == 0) { - // Fast and common case, word based insertion - for (int i = 0; i < (words - 1); ++i) owp[lword + i] = lwp[i]; - owp[hword] = lwp[words - 1] & cleanmask; - } else if (loffset == 0) { - // Non-32bit, but nicely aligned, so stuff all but the last word - for (int i = 0; i < (words - 1); ++i) owp[lword + i] = lwp[i]; - // Know it's not a full word as above fast case handled it - const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)); - owp[hword] = (owp[hword] & ~hinsmask) | (lwp[words - 1] & (hinsmask & cleanmask)); - } else { - const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << 0; - const EData linsmask = (VL_MASK_E((VL_EDATASIZE - 1) - loffset + 1)) << loffset; - const int nbitsonright - = VL_EDATASIZE - loffset; // bits that end up in lword (know loffset!=0) - // Middle words - for (int i = 0; i < words; ++i) { - { // Lower word - const int oword = lword + i; - const EData d = lwp[i] << loffset; - const EData od = (owp[oword] & ~linsmask) | (d & linsmask); - if (oword == hword) { - owp[oword] = (owp[oword] & ~hinsmask) | (od & (hinsmask & cleanmask)); - } else { - owp[oword] = od; - } - } - { // Upper word - const int oword = lword + i + 1; - if (oword <= hword) { - const EData d = lwp[i] >> nbitsonright; - const EData od = (d & ~linsmask) | (owp[oword] & linsmask); - if (oword == hword) { - owp[oword] = (owp[oword] & ~hinsmask) | (od & (hinsmask & cleanmask)); - } else { - owp[oword] = od; - } - } - } - } - } -} - -static inline void _vl_insert_WQ(int obits, WDataOutP owp, QData ld, int hbit, int lbit, - int rbits = 0) VL_MT_SAFE { - WData lwp[VL_WQ_WORDS_E]; - VL_SET_WQ(lwp, ld); - _vl_insert_WW(obits, owp, lwp, hbit, lbit, rbits); -} - -// EMIT_RULE: VL_REPLICATE: oclean=clean>width32, dirty<=width32; lclean=clean; rclean==clean; -// RHS MUST BE CLEAN CONSTANT. -#define VL_REPLICATE_IOI(obits, lbits, rbits, ld, rep) (-(ld)) // Iff lbits==1 -#define VL_REPLICATE_QOI(obits, lbits, rbits, ld, rep) (-(static_cast(ld))) // Iff lbits==1 - -static inline IData VL_REPLICATE_III(int, int lbits, int, IData ld, IData rep) VL_PURE { - IData returndata = ld; - for (unsigned i = 1; i < rep; ++i) { - returndata = returndata << lbits; - returndata |= ld; - } - return returndata; -} -static inline QData VL_REPLICATE_QII(int, int lbits, int, IData ld, IData rep) VL_PURE { - QData returndata = ld; - for (unsigned i = 1; i < rep; ++i) { - returndata = returndata << lbits; - returndata |= static_cast(ld); - } - return returndata; -} -static inline WDataOutP VL_REPLICATE_WII(int obits, int lbits, int, WDataOutP owp, IData ld, - IData rep) VL_MT_SAFE { - owp[0] = ld; - for (unsigned i = 1; i < rep; ++i) { - _vl_insert_WI(obits, owp, ld, i * lbits + lbits - 1, i * lbits); - } - return owp; -} -static inline WDataOutP VL_REPLICATE_WQI(int obits, int lbits, int, WDataOutP owp, QData ld, - IData rep) VL_MT_SAFE { - VL_SET_WQ(owp, ld); - for (unsigned i = 1; i < rep; ++i) { - _vl_insert_WQ(obits, owp, ld, i * lbits + lbits - 1, i * lbits); - } - return owp; -} -static inline WDataOutP VL_REPLICATE_WWI(int obits, int lbits, int, WDataOutP owp, - WDataInP const lwp, IData rep) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(lbits); ++i) owp[i] = lwp[i]; - for (unsigned i = 1; i < rep; ++i) { - _vl_insert_WW(obits, owp, lwp, i * lbits + lbits - 1, i * lbits); - } - return owp; -} - -// Left stream operator. Output will always be clean. LHS and RHS must be clean. -// Special "fast" versions for slice sizes that are a power of 2. These use -// shifts and masks to execute faster than the slower for-loop approach where a -// subset of bits is copied in during each iteration. -static inline IData VL_STREAML_FAST_III(int, int lbits, int, IData ld, IData rd_log2) VL_PURE { - // Pre-shift bits in most-significant slice: - // - // If lbits is not a multiple of the slice size (i.e., lbits % rd != 0), - // then we end up with a "gap" in our reversed result. For example, if we - // have a 5-bit Verlilog signal (lbits=5) in an 8-bit C data type: - // - // ld = ---43210 - // - // (where numbers are the Verilog signal bit numbers and '-' is an unused bit). - // Executing the switch statement below with a slice size of two (rd=2, - // rd_log2=1) produces: - // - // ret = 1032-400 - // - // Pre-shifting the bits in the most-significant slice allows us to avoid - // this gap in the shuffled data: - // - // ld_adjusted = --4-3210 - // ret = 10324--- - IData ret = ld; - if (rd_log2) { - const vluint32_t lbitsFloor = lbits & ~VL_MASK_I(rd_log2); // max multiple of rd <= lbits - const vluint32_t lbitsRem = lbits - lbitsFloor; // number of bits in most-sig slice (MSS) - const IData msbMask = VL_MASK_I(lbitsRem) << lbitsFloor; // mask to sel only bits in MSS - ret = (ret & ~msbMask) | ((ret & msbMask) << ((VL_UL(1) << rd_log2) - lbitsRem)); - } - switch (rd_log2) { - case 0: ret = ((ret >> 1) & VL_UL(0x55555555)) | ((ret & VL_UL(0x55555555)) << 1); // FALLTHRU - case 1: ret = ((ret >> 2) & VL_UL(0x33333333)) | ((ret & VL_UL(0x33333333)) << 2); // FALLTHRU - case 2: ret = ((ret >> 4) & VL_UL(0x0f0f0f0f)) | ((ret & VL_UL(0x0f0f0f0f)) << 4); // FALLTHRU - case 3: ret = ((ret >> 8) & VL_UL(0x00ff00ff)) | ((ret & VL_UL(0x00ff00ff)) << 8); // FALLTHRU - case 4: ret = ((ret >> 16) | (ret << 16)); // FALLTHRU - default:; - } - return ret >> (VL_IDATASIZE - lbits); -} - -static inline QData VL_STREAML_FAST_QQI(int, int lbits, int, QData ld, IData rd_log2) VL_PURE { - // Pre-shift bits in most-significant slice (see comment in VL_STREAML_FAST_III) - QData ret = ld; - if (rd_log2) { - const vluint32_t lbitsFloor = lbits & ~VL_MASK_I(rd_log2); - const vluint32_t lbitsRem = lbits - lbitsFloor; - const QData msbMask = VL_MASK_Q(lbitsRem) << lbitsFloor; - ret = (ret & ~msbMask) | ((ret & msbMask) << ((1ULL << rd_log2) - lbitsRem)); - } - switch (rd_log2) { - case 0: - ret = (((ret >> 1) & 0x5555555555555555ULL) - | ((ret & 0x5555555555555555ULL) << 1)); // FALLTHRU - case 1: - ret = (((ret >> 2) & 0x3333333333333333ULL) - | ((ret & 0x3333333333333333ULL) << 2)); // FALLTHRU - case 2: - ret = (((ret >> 4) & 0x0f0f0f0f0f0f0f0fULL) - | ((ret & 0x0f0f0f0f0f0f0f0fULL) << 4)); // FALLTHRU - case 3: - ret = (((ret >> 8) & 0x00ff00ff00ff00ffULL) - | ((ret & 0x00ff00ff00ff00ffULL) << 8)); // FALLTHRU - case 4: - ret = (((ret >> 16) & 0x0000ffff0000ffffULL) - | ((ret & 0x0000ffff0000ffffULL) << 16)); // FALLTHRU - case 5: ret = ((ret >> 32) | (ret << 32)); // FALLTHRU - default:; - } - return ret >> (VL_QUADSIZE - lbits); -} - -// Regular "slow" streaming operators -static inline IData VL_STREAML_III(int, int lbits, int, IData ld, IData rd) VL_PURE { - IData ret = 0; - // Slice size should never exceed the lhs width - const IData mask = VL_MASK_I(rd); - for (int istart = 0; istart < lbits; istart += rd) { - int ostart = lbits - rd - istart; - ostart = ostart > 0 ? ostart : 0; - ret |= ((ld >> istart) & mask) << ostart; - } - return ret; -} - -static inline QData VL_STREAML_QQI(int, int lbits, int, QData ld, IData rd) VL_PURE { - QData ret = 0; - // Slice size should never exceed the lhs width - const QData mask = VL_MASK_Q(rd); - for (int istart = 0; istart < lbits; istart += rd) { - int ostart = lbits - rd - istart; - ostart = ostart > 0 ? ostart : 0; - ret |= ((ld >> istart) & mask) << ostart; - } - return ret; -} - -static inline WDataOutP VL_STREAML_WWI(int, int lbits, int, WDataOutP owp, WDataInP const lwp, - IData rd) VL_MT_SAFE { - VL_ZERO_W(lbits, owp); - // Slice size should never exceed the lhs width - const int ssize = (rd < static_cast(lbits)) ? rd : (static_cast(lbits)); - for (int istart = 0; istart < lbits; istart += rd) { - int ostart = lbits - rd - istart; - ostart = ostart > 0 ? ostart : 0; - for (int sbit = 0; sbit < ssize && sbit < lbits - istart; ++sbit) { - // Extract a single bit from lwp and shift it to the correct - // location for owp. - EData bit = (VL_BITRSHIFT_W(lwp, (istart + sbit)) & 1) << VL_BITBIT_E(ostart + sbit); - owp[VL_BITWORD_E(ostart + sbit)] |= bit; - } - } - return owp; -} - -// Because concats are common and wide, it's valuable to always have a clean output. -// Thus we specify inputs must be clean, so we don't need to clean the output. -// Note the bit shifts are always constants, so the adds in these constify out. -// Casts required, as args may be 8 bit entities, and need to shift to appropriate output size -#define VL_CONCAT_III(obits, lbits, rbits, ld, rd) \ - (static_cast(ld) << (rbits) | static_cast(rd)) -#define VL_CONCAT_QII(obits, lbits, rbits, ld, rd) \ - (static_cast(ld) << (rbits) | static_cast(rd)) -#define VL_CONCAT_QIQ(obits, lbits, rbits, ld, rd) \ - (static_cast(ld) << (rbits) | static_cast(rd)) -#define VL_CONCAT_QQI(obits, lbits, rbits, ld, rd) \ - (static_cast(ld) << (rbits) | static_cast(rd)) -#define VL_CONCAT_QQQ(obits, lbits, rbits, ld, rd) \ - (static_cast(ld) << (rbits) | static_cast(rd)) - -static inline WDataOutP VL_CONCAT_WII(int obits, int lbits, int rbits, WDataOutP owp, IData ld, - IData rd) VL_MT_SAFE { - owp[0] = rd; - for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - _vl_insert_WI(obits, owp, ld, rbits + lbits - 1, rbits); - return owp; -} -static inline WDataOutP VL_CONCAT_WWI(int obits, int lbits, int rbits, WDataOutP owp, - WDataInP const lwp, IData rd) VL_MT_SAFE { - owp[0] = rd; - for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - _vl_insert_WW(obits, owp, lwp, rbits + lbits - 1, rbits); - return owp; -} -static inline WDataOutP VL_CONCAT_WIW(int obits, int lbits, int rbits, WDataOutP owp, IData ld, - WDataInP const rwp) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i]; - for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; - _vl_insert_WI(obits, owp, ld, rbits + lbits - 1, rbits); - return owp; -} -static inline WDataOutP VL_CONCAT_WIQ(int obits, int lbits, int rbits, WDataOutP owp, IData ld, - QData rd) VL_MT_SAFE { - VL_SET_WQ(owp, rd); - for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - _vl_insert_WI(obits, owp, ld, rbits + lbits - 1, rbits); - return owp; -} -static inline WDataOutP VL_CONCAT_WQI(int obits, int lbits, int rbits, WDataOutP owp, QData ld, - IData rd) VL_MT_SAFE { - owp[0] = rd; - for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - _vl_insert_WQ(obits, owp, ld, rbits + lbits - 1, rbits); - return owp; -} -static inline WDataOutP VL_CONCAT_WQQ(int obits, int lbits, int rbits, WDataOutP owp, QData ld, - QData rd) VL_MT_SAFE { - VL_SET_WQ(owp, rd); - for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - _vl_insert_WQ(obits, owp, ld, rbits + lbits - 1, rbits); - return owp; -} -static inline WDataOutP VL_CONCAT_WWQ(int obits, int lbits, int rbits, WDataOutP owp, - WDataInP const lwp, QData rd) VL_MT_SAFE { - VL_SET_WQ(owp, rd); - for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - _vl_insert_WW(obits, owp, lwp, rbits + lbits - 1, rbits); - return owp; -} -static inline WDataOutP VL_CONCAT_WQW(int obits, int lbits, int rbits, WDataOutP owp, QData ld, - WDataInP const rwp) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i]; - for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; - _vl_insert_WQ(obits, owp, ld, rbits + lbits - 1, rbits); - return owp; -} -static inline WDataOutP VL_CONCAT_WWW(int obits, int lbits, int rbits, WDataOutP owp, - WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i]; - for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; - _vl_insert_WW(obits, owp, lwp, rbits + lbits - 1, rbits); - return owp; -} - -//=================================================================== -// Shifts - -// Static shift, used by internal functions -// The output is the same as the input - it overlaps! -static inline void _vl_shiftl_inplace_w(int obits, WDataOutP iowp, - IData rd /*1 or 4*/) VL_MT_SAFE { - const int words = VL_WORDS_I(obits); - const EData linsmask = VL_MASK_E(rd); - for (int i = words - 1; i >= 1; --i) { - iowp[i] - = ((iowp[i] << rd) & ~linsmask) | ((iowp[i - 1] >> (VL_EDATASIZE - rd)) & linsmask); - } - iowp[0] = ((iowp[0] << rd) & ~linsmask); - iowp[VL_WORDS_I(obits) - 1] &= VL_MASK_E(obits); -} - -// EMIT_RULE: VL_SHIFTL: oclean=lclean; rclean==clean; -// Important: Unlike most other funcs, the shift might well be a computed -// expression. Thus consider this when optimizing. (And perhaps have 2 funcs?) -static inline WDataOutP VL_SHIFTL_WWI(int obits, int, int, WDataOutP owp, WDataInP const lwp, - IData rd) VL_MT_SAFE { - const int word_shift = VL_BITWORD_E(rd); - const int bit_shift = VL_BITBIT_E(rd); - if (rd >= static_cast(obits)) { // rd may be huge with MSB set - for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - } else if (bit_shift == 0) { // Aligned word shift (<<0,<<32,<<64 etc) - for (int i = 0; i < word_shift; ++i) owp[i] = 0; - for (int i = word_shift; i < VL_WORDS_I(obits); ++i) owp[i] = lwp[i - word_shift]; - } else { - for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - _vl_insert_WW(obits, owp, lwp, obits - 1, rd); - } - return owp; -} -static inline WDataOutP VL_SHIFTL_WWW(int obits, int lbits, int rbits, WDataOutP owp, - WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { - for (int i = 1; i < VL_WORDS_I(rbits); ++i) { - if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more - return VL_ZERO_W(obits, owp); - } - } - return VL_SHIFTL_WWI(obits, lbits, 32, owp, lwp, rwp[0]); -} -static inline WDataOutP VL_SHIFTL_WWQ(int obits, int lbits, int rbits, WDataOutP owp, - WDataInP const lwp, QData rd) VL_MT_SAFE { - WData rwp[VL_WQ_WORDS_E]; - VL_SET_WQ(rwp, rd); - return VL_SHIFTL_WWW(obits, lbits, rbits, owp, lwp, rwp); -} -static inline IData VL_SHIFTL_IIW(int obits, int, int rbits, IData lhs, - WDataInP const rwp) VL_MT_SAFE { - for (int i = 1; i < VL_WORDS_I(rbits); ++i) { - if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more - return 0; - } - } - return VL_CLEAN_II(obits, obits, lhs << rwp[0]); -} -static inline IData VL_SHIFTL_IIQ(int obits, int, int, IData lhs, QData rhs) VL_MT_SAFE { - if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; - return VL_CLEAN_II(obits, obits, lhs << rhs); -} -static inline QData VL_SHIFTL_QQW(int obits, int, int rbits, QData lhs, - WDataInP const rwp) VL_MT_SAFE { - for (int i = 1; i < VL_WORDS_I(rbits); ++i) { - if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more - return 0; - } - } - // Above checks rwp[1]==0 so not needed in below shift - return VL_CLEAN_QQ(obits, obits, lhs << (static_cast(rwp[0]))); -} -static inline QData VL_SHIFTL_QQQ(int obits, int, int, QData lhs, QData rhs) VL_MT_SAFE { - if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; - return VL_CLEAN_QQ(obits, obits, lhs << rhs); -} - -// EMIT_RULE: VL_SHIFTR: oclean=lclean; rclean==clean; -// Important: Unlike most other funcs, the shift might well be a computed -// expression. Thus consider this when optimizing. (And perhaps have 2 funcs?) -static inline WDataOutP VL_SHIFTR_WWI(int obits, int, int, WDataOutP owp, WDataInP const lwp, - IData rd) VL_MT_SAFE { - const int word_shift = VL_BITWORD_E(rd); // Maybe 0 - const int bit_shift = VL_BITBIT_E(rd); - if (rd >= static_cast(obits)) { // rd may be huge with MSB set - for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - } else if (bit_shift == 0) { // Aligned word shift (>>0,>>32,>>64 etc) - const int copy_words = (VL_WORDS_I(obits) - word_shift); - for (int i = 0; i < copy_words; ++i) owp[i] = lwp[i + word_shift]; - for (int i = copy_words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - } else { - const int loffset = rd & VL_SIZEBITS_E; - const int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword (know - // loffset!=0) Middle words - const int words = VL_WORDS_I(obits - rd); - for (int i = 0; i < words; ++i) { - owp[i] = lwp[i + word_shift] >> loffset; - const int upperword = i + word_shift + 1; - if (upperword < VL_WORDS_I(obits)) owp[i] |= lwp[upperword] << nbitsonright; - } - for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - } - return owp; -} -static inline WDataOutP VL_SHIFTR_WWW(int obits, int lbits, int rbits, WDataOutP owp, - WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { - for (int i = 1; i < VL_WORDS_I(rbits); ++i) { - if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more - return VL_ZERO_W(obits, owp); - } - } - return VL_SHIFTR_WWI(obits, lbits, 32, owp, lwp, rwp[0]); -} -static inline WDataOutP VL_SHIFTR_WWQ(int obits, int lbits, int rbits, WDataOutP owp, - WDataInP const lwp, QData rd) VL_MT_SAFE { - WData rwp[VL_WQ_WORDS_E]; - VL_SET_WQ(rwp, rd); - return VL_SHIFTR_WWW(obits, lbits, rbits, owp, lwp, rwp); -} - -static inline IData VL_SHIFTR_IIW(int obits, int, int rbits, IData lhs, - WDataInP const rwp) VL_MT_SAFE { - for (int i = 1; i < VL_WORDS_I(rbits); ++i) { - if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more - return 0; - } - } - return VL_CLEAN_II(obits, obits, lhs >> rwp[0]); -} -static inline QData VL_SHIFTR_QQW(int obits, int, int rbits, QData lhs, - WDataInP const rwp) VL_MT_SAFE { - for (int i = 1; i < VL_WORDS_I(rbits); ++i) { - if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more - return 0; - } - } - // Above checks rwp[1]==0 so not needed in below shift - return VL_CLEAN_QQ(obits, obits, lhs >> (static_cast(rwp[0]))); -} -static inline IData VL_SHIFTR_IIQ(int obits, int, int, IData lhs, QData rhs) VL_MT_SAFE { - if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; - return VL_CLEAN_QQ(obits, obits, lhs >> rhs); -} -static inline QData VL_SHIFTR_QQQ(int obits, int, int, QData lhs, QData rhs) VL_MT_SAFE { - if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; - return VL_CLEAN_QQ(obits, obits, lhs >> rhs); -} - -// EMIT_RULE: VL_SHIFTRS: oclean=false; lclean=clean, rclean==clean; -static inline IData VL_SHIFTRS_III(int obits, int lbits, int, IData lhs, IData rhs) VL_PURE { - // Note the C standard does not specify the >> operator as a arithmetic shift! - // IEEE says signed if output signed, but bit position from lbits; - // must use lbits for sign; lbits might != obits, - // an EXTEND(SHIFTRS(...)) can became a SHIFTRS(...) within same 32/64 bit word length - const IData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative - const IData signext = ~(VL_MASK_I(lbits) >> rhs); // One with bits where we've shifted "past" - return (lhs >> rhs) | (sign & VL_CLEAN_II(obits, obits, signext)); -} -static inline QData VL_SHIFTRS_QQI(int obits, int lbits, int, QData lhs, IData rhs) VL_PURE { - const QData sign = -(lhs >> (lbits - 1)); - const QData signext = ~(VL_MASK_Q(lbits) >> rhs); - return (lhs >> rhs) | (sign & VL_CLEAN_QQ(obits, obits, signext)); -} -static inline IData VL_SHIFTRS_IQI(int obits, int lbits, int rbits, QData lhs, IData rhs) VL_PURE { - return static_cast(VL_SHIFTRS_QQI(obits, lbits, rbits, lhs, rhs)); -} -static inline WDataOutP VL_SHIFTRS_WWI(int obits, int lbits, int, WDataOutP owp, - WDataInP const lwp, IData rd) VL_MT_SAFE { - const int word_shift = VL_BITWORD_E(rd); - const int bit_shift = VL_BITBIT_E(rd); - const int lmsw = VL_WORDS_I(obits) - 1; - const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); - if (rd >= static_cast(obits)) { // Shifting past end, sign in all of lbits - for (int i = 0; i <= lmsw; ++i) owp[i] = sign; - owp[lmsw] &= VL_MASK_E(lbits); - } else if (bit_shift == 0) { // Aligned word shift (>>0,>>32,>>64 etc) - const int copy_words = (VL_WORDS_I(obits) - word_shift); - for (int i = 0; i < copy_words; ++i) owp[i] = lwp[i + word_shift]; - if (copy_words >= 0) owp[copy_words - 1] |= ~VL_MASK_E(obits) & sign; - for (int i = copy_words; i < VL_WORDS_I(obits); ++i) owp[i] = sign; - owp[lmsw] &= VL_MASK_E(lbits); - } else { - const int loffset = rd & VL_SIZEBITS_E; - int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword (know loffset!=0) - // Middle words - const int words = VL_WORDS_I(obits - rd); - for (int i = 0; i < words; ++i) { - owp[i] = lwp[i + word_shift] >> loffset; - const int upperword = i + word_shift + 1; - if (upperword < VL_WORDS_I(obits)) owp[i] |= lwp[upperword] << nbitsonright; - } - if (words) owp[words - 1] |= sign & ~VL_MASK_E(obits - loffset); - for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = sign; - owp[lmsw] &= VL_MASK_E(lbits); - } - return owp; -} -static inline WDataOutP VL_SHIFTRS_WWW(int obits, int lbits, int rbits, WDataOutP owp, - WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { - EData overshift = 0; // Huge shift 1>>32 or more - for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; - if (VL_UNLIKELY(overshift || rwp[0] >= obits)) { - const int lmsw = VL_WORDS_I(obits) - 1; - const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); - for (int j = 0; j <= lmsw; ++j) owp[j] = sign; - owp[lmsw] &= VL_MASK_E(lbits); - return owp; - } - return VL_SHIFTRS_WWI(obits, lbits, 32, owp, lwp, rwp[0]); -} -static inline WDataOutP VL_SHIFTRS_WWQ(int obits, int lbits, int rbits, WDataOutP owp, - WDataInP const lwp, QData rd) VL_MT_SAFE { - WData rwp[VL_WQ_WORDS_E]; - VL_SET_WQ(rwp, rd); - return VL_SHIFTRS_WWW(obits, lbits, rbits, owp, lwp, rwp); -} -static inline IData VL_SHIFTRS_IIW(int obits, int lbits, int rbits, IData lhs, - WDataInP const rwp) VL_MT_SAFE { - EData overshift = 0; // Huge shift 1>>32 or more - for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; - if (VL_UNLIKELY(overshift || rwp[0] >= obits)) { - const IData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative - return VL_CLEAN_II(obits, obits, sign); - } - return VL_SHIFTRS_III(obits, lbits, 32, lhs, rwp[0]); -} -static inline QData VL_SHIFTRS_QQW(int obits, int lbits, int rbits, QData lhs, - WDataInP const rwp) VL_MT_SAFE { - EData overshift = 0; // Huge shift 1>>32 or more - for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; - if (VL_UNLIKELY(overshift || rwp[0] >= obits)) { - const QData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative - return VL_CLEAN_QQ(obits, obits, sign); - } - return VL_SHIFTRS_QQI(obits, lbits, 32, lhs, rwp[0]); -} -static inline IData VL_SHIFTRS_IIQ(int obits, int lbits, int rbits, IData lhs, - QData rhs) VL_MT_SAFE { - WData rwp[VL_WQ_WORDS_E]; - VL_SET_WQ(rwp, rhs); - return VL_SHIFTRS_IIW(obits, lbits, rbits, lhs, rwp); -} -static inline QData VL_SHIFTRS_QQQ(int obits, int lbits, int rbits, QData lhs, QData rhs) VL_PURE { - WData rwp[VL_WQ_WORDS_E]; - VL_SET_WQ(rwp, rhs); - return VL_SHIFTRS_QQW(obits, lbits, rbits, lhs, rwp); -} - -//=================================================================== -// Bit selection - -// EMIT_RULE: VL_BITSEL: oclean=dirty; rclean==clean; -#define VL_BITSEL_IIII(obits, lbits, rbits, zbits, lhs, rhs) ((lhs) >> (rhs)) -#define VL_BITSEL_QIII(obits, lbits, rbits, zbits, lhs, rhs) ((lhs) >> (rhs)) -#define VL_BITSEL_QQII(obits, lbits, rbits, zbits, lhs, rhs) ((lhs) >> (rhs)) -#define VL_BITSEL_IQII(obits, lbits, rbits, zbits, lhs, rhs) (static_cast((lhs) >> (rhs))) - -static inline IData VL_BITSEL_IWII(int, int lbits, int, int, WDataInP const lwp, - IData rd) VL_MT_SAFE { - int word = VL_BITWORD_E(rd); - if (VL_UNLIKELY(rd > static_cast(lbits))) { - return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. - // We return all 1's as that's more likely to find bugs (?) than 0's. - } else { - return (lwp[word] >> VL_BITBIT_E(rd)); - } -} - -// EMIT_RULE: VL_RANGE: oclean=lclean; out=dirty -// & MUST BE CLEAN (currently constant) -#define VL_SEL_IIII(obits, lbits, rbits, tbits, lhs, lsb, width) ((lhs) >> (lsb)) -#define VL_SEL_QQII(obits, lbits, rbits, tbits, lhs, lsb, width) ((lhs) >> (lsb)) -#define VL_SEL_IQII(obits, lbits, rbits, tbits, lhs, lsb, width) \ - (static_cast((lhs) >> (lsb))) - -static inline IData VL_SEL_IWII(int, int lbits, int, int, WDataInP const lwp, IData lsb, - IData width) VL_MT_SAFE { - int msb = lsb + width - 1; - if (VL_UNLIKELY(msb >= lbits)) { - return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. - } else if (VL_BITWORD_E(msb) == VL_BITWORD_E(static_cast(lsb))) { - return VL_BITRSHIFT_W(lwp, lsb); - } else { - // 32 bit extraction may span two words - int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); // bits that come from low word - return ((lwp[VL_BITWORD_E(msb)] << nbitsfromlow) | VL_BITRSHIFT_W(lwp, lsb)); - } -} - -static inline QData VL_SEL_QWII(int, int lbits, int, int, WDataInP const lwp, IData lsb, - IData width) VL_MT_SAFE { - const int msb = lsb + width - 1; - if (VL_UNLIKELY(msb > lbits)) { - return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. - } else if (VL_BITWORD_E(msb) == VL_BITWORD_E(static_cast(lsb))) { - return VL_BITRSHIFT_W(lwp, lsb); - } else if (VL_BITWORD_E(msb) == 1 + VL_BITWORD_E(static_cast(lsb))) { - const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); - const QData hi = (lwp[VL_BITWORD_E(msb)]); - const QData lo = VL_BITRSHIFT_W(lwp, lsb); - return (hi << nbitsfromlow) | lo; - } else { - // 64 bit extraction may span three words - int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); - const QData hi = (lwp[VL_BITWORD_E(msb)]); - const QData mid = (lwp[VL_BITWORD_E(lsb) + 1]); - const QData lo = VL_BITRSHIFT_W(lwp, lsb); - return (hi << (nbitsfromlow + VL_EDATASIZE)) | (mid << nbitsfromlow) | lo; - } -} - -static inline WDataOutP VL_SEL_WWII(int obits, int lbits, int, int, WDataOutP owp, - WDataInP const lwp, IData lsb, IData width) VL_MT_SAFE { - const int msb = lsb + width - 1; - const int word_shift = VL_BITWORD_E(lsb); - if (VL_UNLIKELY(msb > lbits)) { // Outside bounds, - for (int i = 0; i < VL_WORDS_I(obits) - 1; ++i) owp[i] = ~0; - owp[VL_WORDS_I(obits) - 1] = VL_MASK_E(obits); - } else if (VL_BITBIT_E(lsb) == 0) { - // Just a word extract - for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = lwp[i + word_shift]; - } else { - // Not a _vl_insert because the bits come from any bit number and goto bit 0 - const int loffset = lsb & VL_SIZEBITS_E; - const int nbitsfromlow = VL_EDATASIZE - loffset; // bits that end up in lword (know - // loffset!=0) Middle words - const int words = VL_WORDS_I(msb - lsb + 1); - for (int i = 0; i < words; ++i) { - owp[i] = lwp[i + word_shift] >> loffset; - const int upperword = i + word_shift + 1; - if (upperword <= static_cast(VL_BITWORD_E(msb))) { - owp[i] |= lwp[upperword] << nbitsfromlow; - } - } - for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; - } - return owp; -} - -//====================================================================== -// Math needing insert/select - -// Return QData from double (numeric) -// EMIT_RULE: VL_RTOIROUND_Q_D: oclean=dirty; lclean==clean/real -static inline QData VL_RTOIROUND_Q_D(int, double lhs) VL_PURE { - // IEEE format: [63]=sign [62:52]=exp+1023 [51:0]=mantissa - // This does not need to support subnormals as they are sub-integral - lhs = VL_ROUND(lhs); - if (lhs == 0.0) return 0; - const QData q = VL_CVT_Q_D(lhs); - const int lsb = static_cast((q >> 52ULL) & VL_MASK_Q(11)) - 1023 - 52; - const vluint64_t mantissa = (q & VL_MASK_Q(52)) | (1ULL << 52); - vluint64_t out = 0; - if (lsb < 0) { - out = mantissa >> -lsb; - } else if (lsb < 64) { - out = mantissa << lsb; - } - if (lhs < 0) out = -out; - return out; -} -static inline IData VL_RTOIROUND_I_D(int bits, double lhs) VL_PURE { - return static_cast(VL_RTOIROUND_Q_D(bits, lhs)); -} -static inline WDataOutP VL_RTOIROUND_W_D(int obits, WDataOutP owp, double lhs) VL_PURE { - // IEEE format: [63]=sign [62:52]=exp+1023 [51:0]=mantissa - // This does not need to support subnormals as they are sub-integral - lhs = VL_ROUND(lhs); - VL_ZERO_W(obits, owp); - if (lhs == 0.0) return owp; - const QData q = VL_CVT_Q_D(lhs); - const int lsb = static_cast((q >> 52ULL) & VL_MASK_Q(11)) - 1023 - 52; - const vluint64_t mantissa = (q & VL_MASK_Q(52)) | (1ULL << 52); - if (lsb < 0) { - VL_SET_WQ(owp, mantissa >> -lsb); - } else if (lsb < obits) { - _vl_insert_WQ(obits, owp, mantissa, lsb + 52, lsb); - } - if (lhs < 0) VL_NEGATE_INPLACE_W(VL_WORDS_I(obits), owp); - return owp; -} - -//====================================================================== -// Range assignments - -// EMIT_RULE: VL_ASSIGNRANGE: rclean=dirty; -static inline void VL_ASSIGNSEL_IIII(int rbits, int obits, int lsb, CData& lhsr, - IData rhs) VL_PURE { - _vl_insert_II(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); -} -static inline void VL_ASSIGNSEL_IIII(int rbits, int obits, int lsb, SData& lhsr, - IData rhs) VL_PURE { - _vl_insert_II(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); -} -static inline void VL_ASSIGNSEL_IIII(int rbits, int obits, int lsb, IData& lhsr, - IData rhs) VL_PURE { - _vl_insert_II(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); -} -static inline void VL_ASSIGNSEL_QIII(int rbits, int obits, int lsb, QData& lhsr, - IData rhs) VL_PURE { - _vl_insert_QQ(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); -} -static inline void VL_ASSIGNSEL_QQII(int rbits, int obits, int lsb, QData& lhsr, - QData rhs) VL_PURE { - _vl_insert_QQ(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); -} -static inline void VL_ASSIGNSEL_QIIQ(int rbits, int obits, int lsb, QData& lhsr, - QData rhs) VL_PURE { - _vl_insert_QQ(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); -} -// static inline void VL_ASSIGNSEL_IIIW(int obits, int lsb, IData& lhsr, WDataInP const rwp) -// VL_MT_SAFE { Illegal, as lhs width >= rhs width -static inline void VL_ASSIGNSEL_WIII(int rbits, int obits, int lsb, WDataOutP owp, - IData rhs) VL_MT_SAFE { - _vl_insert_WI(obits, owp, rhs, lsb + obits - 1, lsb, rbits); -} -static inline void VL_ASSIGNSEL_WIIQ(int rbits, int obits, int lsb, WDataOutP owp, - QData rhs) VL_MT_SAFE { - _vl_insert_WQ(obits, owp, rhs, lsb + obits - 1, lsb, rbits); -} -static inline void VL_ASSIGNSEL_WIIW(int rbits, int obits, int lsb, WDataOutP owp, - WDataInP const rwp) VL_MT_SAFE { - _vl_insert_WW(obits, owp, rwp, lsb + obits - 1, lsb, rbits); -} - -//====================================================================== -// Triops - -static inline WDataOutP VL_COND_WIWW(int obits, int, int, int, WDataOutP owp, int cond, - WDataInP const w1p, WDataInP const w2p) VL_MT_SAFE { - const int words = VL_WORDS_I(obits); - for (int i = 0; i < words; ++i) owp[i] = cond ? w1p[i] : w2p[i]; - return owp; -} - -//====================================================================== -// Constification - -// VL_CONST_W_#X(int obits, WDataOutP owp, IData data0, .... IData data(#-1)) -// Sets wide vector words to specified constant words. -// These macros are used when o might represent more words then are given as constants, -// hence all upper words must be zeroed. -// If changing the number of functions here, also change EMITCINLINES_NUM_CONSTW - -#define VL_C_END_(obits, wordsSet) \ - for (int i = (wordsSet); i < VL_WORDS_I(obits); ++i) o[i] = 0; \ - return o - -// clang-format off -static inline WDataOutP VL_CONST_W_1X(int obits, WDataOutP o, EData d0) VL_MT_SAFE { - o[0] = d0; - VL_C_END_(obits, 1); -} -static inline WDataOutP VL_CONST_W_2X(int obits, WDataOutP o, EData d1, EData d0) VL_MT_SAFE { - o[0] = d0; o[1] = d1; - VL_C_END_(obits, 2); -} -static inline WDataOutP VL_CONST_W_3X(int obits, WDataOutP o, EData d2, EData d1, - EData d0) VL_MT_SAFE { - o[0] = d0; o[1] = d1; o[2] = d2; - VL_C_END_(obits,3); -} -static inline WDataOutP VL_CONST_W_4X(int obits, WDataOutP o, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; - VL_C_END_(obits,4); -} -static inline WDataOutP VL_CONST_W_5X(int obits, WDataOutP o, - EData d4, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; - o[4] = d4; - VL_C_END_(obits,5); -} -static inline WDataOutP VL_CONST_W_6X(int obits, WDataOutP o, - EData d5, EData d4, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; - o[4] = d4; o[5] = d5; - VL_C_END_(obits,6); -} -static inline WDataOutP VL_CONST_W_7X(int obits, WDataOutP o, - EData d6, EData d5, EData d4, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; - o[4] = d4; o[5] = d5; o[6] = d6; - VL_C_END_(obits,7); -} -static inline WDataOutP VL_CONST_W_8X(int obits, WDataOutP o, - EData d7, EData d6, EData d5, EData d4, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; - o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; - VL_C_END_(obits,8); -} -// -static inline WDataOutP VL_CONSTHI_W_1X(int obits, int lsb, WDataOutP obase, - EData d0) VL_MT_SAFE { - WDataOutP o = obase + VL_WORDS_I(lsb); - o[0] = d0; - VL_C_END_(obits, VL_WORDS_I(lsb) + 1); -} -static inline WDataOutP VL_CONSTHI_W_2X(int obits, int lsb, WDataOutP obase, - EData d1, EData d0) VL_MT_SAFE { - WDataOutP o = obase + VL_WORDS_I(lsb); - o[0] = d0; o[1] = d1; - VL_C_END_(obits, VL_WORDS_I(lsb) + 2); -} -static inline WDataOutP VL_CONSTHI_W_3X(int obits, int lsb, WDataOutP obase, - EData d2, EData d1, EData d0) VL_MT_SAFE { - WDataOutP o = obase + VL_WORDS_I(lsb); - o[0] = d0; o[1] = d1; o[2] = d2; - VL_C_END_(obits, VL_WORDS_I(lsb) + 3); -} -static inline WDataOutP VL_CONSTHI_W_4X(int obits, int lsb, WDataOutP obase, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - WDataOutP o = obase + VL_WORDS_I(lsb); - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; - VL_C_END_(obits, VL_WORDS_I(lsb) + 4); -} -static inline WDataOutP VL_CONSTHI_W_5X(int obits, int lsb, WDataOutP obase, - EData d4, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - WDataOutP o = obase + VL_WORDS_I(lsb); - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; - o[4] = d4; - VL_C_END_(obits, VL_WORDS_I(lsb) + 5); -} -static inline WDataOutP VL_CONSTHI_W_6X(int obits, int lsb, WDataOutP obase, - EData d5, EData d4, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - WDataOutP o = obase + VL_WORDS_I(lsb); - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; - o[4] = d4; o[5] = d5; - VL_C_END_(obits, VL_WORDS_I(lsb) + 6); -} -static inline WDataOutP VL_CONSTHI_W_7X(int obits, int lsb, WDataOutP obase, - EData d6, EData d5, EData d4, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - WDataOutP o = obase + VL_WORDS_I(lsb); - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; - o[4] = d4; o[5] = d5; o[6] = d6; - VL_C_END_(obits, VL_WORDS_I(lsb) + 7); -} -static inline WDataOutP VL_CONSTHI_W_8X(int obits, int lsb, WDataOutP obase, - EData d7, EData d6, EData d5, EData d4, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - WDataOutP o = obase + VL_WORDS_I(lsb); - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; - o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; - VL_C_END_(obits, VL_WORDS_I(lsb) + 8); -} - -#undef VL_C_END_ - -// Partial constant, lower words of vector wider than 8*32, starting at bit number lsb -static inline void VL_CONSTLO_W_8X(int lsb, WDataOutP obase, - EData d7, EData d6, EData d5, EData d4, - EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { - WDataOutP o = obase + VL_WORDS_I(lsb); - o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; -} -// clang-format on +#include "verilated_funcs.h" //====================================================================== +#undef VERILATOR_VERILATED_H_INTERNAL_ #endif // Guard diff --git a/include/verilated_dpi.h b/include/verilated_dpi.h index d86e741fa..c1638f5d1 100644 --- a/include/verilated_dpi.h +++ b/include/verilated_dpi.h @@ -28,7 +28,6 @@ #include "verilatedos.h" #include "verilated.h" // Also presumably included by caller -#include "verilated_heavy.h" // Also presumably included by caller #include "verilated_sym_props.h" #include "svdpi.h" diff --git a/include/verilated_funcs.h b/include/verilated_funcs.h new file mode 100644 index 000000000..0c11c6551 --- /dev/null +++ b/include/verilated_funcs.h @@ -0,0 +1,2252 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// +// Code available from: https://verilator.org +// +// Copyright 2003-2021 by Wilson Snyder. This program is free software; you can +// redistribute it and/or modify it under the terms of either the GNU +// Lesser General Public License Version 3 or the Perl Artistic License +// Version 2.0. +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* +/// +/// \file +/// \brief Verilated common functions +/// +/// verilated.h should be included instead of this file. +/// +/// Those macro/function/variable starting or ending in _ are internal, +/// however many of the other function/macros here are also internal. +/// +//************************************************************************* + +#ifndef VERILATOR_VERILATED_FUNCS_H_ +#define VERILATOR_VERILATED_FUNCS_H_ + +#ifndef VERILATOR_VERILATED_H_INTERNAL_ +#error "verilated_funcs.h should only be included by verilated.h" +#endif + +//========================================================================= +// Extern functions -- User may override -- See verilated.cpp + +/// Routine to call for $finish +/// User code may wish to replace this function, to do so, define VL_USER_FINISH. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_FINISH_MT instead, which eventually calls this. +extern void vl_finish(const char* filename, int linenum, const char* hier); + +/// Routine to call for $stop and non-fatal error +/// User code may wish to replace this function, to do so, define VL_USER_STOP. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_FINISH_MT instead, which eventually calls this. +extern void vl_stop(const char* filename, int linenum, const char* hier); + +/// Routine to call for a couple of fatal messages +/// User code may wish to replace this function, to do so, define VL_USER_FATAL. +/// This code does not have to be thread safe. +/// Verilator internal code must call VL_FINISH_MT instead, which eventually calls this. +extern void vl_fatal(const char* filename, int linenum, const char* hier, const char* msg); + +//========================================================================= +// Extern functions -- Slow path + +/// Multithread safe wrapper for calls to $finish +extern void VL_FINISH_MT(const char* filename, int linenum, const char* hier) VL_MT_SAFE; +/// Multithread safe wrapper for calls to $stop +extern void VL_STOP_MT(const char* filename, int linenum, const char* hier, + bool maybe = true) VL_MT_SAFE; +/// Multithread safe wrapper to call for a couple of fatal messages +extern void VL_FATAL_MT(const char* filename, int linenum, const char* hier, + const char* msg) VL_MT_SAFE; + +// clang-format off +/// Print a string, multithread safe. Eventually VL_PRINTF will get called. +#ifdef VL_THREADED +extern void VL_PRINTF_MT(const char* formatp, ...) VL_ATTR_PRINTF(1) VL_MT_SAFE; +#else +# define VL_PRINTF_MT VL_PRINTF // The following parens will take care of themselves +#endif +// clang-format on + +/// Print a debug message from internals with standard prefix, with printf style format +extern void VL_DBG_MSGF(const char* formatp, ...) VL_ATTR_PRINTF(1) VL_MT_SAFE; + +inline IData VL_RANDOM_I(int obits) VL_MT_SAFE { return vl_rand64() & VL_MASK_I(obits); } +inline QData VL_RANDOM_Q(int obits) VL_MT_SAFE { return vl_rand64() & VL_MASK_Q(obits); } +#ifndef VL_NO_LEGACY +extern WDataOutP VL_RANDOM_W(int obits, WDataOutP outwp); +#endif +extern IData VL_RANDOM_SEEDED_II(int obits, IData seed) VL_MT_SAFE; +inline IData VL_URANDOM_RANGE_I(IData hi, IData lo) { + vluint64_t rnd = vl_rand64(); + if (VL_LIKELY(hi > lo)) { + // Modulus isn't very fast but it's common that hi-low is power-of-two + return (rnd % (hi - lo + 1)) + lo; + } else { + return (rnd % (lo - hi + 1)) + hi; + } +} + +// These are init time only, so slow is fine +/// Random reset a signal of given width +extern IData VL_RAND_RESET_I(int obits); +/// Random reset a signal of given width +extern QData VL_RAND_RESET_Q(int obits); +/// Random reset a signal of given width +extern WDataOutP VL_RAND_RESET_W(int obits, WDataOutP outwp); +/// Zero reset a signal (slow - else use VL_ZERO_W) +extern WDataOutP VL_ZERO_RESET_W(int obits, WDataOutP outwp); + +#if VL_THREADED +/// Return high-precision counter for profiling, or 0x0 if not available +inline QData VL_RDTSC_Q() { + vluint64_t val; + VL_RDTSC(val); + return val; +} +#endif + +extern void VL_PRINTTIMESCALE(const char* namep, const char* timeunitp, + const VerilatedContext* contextp) VL_MT_SAFE; + +extern WDataOutP _vl_moddiv_w(int lbits, WDataOutP owp, WDataInP const lwp, WDataInP const rwp, + bool is_modulus); + +extern IData VL_FGETS_IXI(int obits, void* destp, IData fpi); + +extern void VL_FFLUSH_I(IData fdi); +extern IData VL_FSEEK_I(IData fdi, IData offset, IData origin); +extern IData VL_FTELL_I(IData fdi); +extern void VL_FCLOSE_I(IData fdi); + +extern IData VL_FREAD_I(int width, int array_lsb, int array_size, void* memp, IData fpi, + IData start, IData count); + +extern void VL_WRITEF(const char* formatp, ...); +extern void VL_FWRITEF(IData fpi, const char* formatp, ...); + +extern IData VL_FSCANF_IX(IData fpi, const char* formatp, ...); +extern IData VL_SSCANF_IIX(int lbits, IData ld, const char* formatp, ...); +extern IData VL_SSCANF_IQX(int lbits, QData ld, const char* formatp, ...); +extern IData VL_SSCANF_IWX(int lbits, WDataInP const lwp, const char* formatp, ...); + +extern void VL_SFORMAT_X(int obits, CData& destr, const char* formatp, ...); +extern void VL_SFORMAT_X(int obits, SData& destr, const char* formatp, ...); +extern void VL_SFORMAT_X(int obits, IData& destr, const char* formatp, ...); +extern void VL_SFORMAT_X(int obits, QData& destr, const char* formatp, ...); +extern void VL_SFORMAT_X(int obits, void* destp, const char* formatp, ...); + +extern IData VL_SYSTEM_IW(int lhswords, WDataInP const lhsp); +extern IData VL_SYSTEM_IQ(QData lhs); +inline IData VL_SYSTEM_II(IData lhs) VL_MT_SAFE { return VL_SYSTEM_IQ(lhs); } + +extern IData VL_TESTPLUSARGS_I(const char* formatp); +extern const char* vl_mc_scan_plusargs(const char* prefixp); // PLIish + +//========================================================================= +// Base macros + +// Return true if data[bit] set; not 0/1 return, but 0/non-zero return. +#define VL_BITISSET_I(data, bit) ((data) & (VL_UL(1) << VL_BITBIT_I(bit))) +#define VL_BITISSET_Q(data, bit) ((data) & (1ULL << VL_BITBIT_Q(bit))) +#define VL_BITISSET_E(data, bit) ((data) & (VL_EUL(1) << VL_BITBIT_E(bit))) +#define VL_BITISSET_W(data, bit) ((data)[VL_BITWORD_E(bit)] & (VL_EUL(1) << VL_BITBIT_E(bit))) +#define VL_BITISSETLIMIT_W(data, width, bit) (((bit) < (width)) && VL_BITISSET_W(data, bit)) + +// Shift appropriate word by bit. Does not account for wrapping between two words +#define VL_BITRSHIFT_W(data, bit) ((data)[VL_BITWORD_E(bit)] >> VL_BITBIT_E(bit)) + +// Create two 32-bit words from quadword +// WData is always at least 2 words; does not clean upper bits +#define VL_SET_WQ(owp, data) \ + do { \ + (owp)[0] = static_cast(data); \ + (owp)[1] = static_cast((data) >> VL_EDATASIZE); \ + } while (false) +#define VL_SET_WI(owp, data) \ + do { \ + (owp)[0] = static_cast(data); \ + (owp)[1] = 0; \ + } while (false) +#define VL_SET_QW(lwp) \ + ((static_cast((lwp)[0])) \ + | (static_cast((lwp)[1]) << (static_cast(VL_EDATASIZE)))) +#define VL_SET_QII(ld, rd) ((static_cast(ld) << 32ULL) | static_cast(rd)) + +// Return FILE* from IData +extern FILE* VL_CVT_I_FP(IData lhs) VL_MT_SAFE; + +// clang-format off +// Use a union to avoid cast-to-different-size warnings +// Return void* from QData +static inline void* VL_CVT_Q_VP(QData lhs) VL_PURE { + union { void* fp; QData q; } u; + u.q = lhs; + return u.fp; +} +// Return QData from const void* +static inline QData VL_CVT_VP_Q(const void* fp) VL_PURE { + union { const void* fp; QData q; } u; + u.q = 0; + u.fp = fp; + return u.q; +} +// Return double from QData (bits, not numerically) +static inline double VL_CVT_D_Q(QData lhs) VL_PURE { + union { double d; QData q; } u; + u.q = lhs; + return u.d; +} +// Return QData from double (bits, not numerically) +static inline QData VL_CVT_Q_D(double lhs) VL_PURE { + union { double d; QData q; } u; + u.d = lhs; + return u.q; +} +// clang-format on + +// Return double from lhs (numeric) unsigned +double VL_ITOR_D_W(int lbits, WDataInP const lwp) VL_PURE; +static inline double VL_ITOR_D_I(int, IData lhs) VL_PURE { + return static_cast(static_cast(lhs)); +} +static inline double VL_ITOR_D_Q(int, QData lhs) VL_PURE { + return static_cast(static_cast(lhs)); +} +// Return double from lhs (numeric) signed +double VL_ISTOR_D_W(int lbits, WDataInP const lwp) VL_PURE; +static inline double VL_ISTOR_D_I(int lbits, IData lhs) VL_PURE { + if (lbits == 32) return static_cast(static_cast(lhs)); + VlWide lwp; + VL_SET_WI(lwp, lhs); + return VL_ISTOR_D_W(lbits, lwp); +} +static inline double VL_ISTOR_D_Q(int lbits, QData lhs) VL_PURE { + if (lbits == 64) return static_cast(static_cast(lhs)); + VlWide lwp; + VL_SET_WQ(lwp, lhs); + return VL_ISTOR_D_W(lbits, lwp); +} +// Return QData from double (numeric) +static inline IData VL_RTOI_I_D(double lhs) VL_PURE { + return static_cast(VL_TRUNC(lhs)); +} + +// Sign extend such that if MSB set, we get ffff_ffff, else 0s +// (Requires clean input) +#define VL_SIGN_I(nbits, lhs) ((lhs) >> VL_BITBIT_I((nbits)-VL_UL(1))) +#define VL_SIGN_Q(nbits, lhs) ((lhs) >> VL_BITBIT_Q((nbits)-1ULL)) +#define VL_SIGN_E(nbits, lhs) ((lhs) >> VL_BITBIT_E((nbits)-VL_EUL(1))) +#define VL_SIGN_W(nbits, rwp) \ + ((rwp)[VL_BITWORD_E((nbits)-VL_EUL(1))] >> VL_BITBIT_E((nbits)-VL_EUL(1))) +#define VL_SIGNONES_E(nbits, lhs) (-(VL_SIGN_E(nbits, lhs))) + +// Sign bit extended up to MSB, doesn't include unsigned portion +// Optimization bug in GCC 3.3 returns different bitmasks to later states for +static inline IData VL_EXTENDSIGN_I(int lbits, IData lhs) VL_PURE { + return (-((lhs) & (VL_UL(1) << (lbits - 1)))); +} +static inline QData VL_EXTENDSIGN_Q(int lbits, QData lhs) VL_PURE { + return (-((lhs) & (1ULL << (lbits - 1)))); +} + +// Debugging prints +extern void _vl_debug_print_w(int lbits, WDataInP const iwp); + +//========================================================================= +// Pli macros + +extern int VL_TIME_STR_CONVERT(const char* strp) VL_PURE; + +// These are deprecated and used only to establish the default precision/units. +// Use Verilator timescale-override for better control. +// clang-format off +#ifndef VL_TIME_PRECISION +# ifdef VL_TIME_PRECISION_STR +# define VL_TIME_PRECISION VL_TIME_STR_CONVERT(VL_STRINGIFY(VL_TIME_PRECISION_STR)) +# else +# define VL_TIME_PRECISION (-12) ///< Timescale default units if not in Verilog - picoseconds +# endif +#endif +#ifndef VL_TIME_UNIT +# ifdef VL_TIME_UNIT_STR +# define VL_TIME_UNIT VL_TIME_STR_CONVERT(VL_STRINGIFY(VL_TIME_PRECISION_STR)) +# else +# define VL_TIME_UNIT (-12) ///< Timescale default units if not in Verilog - picoseconds +# endif +#endif + +#if defined(SYSTEMC_VERSION) +/// Return current simulation time +// Already defined: extern sc_time sc_time_stamp(); +inline vluint64_t vl_time_stamp64() { return sc_time_stamp().value(); } +#else // Non-SystemC +# if !defined(VL_TIME_CONTEXT) && !defined(VL_NO_LEGACY) +# ifdef VL_TIME_STAMP64 +// vl_time_stamp64() may be optionally defined by the user to return time. +// On MSVC++ weak symbols are not supported so must be declared, or define +// VL_TIME_CONTEXT. +extern vluint64_t vl_time_stamp64() VL_ATTR_WEAK; +# else +// sc_time_stamp() may be optionally defined by the user to return time. +// On MSVC++ weak symbols are not supported so must be declared, or define +// VL_TIME_CONTEXT. +extern double sc_time_stamp() VL_ATTR_WEAK; // Verilator 4.032 and newer +inline vluint64_t vl_time_stamp64() { + // clang9.0.1 requires & although we really do want the weak symbol value + return VL_LIKELY(&sc_time_stamp) ? static_cast(sc_time_stamp()) : 0; +} +# endif +# endif +#endif + +inline vluint64_t VerilatedContext::time() const VL_MT_SAFE { + // When using non-default context, fastest path is return time + if (VL_LIKELY(m_s.m_time)) return m_s.m_time; +#if defined(SYSTEMC_VERSION) || (!defined(VL_TIME_CONTEXT) && !defined(VL_NO_LEGACY)) + // Zero time could mean really at zero, or using callback + // clang9.0.1 requires & although we really do want the weak symbol value + if (VL_LIKELY(&vl_time_stamp64)) { // else is weak symbol that is not defined + return vl_time_stamp64(); + } +#endif + return 0; +} + +#define VL_TIME_Q() (Verilated::threadContextp()->time()) +#define VL_TIME_D() (static_cast(VL_TIME_Q())) + +// Time scaled from 1-per-precision into a module's time units ("Unit"-ed, not "United") +// Optimized assuming scale is always constant. +// Can't use multiply in Q flavor, as might lose precision +#define VL_TIME_UNITED_Q(scale) (VL_TIME_Q() / static_cast(scale)) +#define VL_TIME_UNITED_D(scale) (VL_TIME_D() / static_cast(scale)) + +// Return time precision as multiplier of time units +double vl_time_multiplier(int scale) VL_PURE; +// Return power of 10. e.g. returns 100 if n==2 +vluint64_t vl_time_pow10(int n) VL_PURE; + +#ifdef VL_DEBUG +/// Evaluate statement if Verilated::debug() enabled +# define VL_DEBUG_IF(stmt) \ + do { \ + if (VL_UNLIKELY(Verilated::debug())) {stmt} \ + } while (false) +#else +// We intentionally do not compile the stmt to improve compile speed +# define VL_DEBUG_IF(stmt) do {} while (false) +#endif + +// clang-format on + +//========================================================================= +// Functional macros/routines +// These all take the form +// VL_func_IW(bits, bits, op, op) +// VL_func_WW(bits, bits, out, op, op) +// The I/W indicates if it's a integer or wide for the output and each operand. +// The bits indicate the bit width of the output and each operand. +// If wide output, a temporary storage location is specified. + +//=================================================================== +// SETTING OPERATORS + +// Output clean +// EMIT_RULE: VL_CLEAN: oclean=clean; obits=lbits; +#define VL_CLEAN_II(obits, lbits, lhs) ((lhs)&VL_MASK_I(obits)) +#define VL_CLEAN_QQ(obits, lbits, lhs) ((lhs)&VL_MASK_Q(obits)) + +// EMIT_RULE: VL_ASSIGNCLEAN: oclean=clean; obits==lbits; +#define VL_ASSIGNCLEAN_W(obits, owp, lwp) VL_CLEAN_WW((obits), (obits), (owp), (lwp)) +static inline WDataOutP _vl_clean_inplace_w(int obits, WDataOutP owp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + owp[words - 1] &= VL_MASK_E(obits); + return owp; +} +static inline WDataOutP VL_CLEAN_WW(int obits, int, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + for (int i = 0; (i < (words - 1)); ++i) owp[i] = lwp[i]; + owp[words - 1] = lwp[words - 1] & VL_MASK_E(obits); + return owp; +} +static inline WDataOutP VL_ZERO_W(int obits, WDataOutP owp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + for (int i = 0; i < words; ++i) owp[i] = 0; + return owp; +} +static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + for (int i = 0; i < (words - 1); ++i) owp[i] = ~VL_EUL(0); + owp[words - 1] = VL_MASK_E(obits); + return owp; +} + +// EMIT_RULE: VL_ASSIGN: oclean=rclean; obits==lbits; +// For now, we always have a clean rhs. +// Note: If a ASSIGN isn't clean, use VL_ASSIGNCLEAN instead to do the same thing. +static inline WDataOutP VL_ASSIGN_W(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + for (int i = 0; i < words; ++i) owp[i] = lwp[i]; + return owp; +} + +// EMIT_RULE: VL_ASSIGNBIT: rclean=clean; +static inline void VL_ASSIGNBIT_II(int, int bit, CData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_II(int, int bit, SData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_II(int, int bit, IData& lhsr, IData rhs) VL_PURE { + lhsr = ((lhsr & ~(VL_UL(1) << VL_BITBIT_I(bit))) | (rhs << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_QI(int, int bit, QData& lhsr, QData rhs) VL_PURE { + lhsr = ((lhsr & ~(1ULL << VL_BITBIT_Q(bit))) | (static_cast(rhs) << VL_BITBIT_Q(bit))); +} +static inline void VL_ASSIGNBIT_WI(int, int bit, WDataOutP owp, IData rhs) VL_MT_SAFE { + EData orig = owp[VL_BITWORD_E(bit)]; + owp[VL_BITWORD_E(bit)] = ((orig & ~(VL_EUL(1) << VL_BITBIT_E(bit))) + | (static_cast(rhs) << VL_BITBIT_E(bit))); +} +// Alternative form that is an instruction faster when rhs is constant one. +static inline void VL_ASSIGNBIT_IO(int, int bit, CData& lhsr, IData) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_IO(int, int bit, SData& lhsr, IData) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_IO(int, int bit, IData& lhsr, IData) VL_PURE { + lhsr = (lhsr | (VL_UL(1) << VL_BITBIT_I(bit))); +} +static inline void VL_ASSIGNBIT_QO(int, int bit, QData& lhsr, IData) VL_PURE { + lhsr = (lhsr | (1ULL << VL_BITBIT_Q(bit))); +} +static inline void VL_ASSIGNBIT_WO(int, int bit, WDataOutP owp, IData) VL_MT_SAFE { + const EData orig = owp[VL_BITWORD_E(bit)]; + owp[VL_BITWORD_E(bit)] = (orig | (VL_EUL(1) << VL_BITBIT_E(bit))); +} + +//=================================================================== +// SYSTEMC OPERATORS +// Copying verilog format to systemc integers and bit vectors. +// Get a SystemC variable + +#define VL_ASSIGN_ISI(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read()); } +#define VL_ASSIGN_QSQ(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read()); } + +#define VL_ASSIGN_ISW(obits, od, svar) \ + { (od) = ((svar).read().get_word(0)) & VL_MASK_I(obits); } +#define VL_ASSIGN_QSW(obits, od, svar) \ + { \ + (od) = ((static_cast((svar).read().get_word(1))) << VL_IDATASIZE \ + | (svar).read().get_word(0)) \ + & VL_MASK_Q(obits); \ + } +#define VL_ASSIGN_WSW(obits, owp, svar) \ + { \ + const int words = VL_WORDS_I(obits); \ + for (int i = 0; i < words; ++i) (owp)[i] = (svar).read().get_word(i); \ + (owp)[words - 1] &= VL_MASK_E(obits); \ + } + +#define VL_ASSIGN_ISU(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_II((obits), (obits), (svar).read().to_uint()); } +#define VL_ASSIGN_QSU(obits, vvar, svar) \ + { (vvar) = VL_CLEAN_QQ((obits), (obits), (svar).read().to_uint64()); } +#define VL_ASSIGN_WSB(obits, owp, svar) \ + { \ + const int words = VL_WORDS_I(obits); \ + sc_biguint<(obits)> _butemp = (svar).read(); \ + for (int i = 0; i < words; ++i) { \ + int msb = ((i + 1) * VL_IDATASIZE) - 1; \ + msb = (msb >= (obits)) ? ((obits)-1) : msb; \ + (owp)[i] = _butemp.range(msb, i * VL_IDATASIZE).to_uint(); \ + } \ + (owp)[words - 1] &= VL_MASK_E(obits); \ + } + +// Copying verilog format from systemc integers and bit vectors. +// Set a SystemC variable + +#define VL_ASSIGN_SII(obits, svar, vvar) \ + { (svar).write(vvar); } +#define VL_ASSIGN_SQQ(obits, svar, vvar) \ + { (svar).write(vvar); } + +#define VL_ASSIGN_SWI(obits, svar, rd) \ + { \ + sc_bv<(obits)> _bvtemp; \ + _bvtemp.set_word(0, (rd)); \ + (svar).write(_bvtemp); \ + } +#define VL_ASSIGN_SWQ(obits, svar, rd) \ + { \ + sc_bv<(obits)> _bvtemp; \ + _bvtemp.set_word(0, static_cast(rd)); \ + _bvtemp.set_word(1, static_cast((rd) >> VL_IDATASIZE)); \ + (svar).write(_bvtemp); \ + } +#define VL_ASSIGN_SWW(obits, svar, rwp) \ + { \ + sc_bv<(obits)> _bvtemp; \ + for (int i = 0; i < VL_WORDS_I(obits); ++i) _bvtemp.set_word(i, (rwp)[i]); \ + (svar).write(_bvtemp); \ + } + +#define VL_ASSIGN_SUI(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SUQ(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBI(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBQ(obits, svar, rd) \ + { (svar).write(rd); } +#define VL_ASSIGN_SBW(obits, svar, rwp) \ + { \ + sc_biguint<(obits)> _butemp; \ + for (int i = 0; i < VL_WORDS_I(obits); ++i) { \ + int msb = ((i + 1) * VL_IDATASIZE) - 1; \ + msb = (msb >= (obits)) ? ((obits)-1) : msb; \ + _butemp.range(msb, i* VL_IDATASIZE) = (rwp)[i]; \ + } \ + (svar).write(_butemp); \ + } + +//=================================================================== +// Extending sizes + +// CAREFUL, we're width changing, so obits!=lbits + +// Right must be clean because otherwise size increase would pick up bad bits +// EMIT_RULE: VL_EXTEND: oclean=clean; rclean==clean; +#define VL_EXTEND_II(obits, lbits, lhs) ((lhs)) +#define VL_EXTEND_QI(obits, lbits, lhs) (static_cast(lhs)) +#define VL_EXTEND_QQ(obits, lbits, lhs) ((lhs)) + +static inline WDataOutP VL_EXTEND_WI(int obits, int, WDataOutP owp, IData ld) VL_MT_SAFE { + // Note for extracts that obits != lbits + owp[0] = ld; + for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + return owp; +} +static inline WDataOutP VL_EXTEND_WQ(int obits, int, WDataOutP owp, QData ld) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + return owp; +} +static inline WDataOutP VL_EXTEND_WW(int obits, int lbits, WDataOutP owp, + WDataInP const lwp) VL_MT_SAFE { + for (int i = 0; i < VL_WORDS_I(lbits); ++i) owp[i] = lwp[i]; + for (int i = VL_WORDS_I(lbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; + return owp; +} + +// EMIT_RULE: VL_EXTENDS: oclean=*dirty*; obits=lbits; +// Sign extension; output dirty +static inline IData VL_EXTENDS_II(int, int lbits, IData lhs) VL_PURE { + return VL_EXTENDSIGN_I(lbits, lhs) | lhs; +} +static inline QData VL_EXTENDS_QI(int, int lbits, QData lhs /*Q_as_need_extended*/) VL_PURE { + return VL_EXTENDSIGN_Q(lbits, lhs) | lhs; +} +static inline QData VL_EXTENDS_QQ(int, int lbits, QData lhs) VL_PURE { + return VL_EXTENDSIGN_Q(lbits, lhs) | lhs; +} + +static inline WDataOutP VL_EXTENDS_WI(int obits, int lbits, WDataOutP owp, IData ld) VL_MT_SAFE { + const EData sign = VL_SIGNONES_E(lbits, static_cast(ld)); + owp[0] = ld | (sign & ~VL_MASK_E(lbits)); + for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + return owp; +} +static inline WDataOutP VL_EXTENDS_WQ(int obits, int lbits, WDataOutP owp, QData ld) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + const EData sign = VL_SIGNONES_E(lbits, owp[1]); + owp[1] |= sign & ~VL_MASK_E(lbits); + for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + return owp; +} +static inline WDataOutP VL_EXTENDS_WW(int obits, int lbits, WDataOutP owp, + WDataInP const lwp) VL_MT_SAFE { + for (int i = 0; i < VL_WORDS_I(lbits) - 1; ++i) owp[i] = lwp[i]; + const int lmsw = VL_WORDS_I(lbits) - 1; + const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); + owp[lmsw] = lwp[lmsw] | (sign & ~VL_MASK_E(lbits)); + for (int i = VL_WORDS_I(lbits); i < VL_WORDS_I(obits); ++i) owp[i] = sign; + return owp; +} + +//=================================================================== +// REDUCTION OPERATORS + +// EMIT_RULE: VL_REDAND: oclean=clean; lclean==clean; obits=1; +#define VL_REDAND_II(obits, lbits, lhs) ((lhs) == VL_MASK_I(lbits)) +#define VL_REDAND_IQ(obits, lbits, lhs) ((lhs) == VL_MASK_Q(lbits)) +static inline IData VL_REDAND_IW(int, int lbits, WDataInP const lwp) VL_MT_SAFE { + const int words = VL_WORDS_I(lbits); + EData combine = lwp[0]; + for (int i = 1; i < words - 1; ++i) combine &= lwp[i]; + combine &= ~VL_MASK_E(lbits) | lwp[words - 1]; + return ((~combine) == 0); +} + +// EMIT_RULE: VL_REDOR: oclean=clean; lclean==clean; obits=1; +#define VL_REDOR_I(lhs) ((lhs) != 0) +#define VL_REDOR_Q(lhs) ((lhs) != 0) +static inline IData VL_REDOR_W(int words, WDataInP const lwp) VL_MT_SAFE { + EData equal = 0; + for (int i = 0; i < words; ++i) equal |= lwp[i]; + return (equal != 0); +} + +// EMIT_RULE: VL_REDXOR: oclean=dirty; obits=1; +static inline IData VL_REDXOR_2(IData r) VL_PURE { + // Experiments show VL_REDXOR_2 is faster than __builtin_parityl + r = (r ^ (r >> 1)); + return r; +} +static inline IData VL_REDXOR_4(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + return r; +#endif +} +static inline IData VL_REDXOR_8(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + return r; +#endif +} +static inline IData VL_REDXOR_16(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + return r; +#endif +} +static inline IData VL_REDXOR_32(IData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityl(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + r = (r ^ (r >> 16)); + return r; +#endif +} +static inline IData VL_REDXOR_64(QData r) VL_PURE { +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(VL_NO_BUILTINS) + return __builtin_parityll(r); +#else + r = (r ^ (r >> 1)); + r = (r ^ (r >> 2)); + r = (r ^ (r >> 4)); + r = (r ^ (r >> 8)); + r = (r ^ (r >> 16)); + r = (r ^ (r >> 32)); + return static_cast(r); +#endif +} +static inline IData VL_REDXOR_W(int words, WDataInP const lwp) VL_MT_SAFE { + EData r = lwp[0]; + for (int i = 1; i < words; ++i) r ^= lwp[i]; + return VL_REDXOR_32(r); +} + +// EMIT_RULE: VL_COUNTONES_II: oclean = false; lhs clean +static inline IData VL_COUNTONES_I(IData lhs) VL_PURE { + // This is faster than __builtin_popcountl + IData r = lhs - ((lhs >> 1) & 033333333333) - ((lhs >> 2) & 011111111111); + r = (r + (r >> 3)) & 030707070707; + r = (r + (r >> 6)); + r = (r + (r >> 12) + (r >> 24)) & 077; + return r; +} +static inline IData VL_COUNTONES_Q(QData lhs) VL_PURE { + return VL_COUNTONES_I(static_cast(lhs)) + VL_COUNTONES_I(static_cast(lhs >> 32)); +} +#define VL_COUNTONES_E VL_COUNTONES_I +static inline IData VL_COUNTONES_W(int words, WDataInP const lwp) VL_MT_SAFE { + EData r = 0; + for (int i = 0; i < words; ++i) r += VL_COUNTONES_E(lwp[i]); + return r; +} + +// EMIT_RULE: VL_COUNTBITS_II: oclean = false; lhs clean +static inline IData VL_COUNTBITS_I(int lbits, IData lhs, IData ctrl0, IData ctrl1, + IData ctrl2) VL_PURE { + int ctrlSum = (ctrl0 & 0x1) + (ctrl1 & 0x1) + (ctrl2 & 0x1); + if (ctrlSum == 3) { + return VL_COUNTONES_I(lhs); + } else if (ctrlSum == 0) { + IData mask = (lbits == 32) ? -1 : ((1 << lbits) - 1); + return VL_COUNTONES_I(~lhs & mask); + } else { + return (lbits == 32) ? 32 : lbits; + } +} +static inline IData VL_COUNTBITS_Q(int lbits, QData lhs, IData ctrl0, IData ctrl1, + IData ctrl2) VL_PURE { + return VL_COUNTBITS_I(32, static_cast(lhs), ctrl0, ctrl1, ctrl2) + + VL_COUNTBITS_I(lbits - 32, static_cast(lhs >> 32), ctrl0, ctrl1, ctrl2); +} +#define VL_COUNTBITS_E VL_COUNTBITS_I +static inline IData VL_COUNTBITS_W(int lbits, int words, WDataInP const lwp, IData ctrl0, + IData ctrl1, IData ctrl2) VL_MT_SAFE { + EData r = 0; + IData wordLbits = 32; + for (int i = 0; i < words; ++i) { + if (i == words - 1) wordLbits = lbits % 32; + r += VL_COUNTBITS_E(wordLbits, lwp[i], ctrl0, ctrl1, ctrl2); + } + return r; +} + +static inline IData VL_ONEHOT_I(IData lhs) VL_PURE { + return (((lhs & (lhs - 1)) == 0) & (lhs != 0)); +} +static inline IData VL_ONEHOT_Q(QData lhs) VL_PURE { + return (((lhs & (lhs - 1)) == 0) & (lhs != 0)); +} +static inline IData VL_ONEHOT_W(int words, WDataInP const lwp) VL_MT_SAFE { + EData one = 0; + for (int i = 0; (i < words); ++i) { + if (lwp[i]) { + if (one) return 0; + one = 1; + if (lwp[i] & (lwp[i] - 1)) return 0; + } + } + return one; +} + +static inline IData VL_ONEHOT0_I(IData lhs) VL_PURE { return ((lhs & (lhs - 1)) == 0); } +static inline IData VL_ONEHOT0_Q(QData lhs) VL_PURE { return ((lhs & (lhs - 1)) == 0); } +static inline IData VL_ONEHOT0_W(int words, WDataInP const lwp) VL_MT_SAFE { + bool one = false; + for (int i = 0; (i < words); ++i) { + if (lwp[i]) { + if (one) return 0; + one = true; + if (lwp[i] & (lwp[i] - 1)) return 0; + } + } + return 1; +} + +static inline IData VL_CLOG2_I(IData lhs) VL_PURE { + // There are faster algorithms, or fls GCC4 builtins, but rarely used + if (VL_UNLIKELY(!lhs)) return 0; + --lhs; + int shifts = 0; + for (; lhs != 0; ++shifts) lhs = lhs >> 1; + return shifts; +} +static inline IData VL_CLOG2_Q(QData lhs) VL_PURE { + if (VL_UNLIKELY(!lhs)) return 0; + --lhs; + int shifts = 0; + for (; lhs != 0; ++shifts) lhs = lhs >> 1ULL; + return shifts; +} +static inline IData VL_CLOG2_W(int words, WDataInP const lwp) VL_MT_SAFE { + EData adjust = (VL_COUNTONES_W(words, lwp) == 1) ? 0 : 1; + for (int i = words - 1; i >= 0; --i) { + if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken + for (int bit = VL_EDATASIZE - 1; bit >= 0; --bit) { + if (VL_UNLIKELY(VL_BITISSET_E(lwp[i], bit))) { + return i * VL_EDATASIZE + bit + adjust; + } + } + // Can't get here - one bit must be set + } + } + return 0; +} + +static inline IData VL_MOSTSETBITP1_W(int words, WDataInP const lwp) VL_MT_SAFE { + // MSB set bit plus one; similar to FLS. 0=value is zero + for (int i = words - 1; i >= 0; --i) { + if (VL_UNLIKELY(lwp[i])) { // Shorter worst case if predict not taken + for (int bit = VL_EDATASIZE - 1; bit >= 0; --bit) { + if (VL_UNLIKELY(VL_BITISSET_E(lwp[i], bit))) return i * VL_EDATASIZE + bit + 1; + } + // Can't get here - one bit must be set + } + } + return 0; +} + +//=================================================================== +// SIMPLE LOGICAL OPERATORS + +// EMIT_RULE: VL_AND: oclean=lclean||rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_AND_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] & rwp[i]); + return owp; +} +// EMIT_RULE: VL_OR: oclean=lclean&&rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_OR_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] | rwp[i]); + return owp; +} +// EMIT_RULE: VL_CHANGEXOR: oclean=1; obits=32; lbits==rbits; +static inline IData VL_CHANGEXOR_W(int words, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + IData od = 0; + for (int i = 0; (i < words); ++i) od |= (lwp[i] ^ rwp[i]); + return od; +} +// EMIT_RULE: VL_XOR: oclean=lclean&&rclean; obits=lbits; lbits==rbits; +static inline WDataOutP VL_XOR_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; (i < words); ++i) owp[i] = (lwp[i] ^ rwp[i]); + return owp; +} +// EMIT_RULE: VL_NOT: oclean=dirty; obits=lbits; +static inline WDataOutP VL_NOT_W(int words, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + for (int i = 0; i < words; ++i) owp[i] = ~(lwp[i]); + return owp; +} + +//========================================================================= +// Logical comparisons + +// EMIT_RULE: VL_EQ: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_NEQ: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_LT: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_GT: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_GTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +// EMIT_RULE: VL_LTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; +#define VL_NEQ_W(words, lwp, rwp) (!VL_EQ_W(words, lwp, rwp)) +#define VL_LT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) < 0) +#define VL_LTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) <= 0) +#define VL_GT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) > 0) +#define VL_GTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) >= 0) + +// Output clean, AND MUST BE CLEAN +static inline IData VL_EQ_W(int words, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + EData nequal = 0; + for (int i = 0; (i < words); ++i) nequal |= (lwp[i] ^ rwp[i]); + return (nequal == 0); +} + +// Internal usage +static inline int _vl_cmp_w(int words, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + for (int i = words - 1; i >= 0; --i) { + if (lwp[i] > rwp[i]) return 1; + if (lwp[i] < rwp[i]) return -1; + } + return 0; // == +} + +#define VL_LTS_IWW(obits, lbits, rbbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) < 0) +#define VL_LTES_IWW(obits, lbits, rbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) <= 0) +#define VL_GTS_IWW(obits, lbits, rbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) > 0) +#define VL_GTES_IWW(obits, lbits, rbits, lwp, rwp) (_vl_cmps_w(lbits, lwp, rwp) >= 0) + +static inline IData VL_GTS_III(int, int lbits, int, IData lhs, IData rhs) VL_PURE { + // For lbits==32, this becomes just a single instruction, otherwise ~5. + // GCC 3.3.4 sign extension bugs on AMD64 architecture force us to use quad logic + const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed > rhs_signed; +} +static inline IData VL_GTS_IQQ(int, int lbits, int, QData lhs, QData rhs) VL_PURE { + const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed > rhs_signed; +} + +static inline IData VL_GTES_III(int, int lbits, int, IData lhs, IData rhs) VL_PURE { + const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed >= rhs_signed; +} +static inline IData VL_GTES_IQQ(int, int lbits, int, QData lhs, QData rhs) VL_PURE { + const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed >= rhs_signed; +} + +static inline IData VL_LTS_III(int, int lbits, int, IData lhs, IData rhs) VL_PURE { + const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed < rhs_signed; +} +static inline IData VL_LTS_IQQ(int, int lbits, int, QData lhs, QData rhs) VL_PURE { + const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed < rhs_signed; +} + +static inline IData VL_LTES_III(int, int lbits, int, IData lhs, IData rhs) VL_PURE { + const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); // Q for gcc + const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); // Q for gcc + return lhs_signed <= rhs_signed; +} +static inline IData VL_LTES_IQQ(int, int lbits, int, QData lhs, QData rhs) VL_PURE { + const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed <= rhs_signed; +} + +static inline int _vl_cmps_w(int lbits, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + const int words = VL_WORDS_I(lbits); + int i = words - 1; + // We need to flip sense if negative comparison + const EData lsign = VL_SIGN_E(lbits, lwp[i]); + const EData rsign = VL_SIGN_E(lbits, rwp[i]); + if (!lsign && rsign) return 1; // + > - + if (lsign && !rsign) return -1; // - < + + for (; i >= 0; --i) { + if (lwp[i] > rwp[i]) return 1; + if (lwp[i] < rwp[i]) return -1; + } + return 0; // == +} + +//========================================================================= +// Math + +// Output NOT clean +static inline WDataOutP VL_NEGATE_W(int words, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { + EData carry = 1; + for (int i = 0; i < words; ++i) { + owp[i] = ~lwp[i] + carry; + carry = (owp[i] < ~lwp[i]); + } + return owp; +} +static inline void VL_NEGATE_INPLACE_W(int words, WDataOutP owp_lwp) VL_MT_SAFE { + EData carry = 1; + for (int i = 0; i < words; ++i) { + EData word = ~owp_lwp[i] + carry; + carry = (word < ~owp_lwp[i]); + owp_lwp[i] = word; + } +} + +// EMIT_RULE: VL_MUL: oclean=dirty; lclean==clean; rclean==clean; +// EMIT_RULE: VL_DIV: oclean=dirty; lclean==clean; rclean==clean; +// EMIT_RULE: VL_MODDIV: oclean=dirty; lclean==clean; rclean==clean; +#define VL_DIV_III(lbits, lhs, rhs) (((rhs) == 0) ? 0 : (lhs) / (rhs)) +#define VL_DIV_QQQ(lbits, lhs, rhs) (((rhs) == 0) ? 0 : (lhs) / (rhs)) +#define VL_DIV_WWW(lbits, owp, lwp, rwp) (_vl_moddiv_w(lbits, owp, lwp, rwp, 0)) +#define VL_MODDIV_III(lbits, lhs, rhs) (((rhs) == 0) ? 0 : (lhs) % (rhs)) +#define VL_MODDIV_QQQ(lbits, lhs, rhs) (((rhs) == 0) ? 0 : (lhs) % (rhs)) +#define VL_MODDIV_WWW(lbits, owp, lwp, rwp) (_vl_moddiv_w(lbits, owp, lwp, rwp, 1)) + +static inline WDataOutP VL_ADD_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = carry + static_cast(lwp[i]) + static_cast(rwp[i]); + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Last output word is dirty + return owp; +} + +static inline WDataOutP VL_SUB_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = (carry + static_cast(lwp[i]) + + static_cast(static_cast(~rwp[i]))); + if (i == 0) ++carry; // Negation of rwp + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Last output word is dirty + return owp; +} + +static inline WDataOutP VL_MUL_W(int words, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; i < words; ++i) owp[i] = 0; + for (int lword = 0; lword < words; ++lword) { + for (int rword = 0; rword < words; ++rword) { + QData mul = static_cast(lwp[lword]) * static_cast(rwp[rword]); + for (int qword = lword + rword; qword < words; ++qword) { + mul += static_cast(owp[qword]); + owp[qword] = (mul & 0xffffffffULL); + mul = (mul >> 32ULL) & 0xffffffffULL; + } + } + } + // Last output word is dirty + return owp; +} + +static inline IData VL_MULS_III(int, int lbits, int, IData lhs, IData rhs) VL_PURE { + const vlsint32_t lhs_signed = VL_EXTENDS_II(32, lbits, lhs); + const vlsint32_t rhs_signed = VL_EXTENDS_II(32, lbits, rhs); + return lhs_signed * rhs_signed; +} +static inline QData VL_MULS_QQQ(int, int lbits, int, QData lhs, QData rhs) VL_PURE { + const vlsint64_t lhs_signed = VL_EXTENDS_QQ(64, lbits, lhs); + const vlsint64_t rhs_signed = VL_EXTENDS_QQ(64, lbits, rhs); + return lhs_signed * rhs_signed; +} + +static inline WDataOutP VL_MULS_WWW(int, int lbits, int, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp) VL_MT_SAFE { + const int words = VL_WORDS_I(lbits); + // cppcheck-suppress variableScope + WData lwstore[VL_MULS_MAX_WORDS]; // Fixed size, as MSVC++ doesn't allow [words] here + // cppcheck-suppress variableScope + WData rwstore[VL_MULS_MAX_WORDS]; + WDataInP lwusp = lwp; + WDataInP rwusp = rwp; + EData lneg = VL_SIGN_E(lbits, lwp[words - 1]); + if (lneg) { // Negate lhs + lwusp = lwstore; + VL_NEGATE_W(words, lwstore, lwp); + lwstore[words - 1] &= VL_MASK_E(lbits); // Clean it + } + EData rneg = VL_SIGN_E(lbits, rwp[words - 1]); + if (rneg) { // Negate rhs + rwusp = rwstore; + VL_NEGATE_W(words, rwstore, rwp); + rwstore[words - 1] &= VL_MASK_E(lbits); // Clean it + } + VL_MUL_W(words, owp, lwusp, rwusp); + owp[words - 1] &= VL_MASK_E( + lbits); // Clean. Note it's ok for the multiply to overflow into the sign bit + if ((lneg ^ rneg) & 1) { // Negate output (not using NEGATE, as owp==lwp) + QData carry = 0; + for (int i = 0; i < words; ++i) { + carry = carry + static_cast(static_cast(~owp[i])); + if (i == 0) ++carry; // Negation of temp2 + owp[i] = (carry & 0xffffffffULL); + carry = (carry >> 32ULL) & 0xffffffffULL; + } + // Not needed: owp[words-1] |= 1< 0) power = power * power; + if (rhs & (1ULL << i)) out *= power; + } + return out; +} +static inline QData VL_POW_QQQ(int, int, int rbits, QData lhs, QData rhs) VL_PURE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (VL_UNLIKELY(lhs == 0)) return 0; + QData power = lhs; + QData out = 1ULL; + for (int i = 0; i < rbits; ++i) { + if (i > 0) power = power * power; + if (rhs & (1ULL << i)) out *= power; + } + return out; +} +WDataOutP VL_POW_WWW(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp); +WDataOutP VL_POW_WWQ(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, QData rhs); +QData VL_POW_QQW(int obits, int, int rbits, QData lhs, WDataInP const rwp); + +#define VL_POWSS_IIQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_IIQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_IIW(obits, lbits, rbits, lhs, rwp, lsign, rsign) \ + VL_POWSS_QQW(obits, lbits, rbits, lhs, rwp, lsign, rsign) +#define VL_POWSS_QQI(obits, lbits, rbits, lhs, rhs, lsign, rsign) \ + VL_POWSS_QQQ(obits, lbits, rbits, lhs, rhs, lsign, rsign) +#define VL_POWSS_WWI(obits, lbits, rbits, owp, lwp, rhs, lsign, rsign) \ + VL_POWSS_WWQ(obits, lbits, rbits, owp, lwp, rhs, lsign, rsign) + +static inline IData VL_POWSS_III(int obits, int, int rbits, IData lhs, IData rhs, bool lsign, + bool rsign) VL_MT_SAFE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (rsign && VL_SIGN_I(rbits, rhs)) { + if (lhs == 0) { + return 0; // "X" + } else if (lhs == 1) { + return 1; + } else if (lsign && lhs == VL_MASK_I(obits)) { // -1 + if (rhs & 1) { + return VL_MASK_I(obits); // -1^odd=-1 + } else { + return 1; // -1^even=1 + } + } + return 0; + } + return VL_POW_III(obits, rbits, rbits, lhs, rhs); +} +static inline QData VL_POWSS_QQQ(int obits, int, int rbits, QData lhs, QData rhs, bool lsign, + bool rsign) VL_MT_SAFE { + if (VL_UNLIKELY(rhs == 0)) return 1; + if (rsign && VL_SIGN_Q(rbits, rhs)) { + if (lhs == 0) { + return 0; // "X" + } else if (lhs == 1) { + return 1; + } else if (lsign && lhs == VL_MASK_Q(obits)) { // -1 + if (rhs & 1) { + return VL_MASK_Q(obits); // -1^odd=-1 + } else { + return 1; // -1^even=1 + } + } + return 0; + } + return VL_POW_QQQ(obits, rbits, rbits, lhs, rhs); +} +WDataOutP VL_POWSS_WWW(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, + WDataInP const rwp, bool lsign, bool rsign); +WDataOutP VL_POWSS_WWQ(int obits, int, int rbits, WDataOutP owp, WDataInP const lwp, QData rhs, + bool lsign, bool rsign); +QData VL_POWSS_QQW(int obits, int, int rbits, QData lhs, WDataInP const rwp, bool lsign, + bool rsign); + +//=================================================================== +// Concat/replication + +// INTERNAL: Stuff LHS bit 0++ into OUTPUT at specified offset +// ld may be "dirty", output is clean +static inline void _vl_insert_II(int, CData& lhsr, IData ld, int hbit, int lbit, + int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_II(int, SData& lhsr, IData ld, int hbit, int lbit, + int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_II(int, IData& lhsr, IData ld, int hbit, int lbit, + int rbits) VL_PURE { + const IData cleanmask = VL_MASK_I(rbits); + const IData insmask = (VL_MASK_I(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_QQ(int, QData& lhsr, QData ld, int hbit, int lbit, + int rbits) VL_PURE { + const QData cleanmask = VL_MASK_Q(rbits); + const QData insmask = (VL_MASK_Q(hbit - lbit + 1)) << lbit; + lhsr = (lhsr & ~insmask) | ((ld << lbit) & (insmask & cleanmask)); +} +static inline void _vl_insert_WI(int, WDataOutP owp, IData ld, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int roffset = VL_BITBIT_E(rbits); + const int hword = VL_BITWORD_E(hbit); + const int lword = VL_BITWORD_E(lbit); + const int rword = VL_BITWORD_E(rbits); + const EData cleanmask = hword == rword ? VL_MASK_E(roffset) : VL_MASK_E(0); + + if (hoffset == VL_SIZEBITS_E && loffset == 0) { + // Fast and common case, word based insertion + owp[VL_BITWORD_E(lbit)] = ld & cleanmask; + } else { + const EData lde = static_cast(ld); + if (hword == lword) { // know < EData bits because above checks it + // Assignment is contained within one word of destination + const EData insmask = (VL_MASK_E(hoffset - loffset + 1)) << loffset; + owp[lword] = (owp[lword] & ~insmask) | ((lde << loffset) & (insmask & cleanmask)); + } else { + // Assignment crosses a word boundary in destination + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << 0; + const EData linsmask = (VL_MASK_E((VL_EDATASIZE - 1) - loffset + 1)) << loffset; + const int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword + owp[lword] = (owp[lword] & ~linsmask) | ((lde << loffset) & linsmask); + owp[hword] + = (owp[hword] & ~hinsmask) | ((lde >> nbitsonright) & (hinsmask & cleanmask)); + } + } +} + +// INTERNAL: Stuff large LHS bit 0++ into OUTPUT at specified offset +// lwp may be "dirty" +static inline void _vl_insert_WW(int, WDataOutP owp, WDataInP const lwp, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + const int hoffset = VL_BITBIT_E(hbit); + const int loffset = VL_BITBIT_E(lbit); + const int roffset = VL_BITBIT_E(rbits); + const int lword = VL_BITWORD_E(lbit); + const int hword = VL_BITWORD_E(hbit); + const int rword = VL_BITWORD_E(rbits); + const int words = VL_WORDS_I(hbit - lbit + 1); + // Cleaning mask, only applied to top word of the assignment. Is a no-op + // if we don't assign to the top word of the destination. + const EData cleanmask = hword == rword ? VL_MASK_E(roffset) : VL_MASK_E(0); + + if (hoffset == VL_SIZEBITS_E && loffset == 0) { + // Fast and common case, word based insertion + for (int i = 0; i < (words - 1); ++i) owp[lword + i] = lwp[i]; + owp[hword] = lwp[words - 1] & cleanmask; + } else if (loffset == 0) { + // Non-32bit, but nicely aligned, so stuff all but the last word + for (int i = 0; i < (words - 1); ++i) owp[lword + i] = lwp[i]; + // Know it's not a full word as above fast case handled it + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)); + owp[hword] = (owp[hword] & ~hinsmask) | (lwp[words - 1] & (hinsmask & cleanmask)); + } else { + const EData hinsmask = (VL_MASK_E(hoffset - 0 + 1)) << 0; + const EData linsmask = (VL_MASK_E((VL_EDATASIZE - 1) - loffset + 1)) << loffset; + const int nbitsonright + = VL_EDATASIZE - loffset; // bits that end up in lword (know loffset!=0) + // Middle words + for (int i = 0; i < words; ++i) { + { // Lower word + const int oword = lword + i; + const EData d = lwp[i] << loffset; + const EData od = (owp[oword] & ~linsmask) | (d & linsmask); + if (oword == hword) { + owp[oword] = (owp[oword] & ~hinsmask) | (od & (hinsmask & cleanmask)); + } else { + owp[oword] = od; + } + } + { // Upper word + const int oword = lword + i + 1; + if (oword <= hword) { + const EData d = lwp[i] >> nbitsonright; + const EData od = (d & ~linsmask) | (owp[oword] & linsmask); + if (oword == hword) { + owp[oword] = (owp[oword] & ~hinsmask) | (od & (hinsmask & cleanmask)); + } else { + owp[oword] = od; + } + } + } + } + } +} + +static inline void _vl_insert_WQ(int obits, WDataOutP owp, QData ld, int hbit, int lbit, + int rbits = 0) VL_MT_SAFE { + VlWide lwp; + VL_SET_WQ(lwp, ld); + _vl_insert_WW(obits, owp, lwp, hbit, lbit, rbits); +} + +// EMIT_RULE: VL_REPLICATE: oclean=clean>width32, dirty<=width32; lclean=clean; rclean==clean; +// RHS MUST BE CLEAN CONSTANT. +#define VL_REPLICATE_IOI(obits, lbits, rbits, ld, rep) (-(ld)) // Iff lbits==1 +#define VL_REPLICATE_QOI(obits, lbits, rbits, ld, rep) (-(static_cast(ld))) // Iff lbits==1 + +static inline IData VL_REPLICATE_III(int, int lbits, int, IData ld, IData rep) VL_PURE { + IData returndata = ld; + for (unsigned i = 1; i < rep; ++i) { + returndata = returndata << lbits; + returndata |= ld; + } + return returndata; +} +static inline QData VL_REPLICATE_QII(int, int lbits, int, IData ld, IData rep) VL_PURE { + QData returndata = ld; + for (unsigned i = 1; i < rep; ++i) { + returndata = returndata << lbits; + returndata |= static_cast(ld); + } + return returndata; +} +static inline WDataOutP VL_REPLICATE_WII(int obits, int lbits, int, WDataOutP owp, IData ld, + IData rep) VL_MT_SAFE { + owp[0] = ld; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WI(obits, owp, ld, i * lbits + lbits - 1, i * lbits); + } + return owp; +} +static inline WDataOutP VL_REPLICATE_WQI(int obits, int lbits, int, WDataOutP owp, QData ld, + IData rep) VL_MT_SAFE { + VL_SET_WQ(owp, ld); + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WQ(obits, owp, ld, i * lbits + lbits - 1, i * lbits); + } + return owp; +} +static inline WDataOutP VL_REPLICATE_WWI(int obits, int lbits, int, WDataOutP owp, + WDataInP const lwp, IData rep) VL_MT_SAFE { + for (int i = 0; i < VL_WORDS_I(lbits); ++i) owp[i] = lwp[i]; + for (unsigned i = 1; i < rep; ++i) { + _vl_insert_WW(obits, owp, lwp, i * lbits + lbits - 1, i * lbits); + } + return owp; +} + +// Left stream operator. Output will always be clean. LHS and RHS must be clean. +// Special "fast" versions for slice sizes that are a power of 2. These use +// shifts and masks to execute faster than the slower for-loop approach where a +// subset of bits is copied in during each iteration. +static inline IData VL_STREAML_FAST_III(int, int lbits, int, IData ld, IData rd_log2) VL_PURE { + // Pre-shift bits in most-significant slice: + // + // If lbits is not a multiple of the slice size (i.e., lbits % rd != 0), + // then we end up with a "gap" in our reversed result. For example, if we + // have a 5-bit Verlilog signal (lbits=5) in an 8-bit C data type: + // + // ld = ---43210 + // + // (where numbers are the Verilog signal bit numbers and '-' is an unused bit). + // Executing the switch statement below with a slice size of two (rd=2, + // rd_log2=1) produces: + // + // ret = 1032-400 + // + // Pre-shifting the bits in the most-significant slice allows us to avoid + // this gap in the shuffled data: + // + // ld_adjusted = --4-3210 + // ret = 10324--- + IData ret = ld; + if (rd_log2) { + const vluint32_t lbitsFloor = lbits & ~VL_MASK_I(rd_log2); // max multiple of rd <= lbits + const vluint32_t lbitsRem = lbits - lbitsFloor; // number of bits in most-sig slice (MSS) + const IData msbMask = VL_MASK_I(lbitsRem) << lbitsFloor; // mask to sel only bits in MSS + ret = (ret & ~msbMask) | ((ret & msbMask) << ((VL_UL(1) << rd_log2) - lbitsRem)); + } + switch (rd_log2) { + case 0: ret = ((ret >> 1) & VL_UL(0x55555555)) | ((ret & VL_UL(0x55555555)) << 1); // FALLTHRU + case 1: ret = ((ret >> 2) & VL_UL(0x33333333)) | ((ret & VL_UL(0x33333333)) << 2); // FALLTHRU + case 2: ret = ((ret >> 4) & VL_UL(0x0f0f0f0f)) | ((ret & VL_UL(0x0f0f0f0f)) << 4); // FALLTHRU + case 3: ret = ((ret >> 8) & VL_UL(0x00ff00ff)) | ((ret & VL_UL(0x00ff00ff)) << 8); // FALLTHRU + case 4: ret = ((ret >> 16) | (ret << 16)); // FALLTHRU + default:; + } + return ret >> (VL_IDATASIZE - lbits); +} + +static inline QData VL_STREAML_FAST_QQI(int, int lbits, int, QData ld, IData rd_log2) VL_PURE { + // Pre-shift bits in most-significant slice (see comment in VL_STREAML_FAST_III) + QData ret = ld; + if (rd_log2) { + const vluint32_t lbitsFloor = lbits & ~VL_MASK_I(rd_log2); + const vluint32_t lbitsRem = lbits - lbitsFloor; + const QData msbMask = VL_MASK_Q(lbitsRem) << lbitsFloor; + ret = (ret & ~msbMask) | ((ret & msbMask) << ((1ULL << rd_log2) - lbitsRem)); + } + switch (rd_log2) { + case 0: + ret = (((ret >> 1) & 0x5555555555555555ULL) + | ((ret & 0x5555555555555555ULL) << 1)); // FALLTHRU + case 1: + ret = (((ret >> 2) & 0x3333333333333333ULL) + | ((ret & 0x3333333333333333ULL) << 2)); // FALLTHRU + case 2: + ret = (((ret >> 4) & 0x0f0f0f0f0f0f0f0fULL) + | ((ret & 0x0f0f0f0f0f0f0f0fULL) << 4)); // FALLTHRU + case 3: + ret = (((ret >> 8) & 0x00ff00ff00ff00ffULL) + | ((ret & 0x00ff00ff00ff00ffULL) << 8)); // FALLTHRU + case 4: + ret = (((ret >> 16) & 0x0000ffff0000ffffULL) + | ((ret & 0x0000ffff0000ffffULL) << 16)); // FALLTHRU + case 5: ret = ((ret >> 32) | (ret << 32)); // FALLTHRU + default:; + } + return ret >> (VL_QUADSIZE - lbits); +} + +// Regular "slow" streaming operators +static inline IData VL_STREAML_III(int, int lbits, int, IData ld, IData rd) VL_PURE { + IData ret = 0; + // Slice size should never exceed the lhs width + const IData mask = VL_MASK_I(rd); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + ret |= ((ld >> istart) & mask) << ostart; + } + return ret; +} + +static inline QData VL_STREAML_QQI(int, int lbits, int, QData ld, IData rd) VL_PURE { + QData ret = 0; + // Slice size should never exceed the lhs width + const QData mask = VL_MASK_Q(rd); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + ret |= ((ld >> istart) & mask) << ostart; + } + return ret; +} + +static inline WDataOutP VL_STREAML_WWI(int, int lbits, int, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + VL_ZERO_W(lbits, owp); + // Slice size should never exceed the lhs width + const int ssize = (rd < static_cast(lbits)) ? rd : (static_cast(lbits)); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + for (int sbit = 0; sbit < ssize && sbit < lbits - istart; ++sbit) { + // Extract a single bit from lwp and shift it to the correct + // location for owp. + EData bit = (VL_BITRSHIFT_W(lwp, (istart + sbit)) & 1) << VL_BITBIT_E(ostart + sbit); + owp[VL_BITWORD_E(ostart + sbit)] |= bit; + } + } + return owp; +} + +// Because concats are common and wide, it's valuable to always have a clean output. +// Thus we specify inputs must be clean, so we don't need to clean the output. +// Note the bit shifts are always constants, so the adds in these constify out. +// Casts required, as args may be 8 bit entities, and need to shift to appropriate output size +#define VL_CONCAT_III(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QII(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QIQ(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QQI(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) +#define VL_CONCAT_QQQ(obits, lbits, rbits, ld, rd) \ + (static_cast(ld) << (rbits) | static_cast(rd)) + +static inline WDataOutP VL_CONCAT_WII(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + IData rd) VL_MT_SAFE { + owp[0] = rd; + for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WI(obits, owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWI(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, IData rd) VL_MT_SAFE { + owp[0] = rd; + for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WW(obits, owp, lwp, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WIW(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i]; + for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WI(obits, owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WIQ(int obits, int lbits, int rbits, WDataOutP owp, IData ld, + QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WI(obits, owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQI(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + IData rd) VL_MT_SAFE { + owp[0] = rd; + for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WQ(obits, owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQQ(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WQ(obits, owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VL_SET_WQ(owp, rd); + for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WW(obits, owp, lwp, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WQW(int obits, int lbits, int rbits, WDataOutP owp, QData ld, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i]; + for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WQ(obits, owp, ld, rbits + lbits - 1, rbits); + return owp; +} +static inline WDataOutP VL_CONCAT_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i]; + for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WW(obits, owp, lwp, rbits + lbits - 1, rbits); + return owp; +} + +//=================================================================== +// Shifts + +// Static shift, used by internal functions +// The output is the same as the input - it overlaps! +static inline void _vl_shiftl_inplace_w(int obits, WDataOutP iowp, + IData rd /*1 or 4*/) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + const EData linsmask = VL_MASK_E(rd); + for (int i = words - 1; i >= 1; --i) { + iowp[i] + = ((iowp[i] << rd) & ~linsmask) | ((iowp[i - 1] >> (VL_EDATASIZE - rd)) & linsmask); + } + iowp[0] = ((iowp[0] << rd) & ~linsmask); + iowp[VL_WORDS_I(obits) - 1] &= VL_MASK_E(obits); +} + +// EMIT_RULE: VL_SHIFTL: oclean=lclean; rclean==clean; +// Important: Unlike most other funcs, the shift might well be a computed +// expression. Thus consider this when optimizing. (And perhaps have 2 funcs?) +static inline WDataOutP VL_SHIFTL_WWI(int obits, int, int, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); + const int bit_shift = VL_BITBIT_E(rd); + if (rd >= static_cast(obits)) { // rd may be huge with MSB set + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else if (bit_shift == 0) { // Aligned word shift (<<0,<<32,<<64 etc) + for (int i = 0; i < word_shift; ++i) owp[i] = 0; + for (int i = word_shift; i < VL_WORDS_I(obits); ++i) owp[i] = lwp[i - word_shift]; + } else { + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + _vl_insert_WW(obits, owp, lwp, obits - 1, rd); + } + return owp; +} +static inline WDataOutP VL_SHIFTL_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return VL_ZERO_W(obits, owp); + } + } + return VL_SHIFTL_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTL_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTL_WWW(obits, lbits, rbits, owp, lwp, rwp); +} +static inline IData VL_SHIFTL_IIW(int obits, int, int rbits, IData lhs, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return 0; + } + } + return VL_CLEAN_II(obits, obits, lhs << rwp[0]); +} +static inline IData VL_SHIFTL_IIQ(int obits, int, int, IData lhs, QData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return VL_CLEAN_II(obits, obits, lhs << rhs); +} +static inline QData VL_SHIFTL_QQW(int obits, int, int rbits, QData lhs, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return 0; + } + } + // Above checks rwp[1]==0 so not needed in below shift + return VL_CLEAN_QQ(obits, obits, lhs << (static_cast(rwp[0]))); +} +static inline QData VL_SHIFTL_QQQ(int obits, int, int, QData lhs, QData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return VL_CLEAN_QQ(obits, obits, lhs << rhs); +} + +// EMIT_RULE: VL_SHIFTR: oclean=lclean; rclean==clean; +// Important: Unlike most other funcs, the shift might well be a computed +// expression. Thus consider this when optimizing. (And perhaps have 2 funcs?) +static inline WDataOutP VL_SHIFTR_WWI(int obits, int, int, WDataOutP owp, WDataInP const lwp, + IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); // Maybe 0 + const int bit_shift = VL_BITBIT_E(rd); + if (rd >= static_cast(obits)) { // rd may be huge with MSB set + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else if (bit_shift == 0) { // Aligned word shift (>>0,>>32,>>64 etc) + const int copy_words = (VL_WORDS_I(obits) - word_shift); + for (int i = 0; i < copy_words; ++i) owp[i] = lwp[i + word_shift]; + for (int i = copy_words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } else { + const int loffset = rd & VL_SIZEBITS_E; + const int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword (know + // loffset!=0) Middle words + const int words = VL_WORDS_I(obits - rd); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword < VL_WORDS_I(obits)) owp[i] |= lwp[upperword] << nbitsonright; + } + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } + return owp; +} +static inline WDataOutP VL_SHIFTR_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return VL_ZERO_W(obits, owp); + } + } + return VL_SHIFTR_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTR_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTR_WWW(obits, lbits, rbits, owp, lwp, rwp); +} + +static inline IData VL_SHIFTR_IIW(int obits, int, int rbits, IData lhs, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return 0; + } + } + return VL_CLEAN_II(obits, obits, lhs >> rwp[0]); +} +static inline QData VL_SHIFTR_QQW(int obits, int, int rbits, QData lhs, + WDataInP const rwp) VL_MT_SAFE { + for (int i = 1; i < VL_WORDS_I(rbits); ++i) { + if (VL_UNLIKELY(rwp[i])) { // Huge shift 1>>32 or more + return 0; + } + } + // Above checks rwp[1]==0 so not needed in below shift + return VL_CLEAN_QQ(obits, obits, lhs >> (static_cast(rwp[0]))); +} +static inline IData VL_SHIFTR_IIQ(int obits, int, int, IData lhs, QData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_IDATASIZE)) return 0; + return VL_CLEAN_QQ(obits, obits, lhs >> rhs); +} +static inline QData VL_SHIFTR_QQQ(int obits, int, int, QData lhs, QData rhs) VL_MT_SAFE { + if (VL_UNLIKELY(rhs >= VL_QUADSIZE)) return 0; + return VL_CLEAN_QQ(obits, obits, lhs >> rhs); +} + +// EMIT_RULE: VL_SHIFTRS: oclean=false; lclean=clean, rclean==clean; +static inline IData VL_SHIFTRS_III(int obits, int lbits, int, IData lhs, IData rhs) VL_PURE { + // Note the C standard does not specify the >> operator as a arithmetic shift! + // IEEE says signed if output signed, but bit position from lbits; + // must use lbits for sign; lbits might != obits, + // an EXTEND(SHIFTRS(...)) can became a SHIFTRS(...) within same 32/64 bit word length + const IData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + const IData signext = ~(VL_MASK_I(lbits) >> rhs); // One with bits where we've shifted "past" + return (lhs >> rhs) | (sign & VL_CLEAN_II(obits, obits, signext)); +} +static inline QData VL_SHIFTRS_QQI(int obits, int lbits, int, QData lhs, IData rhs) VL_PURE { + const QData sign = -(lhs >> (lbits - 1)); + const QData signext = ~(VL_MASK_Q(lbits) >> rhs); + return (lhs >> rhs) | (sign & VL_CLEAN_QQ(obits, obits, signext)); +} +static inline IData VL_SHIFTRS_IQI(int obits, int lbits, int rbits, QData lhs, IData rhs) VL_PURE { + return static_cast(VL_SHIFTRS_QQI(obits, lbits, rbits, lhs, rhs)); +} +static inline WDataOutP VL_SHIFTRS_WWI(int obits, int lbits, int, WDataOutP owp, + WDataInP const lwp, IData rd) VL_MT_SAFE { + const int word_shift = VL_BITWORD_E(rd); + const int bit_shift = VL_BITBIT_E(rd); + const int lmsw = VL_WORDS_I(obits) - 1; + const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); + if (rd >= static_cast(obits)) { // Shifting past end, sign in all of lbits + for (int i = 0; i <= lmsw; ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } else if (bit_shift == 0) { // Aligned word shift (>>0,>>32,>>64 etc) + const int copy_words = (VL_WORDS_I(obits) - word_shift); + for (int i = 0; i < copy_words; ++i) owp[i] = lwp[i + word_shift]; + if (copy_words >= 0) owp[copy_words - 1] |= ~VL_MASK_E(obits) & sign; + for (int i = copy_words; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } else { + const int loffset = rd & VL_SIZEBITS_E; + int nbitsonright = VL_EDATASIZE - loffset; // bits that end up in lword (know loffset!=0) + // Middle words + const int words = VL_WORDS_I(obits - rd); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword < VL_WORDS_I(obits)) owp[i] |= lwp[upperword] << nbitsonright; + } + if (words) owp[words - 1] |= sign & ~VL_MASK_E(obits - loffset); + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + } + return owp; +} +static inline WDataOutP VL_SHIFTRS_WWW(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= obits)) { + const int lmsw = VL_WORDS_I(obits) - 1; + const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); + for (int j = 0; j <= lmsw; ++j) owp[j] = sign; + owp[lmsw] &= VL_MASK_E(lbits); + return owp; + } + return VL_SHIFTRS_WWI(obits, lbits, 32, owp, lwp, rwp[0]); +} +static inline WDataOutP VL_SHIFTRS_WWQ(int obits, int lbits, int rbits, WDataOutP owp, + WDataInP const lwp, QData rd) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rd); + return VL_SHIFTRS_WWW(obits, lbits, rbits, owp, lwp, rwp); +} +static inline IData VL_SHIFTRS_IIW(int obits, int lbits, int rbits, IData lhs, + WDataInP const rwp) VL_MT_SAFE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= obits)) { + const IData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + return VL_CLEAN_II(obits, obits, sign); + } + return VL_SHIFTRS_III(obits, lbits, 32, lhs, rwp[0]); +} +static inline QData VL_SHIFTRS_QQW(int obits, int lbits, int rbits, QData lhs, + WDataInP const rwp) VL_MT_SAFE { + EData overshift = 0; // Huge shift 1>>32 or more + for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; + if (VL_UNLIKELY(overshift || rwp[0] >= obits)) { + const QData sign = -(lhs >> (lbits - 1)); // ffff_ffff if negative + return VL_CLEAN_QQ(obits, obits, sign); + } + return VL_SHIFTRS_QQI(obits, lbits, 32, lhs, rwp[0]); +} +static inline IData VL_SHIFTRS_IIQ(int obits, int lbits, int rbits, IData lhs, + QData rhs) VL_MT_SAFE { + VlWide rwp; + VL_SET_WQ(rwp, rhs); + return VL_SHIFTRS_IIW(obits, lbits, rbits, lhs, rwp); +} +static inline QData VL_SHIFTRS_QQQ(int obits, int lbits, int rbits, QData lhs, QData rhs) VL_PURE { + VlWide rwp; + VL_SET_WQ(rwp, rhs); + return VL_SHIFTRS_QQW(obits, lbits, rbits, lhs, rwp); +} + +//=================================================================== +// Bit selection + +// EMIT_RULE: VL_BITSEL: oclean=dirty; rclean==clean; +#define VL_BITSEL_IIII(obits, lbits, rbits, zbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_QIII(obits, lbits, rbits, zbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_QQII(obits, lbits, rbits, zbits, lhs, rhs) ((lhs) >> (rhs)) +#define VL_BITSEL_IQII(obits, lbits, rbits, zbits, lhs, rhs) (static_cast((lhs) >> (rhs))) + +static inline IData VL_BITSEL_IWII(int, int lbits, int, int, WDataInP const lwp, + IData rd) VL_MT_SAFE { + int word = VL_BITWORD_E(rd); + if (VL_UNLIKELY(rd > static_cast(lbits))) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + // We return all 1's as that's more likely to find bugs (?) than 0's. + } else { + return (lwp[word] >> VL_BITBIT_E(rd)); + } +} + +// EMIT_RULE: VL_RANGE: oclean=lclean; out=dirty +// & MUST BE CLEAN (currently constant) +#define VL_SEL_IIII(obits, lbits, rbits, tbits, lhs, lsb, width) ((lhs) >> (lsb)) +#define VL_SEL_QQII(obits, lbits, rbits, tbits, lhs, lsb, width) ((lhs) >> (lsb)) +#define VL_SEL_IQII(obits, lbits, rbits, tbits, lhs, lsb, width) \ + (static_cast((lhs) >> (lsb))) + +static inline IData VL_SEL_IWII(int, int lbits, int, int, WDataInP const lwp, IData lsb, + IData width) VL_MT_SAFE { + int msb = lsb + width - 1; + if (VL_UNLIKELY(msb >= lbits)) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + } else if (VL_BITWORD_E(msb) == VL_BITWORD_E(static_cast(lsb))) { + return VL_BITRSHIFT_W(lwp, lsb); + } else { + // 32 bit extraction may span two words + int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); // bits that come from low word + return ((lwp[VL_BITWORD_E(msb)] << nbitsfromlow) | VL_BITRSHIFT_W(lwp, lsb)); + } +} + +static inline QData VL_SEL_QWII(int, int lbits, int, int, WDataInP const lwp, IData lsb, + IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + if (VL_UNLIKELY(msb > lbits)) { + return ~0; // Spec says you can go outside the range of a array. Don't coredump if so. + } else if (VL_BITWORD_E(msb) == VL_BITWORD_E(static_cast(lsb))) { + return VL_BITRSHIFT_W(lwp, lsb); + } else if (VL_BITWORD_E(msb) == 1 + VL_BITWORD_E(static_cast(lsb))) { + const int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); + const QData hi = (lwp[VL_BITWORD_E(msb)]); + const QData lo = VL_BITRSHIFT_W(lwp, lsb); + return (hi << nbitsfromlow) | lo; + } else { + // 64 bit extraction may span three words + int nbitsfromlow = VL_EDATASIZE - VL_BITBIT_E(lsb); + const QData hi = (lwp[VL_BITWORD_E(msb)]); + const QData mid = (lwp[VL_BITWORD_E(lsb) + 1]); + const QData lo = VL_BITRSHIFT_W(lwp, lsb); + return (hi << (nbitsfromlow + VL_EDATASIZE)) | (mid << nbitsfromlow) | lo; + } +} + +static inline WDataOutP VL_SEL_WWII(int obits, int lbits, int, int, WDataOutP owp, + WDataInP const lwp, IData lsb, IData width) VL_MT_SAFE { + const int msb = lsb + width - 1; + const int word_shift = VL_BITWORD_E(lsb); + if (VL_UNLIKELY(msb > lbits)) { // Outside bounds, + for (int i = 0; i < VL_WORDS_I(obits) - 1; ++i) owp[i] = ~0; + owp[VL_WORDS_I(obits) - 1] = VL_MASK_E(obits); + } else if (VL_BITBIT_E(lsb) == 0) { + // Just a word extract + for (int i = 0; i < VL_WORDS_I(obits); ++i) owp[i] = lwp[i + word_shift]; + } else { + // Not a _vl_insert because the bits come from any bit number and goto bit 0 + const int loffset = lsb & VL_SIZEBITS_E; + const int nbitsfromlow = VL_EDATASIZE - loffset; // bits that end up in lword (know + // loffset!=0) Middle words + const int words = VL_WORDS_I(msb - lsb + 1); + for (int i = 0; i < words; ++i) { + owp[i] = lwp[i + word_shift] >> loffset; + const int upperword = i + word_shift + 1; + if (upperword <= static_cast(VL_BITWORD_E(msb))) { + owp[i] |= lwp[upperword] << nbitsfromlow; + } + } + for (int i = words; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + } + return owp; +} + +//====================================================================== +// Math needing insert/select + +// Return QData from double (numeric) +// EMIT_RULE: VL_RTOIROUND_Q_D: oclean=dirty; lclean==clean/real +static inline QData VL_RTOIROUND_Q_D(int, double lhs) VL_PURE { + // IEEE format: [63]=sign [62:52]=exp+1023 [51:0]=mantissa + // This does not need to support subnormals as they are sub-integral + lhs = VL_ROUND(lhs); + if (lhs == 0.0) return 0; + const QData q = VL_CVT_Q_D(lhs); + const int lsb = static_cast((q >> 52ULL) & VL_MASK_Q(11)) - 1023 - 52; + const vluint64_t mantissa = (q & VL_MASK_Q(52)) | (1ULL << 52); + vluint64_t out = 0; + if (lsb < 0) { + out = mantissa >> -lsb; + } else if (lsb < 64) { + out = mantissa << lsb; + } + if (lhs < 0) out = -out; + return out; +} +static inline IData VL_RTOIROUND_I_D(int bits, double lhs) VL_PURE { + return static_cast(VL_RTOIROUND_Q_D(bits, lhs)); +} +static inline WDataOutP VL_RTOIROUND_W_D(int obits, WDataOutP owp, double lhs) VL_PURE { + // IEEE format: [63]=sign [62:52]=exp+1023 [51:0]=mantissa + // This does not need to support subnormals as they are sub-integral + lhs = VL_ROUND(lhs); + VL_ZERO_W(obits, owp); + if (lhs == 0.0) return owp; + const QData q = VL_CVT_Q_D(lhs); + const int lsb = static_cast((q >> 52ULL) & VL_MASK_Q(11)) - 1023 - 52; + const vluint64_t mantissa = (q & VL_MASK_Q(52)) | (1ULL << 52); + if (lsb < 0) { + VL_SET_WQ(owp, mantissa >> -lsb); + } else if (lsb < obits) { + _vl_insert_WQ(obits, owp, mantissa, lsb + 52, lsb); + } + if (lhs < 0) VL_NEGATE_INPLACE_W(VL_WORDS_I(obits), owp); + return owp; +} + +//====================================================================== +// Range assignments + +// EMIT_RULE: VL_ASSIGNRANGE: rclean=dirty; +static inline void VL_ASSIGNSEL_IIII(int rbits, int obits, int lsb, CData& lhsr, + IData rhs) VL_PURE { + _vl_insert_II(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_IIII(int rbits, int obits, int lsb, SData& lhsr, + IData rhs) VL_PURE { + _vl_insert_II(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_IIII(int rbits, int obits, int lsb, IData& lhsr, + IData rhs) VL_PURE { + _vl_insert_II(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_QIII(int rbits, int obits, int lsb, QData& lhsr, + IData rhs) VL_PURE { + _vl_insert_QQ(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_QQII(int rbits, int obits, int lsb, QData& lhsr, + QData rhs) VL_PURE { + _vl_insert_QQ(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_QIIQ(int rbits, int obits, int lsb, QData& lhsr, + QData rhs) VL_PURE { + _vl_insert_QQ(obits, lhsr, rhs, lsb + obits - 1, lsb, rbits); +} +// static inline void VL_ASSIGNSEL_IIIW(int obits, int lsb, IData& lhsr, WDataInP const rwp) +// VL_MT_SAFE { Illegal, as lhs width >= rhs width +static inline void VL_ASSIGNSEL_WIII(int rbits, int obits, int lsb, WDataOutP owp, + IData rhs) VL_MT_SAFE { + _vl_insert_WI(obits, owp, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_WIIQ(int rbits, int obits, int lsb, WDataOutP owp, + QData rhs) VL_MT_SAFE { + _vl_insert_WQ(obits, owp, rhs, lsb + obits - 1, lsb, rbits); +} +static inline void VL_ASSIGNSEL_WIIW(int rbits, int obits, int lsb, WDataOutP owp, + WDataInP const rwp) VL_MT_SAFE { + _vl_insert_WW(obits, owp, rwp, lsb + obits - 1, lsb, rbits); +} + +//====================================================================== +// Triops + +static inline WDataOutP VL_COND_WIWW(int obits, int, int, int, WDataOutP owp, int cond, + WDataInP const w1p, WDataInP const w2p) VL_MT_SAFE { + const int words = VL_WORDS_I(obits); + for (int i = 0; i < words; ++i) owp[i] = cond ? w1p[i] : w2p[i]; + return owp; +} + +//====================================================================== +// Constification + +// VL_CONST_W_#X(int obits, WDataOutP owp, IData data0, .... IData data(#-1)) +// Sets wide vector words to specified constant words. +// These macros are used when o might represent more words then are given as constants, +// hence all upper words must be zeroed. +// If changing the number of functions here, also change EMITCINLINES_NUM_CONSTW + +#define VL_C_END_(obits, wordsSet) \ + for (int i = (wordsSet); i < VL_WORDS_I(obits); ++i) o[i] = 0; \ + return o + +// clang-format off +static inline WDataOutP VL_CONST_W_1X(int obits, WDataOutP o, EData d0) VL_MT_SAFE { + o[0] = d0; + VL_C_END_(obits, 1); +} +static inline WDataOutP VL_CONST_W_2X(int obits, WDataOutP o, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; + VL_C_END_(obits, 2); +} +static inline WDataOutP VL_CONST_W_3X(int obits, WDataOutP o, EData d2, EData d1, + EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; + VL_C_END_(obits,3); +} +static inline WDataOutP VL_CONST_W_4X(int obits, WDataOutP o, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + VL_C_END_(obits,4); +} +static inline WDataOutP VL_CONST_W_5X(int obits, WDataOutP o, + EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; + VL_C_END_(obits,5); +} +static inline WDataOutP VL_CONST_W_6X(int obits, WDataOutP o, + EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; + VL_C_END_(obits,6); +} +static inline WDataOutP VL_CONST_W_7X(int obits, WDataOutP o, + EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; o[6] = d6; + VL_C_END_(obits,7); +} +static inline WDataOutP VL_CONST_W_8X(int obits, WDataOutP o, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; + VL_C_END_(obits,8); +} +// +static inline WDataOutP VL_CONSTHI_W_1X(int obits, int lsb, WDataOutP obase, + EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; + VL_C_END_(obits, VL_WORDS_I(lsb) + 1); +} +static inline WDataOutP VL_CONSTHI_W_2X(int obits, int lsb, WDataOutP obase, + EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; + VL_C_END_(obits, VL_WORDS_I(lsb) + 2); +} +static inline WDataOutP VL_CONSTHI_W_3X(int obits, int lsb, WDataOutP obase, + EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; o[2] = d2; + VL_C_END_(obits, VL_WORDS_I(lsb) + 3); +} +static inline WDataOutP VL_CONSTHI_W_4X(int obits, int lsb, WDataOutP obase, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + VL_C_END_(obits, VL_WORDS_I(lsb) + 4); +} +static inline WDataOutP VL_CONSTHI_W_5X(int obits, int lsb, WDataOutP obase, + EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; + VL_C_END_(obits, VL_WORDS_I(lsb) + 5); +} +static inline WDataOutP VL_CONSTHI_W_6X(int obits, int lsb, WDataOutP obase, + EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; + VL_C_END_(obits, VL_WORDS_I(lsb) + 6); +} +static inline WDataOutP VL_CONSTHI_W_7X(int obits, int lsb, WDataOutP obase, + EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; o[6] = d6; + VL_C_END_(obits, VL_WORDS_I(lsb) + 7); +} +static inline WDataOutP VL_CONSTHI_W_8X(int obits, int lsb, WDataOutP obase, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; + o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; + VL_C_END_(obits, VL_WORDS_I(lsb) + 8); +} + +#undef VL_C_END_ + +// Partial constant, lower words of vector wider than 8*32, starting at bit number lsb +static inline void VL_CONSTLO_W_8X(int lsb, WDataOutP obase, + EData d7, EData d6, EData d5, EData d4, + EData d3, EData d2, EData d1, EData d0) VL_MT_SAFE { + WDataOutP o = obase + VL_WORDS_I(lsb); + o[0] = d0; o[1] = d1; o[2] = d2; o[3] = d3; o[4] = d4; o[5] = d5; o[6] = d6; o[7] = d7; +} +// clang-format on + +//====================================================================== +// Strings + +extern std::string VL_PUTC_N(const std::string& lhs, IData rhs, CData ths) VL_PURE; +extern CData VL_GETC_N(const std::string& lhs, IData rhs) VL_PURE; +extern std::string VL_SUBSTR_N(const std::string& lhs, IData rhs, IData ths) VL_PURE; + +inline IData VL_CMP_NN(const std::string& lhs, const std::string& rhs, bool ignoreCase) VL_PURE { + // SystemVerilog does not allow a string variable to contain '\0'. + // So C functions such as strcmp() can correctly compare strings. + if (ignoreCase) { + return VL_STRCASECMP(lhs.c_str(), rhs.c_str()); + } else { + return std::strcmp(lhs.c_str(), rhs.c_str()); + } +} + +extern IData VL_ATOI_N(const std::string& str, int base) VL_PURE; + +extern IData VL_FGETS_NI(std::string& dest, IData fpi); + +//====================================================================== +// Conversion functions + +extern std::string VL_CVT_PACK_STR_NW(int lwords, const WDataInP lwp) VL_MT_SAFE; +inline std::string VL_CVT_PACK_STR_NQ(QData lhs) VL_PURE { + VlWide lw; + VL_SET_WQ(lw, lhs); + return VL_CVT_PACK_STR_NW(VL_WQ_WORDS_E, lw); +} +inline std::string VL_CVT_PACK_STR_NN(const std::string& lhs) VL_PURE { return lhs; } +inline std::string& VL_CVT_PACK_STR_NN(std::string& lhs) VL_PURE { return lhs; } +inline std::string VL_CVT_PACK_STR_NI(IData lhs) VL_PURE { + VlWide lw; + VL_SET_WI(lw, lhs); + return VL_CVT_PACK_STR_NW(1, lw); +} +inline std::string VL_CONCATN_NNN(const std::string& lhs, const std::string& rhs) VL_PURE { + return lhs + rhs; +} +inline std::string VL_REPLICATEN_NNQ(int, int, int, const std::string& lhs, IData rep) VL_PURE { + std::string out; + out.reserve(lhs.length() * rep); + for (unsigned times = 0; times < rep; ++times) out += lhs; + return out; +} +inline std::string VL_REPLICATEN_NNI(int obits, int lbits, int rbits, const std::string& lhs, + IData rep) VL_PURE { + return VL_REPLICATEN_NNQ(obits, lbits, rbits, lhs, rep); +} + +inline IData VL_LEN_IN(const std::string& ld) { return ld.length(); } +extern std::string VL_TOLOWER_NN(const std::string& ld); +extern std::string VL_TOUPPER_NN(const std::string& ld); + +extern IData VL_FERROR_IN(IData fpi, std::string& outputr) VL_MT_SAFE; +extern IData VL_FOPEN_NN(const std::string& filename, const std::string& mode) VL_MT_SAFE; +extern IData VL_FOPEN_MCD_N(const std::string& filename) VL_MT_SAFE; +extern void VL_READMEM_N(bool hex, int bits, QData depth, int array_lsb, + const std::string& filename, void* memp, QData start, + QData end) VL_MT_SAFE; +extern void VL_WRITEMEM_N(bool hex, int bits, QData depth, int array_lsb, + const std::string& filename, const void* memp, QData start, + QData end) VL_MT_SAFE; +extern IData VL_SSCANF_INX(int lbits, const std::string& ld, const char* formatp, ...) VL_MT_SAFE; +extern void VL_SFORMAT_X(int obits_ignored, std::string& output, const char* formatp, + ...) VL_MT_SAFE; +extern std::string VL_SFORMATF_NX(const char* formatp, ...) VL_MT_SAFE; +extern void VL_TIMEFORMAT_IINI(int units, int precision, const std::string& suffix, int width, + VerilatedContext* contextp) VL_MT_SAFE; +extern IData VL_VALUEPLUSARGS_INW(int rbits, const std::string& ld, WDataOutP rwp) VL_MT_SAFE; +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, CData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, SData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, IData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = rwp[0]; + return got; +} +inline IData VL_VALUEPLUSARGS_INQ(int rbits, const std::string& ld, QData& rdr) VL_MT_SAFE { + VlWide<2> rwp; + IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_SET_QW(rwp); + return got; +} +inline IData VL_VALUEPLUSARGS_INQ(int rbits, const std::string& ld, double& rdr) VL_MT_SAFE { + VlWide<2> rwp; + IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); + if (got) rdr = VL_CVT_D_Q(VL_SET_QW(rwp)); + return got; +} +extern IData VL_VALUEPLUSARGS_INN(int, const std::string& ld, std::string& rdr) VL_MT_SAFE; + +//====================================================================== + +#endif // Guard diff --git a/include/verilated_heavy.h b/include/verilated_heavy.h index 598a99400..80129772f 100644 --- a/include/verilated_heavy.h +++ b/include/verilated_heavy.h @@ -12,966 +12,20 @@ //************************************************************************* /// /// \file -/// \brief Verilated string and data-type header +/// \brief Verilated old string and data-type header /// -/// This file is included automatically by Verilator at the top of -/// all C++ files it generates. It is used when strings or other -/// heavyweight types are required; these contents are not part of -/// verilated.h to save compile time when such types aren't used. +/// This file is deprecated, and provided for backwards compatibility. +/// Include verilated.h instead. /// //************************************************************************* #ifndef VERILATOR_VERILATED_HEAVY_H_ #define VERILATOR_VERILATED_HEAVY_H_ +#ifdef VL_NO_LEGACY +#error "Include instead of " +#endif + #include "verilated.h" -#include -#include -#include -#include -#include -#include -#include -#include - -//=================================================================== -// String formatters (required by below containers) - -extern std::string VL_TO_STRING(CData lhs); -extern std::string VL_TO_STRING(SData lhs); -extern std::string VL_TO_STRING(IData lhs); -extern std::string VL_TO_STRING(QData lhs); -inline std::string VL_TO_STRING(const std::string& obj) { return "\"" + obj + "\""; } -extern std::string VL_TO_STRING_W(int words, const WDataInP obj); - -//=================================================================== -// Shuffle RNG - -class VlURNG final { -public: - using result_type = size_t; - static constexpr size_t min() { return 0; } - static constexpr size_t max() { return 1ULL << 31; } - size_t operator()() { return VL_MASK_I(31) & VL_RANDOM_I(32); } -}; - -//=================================================================== -// Readmem/Writemem operation classes - -class VlReadMem final { - bool m_hex; // Hex format - int m_bits; // Bit width of values - const std::string& m_filename; // Filename - QData m_end; // End address (as specified by user) - FILE* m_fp; // File handle for filename - QData m_addr; // Next address to read - int m_linenum; // Line number last read from file -public: - VlReadMem(bool hex, int bits, const std::string& filename, QData start, QData end); - ~VlReadMem(); - bool isOpen() const { return m_fp != nullptr; } - int linenum() const { return m_linenum; } - bool get(QData& addrr, std::string& valuer); - void setData(void* valuep, const std::string& rhs); -}; - -class VlWriteMem final { - bool m_hex; // Hex format - int m_bits; // Bit width of values - FILE* m_fp; // File handle for filename - QData m_addr; // Next address to write -public: - VlWriteMem(bool hex, int bits, const std::string& filename, QData start, QData end); - ~VlWriteMem(); - bool isOpen() const { return m_fp != nullptr; } - void print(QData addr, bool addrstamp, const void* valuep); -}; - -//=================================================================== -/// Verilog wide packed bit container. -/// Similar to std::array, but lighter weight, only methods needed -/// by Verilator, to help compile time. -/// -/// A 'struct' as we want this to be an aggregate type that allows -/// static aggregate initialization. Consider data members private. -/// -/// For example a Verilog "bit [94:0]" will become a VlWide<3> because 3*32 -/// bits are needed to hold the 95 bits. The MSB (bit 96) must always be -/// zero in memory, but during intermediate operations in the Verilated -/// internals is unpredictable. - -template struct VlWide final { - // MEMBERS - // This should be the only data member, otherwise generated static initializers need updating - EData m_storage[T_Words]; // Contents of the packed array - - // CONSTRUCTORS - // Default constructors and destructor are used. Note however that C++20 requires that - // aggregate types do not have a user declared constructor, not even an explicitly defaulted - // one. - - // OPERATOR METHODS - // Default copy assignment operators are used. - operator WDataOutP() { return &m_storage[0]; } // This also allows [] - operator WDataInP() const { return &m_storage[0]; } // This also allows [] - - // METHODS - const EData& at(size_t index) const { return m_storage[index]; } - EData& at(size_t index) { return m_storage[index]; } - WData* data() { return &m_storage[0]; } - const WData* data() const { return &m_storage[0]; } - bool operator<(const VlWide& rhs) const { - return VL_LT_W(T_Words, data(), rhs.data()); - } -}; - -// Convert a C array to std::array reference by pointer magic, without copy. -// Data type (second argument) is so the function template can automatically generate. -template -VlWide& VL_CVT_W_A(const WDataInP inp, const VlWide&) { - return *((VlWide*)inp); -} - -template std::string VL_TO_STRING(const VlWide& obj) { - return VL_TO_STRING_W(T_Words, obj.data()); -} - -//=================================================================== -// Verilog queue and dynamic array container -// There are no multithreaded locks on this; the base variable must -// be protected by other means -// -// Bound here is the maximum size() allowed, e.g. 1 + SystemVerilog bound -// For dynamic arrays it is always zero -template class VlQueue final { -private: - // TYPES - using Deque = std::deque; - -public: - using const_iterator = typename Deque::const_iterator; - -private: - // MEMBERS - Deque m_deque; // State of the assoc array - T_Value m_defaultValue; // Default value - -public: - // CONSTRUCTORS - // m_defaultValue isn't defaulted. Caller's constructor must do it. - VlQueue() = default; - ~VlQueue() = default; - VlQueue(const VlQueue&) = default; - VlQueue(VlQueue&&) = default; - VlQueue& operator=(const VlQueue&) = default; - VlQueue& operator=(VlQueue&&) = default; - - // Standard copy constructor works. Verilog: assoca = assocb - // Also must allow conversion from a different T_MaxSize queue - template VlQueue operator=(const VlQueue& rhs) { - m_deque = rhs.privateDeque(); - if (VL_UNLIKELY(T_MaxSize && T_MaxSize < m_deque.size())) m_deque.resize(T_MaxSize - 1); - return *this; - } - - static VlQueue cons(const T_Value& lhs) { - VlQueue out; - out.push_back(lhs); - return out; - } - static VlQueue cons(const T_Value& lhs, const T_Value& rhs) { - VlQueue out; - out.push_back(rhs); - out.push_back(lhs); - return out; - } - static VlQueue cons(const VlQueue& lhs, const T_Value& rhs) { - VlQueue out = lhs; - out.push_front(rhs); - return out; - } - static VlQueue cons(const T_Value& lhs, const VlQueue& rhs) { - VlQueue out = rhs; - out.push_back(lhs); - return out; - } - static VlQueue cons(const VlQueue& lhs, const VlQueue& rhs) { - VlQueue out = rhs; - for (const auto& i : lhs.m_deque) out.push_back(i); - return out; - } - - // METHODS - T_Value& atDefault() { return m_defaultValue; } - const T_Value& atDefault() const { return m_defaultValue; } - const Deque& privateDeque() const { return m_deque; } - - // Size. Verilog: function int size(), or int num() - int size() const { return m_deque.size(); } - // Clear array. Verilog: function void delete([input index]) - void clear() { m_deque.clear(); } - void erase(vlsint32_t index) { - if (VL_LIKELY(index >= 0 && index < m_deque.size())) - m_deque.erase(m_deque.begin() + index); - } - - // Dynamic array new[] becomes a renew() - void renew(size_t size) { - clear(); - m_deque.resize(size, atDefault()); - } - // Dynamic array new[]() becomes a renew_copy() - void renew_copy(size_t size, const VlQueue& rhs) { - if (size == 0) { - clear(); - } else { - *this = rhs; - m_deque.resize(size, atDefault()); - } - } - - // function void q.push_front(value) - void push_front(const T_Value& value) { - m_deque.push_front(value); - if (VL_UNLIKELY(T_MaxSize != 0 && m_deque.size() > T_MaxSize)) m_deque.pop_back(); - } - // function void q.push_back(value) - void push_back(const T_Value& value) { - if (VL_LIKELY(T_MaxSize == 0 || m_deque.size() < T_MaxSize)) m_deque.push_back(value); - } - // function value_t q.pop_front(); - T_Value pop_front() { - if (m_deque.empty()) return m_defaultValue; - T_Value v = m_deque.front(); - m_deque.pop_front(); - return v; - } - // function value_t q.pop_back(); - T_Value pop_back() { - if (m_deque.empty()) return m_defaultValue; - T_Value v = m_deque.back(); - m_deque.pop_back(); - return v; - } - - // Setting. Verilog: assoc[index] = v - // Can't just overload operator[] or provide a "at" reference to set, - // because we need to be able to insert only when the value is set - T_Value& at(vlsint32_t index) { - static T_Value s_throwAway; - // Needs to work for dynamic arrays, so does not use T_MaxSize - if (VL_UNLIKELY(index < 0 || index >= m_deque.size())) { - s_throwAway = atDefault(); - return s_throwAway; - } else { - return m_deque[index]; - } - } - // Accessing. Verilog: v = assoc[index] - const T_Value& at(vlsint32_t index) const { - static T_Value s_throwAway; - // Needs to work for dynamic arrays, so does not use T_MaxSize - if (VL_UNLIKELY(index < 0 || index >= m_deque.size())) { - return atDefault(); - } else { - return m_deque[index]; - } - } - // function void q.insert(index, value); - void insert(vlsint32_t index, const T_Value& value) { - if (VL_UNLIKELY(index < 0 || index >= m_deque.size())) return; - m_deque.insert(m_deque.begin() + index, value); - } - - // Return slice q[lsb:msb] - VlQueue slice(vlsint32_t lsb, vlsint32_t msb) const { - VlQueue out; - if (VL_UNLIKELY(lsb < 0)) lsb = 0; - if (VL_UNLIKELY(lsb >= m_deque.size())) lsb = m_deque.size() - 1; - if (VL_UNLIKELY(msb >= m_deque.size())) msb = m_deque.size() - 1; - for (vlsint32_t i = lsb; i <= msb; ++i) out.push_back(m_deque[i]); - return out; - } - - // For save/restore - const_iterator begin() const { return m_deque.begin(); } - const_iterator end() const { return m_deque.end(); } - - // Methods - void sort() { std::sort(m_deque.begin(), m_deque.end()); } - template void sort(Func with_func) { - // with_func returns arbitrary type to use for the sort comparison - std::sort(m_deque.begin(), m_deque.end(), [=](const T_Value& a, const T_Value& b) { - // index number is meaninless with sort, as it changes - return with_func(0, a) < with_func(0, b); - }); - } - void rsort() { std::sort(m_deque.rbegin(), m_deque.rend()); } - template void rsort(Func with_func) { - // with_func returns arbitrary type to use for the sort comparison - std::sort(m_deque.rbegin(), m_deque.rend(), [=](const T_Value& a, const T_Value& b) { - // index number is meaninless with sort, as it changes - return with_func(0, a) < with_func(0, b); - }); - } - void reverse() { std::reverse(m_deque.begin(), m_deque.end()); } - void shuffle() { std::shuffle(m_deque.begin(), m_deque.end(), VlURNG{}); } - VlQueue unique() const { - VlQueue out; - std::unordered_set saw; - for (const auto& i : m_deque) { - auto it = saw.find(i); - if (it == saw.end()) { - saw.insert(it, i); - out.push_back(i); - } - } - return out; - } - VlQueue unique_index() const { - VlQueue out; - IData index = 0; - std::unordered_set saw; - for (const auto& i : m_deque) { - auto it = saw.find(i); - if (it == saw.end()) { - saw.insert(it, i); - out.push_back(index); - } - ++index; - } - return out; - } - template VlQueue find(Func with_func) const { - VlQueue out; - IData index = 0; - for (const auto& i : m_deque) { - if (with_func(index, i)) out.push_back(i); - ++index; - } - return out; - } - template VlQueue find_index(Func with_func) const { - VlQueue out; - IData index = 0; - for (const auto& i : m_deque) { - if (with_func(index, i)) out.push_back(index); - ++index; - } - return out; - } - template VlQueue find_first(Func with_func) const { - // Can't use std::find_if as need index number - IData index = 0; - for (const auto& i : m_deque) { - if (with_func(index, i)) return VlQueue::cons(i); - ++index; - } - return VlQueue{}; - } - template VlQueue find_first_index(Func with_func) const { - IData index = 0; - for (const auto& i : m_deque) { - if (with_func(index, i)) return VlQueue::cons(index); - ++index; - } - return VlQueue{}; - } - template VlQueue find_last(Func with_func) const { - IData index = m_deque.size() - 1; - for (auto it = m_deque.rbegin(); it != m_deque.rend(); ++it) { - if (with_func(index, *it)) return VlQueue::cons(*it); - --index; - } - return VlQueue{}; - } - template VlQueue find_last_index(Func with_func) const { - IData index = m_deque.size() - 1; - for (auto it = m_deque.rbegin(); it != m_deque.rend(); ++it) { - if (with_func(index, *it)) return VlQueue::cons(index); - --index; - } - return VlQueue{}; - } - - // Reduction operators - VlQueue min() const { - if (m_deque.empty()) return VlQueue{}; - const auto it = std::min_element(m_deque.begin(), m_deque.end()); - return VlQueue::cons(*it); - } - VlQueue max() const { - if (m_deque.empty()) return VlQueue{}; - const auto it = std::max_element(m_deque.begin(), m_deque.end()); - return VlQueue::cons(*it); - } - - T_Value r_sum() const { - T_Value out(0); // Type must have assignment operator - for (const auto& i : m_deque) out += i; - return out; - } - template T_Value r_sum(Func with_func) const { - T_Value out(0); // Type must have assignment operator - IData index = 0; - for (const auto& i : m_deque) out += with_func(index++, i); - return out; - } - T_Value r_product() const { - if (m_deque.empty()) return T_Value(0); - auto it = m_deque.begin(); - T_Value out{*it}; - ++it; - for (; it != m_deque.end(); ++it) out *= *it; - return out; - } - template T_Value r_product(Func with_func) const { - if (m_deque.empty()) return T_Value(0); - auto it = m_deque.begin(); - IData index = 0; - T_Value out{with_func(index, *it)}; - ++it; - ++index; - for (; it != m_deque.end(); ++it) out *= with_func(index++, *it); - return out; - } - T_Value r_and() const { - if (m_deque.empty()) return T_Value(0); - auto it = m_deque.begin(); - T_Value out{*it}; - ++it; - for (; it != m_deque.end(); ++it) out &= *it; - return out; - } - template T_Value r_and(Func with_func) const { - if (m_deque.empty()) return T_Value(0); - auto it = m_deque.begin(); - IData index = 0; - T_Value out{with_func(index, *it)}; - ++it; - ++index; - for (; it != m_deque.end(); ++it) out &= with_func(index, *it); - return out; - } - T_Value r_or() const { - T_Value out(0); // Type must have assignment operator - for (const auto& i : m_deque) out |= i; - return out; - } - template T_Value r_or(Func with_func) const { - T_Value out(0); // Type must have assignment operator - IData index = 0; - for (const auto& i : m_deque) out |= with_func(index++, i); - return out; - } - T_Value r_xor() const { - T_Value out(0); // Type must have assignment operator - for (const auto& i : m_deque) out ^= i; - return out; - } - template T_Value r_xor(Func with_func) const { - T_Value out(0); // Type must have assignment operator - IData index = 0; - for (const auto& i : m_deque) out ^= with_func(index++, i); - return out; - } - - // Dumping. Verilog: str = $sformatf("%p", assoc) - std::string to_string() const { - if (m_deque.empty()) return "'{}"; // No trailing space - std::string out = "'{"; - std::string comma; - for (const auto& i : m_deque) { - out += comma + VL_TO_STRING(i); - comma = ", "; - } - return out + "} "; - } -}; - -template std::string VL_TO_STRING(const VlQueue& obj) { - return obj.to_string(); -} - -//=================================================================== -// Verilog associative array container -// There are no multithreaded locks on this; the base variable must -// be protected by other means -// -template class VlAssocArray final { -private: - // TYPES - using Map = std::map; - -public: - using const_iterator = typename Map::const_iterator; - -private: - // MEMBERS - Map m_map; // State of the assoc array - T_Value m_defaultValue; // Default value - -public: - // CONSTRUCTORS - // m_defaultValue isn't defaulted. Caller's constructor must do it. - VlAssocArray() = default; - ~VlAssocArray() = default; - VlAssocArray(const VlAssocArray&) = default; - VlAssocArray(VlAssocArray&&) = default; - VlAssocArray& operator=(const VlAssocArray&) = default; - VlAssocArray& operator=(VlAssocArray&&) = default; - - // METHODS - T_Value& atDefault() { return m_defaultValue; } - const T_Value& atDefault() const { return m_defaultValue; } - - // Size of array. Verilog: function int size(), or int num() - int size() const { return m_map.size(); } - // Clear array. Verilog: function void delete([input index]) - void clear() { m_map.clear(); } - void erase(const T_Key& index) { m_map.erase(index); } - // Return 0/1 if element exists. Verilog: function int exists(input index) - int exists(const T_Key& index) const { return m_map.find(index) != m_map.end(); } - // Return first element. Verilog: function int first(ref index); - int first(T_Key& indexr) const { - const auto it = m_map.cbegin(); - if (it == m_map.end()) return 0; - indexr = it->first; - return 1; - } - // Return last element. Verilog: function int last(ref index) - int last(T_Key& indexr) const { - const auto it = m_map.crbegin(); - if (it == m_map.rend()) return 0; - indexr = it->first; - return 1; - } - // Return next element. Verilog: function int next(ref index) - int next(T_Key& indexr) const { - auto it = m_map.find(indexr); - if (VL_UNLIKELY(it == m_map.end())) return 0; - ++it; - if (VL_UNLIKELY(it == m_map.end())) return 0; - indexr = it->first; - return 1; - } - // Return prev element. Verilog: function int prev(ref index) - int prev(T_Key& indexr) const { - auto it = m_map.find(indexr); - if (VL_UNLIKELY(it == m_map.end())) return 0; - if (VL_UNLIKELY(it == m_map.begin())) return 0; - --it; - indexr = it->first; - return 1; - } - // Setting. Verilog: assoc[index] = v - // Can't just overload operator[] or provide a "at" reference to set, - // because we need to be able to insert only when the value is set - T_Value& at(const T_Key& index) { - const auto it = m_map.find(index); - if (it == m_map.end()) { - std::pair pit = m_map.emplace(index, m_defaultValue); - return pit.first->second; - } - return it->second; - } - // Accessing. Verilog: v = assoc[index] - const T_Value& at(const T_Key& index) const { - const auto it = m_map.find(index); - if (it == m_map.end()) { - return m_defaultValue; - } else { - return it->second; - } - } - // Setting as a chained operation - VlAssocArray& set(const T_Key& index, const T_Value& value) { - at(index) = value; - return *this; - } - VlAssocArray& setDefault(const T_Value& value) { - atDefault() = value; - return *this; - } - - // For save/restore - const_iterator begin() const { return m_map.begin(); } - const_iterator end() const { return m_map.end(); } - - // Methods - VlQueue unique() const { - VlQueue out; - std::set saw; - for (const auto& i : m_map) { - auto it = saw.find(i.second); - if (it == saw.end()) { - saw.insert(it, i.second); - out.push_back(i.second); - } - } - return out; - } - VlQueue unique_index() const { - VlQueue out; - std::set saw; - for (const auto& i : m_map) { - auto it = saw.find(i.second); - if (it == saw.end()) { - saw.insert(it, i.second); - out.push_back(i.first); - } - } - return out; - } - template VlQueue find(Func with_func) const { - VlQueue out; - for (const auto& i : m_map) - if (with_func(i.first, i.second)) out.push_back(i.second); - return out; - } - template VlQueue find_index(Func with_func) const { - VlQueue out; - for (const auto& i : m_map) - if (with_func(i.first, i.second)) out.push_back(i.first); - return out; - } - template VlQueue find_first(Func with_func) const { - const auto it - = std::find_if(m_map.begin(), m_map.end(), [=](const std::pair& i) { - return with_func(i.first, i.second); - }); - if (it == m_map.end()) return VlQueue{}; - return VlQueue::cons(it->second); - } - template VlQueue find_first_index(Func with_func) const { - const auto it - = std::find_if(m_map.begin(), m_map.end(), [=](const std::pair& i) { - return with_func(i.first, i.second); - }); - if (it == m_map.end()) return VlQueue{}; - return VlQueue::cons(it->first); - } - template VlQueue find_last(Func with_func) const { - const auto it - = std::find_if(m_map.rbegin(), m_map.rend(), [=](const std::pair& i) { - return with_func(i.first, i.second); - }); - if (it == m_map.rend()) return VlQueue{}; - return VlQueue::cons(it->second); - } - template VlQueue find_last_index(Func with_func) const { - const auto it - = std::find_if(m_map.rbegin(), m_map.rend(), [=](const std::pair& i) { - return with_func(i.first, i.second); - }); - if (it == m_map.rend()) return VlQueue{}; - return VlQueue::cons(it->first); - } - - // Reduction operators - VlQueue min() const { - if (m_map.empty()) return VlQueue(); - const auto it = std::min_element( - m_map.begin(), m_map.end(), - [](const std::pair& a, const std::pair& b) { - return a.second < b.second; - }); - return VlQueue::cons(it->second); - } - VlQueue max() const { - if (m_map.empty()) return VlQueue(); - const auto it = std::max_element( - m_map.begin(), m_map.end(), - [](const std::pair& a, const std::pair& b) { - return a.second < b.second; - }); - return VlQueue::cons(it->second); - } - - T_Value r_sum() const { - T_Value out(0); // Type must have assignment operator - for (const auto& i : m_map) out += i.second; - return out; - } - template T_Value r_sum(Func with_func) const { - T_Value out(0); // Type must have assignment operator - for (const auto& i : m_map) out += with_func(i.first, i.second); - return out; - } - T_Value r_product() const { - if (m_map.empty()) return T_Value(0); - auto it = m_map.begin(); - T_Value out{it->second}; - ++it; - for (; it != m_map.end(); ++it) out *= it->second; - return out; - } - template T_Value r_product(Func with_func) const { - if (m_map.empty()) return T_Value(0); - auto it = m_map.begin(); - T_Value out{with_func(it->first, it->second)}; - ++it; - for (; it != m_map.end(); ++it) out *= with_func(it->first, it->second); - return out; - } - T_Value r_and() const { - if (m_map.empty()) return T_Value(0); - auto it = m_map.begin(); - T_Value out{it->second}; - ++it; - for (; it != m_map.end(); ++it) out &= it->second; - return out; - } - template T_Value r_and(Func with_func) const { - if (m_map.empty()) return T_Value(0); - auto it = m_map.begin(); - T_Value out{with_func(it->first, it->second)}; - ++it; - for (; it != m_map.end(); ++it) out &= with_func(it->first, it->second); - return out; - } - T_Value r_or() const { - T_Value out(0); // Type must have assignment operator - for (const auto& i : m_map) out |= i.second; - return out; - } - template T_Value r_or(Func with_func) const { - T_Value out(0); // Type must have assignment operator - for (const auto& i : m_map) out |= with_func(i.first, i.second); - return out; - } - T_Value r_xor() const { - T_Value out(0); // Type must have assignment operator - for (const auto& i : m_map) out ^= i.second; - return out; - } - template T_Value r_xor(Func with_func) const { - T_Value out(0); // Type must have assignment operator - for (const auto& i : m_map) out ^= with_func(i.first, i.second); - return out; - } - - // Dumping. Verilog: str = $sformatf("%p", assoc) - std::string to_string() const { - if (m_map.empty()) return "'{}"; // No trailing space - std::string out = "'{"; - std::string comma; - for (const auto& i : m_map) { - out += comma + VL_TO_STRING(i.first) + ":" + VL_TO_STRING(i.second); - comma = ", "; - } - // Default not printed - maybe random init data - return out + "} "; - } -}; - -template -std::string VL_TO_STRING(const VlAssocArray& obj) { - return obj.to_string(); -} - -template -void VL_READMEM_N(bool hex, int bits, const std::string& filename, - VlAssocArray& obj, QData start, QData end) VL_MT_SAFE { - VlReadMem rmem{hex, bits, filename, start, end}; - if (VL_UNLIKELY(!rmem.isOpen())) return; - while (true) { - QData addr; - std::string data; - if (rmem.get(addr /*ref*/, data /*ref*/)) { - rmem.setData(&(obj.at(addr)), data); - } else { - break; - } - } -} - -template -void VL_WRITEMEM_N(bool hex, int bits, const std::string& filename, - const VlAssocArray& obj, QData start, QData end) VL_MT_SAFE { - VlWriteMem wmem{hex, bits, filename, start, end}; - if (VL_UNLIKELY(!wmem.isOpen())) return; - for (const auto& i : obj) { - const QData addr = i.first; - if (addr >= start && addr <= end) wmem.print(addr, true, &(i.second)); - } -} - -//=================================================================== -/// Verilog unpacked array container -/// For when a standard C++[] array is not sufficient, e.g. an -/// array under a queue, or methods operating on the array. -/// -/// A 'struct' as we want this to be an aggregate type that allows -/// static aggregate initialization. Consider data members private. -/// -/// This class may get exposed to a Verilated Model's top I/O, if the top -/// IO has an unpacked array. - -template struct VlUnpacked final { - // MEMBERS - // This should be the only data member, otherwise generated static initializers need updating - T_Value m_storage[T_Depth]; // Contents of the unpacked array - - // CONSTRUCTORS - // Default constructors and destructor are used. Note however that C++20 requires that - // aggregate types do not have a user declared constructor, not even an explicitly defaulted - // one. - - // OPERATOR METHODS - // Default copy assignment operators are used. - - // METHODS - // Raw access - WData* data() { return &m_storage[0]; } - const WData* data() const { return &m_storage[0]; } - - T_Value& operator[](size_t index) { return m_storage[index]; }; - const T_Value& operator[](size_t index) const { return m_storage[index]; }; - - // Dumping. Verilog: str = $sformatf("%p", assoc) - std::string to_string() const { - std::string out = "'{"; - std::string comma; - for (int i = 0; i < T_Depth; ++i) { - out += comma + VL_TO_STRING(m_storage[i]); - comma = ", "; - } - return out + "} "; - } -}; - -template -std::string VL_TO_STRING(const VlUnpacked& obj) { - return obj.to_string(); -} - -//=================================================================== -// Verilog class reference container -// There are no multithreaded locks on this; the base variable must -// be protected by other means - -#define VlClassRef std::shared_ptr - -template // T typically of type VlClassRef -inline T VL_NULL_CHECK(T t, const char* filename, int linenum) { - if (VL_UNLIKELY(!t)) Verilated::nullPointerError(filename, linenum); - return t; -} - -template -static inline bool VL_CAST_DYNAMIC(VlClassRef in, VlClassRef& outr) { - VlClassRef casted = std::dynamic_pointer_cast(in); - if (VL_LIKELY(casted)) { - outr = casted; - return true; - } else { - return false; - } -} - -//====================================================================== -// Conversion functions - -extern std::string VL_CVT_PACK_STR_NW(int lwords, const WDataInP lwp) VL_MT_SAFE; -inline std::string VL_CVT_PACK_STR_NQ(QData lhs) VL_PURE { - VlWide lw; - VL_SET_WQ(lw, lhs); - return VL_CVT_PACK_STR_NW(VL_WQ_WORDS_E, lw); -} -inline std::string VL_CVT_PACK_STR_NN(const std::string& lhs) VL_PURE { return lhs; } -inline std::string& VL_CVT_PACK_STR_NN(std::string& lhs) VL_PURE { return lhs; } -inline std::string VL_CVT_PACK_STR_NI(IData lhs) VL_PURE { - VlWide lw; - VL_SET_WI(lw, lhs); - return VL_CVT_PACK_STR_NW(1, lw); -} -inline std::string VL_CONCATN_NNN(const std::string& lhs, const std::string& rhs) VL_PURE { - return lhs + rhs; -} -inline std::string VL_REPLICATEN_NNQ(int, int, int, const std::string& lhs, IData rep) VL_PURE { - std::string out; - out.reserve(lhs.length() * rep); - for (unsigned times = 0; times < rep; ++times) out += lhs; - return out; -} -inline std::string VL_REPLICATEN_NNI(int obits, int lbits, int rbits, const std::string& lhs, - IData rep) VL_PURE { - return VL_REPLICATEN_NNQ(obits, lbits, rbits, lhs, rep); -} - -inline IData VL_LEN_IN(const std::string& ld) { return ld.length(); } -extern std::string VL_TOLOWER_NN(const std::string& ld); -extern std::string VL_TOUPPER_NN(const std::string& ld); - -extern IData VL_FERROR_IN(IData fpi, std::string& outputr) VL_MT_SAFE; -extern IData VL_FOPEN_NN(const std::string& filename, const std::string& mode) VL_MT_SAFE; -extern IData VL_FOPEN_MCD_N(const std::string& filename) VL_MT_SAFE; -extern void VL_READMEM_N(bool hex, int bits, QData depth, int array_lsb, - const std::string& filename, void* memp, QData start, - QData end) VL_MT_SAFE; -extern void VL_WRITEMEM_N(bool hex, int bits, QData depth, int array_lsb, - const std::string& filename, const void* memp, QData start, - QData end) VL_MT_SAFE; -extern IData VL_SSCANF_INX(int lbits, const std::string& ld, const char* formatp, ...) VL_MT_SAFE; -extern void VL_SFORMAT_X(int obits_ignored, std::string& output, const char* formatp, - ...) VL_MT_SAFE; -extern std::string VL_SFORMATF_NX(const char* formatp, ...) VL_MT_SAFE; -extern void VL_TIMEFORMAT_IINI(int units, int precision, const std::string& suffix, int width, - VerilatedContext* contextp) VL_MT_SAFE; -extern IData VL_VALUEPLUSARGS_INW(int rbits, const std::string& ld, WDataOutP rwp) VL_MT_SAFE; -inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, CData& rdr) VL_MT_SAFE { - VlWide<2> rwp; // WData must always be at least 2 - IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); - if (got) rdr = rwp[0]; - return got; -} -inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, SData& rdr) VL_MT_SAFE { - VlWide<2> rwp; // WData must always be at least 2 - IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); - if (got) rdr = rwp[0]; - return got; -} -inline IData VL_VALUEPLUSARGS_INI(int rbits, const std::string& ld, IData& rdr) VL_MT_SAFE { - VlWide<2> rwp; - IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); - if (got) rdr = rwp[0]; - return got; -} -inline IData VL_VALUEPLUSARGS_INQ(int rbits, const std::string& ld, QData& rdr) VL_MT_SAFE { - VlWide<2> rwp; - IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); - if (got) rdr = VL_SET_QW(rwp); - return got; -} -inline IData VL_VALUEPLUSARGS_INQ(int rbits, const std::string& ld, double& rdr) VL_MT_SAFE { - VlWide<2> rwp; - IData got = VL_VALUEPLUSARGS_INW(rbits, ld, rwp); - if (got) rdr = VL_CVT_D_Q(VL_SET_QW(rwp)); - return got; -} -extern IData VL_VALUEPLUSARGS_INN(int, const std::string& ld, std::string& rdr) VL_MT_SAFE; - -//====================================================================== -// Strings - -extern std::string VL_PUTC_N(const std::string& lhs, IData rhs, CData ths) VL_PURE; -extern CData VL_GETC_N(const std::string& lhs, IData rhs) VL_PURE; -extern std::string VL_SUBSTR_N(const std::string& lhs, IData rhs, IData ths) VL_PURE; - -inline IData VL_CMP_NN(const std::string& lhs, const std::string& rhs, bool ignoreCase) VL_PURE { - // SystemVerilog does not allow a string variable to contain '\0'. - // So C functions such as strcmp() can correctly compare strings. - if (ignoreCase) { - return VL_STRCASECMP(lhs.c_str(), rhs.c_str()); - } else { - return std::strcmp(lhs.c_str(), rhs.c_str()); - } -} - -extern IData VL_ATOI_N(const std::string& str, int base) VL_PURE; - -extern IData VL_FGETS_NI(std::string& dest, IData fpi); - #endif // Guard diff --git a/include/verilated_imp.h b/include/verilated_imp.h index fef103f7b..ed17f86e2 100644 --- a/include/verilated_imp.h +++ b/include/verilated_imp.h @@ -30,7 +30,6 @@ #include "verilatedos.h" #include "verilated.h" -#include "verilated_heavy.h" #include "verilated_syms.h" #include diff --git a/include/verilated_save.h b/include/verilated_save.h index 1a6c14e13..f49c15f91 100644 --- a/include/verilated_save.h +++ b/include/verilated_save.h @@ -23,7 +23,7 @@ #define VERILATOR_VERILATED_SAVE_C_H_ #include "verilatedos.h" -#include "verilated_heavy.h" +#include "verilated.h" #include diff --git a/include/verilated_syms.h b/include/verilated_syms.h index 1dc633e6e..160bd31c9 100644 --- a/include/verilated_syms.h +++ b/include/verilated_syms.h @@ -30,7 +30,7 @@ #define VERILATOR_VERILATED_SYMS_H_ #include "verilatedos.h" -#include "verilated_heavy.h" +#include "verilated.h" #include "verilated_sym_props.h" #include diff --git a/include/verilated_types.h b/include/verilated_types.h new file mode 100644 index 000000000..8a0dde96d --- /dev/null +++ b/include/verilated_types.h @@ -0,0 +1,897 @@ +// -*- mode: C++; c-file-style: "cc-mode" -*- +//************************************************************************* +// +// Code available from: https://verilator.org +// +// Copyright 2003-2021 by Wilson Snyder. This program is free software; you can +// redistribute it and/or modify it under the terms of either the GNU +// Lesser General Public License Version 3 or the Perl Artistic License +// Version 2.0. +// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 +// +//************************************************************************* +/// +/// \file +/// \brief Verilated common data type containers +/// +/// verilated.h should be included instead of this file. +/// +/// Those macro/function/variable starting or ending in _ are internal, +/// however many of the other function/macros here are also internal. +/// +//************************************************************************* + +#ifndef VERILATOR_VERILATED_TYPES_H_ +#define VERILATOR_VERILATED_TYPES_H_ + +#ifndef VERILATOR_VERILATED_H_INTERNAL_ +#error "verilated_types.h should only be included by verilated.h" +#endif + +//=================================================================== +// String formatters (required by below containers) + +extern std::string VL_TO_STRING(CData lhs); +extern std::string VL_TO_STRING(SData lhs); +extern std::string VL_TO_STRING(IData lhs); +extern std::string VL_TO_STRING(QData lhs); +inline std::string VL_TO_STRING(const std::string& obj) { return "\"" + obj + "\""; } +extern std::string VL_TO_STRING_W(int words, const WDataInP obj); + +//========================================================================= +// Declare net data types + +#define VL_SIG8(name, msb, lsb) CData name ///< Declare signal, 1-8 bits +#define VL_SIG16(name, msb, lsb) SData name ///< Declare signal, 9-16 bits +#define VL_SIG64(name, msb, lsb) QData name ///< Declare signal, 33-64 bits +#define VL_SIG(name, msb, lsb) IData name ///< Declare signal, 17-32 bits +#define VL_SIGW(name, msb, lsb, words) WData name[words] ///< Declare signal, 65+ bits +#define VL_IN8(name, msb, lsb) CData name ///< Declare input signal, 1-8 bits +#define VL_IN16(name, msb, lsb) SData name ///< Declare input signal, 9-16 bits +#define VL_IN64(name, msb, lsb) QData name ///< Declare input signal, 33-64 bits +#define VL_IN(name, msb, lsb) IData name ///< Declare input signal, 17-32 bits +#define VL_INW(name, msb, lsb, words) WData name[words] ///< Declare input signal, 65+ bits +#define VL_INOUT8(name, msb, lsb) CData name ///< Declare bidir signal, 1-8 bits +#define VL_INOUT16(name, msb, lsb) SData name ///< Declare bidir signal, 9-16 bits +#define VL_INOUT64(name, msb, lsb) QData name ///< Declare bidir signal, 33-64 bits +#define VL_INOUT(name, msb, lsb) IData name ///< Declare bidir signal, 17-32 bits +#define VL_INOUTW(name, msb, lsb, words) WData name[words] ///< Declare bidir signal, 65+ bits +#define VL_OUT8(name, msb, lsb) CData name ///< Declare output signal, 1-8 bits +#define VL_OUT16(name, msb, lsb) SData name ///< Declare output signal, 9-16 bits +#define VL_OUT64(name, msb, lsb) QData name ///< Declare output signal, 33-64bits +#define VL_OUT(name, msb, lsb) IData name ///< Declare output signal, 17-32 bits +#define VL_OUTW(name, msb, lsb, words) WData name[words] ///< Declare output signal, 65+ bits + +//=================================================================== +// Shuffle RNG + +extern vluint64_t vl_rand64() VL_MT_SAFE; + +class VlURNG final { +public: + using result_type = size_t; + static constexpr size_t min() { return 0; } + static constexpr size_t max() { return 1ULL << 31; } + size_t operator()() { return VL_MASK_I(31) & vl_rand64(); } +}; + +//=================================================================== +// Readmem/Writemem operation classes + +class VlReadMem final { + bool m_hex; // Hex format + int m_bits; // Bit width of values + const std::string& m_filename; // Filename + QData m_end; // End address (as specified by user) + FILE* m_fp; // File handle for filename + QData m_addr; // Next address to read + int m_linenum; // Line number last read from file +public: + VlReadMem(bool hex, int bits, const std::string& filename, QData start, QData end); + ~VlReadMem(); + bool isOpen() const { return m_fp != nullptr; } + int linenum() const { return m_linenum; } + bool get(QData& addrr, std::string& valuer); + void setData(void* valuep, const std::string& rhs); +}; + +class VlWriteMem final { + bool m_hex; // Hex format + int m_bits; // Bit width of values + FILE* m_fp; // File handle for filename + QData m_addr; // Next address to write +public: + VlWriteMem(bool hex, int bits, const std::string& filename, QData start, QData end); + ~VlWriteMem(); + bool isOpen() const { return m_fp != nullptr; } + void print(QData addr, bool addrstamp, const void* valuep); +}; + +//=================================================================== +/// Verilog wide packed bit container. +/// Similar to std::array, but lighter weight, only methods needed +/// by Verilator, to help compile time. +/// +/// A 'struct' as we want this to be an aggregate type that allows +/// static aggregate initialization. Consider data members private. +/// +/// For example a Verilog "bit [94:0]" will become a VlWide<3> because 3*32 +/// bits are needed to hold the 95 bits. The MSB (bit 96) must always be +/// zero in memory, but during intermediate operations in the Verilated +/// internals is unpredictable. + +static int _vl_cmp_w(int words, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE; + +template struct VlWide final { + // MEMBERS + // This should be the only data member, otherwise generated static initializers need updating + EData m_storage[T_Words]; // Contents of the packed array + + // CONSTRUCTORS + // Default constructors and destructor are used. Note however that C++20 requires that + // aggregate types do not have a user declared constructor, not even an explicitly defaulted + // one. + + // OPERATOR METHODS + // Default copy assignment operators are used. + operator WDataOutP() { return &m_storage[0]; } // This also allows [] + operator WDataInP() const { return &m_storage[0]; } // This also allows [] + + // METHODS + const EData& at(size_t index) const { return m_storage[index]; } + EData& at(size_t index) { return m_storage[index]; } + WData* data() { return &m_storage[0]; } + const WData* data() const { return &m_storage[0]; } + bool operator<(const VlWide& rhs) const { + return _vl_cmp_w(T_Words, data(), rhs.data()) < 0; + } +}; + +// Convert a C array to std::array reference by pointer magic, without copy. +// Data type (second argument) is so the function template can automatically generate. +template +VlWide& VL_CVT_W_A(const WDataInP inp, const VlWide&) { + return *((VlWide*)inp); +} + +template std::string VL_TO_STRING(const VlWide& obj) { + return VL_TO_STRING_W(T_Words, obj.data()); +} + +//=================================================================== +// Verilog queue and dynamic array container +// There are no multithreaded locks on this; the base variable must +// be protected by other means +// +// Bound here is the maximum size() allowed, e.g. 1 + SystemVerilog bound +// For dynamic arrays it is always zero +template class VlQueue final { +private: + // TYPES + using Deque = std::deque; + +public: + using const_iterator = typename Deque::const_iterator; + +private: + // MEMBERS + Deque m_deque; // State of the assoc array + T_Value m_defaultValue; // Default value + +public: + // CONSTRUCTORS + // m_defaultValue isn't defaulted. Caller's constructor must do it. + VlQueue() = default; + ~VlQueue() = default; + VlQueue(const VlQueue&) = default; + VlQueue(VlQueue&&) = default; + VlQueue& operator=(const VlQueue&) = default; + VlQueue& operator=(VlQueue&&) = default; + + // Standard copy constructor works. Verilog: assoca = assocb + // Also must allow conversion from a different T_MaxSize queue + template VlQueue operator=(const VlQueue& rhs) { + m_deque = rhs.privateDeque(); + if (VL_UNLIKELY(T_MaxSize && T_MaxSize < m_deque.size())) m_deque.resize(T_MaxSize - 1); + return *this; + } + + static VlQueue cons(const T_Value& lhs) { + VlQueue out; + out.push_back(lhs); + return out; + } + static VlQueue cons(const T_Value& lhs, const T_Value& rhs) { + VlQueue out; + out.push_back(rhs); + out.push_back(lhs); + return out; + } + static VlQueue cons(const VlQueue& lhs, const T_Value& rhs) { + VlQueue out = lhs; + out.push_front(rhs); + return out; + } + static VlQueue cons(const T_Value& lhs, const VlQueue& rhs) { + VlQueue out = rhs; + out.push_back(lhs); + return out; + } + static VlQueue cons(const VlQueue& lhs, const VlQueue& rhs) { + VlQueue out = rhs; + for (const auto& i : lhs.m_deque) out.push_back(i); + return out; + } + + // METHODS + T_Value& atDefault() { return m_defaultValue; } + const T_Value& atDefault() const { return m_defaultValue; } + const Deque& privateDeque() const { return m_deque; } + + // Size. Verilog: function int size(), or int num() + int size() const { return m_deque.size(); } + // Clear array. Verilog: function void delete([input index]) + void clear() { m_deque.clear(); } + void erase(vlsint32_t index) { + if (VL_LIKELY(index >= 0 && index < m_deque.size())) + m_deque.erase(m_deque.begin() + index); + } + + // Dynamic array new[] becomes a renew() + void renew(size_t size) { + clear(); + m_deque.resize(size, atDefault()); + } + // Dynamic array new[]() becomes a renew_copy() + void renew_copy(size_t size, const VlQueue& rhs) { + if (size == 0) { + clear(); + } else { + *this = rhs; + m_deque.resize(size, atDefault()); + } + } + + // function void q.push_front(value) + void push_front(const T_Value& value) { + m_deque.push_front(value); + if (VL_UNLIKELY(T_MaxSize != 0 && m_deque.size() > T_MaxSize)) m_deque.pop_back(); + } + // function void q.push_back(value) + void push_back(const T_Value& value) { + if (VL_LIKELY(T_MaxSize == 0 || m_deque.size() < T_MaxSize)) m_deque.push_back(value); + } + // function value_t q.pop_front(); + T_Value pop_front() { + if (m_deque.empty()) return m_defaultValue; + T_Value v = m_deque.front(); + m_deque.pop_front(); + return v; + } + // function value_t q.pop_back(); + T_Value pop_back() { + if (m_deque.empty()) return m_defaultValue; + T_Value v = m_deque.back(); + m_deque.pop_back(); + return v; + } + + // Setting. Verilog: assoc[index] = v + // Can't just overload operator[] or provide a "at" reference to set, + // because we need to be able to insert only when the value is set + T_Value& at(vlsint32_t index) { + static T_Value s_throwAway; + // Needs to work for dynamic arrays, so does not use T_MaxSize + if (VL_UNLIKELY(index < 0 || index >= m_deque.size())) { + s_throwAway = atDefault(); + return s_throwAway; + } else { + return m_deque[index]; + } + } + // Accessing. Verilog: v = assoc[index] + const T_Value& at(vlsint32_t index) const { + static T_Value s_throwAway; + // Needs to work for dynamic arrays, so does not use T_MaxSize + if (VL_UNLIKELY(index < 0 || index >= m_deque.size())) { + return atDefault(); + } else { + return m_deque[index]; + } + } + // function void q.insert(index, value); + void insert(vlsint32_t index, const T_Value& value) { + if (VL_UNLIKELY(index < 0 || index >= m_deque.size())) return; + m_deque.insert(m_deque.begin() + index, value); + } + + // Return slice q[lsb:msb] + VlQueue slice(vlsint32_t lsb, vlsint32_t msb) const { + VlQueue out; + if (VL_UNLIKELY(lsb < 0)) lsb = 0; + if (VL_UNLIKELY(lsb >= m_deque.size())) lsb = m_deque.size() - 1; + if (VL_UNLIKELY(msb >= m_deque.size())) msb = m_deque.size() - 1; + for (vlsint32_t i = lsb; i <= msb; ++i) out.push_back(m_deque[i]); + return out; + } + + // For save/restore + const_iterator begin() const { return m_deque.begin(); } + const_iterator end() const { return m_deque.end(); } + + // Methods + void sort() { std::sort(m_deque.begin(), m_deque.end()); } + template void sort(Func with_func) { + // with_func returns arbitrary type to use for the sort comparison + std::sort(m_deque.begin(), m_deque.end(), [=](const T_Value& a, const T_Value& b) { + // index number is meaninless with sort, as it changes + return with_func(0, a) < with_func(0, b); + }); + } + void rsort() { std::sort(m_deque.rbegin(), m_deque.rend()); } + template void rsort(Func with_func) { + // with_func returns arbitrary type to use for the sort comparison + std::sort(m_deque.rbegin(), m_deque.rend(), [=](const T_Value& a, const T_Value& b) { + // index number is meaninless with sort, as it changes + return with_func(0, a) < with_func(0, b); + }); + } + void reverse() { std::reverse(m_deque.begin(), m_deque.end()); } + void shuffle() { std::shuffle(m_deque.begin(), m_deque.end(), VlURNG{}); } + VlQueue unique() const { + VlQueue out; + std::unordered_set saw; + for (const auto& i : m_deque) { + auto it = saw.find(i); + if (it == saw.end()) { + saw.insert(it, i); + out.push_back(i); + } + } + return out; + } + VlQueue unique_index() const { + VlQueue out; + IData index = 0; + std::unordered_set saw; + for (const auto& i : m_deque) { + auto it = saw.find(i); + if (it == saw.end()) { + saw.insert(it, i); + out.push_back(index); + } + ++index; + } + return out; + } + template VlQueue find(Func with_func) const { + VlQueue out; + IData index = 0; + for (const auto& i : m_deque) { + if (with_func(index, i)) out.push_back(i); + ++index; + } + return out; + } + template VlQueue find_index(Func with_func) const { + VlQueue out; + IData index = 0; + for (const auto& i : m_deque) { + if (with_func(index, i)) out.push_back(index); + ++index; + } + return out; + } + template VlQueue find_first(Func with_func) const { + // Can't use std::find_if as need index number + IData index = 0; + for (const auto& i : m_deque) { + if (with_func(index, i)) return VlQueue::cons(i); + ++index; + } + return VlQueue{}; + } + template VlQueue find_first_index(Func with_func) const { + IData index = 0; + for (const auto& i : m_deque) { + if (with_func(index, i)) return VlQueue::cons(index); + ++index; + } + return VlQueue{}; + } + template VlQueue find_last(Func with_func) const { + IData index = m_deque.size() - 1; + for (auto it = m_deque.rbegin(); it != m_deque.rend(); ++it) { + if (with_func(index, *it)) return VlQueue::cons(*it); + --index; + } + return VlQueue{}; + } + template VlQueue find_last_index(Func with_func) const { + IData index = m_deque.size() - 1; + for (auto it = m_deque.rbegin(); it != m_deque.rend(); ++it) { + if (with_func(index, *it)) return VlQueue::cons(index); + --index; + } + return VlQueue{}; + } + + // Reduction operators + VlQueue min() const { + if (m_deque.empty()) return VlQueue{}; + const auto it = std::min_element(m_deque.begin(), m_deque.end()); + return VlQueue::cons(*it); + } + VlQueue max() const { + if (m_deque.empty()) return VlQueue{}; + const auto it = std::max_element(m_deque.begin(), m_deque.end()); + return VlQueue::cons(*it); + } + + T_Value r_sum() const { + T_Value out(0); // Type must have assignment operator + for (const auto& i : m_deque) out += i; + return out; + } + template T_Value r_sum(Func with_func) const { + T_Value out(0); // Type must have assignment operator + IData index = 0; + for (const auto& i : m_deque) out += with_func(index++, i); + return out; + } + T_Value r_product() const { + if (m_deque.empty()) return T_Value(0); + auto it = m_deque.begin(); + T_Value out{*it}; + ++it; + for (; it != m_deque.end(); ++it) out *= *it; + return out; + } + template T_Value r_product(Func with_func) const { + if (m_deque.empty()) return T_Value(0); + auto it = m_deque.begin(); + IData index = 0; + T_Value out{with_func(index, *it)}; + ++it; + ++index; + for (; it != m_deque.end(); ++it) out *= with_func(index++, *it); + return out; + } + T_Value r_and() const { + if (m_deque.empty()) return T_Value(0); + auto it = m_deque.begin(); + T_Value out{*it}; + ++it; + for (; it != m_deque.end(); ++it) out &= *it; + return out; + } + template T_Value r_and(Func with_func) const { + if (m_deque.empty()) return T_Value(0); + auto it = m_deque.begin(); + IData index = 0; + T_Value out{with_func(index, *it)}; + ++it; + ++index; + for (; it != m_deque.end(); ++it) out &= with_func(index, *it); + return out; + } + T_Value r_or() const { + T_Value out(0); // Type must have assignment operator + for (const auto& i : m_deque) out |= i; + return out; + } + template T_Value r_or(Func with_func) const { + T_Value out(0); // Type must have assignment operator + IData index = 0; + for (const auto& i : m_deque) out |= with_func(index++, i); + return out; + } + T_Value r_xor() const { + T_Value out(0); // Type must have assignment operator + for (const auto& i : m_deque) out ^= i; + return out; + } + template T_Value r_xor(Func with_func) const { + T_Value out(0); // Type must have assignment operator + IData index = 0; + for (const auto& i : m_deque) out ^= with_func(index++, i); + return out; + } + + // Dumping. Verilog: str = $sformatf("%p", assoc) + std::string to_string() const { + if (m_deque.empty()) return "'{}"; // No trailing space + std::string out = "'{"; + std::string comma; + for (const auto& i : m_deque) { + out += comma + VL_TO_STRING(i); + comma = ", "; + } + return out + "} "; + } +}; + +template std::string VL_TO_STRING(const VlQueue& obj) { + return obj.to_string(); +} + +//=================================================================== +// Verilog associative array container +// There are no multithreaded locks on this; the base variable must +// be protected by other means +// +template class VlAssocArray final { +private: + // TYPES + using Map = std::map; + +public: + using const_iterator = typename Map::const_iterator; + +private: + // MEMBERS + Map m_map; // State of the assoc array + T_Value m_defaultValue; // Default value + +public: + // CONSTRUCTORS + // m_defaultValue isn't defaulted. Caller's constructor must do it. + VlAssocArray() = default; + ~VlAssocArray() = default; + VlAssocArray(const VlAssocArray&) = default; + VlAssocArray(VlAssocArray&&) = default; + VlAssocArray& operator=(const VlAssocArray&) = default; + VlAssocArray& operator=(VlAssocArray&&) = default; + + // METHODS + T_Value& atDefault() { return m_defaultValue; } + const T_Value& atDefault() const { return m_defaultValue; } + + // Size of array. Verilog: function int size(), or int num() + int size() const { return m_map.size(); } + // Clear array. Verilog: function void delete([input index]) + void clear() { m_map.clear(); } + void erase(const T_Key& index) { m_map.erase(index); } + // Return 0/1 if element exists. Verilog: function int exists(input index) + int exists(const T_Key& index) const { return m_map.find(index) != m_map.end(); } + // Return first element. Verilog: function int first(ref index); + int first(T_Key& indexr) const { + const auto it = m_map.cbegin(); + if (it == m_map.end()) return 0; + indexr = it->first; + return 1; + } + // Return last element. Verilog: function int last(ref index) + int last(T_Key& indexr) const { + const auto it = m_map.crbegin(); + if (it == m_map.rend()) return 0; + indexr = it->first; + return 1; + } + // Return next element. Verilog: function int next(ref index) + int next(T_Key& indexr) const { + auto it = m_map.find(indexr); + if (VL_UNLIKELY(it == m_map.end())) return 0; + ++it; + if (VL_UNLIKELY(it == m_map.end())) return 0; + indexr = it->first; + return 1; + } + // Return prev element. Verilog: function int prev(ref index) + int prev(T_Key& indexr) const { + auto it = m_map.find(indexr); + if (VL_UNLIKELY(it == m_map.end())) return 0; + if (VL_UNLIKELY(it == m_map.begin())) return 0; + --it; + indexr = it->first; + return 1; + } + // Setting. Verilog: assoc[index] = v + // Can't just overload operator[] or provide a "at" reference to set, + // because we need to be able to insert only when the value is set + T_Value& at(const T_Key& index) { + const auto it = m_map.find(index); + if (it == m_map.end()) { + std::pair pit = m_map.emplace(index, m_defaultValue); + return pit.first->second; + } + return it->second; + } + // Accessing. Verilog: v = assoc[index] + const T_Value& at(const T_Key& index) const { + const auto it = m_map.find(index); + if (it == m_map.end()) { + return m_defaultValue; + } else { + return it->second; + } + } + // Setting as a chained operation + VlAssocArray& set(const T_Key& index, const T_Value& value) { + at(index) = value; + return *this; + } + VlAssocArray& setDefault(const T_Value& value) { + atDefault() = value; + return *this; + } + + // For save/restore + const_iterator begin() const { return m_map.begin(); } + const_iterator end() const { return m_map.end(); } + + // Methods + VlQueue unique() const { + VlQueue out; + std::set saw; + for (const auto& i : m_map) { + auto it = saw.find(i.second); + if (it == saw.end()) { + saw.insert(it, i.second); + out.push_back(i.second); + } + } + return out; + } + VlQueue unique_index() const { + VlQueue out; + std::set saw; + for (const auto& i : m_map) { + auto it = saw.find(i.second); + if (it == saw.end()) { + saw.insert(it, i.second); + out.push_back(i.first); + } + } + return out; + } + template VlQueue find(Func with_func) const { + VlQueue out; + for (const auto& i : m_map) + if (with_func(i.first, i.second)) out.push_back(i.second); + return out; + } + template VlQueue find_index(Func with_func) const { + VlQueue out; + for (const auto& i : m_map) + if (with_func(i.first, i.second)) out.push_back(i.first); + return out; + } + template VlQueue find_first(Func with_func) const { + const auto it + = std::find_if(m_map.begin(), m_map.end(), [=](const std::pair& i) { + return with_func(i.first, i.second); + }); + if (it == m_map.end()) return VlQueue{}; + return VlQueue::cons(it->second); + } + template VlQueue find_first_index(Func with_func) const { + const auto it + = std::find_if(m_map.begin(), m_map.end(), [=](const std::pair& i) { + return with_func(i.first, i.second); + }); + if (it == m_map.end()) return VlQueue{}; + return VlQueue::cons(it->first); + } + template VlQueue find_last(Func with_func) const { + const auto it + = std::find_if(m_map.rbegin(), m_map.rend(), [=](const std::pair& i) { + return with_func(i.first, i.second); + }); + if (it == m_map.rend()) return VlQueue{}; + return VlQueue::cons(it->second); + } + template VlQueue find_last_index(Func with_func) const { + const auto it + = std::find_if(m_map.rbegin(), m_map.rend(), [=](const std::pair& i) { + return with_func(i.first, i.second); + }); + if (it == m_map.rend()) return VlQueue{}; + return VlQueue::cons(it->first); + } + + // Reduction operators + VlQueue min() const { + if (m_map.empty()) return VlQueue(); + const auto it = std::min_element( + m_map.begin(), m_map.end(), + [](const std::pair& a, const std::pair& b) { + return a.second < b.second; + }); + return VlQueue::cons(it->second); + } + VlQueue max() const { + if (m_map.empty()) return VlQueue(); + const auto it = std::max_element( + m_map.begin(), m_map.end(), + [](const std::pair& a, const std::pair& b) { + return a.second < b.second; + }); + return VlQueue::cons(it->second); + } + + T_Value r_sum() const { + T_Value out(0); // Type must have assignment operator + for (const auto& i : m_map) out += i.second; + return out; + } + template T_Value r_sum(Func with_func) const { + T_Value out(0); // Type must have assignment operator + for (const auto& i : m_map) out += with_func(i.first, i.second); + return out; + } + T_Value r_product() const { + if (m_map.empty()) return T_Value(0); + auto it = m_map.begin(); + T_Value out{it->second}; + ++it; + for (; it != m_map.end(); ++it) out *= it->second; + return out; + } + template T_Value r_product(Func with_func) const { + if (m_map.empty()) return T_Value(0); + auto it = m_map.begin(); + T_Value out{with_func(it->first, it->second)}; + ++it; + for (; it != m_map.end(); ++it) out *= with_func(it->first, it->second); + return out; + } + T_Value r_and() const { + if (m_map.empty()) return T_Value(0); + auto it = m_map.begin(); + T_Value out{it->second}; + ++it; + for (; it != m_map.end(); ++it) out &= it->second; + return out; + } + template T_Value r_and(Func with_func) const { + if (m_map.empty()) return T_Value(0); + auto it = m_map.begin(); + T_Value out{with_func(it->first, it->second)}; + ++it; + for (; it != m_map.end(); ++it) out &= with_func(it->first, it->second); + return out; + } + T_Value r_or() const { + T_Value out(0); // Type must have assignment operator + for (const auto& i : m_map) out |= i.second; + return out; + } + template T_Value r_or(Func with_func) const { + T_Value out(0); // Type must have assignment operator + for (const auto& i : m_map) out |= with_func(i.first, i.second); + return out; + } + T_Value r_xor() const { + T_Value out(0); // Type must have assignment operator + for (const auto& i : m_map) out ^= i.second; + return out; + } + template T_Value r_xor(Func with_func) const { + T_Value out(0); // Type must have assignment operator + for (const auto& i : m_map) out ^= with_func(i.first, i.second); + return out; + } + + // Dumping. Verilog: str = $sformatf("%p", assoc) + std::string to_string() const { + if (m_map.empty()) return "'{}"; // No trailing space + std::string out = "'{"; + std::string comma; + for (const auto& i : m_map) { + out += comma + VL_TO_STRING(i.first) + ":" + VL_TO_STRING(i.second); + comma = ", "; + } + // Default not printed - maybe random init data + return out + "} "; + } +}; + +template +std::string VL_TO_STRING(const VlAssocArray& obj) { + return obj.to_string(); +} + +template +void VL_READMEM_N(bool hex, int bits, const std::string& filename, + VlAssocArray& obj, QData start, QData end) VL_MT_SAFE { + VlReadMem rmem{hex, bits, filename, start, end}; + if (VL_UNLIKELY(!rmem.isOpen())) return; + while (true) { + QData addr; + std::string data; + if (rmem.get(addr /*ref*/, data /*ref*/)) { + rmem.setData(&(obj.at(addr)), data); + } else { + break; + } + } +} + +template +void VL_WRITEMEM_N(bool hex, int bits, const std::string& filename, + const VlAssocArray& obj, QData start, QData end) VL_MT_SAFE { + VlWriteMem wmem{hex, bits, filename, start, end}; + if (VL_UNLIKELY(!wmem.isOpen())) return; + for (const auto& i : obj) { + const QData addr = i.first; + if (addr >= start && addr <= end) wmem.print(addr, true, &(i.second)); + } +} + +//=================================================================== +/// Verilog unpacked array container +/// For when a standard C++[] array is not sufficient, e.g. an +/// array under a queue, or methods operating on the array. +/// +/// A 'struct' as we want this to be an aggregate type that allows +/// static aggregate initialization. Consider data members private. +/// +/// This class may get exposed to a Verilated Model's top I/O, if the top +/// IO has an unpacked array. + +template struct VlUnpacked final { + // MEMBERS + // This should be the only data member, otherwise generated static initializers need updating + T_Value m_storage[T_Depth]; // Contents of the unpacked array + + // CONSTRUCTORS + // Default constructors and destructor are used. Note however that C++20 requires that + // aggregate types do not have a user declared constructor, not even an explicitly defaulted + // one. + + // OPERATOR METHODS + // Default copy assignment operators are used. + + // METHODS + // Raw access + WData* data() { return &m_storage[0]; } + const WData* data() const { return &m_storage[0]; } + + T_Value& operator[](size_t index) { return m_storage[index]; }; + const T_Value& operator[](size_t index) const { return m_storage[index]; }; + + // Dumping. Verilog: str = $sformatf("%p", assoc) + std::string to_string() const { + std::string out = "'{"; + std::string comma; + for (int i = 0; i < T_Depth; ++i) { + out += comma + VL_TO_STRING(m_storage[i]); + comma = ", "; + } + return out + "} "; + } +}; + +template +std::string VL_TO_STRING(const VlUnpacked& obj) { + return obj.to_string(); +} + +//=================================================================== +// Verilog class reference container +// There are no multithreaded locks on this; the base variable must +// be protected by other means + +#define VlClassRef std::shared_ptr + +template // T typically of type VlClassRef +inline T VL_NULL_CHECK(T t, const char* filename, int linenum) { + if (VL_UNLIKELY(!t)) Verilated::nullPointerError(filename, linenum); + return t; +} + +template +static inline bool VL_CAST_DYNAMIC(VlClassRef in, VlClassRef& outr) { + VlClassRef casted = std::dynamic_pointer_cast(in); + if (VL_LIKELY(casted)) { + outr = casted; + return true; + } else { + return false; + } +} + +//====================================================================== + +#endif // Guard diff --git a/src/V3EmitCConstPool.cpp b/src/V3EmitCConstPool.cpp index 4fa3b813c..abd50bd41 100644 --- a/src/V3EmitCConstPool.cpp +++ b/src/V3EmitCConstPool.cpp @@ -49,7 +49,7 @@ class EmitCConstPool final : public EmitCConstInit { ofp->puts("// DESCRIPTION: Verilator output: Constant pool\n"); ofp->puts("//\n"); ofp->puts("\n"); - ofp->puts("#include \"verilated_heavy.h\"\n"); + ofp->puts("#include \"verilated.h\"\n"); return ofp; } diff --git a/src/V3EmitCHeaders.cpp b/src/V3EmitCHeaders.cpp index 2ef0903c3..74dce02c8 100644 --- a/src/V3EmitCHeaders.cpp +++ b/src/V3EmitCHeaders.cpp @@ -303,7 +303,7 @@ class EmitCHeader final : public EmitCConstInit { // Include files puts("\n"); ofp()->putsIntTopInclude(); - puts("#include \"verilated_heavy.h\"\n"); + puts("#include \"verilated.h\"\n"); if (v3Global.opt.mtasks()) puts("#include \"verilated_threads.h\"\n"); if (v3Global.opt.savable()) puts("#include \"verilated_save.h\"\n"); if (v3Global.opt.coverage()) puts("#include \"verilated_cov.h\"\n"); diff --git a/src/V3EmitCImp.cpp b/src/V3EmitCImp.cpp index 69b5f776e..bc5627cf4 100644 --- a/src/V3EmitCImp.cpp +++ b/src/V3EmitCImp.cpp @@ -181,7 +181,7 @@ class EmitCImp final : EmitCFunc { puts("// See " + topClassName() + ".h for the primary calling header\n"); // Include files - puts("\n#include \"verilated_heavy.h\"\n"); + puts("\n#include \"verilated.h\"\n"); if (v3Global.dpi()) puts("#include \"verilated_dpi.h\"\n"); puts("\n"); for (const string& name : headers) puts("#include \"" + name + ".h\"\n"); diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index 965cf56c0..140f03140 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -56,7 +56,7 @@ class EmitCModel final : public EmitCFunc { // Include files puts("\n"); ofp()->putsIntTopInclude(); - puts("#include \"verilated_heavy.h\"\n"); + puts("#include \"verilated.h\"\n"); if (v3Global.opt.mtasks()) puts("#include \"verilated_threads.h\"\n"); if (v3Global.opt.savable()) puts("#include \"verilated_save.h\"\n"); if (v3Global.opt.coverage()) puts("#include \"verilated_cov.h\"\n"); diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index 7cd836c91..f86dd7487 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -389,7 +389,7 @@ void EmitCSyms::emitSymHdr() { puts("\n"); ofp()->putsIntTopInclude(); - puts("#include \"verilated_heavy.h\"\n"); + puts("#include \"verilated.h\"\n"); if (v3Global.needTraceDumper()) { puts("#include \"" + v3Global.opt.traceSourceLang() + ".h\"\n"); } diff --git a/test_regress/t/t_verilated_all.pl b/test_regress/t/t_verilated_all.pl index 1d8370346..44b2d3a72 100755 --- a/test_regress/t/t_verilated_all.pl +++ b/test_regress/t/t_verilated_all.pl @@ -53,6 +53,7 @@ foreach my $file (sort keys %hit) { if (!$hit{$file} && $file !~ /_sc/ && $file !~ /_fst/ + && $file !~ /_heavy/ && ($file !~ /_thread/ || $Self->cfg_with_threaded)) { error("Include file not covered by t_verilated_all test: ",$file); }