Improve W primitive operations with -Oz (#4733)
This commit is contained in:
parent
f048cff093
commit
5d99534d54
|
|
@ -325,6 +325,19 @@ uint64_t vl_time_pow10(int n) VL_PURE;
|
|||
//===================================================================
|
||||
// SETTING OPERATORS
|
||||
|
||||
VL_ATTR_ALWINLINE
|
||||
static WDataOutP VL_MEMSET_ZERO_W(WDataOutP owp, int words) VL_MT_SAFE {
|
||||
return static_cast<WDataOutP>(std::memset(owp, 0, words * sizeof(EData)));
|
||||
}
|
||||
VL_ATTR_ALWINLINE
|
||||
static WDataOutP VL_MEMSET_ONES_W(WDataOutP owp, int words) VL_MT_SAFE {
|
||||
return static_cast<WDataOutP>(std::memset(owp, 0xff, words * sizeof(EData)));
|
||||
}
|
||||
VL_ATTR_ALWINLINE
|
||||
static WDataOutP VL_MEMCPY_W(WDataOutP owp, WDataInP const iwp, int words) VL_MT_SAFE {
|
||||
return static_cast<WDataOutP>(std::memcpy(owp, iwp, words * sizeof(EData)));
|
||||
}
|
||||
|
||||
// Output clean
|
||||
// EMIT_RULE: VL_CLEAN: oclean=clean; obits=lbits;
|
||||
#define VL_CLEAN_II(obits, lbits, lhs) ((lhs) & (VL_MASK_I(obits)))
|
||||
|
|
@ -339,18 +352,16 @@ static inline WDataOutP _vl_clean_inplace_w(int obits, WDataOutP owp) VL_MT_SAFE
|
|||
}
|
||||
static inline WDataOutP VL_CLEAN_WW(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE {
|
||||
const int words = VL_WORDS_I(obits);
|
||||
for (int i = 0; (i < (words - 1)); ++i) owp[i] = lwp[i];
|
||||
VL_MEMCPY_W(owp, lwp, words - 1);
|
||||
owp[words - 1] = lwp[words - 1] & VL_MASK_E(obits);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_ZERO_W(int obits, WDataOutP owp) VL_MT_SAFE {
|
||||
const int words = VL_WORDS_I(obits);
|
||||
for (int i = 0; i < words; ++i) owp[i] = 0;
|
||||
return owp;
|
||||
return VL_MEMSET_ZERO_W(owp, VL_WORDS_I(obits));
|
||||
}
|
||||
static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE {
|
||||
const int words = VL_WORDS_I(obits);
|
||||
for (int i = 0; i < (words - 1); ++i) owp[i] = ~VL_EUL(0);
|
||||
VL_MEMSET_ONES_W(owp, words - 1);
|
||||
owp[words - 1] = VL_MASK_E(obits);
|
||||
return owp;
|
||||
}
|
||||
|
|
@ -359,9 +370,7 @@ static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE {
|
|||
// For now, we always have a clean rhs.
|
||||
// Note: If a ASSIGN isn't clean, use VL_ASSIGNCLEAN instead to do the same thing.
|
||||
static inline WDataOutP VL_ASSIGN_W(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE {
|
||||
const int words = VL_WORDS_I(obits);
|
||||
for (int i = 0; i < words; ++i) owp[i] = lwp[i];
|
||||
return owp;
|
||||
return VL_MEMCPY_W(owp, lwp, VL_WORDS_I(obits));
|
||||
}
|
||||
|
||||
// EMIT_RULE: VL_ASSIGNBIT: rclean=clean;
|
||||
|
|
@ -519,19 +528,20 @@ static inline void VL_ASSIGNBIT_WO(int bit, WDataOutP owp) VL_MT_SAFE {
|
|||
static inline WDataOutP VL_EXTEND_WI(int obits, int, WDataOutP owp, IData ld) VL_MT_SAFE {
|
||||
// Note for extracts that obits != lbits
|
||||
owp[0] = ld;
|
||||
for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_EXTEND_WQ(int obits, int, WDataOutP owp, QData ld) VL_MT_SAFE {
|
||||
VL_SET_WQ(owp, ld);
|
||||
for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_EXTEND_WW(int obits, int lbits, WDataOutP owp,
|
||||
WDataInP const lwp) VL_MT_SAFE {
|
||||
for (int i = 0; i < VL_WORDS_I(lbits); ++i) owp[i] = lwp[i];
|
||||
for (int i = VL_WORDS_I(lbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
return owp;
|
||||
const int lwords = VL_WORDS_I(lbits);
|
||||
VL_PREFETCH_RD(lwp);
|
||||
VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords);
|
||||
return VL_MEMCPY_W(owp, lwp, lwords);
|
||||
}
|
||||
|
||||
// EMIT_RULE: VL_EXTENDS: oclean=*dirty*; obits=lbits;
|
||||
|
|
@ -547,26 +557,37 @@ static inline QData VL_EXTENDS_QQ(int, int lbits, QData lhs) VL_PURE {
|
|||
}
|
||||
|
||||
static inline WDataOutP VL_EXTENDS_WI(int obits, int lbits, WDataOutP owp, IData ld) VL_MT_SAFE {
|
||||
const EData sign = VL_SIGNONES_E(lbits, static_cast<EData>(ld));
|
||||
owp[0] = ld | (sign & ~VL_MASK_E(lbits));
|
||||
for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = sign;
|
||||
owp[0] = ld;
|
||||
if (VL_SIGN_E(lbits, owp[0])) {
|
||||
owp[0] |= ~VL_MASK_E(lbits);
|
||||
VL_MEMSET_ONES_W(owp + 1, VL_WORDS_I(obits) - 1);
|
||||
} else {
|
||||
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
|
||||
}
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_EXTENDS_WQ(int obits, int lbits, WDataOutP owp, QData ld) VL_MT_SAFE {
|
||||
VL_SET_WQ(owp, ld);
|
||||
const EData sign = VL_SIGNONES_E(lbits, owp[1]);
|
||||
owp[1] |= sign & ~VL_MASK_E(lbits);
|
||||
for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = sign;
|
||||
if (VL_SIGN_E(lbits, owp[1])) {
|
||||
owp[1] |= ~VL_MASK_E(lbits);
|
||||
VL_MEMSET_ONES_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
|
||||
} else {
|
||||
VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
|
||||
}
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_EXTENDS_WW(int obits, int lbits, WDataOutP owp,
|
||||
WDataInP const lwp) VL_MT_SAFE {
|
||||
for (int i = 0; i < VL_WORDS_I(lbits) - 1; ++i) owp[i] = lwp[i];
|
||||
const int lmsw = VL_WORDS_I(lbits) - 1;
|
||||
const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]);
|
||||
owp[lmsw] = lwp[lmsw] | (sign & ~VL_MASK_E(lbits));
|
||||
for (int i = VL_WORDS_I(lbits); i < VL_WORDS_I(obits); ++i) owp[i] = sign;
|
||||
return owp;
|
||||
const int lwords = VL_WORDS_I(lbits);
|
||||
VL_PREFETCH_RD(lwp);
|
||||
owp[lwords - 1] = lwp[lwords - 1];
|
||||
if (VL_SIGN_E(lbits, lwp[lwords - 1])) {
|
||||
owp[lwords - 1] |= ~VL_MASK_E(lbits);
|
||||
VL_MEMSET_ONES_W(owp + lwords, VL_WORDS_I(obits) - lwords);
|
||||
} else {
|
||||
VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords);
|
||||
}
|
||||
return VL_MEMCPY_W(owp, lwp, lwords - 1);
|
||||
}
|
||||
|
||||
//===================================================================
|
||||
|
|
@ -1549,63 +1570,66 @@ static inline QData VL_DYN_TO_Q(const VlQueue<T>& q, int elem_size) {
|
|||
static inline WDataOutP VL_CONCAT_WII(int obits, int lbits, int rbits, WDataOutP owp, IData ld,
|
||||
IData rd) VL_MT_SAFE {
|
||||
owp[0] = rd;
|
||||
for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
|
||||
_vl_insert_WI(owp, ld, rbits + lbits - 1, rbits);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_CONCAT_WWI(int obits, int lbits, int rbits, WDataOutP owp,
|
||||
WDataInP const lwp, IData rd) VL_MT_SAFE {
|
||||
owp[0] = rd;
|
||||
for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
|
||||
_vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_CONCAT_WIW(int obits, int lbits, int rbits, WDataOutP owp, IData ld,
|
||||
WDataInP const rwp) VL_MT_SAFE {
|
||||
for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i];
|
||||
for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
const int rwords = VL_WORDS_I(rbits);
|
||||
VL_MEMCPY_W(owp, rwp, rwords);
|
||||
VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords);
|
||||
_vl_insert_WI(owp, ld, rbits + lbits - 1, rbits);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_CONCAT_WIQ(int obits, int lbits, int rbits, WDataOutP owp, IData ld,
|
||||
QData rd) VL_MT_SAFE {
|
||||
VL_SET_WQ(owp, rd);
|
||||
for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
|
||||
_vl_insert_WI(owp, ld, rbits + lbits - 1, rbits);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_CONCAT_WQI(int obits, int lbits, int rbits, WDataOutP owp, QData ld,
|
||||
IData rd) VL_MT_SAFE {
|
||||
owp[0] = rd;
|
||||
for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
|
||||
_vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_CONCAT_WQQ(int obits, int lbits, int rbits, WDataOutP owp, QData ld,
|
||||
QData rd) VL_MT_SAFE {
|
||||
VL_SET_WQ(owp, rd);
|
||||
for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
|
||||
_vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_CONCAT_WWQ(int obits, int lbits, int rbits, WDataOutP owp,
|
||||
WDataInP const lwp, QData rd) VL_MT_SAFE {
|
||||
VL_SET_WQ(owp, rd);
|
||||
for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
|
||||
_vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_CONCAT_WQW(int obits, int lbits, int rbits, WDataOutP owp, QData ld,
|
||||
WDataInP const rwp) VL_MT_SAFE {
|
||||
for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i];
|
||||
for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
const int rwords = VL_WORDS_I(rbits);
|
||||
VL_MEMCPY_W(owp, rwp, rwords);
|
||||
VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords);
|
||||
_vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits);
|
||||
return owp;
|
||||
}
|
||||
static inline WDataOutP VL_CONCAT_WWW(int obits, int lbits, int rbits, WDataOutP owp,
|
||||
WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE {
|
||||
for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i];
|
||||
for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0;
|
||||
const int rwords = VL_WORDS_I(rbits);
|
||||
VL_MEMCPY_W(owp, rwp, rwords);
|
||||
VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords);
|
||||
_vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits);
|
||||
return owp;
|
||||
}
|
||||
|
|
@ -1829,10 +1853,13 @@ static inline WDataOutP VL_SHIFTRS_WWW(int obits, int lbits, int rbits, WDataOut
|
|||
EData overshift = 0; // Huge shift 1>>32 or more
|
||||
for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i];
|
||||
if (VL_UNLIKELY(overshift || rwp[0] >= static_cast<IData>(obits))) {
|
||||
const int lmsw = VL_WORDS_I(obits) - 1;
|
||||
const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]);
|
||||
for (int j = 0; j <= lmsw; ++j) owp[j] = sign;
|
||||
owp[lmsw] &= VL_MASK_E(lbits);
|
||||
const int owords = VL_WORDS_I(obits);
|
||||
if (VL_SIGN_E(lbits, lwp[owords - 1])) {
|
||||
VL_MEMSET_ONES_W(owp, owords);
|
||||
owp[owords - 1] &= VL_MASK_E(lbits);
|
||||
} else {
|
||||
VL_MEMSET_ZERO_W(owp, owords);
|
||||
}
|
||||
return owp;
|
||||
}
|
||||
return VL_SHIFTRS_WWI(obits, lbits, 32, owp, lwp, rwp[0]);
|
||||
|
|
@ -2043,9 +2070,7 @@ static inline void VL_ASSIGNSEL_WW(int rbits, int obits, int lsb, WDataOutP iowp
|
|||
|
||||
static inline WDataOutP VL_COND_WIWW(int obits, WDataOutP owp, int cond, WDataInP const w1p,
|
||||
WDataInP const w2p) VL_MT_SAFE {
|
||||
const int words = VL_WORDS_I(obits);
|
||||
for (int i = 0; i < words; ++i) owp[i] = cond ? w1p[i] : w2p[i];
|
||||
return owp;
|
||||
return VL_MEMCPY_W(owp, cond ? w1p : w2p, VL_WORDS_I(obits));
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
|
|
@ -2058,7 +2083,7 @@ static inline WDataOutP VL_COND_WIWW(int obits, WDataOutP owp, int cond, WDataIn
|
|||
// If changing the number of functions here, also change EMITCINLINES_NUM_CONSTW
|
||||
|
||||
#define VL_C_END_(obits, wordsSet) \
|
||||
for (int i = (wordsSet); i < VL_WORDS_I(obits); ++i) o[i] = 0; \
|
||||
VL_MEMSET_ZERO_W(o + (wordsSet), VL_WORDS_I(obits) - (wordsSet)); \
|
||||
return o
|
||||
|
||||
// clang-format off
|
||||
|
|
|
|||
Loading…
Reference in New Issue