From 69b3c5f6d1a3485d0c81084d47b8005c157749bf Mon Sep 17 00:00:00 2001 From: Benjamin Collier Date: Wed, 20 May 2026 17:14:02 -0600 Subject: [PATCH] Support streaming on queues (#7597) --- docs/CONTRIBUTORS | 2 + include/verilated_funcs.h | 574 ++++++++++++++++++++- nodist/log_changes | 2 + src/V3Const.cpp | 23 +- src/V3EmitCFunc.cpp | 31 +- src/V3EmitCFunc.h | 66 ++- src/V3Expand.cpp | 7 +- src/V3FuncOpt.cpp | 3 +- src/V3Width.cpp | 5 + test_regress/t/t_stream_queue.py | 2 +- test_regress/t/t_stream_queue.v | 353 ++++++++++++- test_regress/t/t_stream_queue_interface.py | 20 + test_regress/t/t_stream_queue_interface.sv | 76 +++ test_regress/t/t_stream_trace.py | 2 +- 14 files changed, 1142 insertions(+), 24 deletions(-) create mode 100755 test_regress/t/t_stream_queue_interface.py create mode 100644 test_regress/t/t_stream_queue_interface.sv diff --git a/docs/CONTRIBUTORS b/docs/CONTRIBUTORS index 5900260e4..c74bbc43f 100644 --- a/docs/CONTRIBUTORS +++ b/docs/CONTRIBUTORS @@ -34,6 +34,7 @@ Artur Bieniek AUDIY Aylon Chaim Porat Bartłomiej Chmiel +Benjamin Collier Brian Li Cameron Kirk Cameron Waite @@ -251,6 +252,7 @@ Ryszard Rozak Samuel Riedel Sean Cross Sebastien Van Cauwenberghe +Secturion Sergey Fedorov Sergi Granell Seth Pellegrino diff --git a/include/verilated_funcs.h b/include/verilated_funcs.h index 2dacc5af8..44a658c37 100644 --- a/include/verilated_funcs.h +++ b/include/verilated_funcs.h @@ -779,7 +779,7 @@ static inline IData VL_COUNTBITS_W(int lbits, int words, WDataInP const lwp, IDa EData r = 0; IData wordLbits = 32; for (int i = 0; i < words; ++i) { - if (i == words - 1) wordLbits = lbits % 32; + if (i == words - 1) wordLbits = VL_BITBIT_I(lbits); r += VL_COUNTBITS_E(wordLbits, lwp[i], ctrl0, ctrl1, ctrl2); } return r; @@ -904,6 +904,7 @@ static inline WDataOutP VL_NOT_W(int words, WDataOutP owp, WDataInP const lwp) V // EMIT_RULE: VL_GTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; // EMIT_RULE: VL_LTE: oclean=clean; lclean==clean; rclean==clean; obits=1; lbits==rbits; #define VL_NEQ_W(words, lwp, rwp) (!VL_EQ_W(words, lwp, rwp)) +#define VL_NEQ_R(words, q, rwp) (!VL_EQ_R(words, q, rwp)) #define VL_LT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) < 0) #define VL_LTE_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) <= 0) #define VL_GT_W(words, lwp, rwp) (_vl_cmp_w(words, lwp, rwp) > 0) @@ -916,6 +917,73 @@ static inline IData VL_EQ_W(int words, WDataInP const lwp, WDataInP const rwp) V return (nequal == 0); } +template +static inline IData VL_EQ_W(int words, WDataInP const rwp, + const VlQueue>& q) VL_PURE { + return VL_EQ_R(words, q, rwp); +} + +template +static inline IData VL_EQ_W(int words, WDataInP const rwp, VlQueue q) VL_PURE { + return VL_EQ_R(words, q, rwp); +} + +template +static inline IData VL_EQ_R(int words, VlQueue q, WDataInP const rwp) VL_PURE { + EData nequal = 0; + const int wordsInQ = q.size() * sizeof(T) / sizeof(IData) - 1; + if (wordsInQ + 1 != words) return false; + if (sizeof(T) == 1) { + IData temp = 0; + for (int i = 0; (i < wordsInQ + 1); ++i) { + temp |= static_cast(q.at((wordsInQ - i) * sizeof(IData) + 3)); + temp |= static_cast(q.at((wordsInQ - i) * sizeof(IData) + 2)) << 8; + temp |= static_cast(q.at((wordsInQ - i) * sizeof(IData) + 1)) << 16; + temp |= static_cast(q.at((wordsInQ - i) * sizeof(IData))) << 24; + nequal |= (temp ^ rwp[i]); + temp = 0; + } + } else if (sizeof(T) == 2) { + IData temp = 0; + for (int i = 0; (i < wordsInQ + 1); ++i) { + temp |= q.at((wordsInQ - i) * sizeof(SData) + 1); + temp |= q.at((wordsInQ - i) * sizeof(SData)) << 16; + nequal |= (temp ^ rwp[i]); + temp = 0; + } + } else if (sizeof(T) == 4) { + for (int i = 0; (i < wordsInQ + 1); ++i) { nequal |= (q.at(wordsInQ - i) ^ rwp[i]); } + } else if (sizeof(T) == 8) { + QData temp = 0; + int qSize = q.size() - 1; + for (int i = 0; (i < qSize); i += 2) { + temp = q.at(qSize - i); + nequal |= (static_cast(q.at(qSize - i)) >> 32 ^ rwp[i + 1]); + temp = rwp[i + 1]; + nequal |= (static_cast(q.at(qSize - i)) ^ rwp[i]); + temp = rwp[i]; + } + } + return (nequal == 0); +} + +template +static inline IData VL_EQ_R(int words, const VlQueue>& q, + WDataInP const rwp) VL_PURE { + EData nequal = 0; + const int wordsInQ = q.size() * N_Words; + if ((q.size() * N_Words) != words) { return false; } + int count = 0; + for (int qIndex = q.size() - 1; qIndex >= 0; qIndex--) { + for (int wordInElement = 0; wordInElement < N_Words; wordInElement++) { + nequal |= (q.at(qIndex).at(wordInElement) ^ rwp[count]); + count++; + } + } + + return (nequal == 0); +} + // Internal usage static inline int _vl_cmp_w(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE { for (int i = words - 1; i >= 0; --i) { @@ -1558,6 +1626,300 @@ static inline QData VL_STREAML_FAST_QQI(int lbits, QData ld, IData rd_log2) VL_P return ret >> (VL_QUADSIZE - lbits); } +template +static inline void VL_STREAML_FAST_RQI(int lbits, VlQueue& q, QData ld, IData rd_log2) VL_PURE { + const QData ret = VL_STREAML_FAST_QQI(lbits, ld, rd_log2); + q.clear(); + const int numQData = 8 / sizeof(T); + const bool needsMask = sizeof(T) < 8; + for (int ii = numQData - 1; ii >= 0; ii--) { + if VL_CONSTEXPR_CXX17 (needsMask) { + VL_CONSTEXPR_CXX17 uint64_t mask = VL_MASK_Q(sizeof(T) * 8); + q.push_back(static_cast(ret >> (ii * sizeof(T) * 8)) & mask); + } else { + q.push_back(static_cast(ret)); + } + } +} + +template +static inline void VL_STREAML_FAST_RQI(int lbits, VlQueue>& q, QData ld, + IData rd_log2) VL_PURE { + const QData ret = VL_STREAML_FAST_QQI(lbits, ld, rd_log2); + q.clear(); + VlWide value; + value[N_Words - 1] = static_cast(ret >> 32); + value[N_Words - 2] = static_cast(ret); + for (int i = N_Words - 3; i >= 0; i--) value[i] = 0; + q.push_back(value); +} + +template +static inline void VL_STREAMR_RII(int lbits, VlQueue& q, IData ld, IData rd_log2) VL_PURE { + q.clear(); + VL_CONSTEXPR_CXX17 int valueSize = sizeof(T); + if VL_CONSTEXPR_CXX17 (valueSize < 4) { + VL_CONSTEXPR_CXX17 int mask = VL_MASK_I(valueSize * 8); + // Push all bytes of the 32-bit integer, MSB first (Big-Endian) + VL_CONSTEXPR_CXX17 int qElementsPerWord = 4 / valueSize; + for (int i = 0; i < qElementsPerWord; i++) { + q.push_back( + static_cast(((ld >> (qElementsPerWord - i - 1) * 8 * valueSize)) & mask)); + } + } else { + q.push_back(static_cast(ld)); + } +} + +template +static inline void VL_STREAMR_RII(int lbits, VlQueue>& q, IData ld, + IData rd_log2) VL_PURE { + q.clear(); + VlWide value; + VL_SET_WI(value, ld); + q.push_back(value); +} + +template +static inline void VL_STREAMR_RQI(int lbits, VlQueue& q, QData ld, IData rd_log2) VL_PURE { + q.clear(); // Empty the queue first + // If this is a queue of bytes (unsigned char) + if VL_CONSTEXPR_CXX17 (sizeof(T) == 1) { + // Push all 8 bytes of the 64-bit integer, MSB first (Big-Endian) + q.push_back(static_cast((ld >> 56) & 0xFF)); + q.push_back(static_cast((ld >> 48) & 0xFF)); + q.push_back(static_cast((ld >> 40) & 0xFF)); + q.push_back(static_cast((ld >> 32) & 0xFF)); + q.push_back(static_cast((ld >> 24) & 0xFF)); + q.push_back(static_cast((ld >> 16) & 0xFF)); + q.push_back(static_cast((ld >> 8) & 0xFF)); + q.push_back(static_cast(ld & 0xFF)); + } else { + const int numQData = 8 / sizeof(T); + for (int ii = numQData - 1; ii >= 0; ii--) { + q.push_back(static_cast(ld >> (ii * sizeof(T) * 8))); + } + } +} + +template +static inline IData VL_STREAMR_IRI(int lbits, VlQueue& q, IData rd_log2) VL_PURE { + IData value = 0; // Starts at 0. Out-of-range bits will remain 0. + const size_t len = q.size(); + + if VL_CONSTEXPR_CXX17 (sizeof(T) == 1) { // If it is a queue of bytes + if (len > 0) value |= static_cast(q.at(0)) << 24; + if (len > 1) value |= static_cast(q.at(1)) << 16; + if (len > 2) value |= static_cast(q.at(2)) << 8; + if (len > 3) value |= static_cast(q.at(3)); + } else if VL_CONSTEXPR_CXX17 (sizeof(T) == 2) { + if (len > 0) value |= static_cast(q.at(0)) << 16; + if (len > 1) value |= static_cast(q.at(1)); + } else if VL_CONSTEXPR_CXX17 (sizeof(T) == 8) { + if (len > 0) value = static_cast(q.at(0)); + } else { // If it is a queue of larger types (e.g. ints) + VL_CONSTEXPR_CXX17 int shiftAmt = sizeof(T) > 4 ? 32 : 0; + if (len > 0) value = static_cast(q.at(0) >> shiftAmt); + } + + return value; +} + +template +static inline IData VL_STREAMR_QRI(int lbits, VlQueue& q, IData rd_log2) VL_PURE { + QData value = 0; + const size_t len = q.size(); + + if VL_CONSTEXPR_CXX17 (sizeof(T) == 1) { + // Must cast to QData BEFORE shifting to prevent 32-bit overflow! + if (len > 0) value |= static_cast(q.at(0)) << 56; + if (len > 1) value |= static_cast(q.at(1)) << 48; + if (len > 2) value |= static_cast(q.at(2)) << 40; + if (len > 3) value |= static_cast(q.at(3)) << 32; + if (len > 4) value |= static_cast(q.at(4)) << 24; + if (len > 5) value |= static_cast(q.at(5)) << 16; + if (len > 6) value |= static_cast(q.at(6)) << 8; + if (len > 7) value |= static_cast(q.at(7)); + } else { + // If it is a queue of larger types (e.g. ints/longs) + if (len > 0) value = static_cast(q.at(0)); + } + + return value; +} + +template +static inline void VL_STREAMR_RQI(int lbits, VlQueue>& q, QData ld, + IData rd_log2) VL_PURE { + q.clear(); // Empty the queue first + VlWide value; + VL_SET_WQ(value, ld); + q.push_back(value); +} + +template +static inline void VL_STREAMR_RWI(int lbits, VlQueue& q, WDataInP const lwp, + IData rd_log2) VL_PURE { + q.clear(); // Empty the queue first + const int numWords = VL_BITWORD_E(lbits); + QData qdataValue = 0; + for (int word = numWords - 1; word >= 0; word--) { + VL_CONSTEXPR_CXX17 int valueSize = sizeof(T); + if VL_CONSTEXPR_CXX17 (valueSize < 4) { + VL_CONSTEXPR_CXX17 int mask = VL_MASK_I(valueSize * 8); + // Push all bytes of the 32-bit integer, MSB first (Big-Endian) + VL_CONSTEXPR_CXX17 int qElementsPerWord = 4 / valueSize; + for (int i = 0; i < qElementsPerWord; i++) { + q.push_back(static_cast( + ((lwp[word] >> (qElementsPerWord - i - 1) * 8 * valueSize)) & mask)); + } + } else if VL_CONSTEXPR_CXX17 (sizeof(T) == 8) { + const int shiftAmt = (word & 0x1) << 5; + qdataValue |= static_cast(lwp[word]) << shiftAmt; + if ((word & 0x1) == 0) { + q.push_back(qdataValue); + qdataValue = 0; + } + } else { + q.push_back(static_cast(lwp[word])); + } + } +} + +template +static inline void VL_STREAMR_RWI(int lbits, VlQueue>& q, WDataInP const lwp, + IData rd_log2) VL_PURE { + q.clear(); // Empty the queue first + const int numWords = VL_BITWORD_E(lbits); + VlWide value; + for (int ii = 0; ii < N_Words; ii++) { value.at(ii) = 0; } + for (int word = numWords - 1; word >= 0; word--) { + value.at(word) = lwp[word]; + if ((word % N_Words) == 0) { q.push_back(value); } + } +} + +template +static inline VlQueue VL_STREAMR_RRI(int lbits, const VlQueue q, IData rd) VL_MT_SAFE { + return q; +} + +static inline VlQueue VL_STREAMR_NRI(int lbits, const VlQueue q, + IData rd) VL_MT_SAFE { + return q; +} + +template +static inline void VL_STREAMR_RRI(int lbits, VlQueue& to_q, + const VlQueue& from_q, IData rd) VL_MT_SAFE { + to_q.clear(); + VL_CONSTEXPR_CXX17 size_t otherSize = sizeof(T_Other); + VL_CONSTEXPR_CXX17 size_t sizeOfThis = sizeof(T_Value); + T_Value temp = 0; + if (otherSize > sizeOfThis) { + for (auto val : from_q) { + for (int ii = otherSize / sizeOfThis - 1; ii >= 0; ii--) { + temp = (static_cast(val >> (ii * 8 * sizeOfThis))); + to_q.push_back(temp); + } + } + } else { + // How many of the other element fits in this element. + size_t otherInElement = sizeOfThis / otherSize - 1; + for (auto val : from_q) { + // Shift the element into the correct position and merge + temp |= (static_cast(val) << (otherInElement * 8 * otherSize)); + otherInElement--; + if (otherInElement == -1) { + to_q.push_back(temp); + temp = 0; + otherInElement = sizeOfThis - 1; + } + } + // Push any remaining leftover elements (upper bits will remain zero-padded) + if (otherInElement < sizeOfThis - 1) { to_q.push_back(temp); } + } +} + +template +static inline void VL_STREAMR_RRI(int lbits, VlQueue>& to_q, + const VlQueue& from_q, IData rd) VL_MT_SAFE { + to_q.clear(); + + VL_CONSTEXPR_CXX17 size_t otherSize = sizeof(T_Other); + VL_CONSTEXPR_CXX17 size_t sizeOfThis = 4 * N_Words; + VL_CONSTEXPR_CXX17 int numOtherInWord = 4 / otherSize; + VlWide temp; + for (int ii = 0; ii < N_Words; ii++) { temp.at(ii) = 0; } + if VL_CONSTEXPR_CXX17 (numOtherInWord > 0) { + size_t elementCount = sizeOfThis - 1; + for (auto val : from_q) { + temp.at((elementCount / numOtherInWord) % N_Words) + |= (static_cast(val) << (elementCount * 8 * otherSize)); + elementCount--; + // If we've collected enough elements for the target type, push and reset + if (elementCount == -1) { + to_q.push_back(temp); + for (int ii = 0; ii < N_Words; ii++) { temp.at(ii) = 0; } + elementCount = sizeOfThis - 1; + } + } + // Push any remaining leftover elements (upper bits will remain zero-padded) + if (elementCount < sizeOfThis - 1) { to_q.push_back(temp); } + } else { //QData + size_t wordCount = N_Words - 1; + for (auto val : from_q) { + temp.at(wordCount % N_Words) = (static_cast(static_cast(val) >> 32)); + wordCount--; + if (wordCount == -1) { + to_q.push_back(temp); + for (int ii = 0; ii < N_Words; ii++) { temp.at(ii) = 0; } + wordCount = N_Words - 1; + } + temp.at(wordCount % N_Words) = (static_cast(val)); + wordCount--; + if (wordCount == -1) { + to_q.push_back(temp); + for (int ii = 0; ii < N_Words; ii++) { temp.at(ii) = 0; } + wordCount = N_Words - 1; + } + } + // Push any remaining leftover elements (upper bits will remain zero-padded) + if (wordCount < N_Words - 1) { to_q.push_back(temp); } + } +} + +template +static inline void VL_STREAMR_RRI(int lbits, VlQueue& to_q, + const VlQueue>& from_q, IData rd) VL_MT_SAFE { + to_q.clear(); + + VL_CONSTEXPR_CXX17 size_t otherSize = 4 * N_Words; + VL_CONSTEXPR_CXX17 size_t sizeOfThis = sizeof(T_Value); + T_Value temp = 0; + for (auto val : from_q) { + if VL_CONSTEXPR_CXX17 (sizeof(T_Value) == 8) { + // iterate backwards because queues are msb first + for (int wordIndex = N_Words - 1; wordIndex >= 0; wordIndex -= 2) { + temp |= (static_cast(val.at(wordIndex)) << 32); + if (wordIndex - 1 >= 0) { temp |= (static_cast(val.at(wordIndex - 1))); } + to_q.push_back(temp); + temp = 0; + } + } else { + //iterate backwards because queues are msb first + for (int wordIndex = N_Words - 1; wordIndex >= 0; wordIndex--) { + for (int elemInWord = sizeof(EData) / sizeOfThis - 1; elemInWord >= 0; + elemInWord--) { + temp + = (static_cast(val.at(wordIndex) >> elemInWord * 8 * sizeOfThis)); + to_q.push_back(temp); + } + } + } + } +} + // Regular "slow" streaming operators static inline IData VL_STREAML_III(int lbits, IData ld, IData rd) VL_PURE { IData ret = 0; @@ -1571,6 +1933,112 @@ static inline IData VL_STREAML_III(int lbits, IData ld, IData rd) VL_PURE { return ret; } +template +static inline VlQueue VL_STREAML_RRI(int lbitsIn, const VlQueue q, IData rd) VL_MT_SAFE { + // TODO this function needs to have a temp variable made in verilator and passed in. + // dynamicly make our "temp variable" + // lbitsIn is always 0 + VlQueue out_queue; + const int lbits = q.size() * 8 * sizeof(T); + out_queue.renew(q.size()); + VL_CONSTEXPR_CXX17 unsigned int moduloMask = 8 * sizeof(T) - 1; + const int ssize = (rd < static_cast(lbits)) ? rd : (static_cast(lbits)); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + for (int sbit = 0; sbit < ssize && sbit < lbits - istart; ++sbit) { + + const int qIndex = (ostart + sbit) / (8 * sizeof(T)); + const int shiftLeft = (istart + sbit) & moduloMask; + const int shiftRight = ((ostart + sbit) & moduloMask); + const T bit = ((q.at(qIndex)) >> shiftRight & 1) << shiftLeft; + const int writeIndx = (istart + sbit) / (8 * sizeof(T)); + out_queue.atWrite(writeIndx) |= bit; + } + } + + return out_queue; +} + +template +static inline VlQueue> +VL_STREAML_RRI(int lbitsIn, const VlQueue> q, IData rd) VL_MT_SAFE { + // TODO this function needs to have a temp variable. + // dynamicly make our "temp variable" + // lbitsIn is always zero + const int lbits = q.size() * 8 * sizeof(IData) * N_Words; + VL_CONSTEXPR_CXX17 int sizeOfElement = 8 * sizeof(IData) * N_Words; + VlQueue> out_queue; + out_queue.renew(q.size()); + VL_CONSTEXPR_CXX17 unsigned int moduloMask = sizeOfElement - 1; + const int ssize = (rd < static_cast(lbits)) ? rd : (static_cast(lbits)); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + for (int sbit = 0; sbit < ssize && sbit < lbits - istart; ++sbit) { + + const int qIndex = (ostart + sbit) / (sizeOfElement); + const int shiftLeftTotal = (istart + sbit) & moduloMask; + const int shiftRightTotal = ((ostart + sbit) & moduloMask); + const int shiftRight = VL_MASK_I(shiftRightTotal); + const int wordIn = VL_BITWORD_E(shiftRightTotal); + const int shiftLeft = VL_MASK_I(shiftLeftTotal); + const int wordOut = VL_BITWORD_E(shiftLeftTotal); + const EData bit = ((q.at(qIndex).at(wordIn)) >> shiftRight & 1) << shiftLeft; + const int writeIndx = (istart + sbit) / (sizeOfElement); + out_queue.atWrite(writeIndx).at(wordOut) |= bit; + } + } + + return out_queue; +} + +template +static inline void VL_STREAML_RII(int lbits, int queueBits, VlQueue& q, IData ld, + IData rd) VL_MT_SAFE { + + IData ret = 0; + if (lbits < queueBits) { lbits = queueBits; } + // Slice size should never exceed the lhs width + const IData mask = VL_MASK_I(rd); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + ret |= ((ld >> istart) & mask) << ostart; + } + q.clear(); + VL_CONSTEXPR_CXX17 int numBitsPerQElem = sizeof(T) * 8; + const bool needsMask = sizeof(T) < 4; + VL_CONSTEXPR_CXX17 int elementMask = VL_MASK_I(numBitsPerQElem * needsMask); + VL_CONSTEXPR_CXX17 int qElementPerWord = numBitsPerQElem < 32 ? 32 / numBitsPerQElem : 1; + for (int i = 0; i < qElementPerWord; i++) { + if VL_CONSTEXPR_CXX17 (needsMask) { + q.push_back(static_cast(((ret >> (qElementPerWord - i - 1) * numBitsPerQElem)) + & elementMask)); + } else { + q.push_back(static_cast((ret))); + } + } +} + +template +static inline void VL_STREAML_RII(int lbits, int queueBits, VlQueue>& q, IData ld, + IData rd) VL_MT_SAFE { + if (lbits < queueBits) { lbits = queueBits; } + IData ret = 0; + // Slice size should never exceed the lhs width + const IData mask = VL_MASK_I(rd); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + ret |= ((ld >> istart) & mask) << ostart; + } + q.clear(); + VlWide value; + value[0] = ret; + q.push_back(value); +} + static inline QData VL_STREAML_QQI(int lbits, QData ld, IData rd) VL_PURE { QData ret = 0; // Slice size should never exceed the lhs width @@ -1602,6 +2070,59 @@ static inline WDataOutP VL_STREAML_WWI(int lbits, WDataOutP owp, WDataInP const return owp; } +template +static inline void VL_STREAML_RWI(int lbits, int queueBits, VlQueue& q, WDataInP const lwp, + IData rd) VL_MT_SAFE { + const bool needsMask = sizeof(T) < 4; + VL_CONSTEXPR_CXX17 int numBitsInT = 8 * sizeof(T); + VL_CONSTEXPR_CXX17 int mask = VL_MASK_I(numBitsInT * needsMask); + q.renew(lbits / numBitsInT); + const int ssize = (rd < static_cast(lbits)) ? rd : (static_cast(lbits)); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + for (int sbit = 0; sbit < ssize && sbit < lbits - istart; ++sbit) { + const EData bit = (VL_BITRSHIFT_W(lwp, (istart + sbit)) & 1) + << VL_BITBIT_E(ostart + sbit); + int qIndex = istart / numBitsInT; + if VL_CONSTEXPR_CXX17 (needsMask) { + int elementInWord = VL_BITBIT_I(ostart + sbit) / numBitsInT; + elementInWord *= numBitsInT; + q.atWrite(qIndex) |= (bit >> elementInWord) & mask; + } else if VL_CONSTEXPR_CXX17 (sizeof(T) > 4) { + int wordInElement = VL_BITBIT_Q(ostart) > 32; + wordInElement *= 32; + q.atWrite(qIndex) |= static_cast(bit) << wordInElement; + } else { + q.atWrite(qIndex) |= (bit); + } + } + } +} + +template +static inline void VL_STREAML_RWI(int lbits, int queueBits, VlQueue>& q, + WDataInP const lwp, IData rd) VL_MT_SAFE { + VL_CONSTEXPR_CXX17 int numBitsInT = 4 * N_Words * 8; + if (lbits < queueBits) { // this handles the case where the queue is larger than the rhs + lbits = queueBits; + } + const int leftOver = (lbits % numBitsInT) > 0; + q.renew(lbits / numBitsInT + leftOver); + const int ssize = (rd < static_cast(lbits)) ? rd : (static_cast(lbits)); + for (int istart = 0; istart < lbits; istart += rd) { + int ostart = lbits - rd - istart; + ostart = ostart > 0 ? ostart : 0; + for (int sbit = 0; sbit < ssize && sbit < lbits - istart; ++sbit) { + const EData bit = (VL_BITRSHIFT_W(lwp, (istart + sbit)) & 1) + << VL_BITBIT_E(ostart + sbit); + int qIndex = istart / numBitsInT; + int wordInWide = VL_BITWORD_E(ostart % numBitsInT); + q.atWrite(qIndex).at(wordInWide) |= (bit); + } + } +} + static inline IData VL_PACK_I_RI(int /*obits*/, int lbits, const VlQueue& q) { IData ret = 0; for (size_t i = 0; i < q.size(); ++i) @@ -1696,6 +2217,19 @@ static inline QData VL_PACK_Q_RQ(int /*obits*/, int lbits, const VlQueue& return ret; } +static inline IData VL_PACK_I_RQ(int /*obits*/, int lbits, const VlQueue& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) ret |= q.at(q.size() - 1 - i) << (i * lbits); + return ret; +} + +template +static inline IData VL_PACK_I_RW(int /*obits*/, int lbits, const VlQueue>& q) { + IData ret = 0; + for (size_t i = 0; i < q.size(); ++i) ret |= q.at(q.size() - 1 - i)[0] << (i * lbits); + return ret; +} + template static inline QData VL_PACK_Q_UQ(int /*obits*/, int lbits, const VlUnpacked& q) { QData ret = 0; @@ -1835,6 +2369,7 @@ static inline WDataOutP VL_CONCAT_WWI(int obits, int lbits, int rbits, WDataOutP _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); return owp; } + static inline WDataOutP VL_CONCAT_WIW(int obits, int lbits, int rbits, WDataOutP owp, IData ld, WDataInP const rwp) VL_MT_SAFE { const int rwords = VL_WORDS_I(rbits); @@ -2179,6 +2714,43 @@ static inline IData VL_BITSEL_IWII(int lbits, WDataInP const lwp, IData rd) VL_M #define VL_SEL_QQII(lbits, lhs, lsb, width) ((lhs) >> (lsb)) #define VL_SEL_IQII(lbits, lhs, lsb, width) (static_cast((lhs) >> (lsb))) +// #define VL_SEL_IRII(lbits, lhs, lsb, width) ((lhs) >> (lsb)) +template +static inline IData VL_SEL_IRII(int lbits, const VlQueue& lhs, IData lsb, + IData width) VL_MT_SAFE { + IData val = 0; + if (sizeof(T) == 8) { + const int offset = lhs.size() * sizeof(T) / sizeof(IData) - VL_BITWORD_E(lsb) - 1; + const int wordIndex = VL_BITWORD_E(VL_BITBIT_Q(lsb)); + const int shiftAmt = wordIndex << 5; + const int index = offset / 2; + val |= static_cast(lhs.at(index) >> shiftAmt); + return val; + } + const int qElemPerWord = 4 / sizeof(T); + const int shiftAmt = qElemPerWord > 1 ? sizeof(T) * 8 : 0; + for (int ii = 0; ii < qElemPerWord; ii++) { + const int offset = lhs.size() * sizeof(T) / sizeof(IData) - VL_BITWORD_E(lsb) - 1; + const int index = offset * qElemPerWord + (qElemPerWord - 1 - ii); + val |= static_cast(lhs.at(index)) << (shiftAmt * ii); + } + return val; +} + +template +static inline IData VL_SEL_IRII(int lbits, const VlQueue>& lhs, IData lsb, + IData width) VL_MT_SAFE { + IData val = 0; + + const int offset = lhs.size() * N_Words - VL_BITWORD_E(lsb) - 1; + const int wordIndex = VL_BITWORD_E(lsb % (N_Words * 32)); + const int shiftAmt = VL_BITBIT_I(lsb); + const int index = offset / N_Words; + val = lhs.at(index).at(wordIndex) >> shiftAmt; + + return val; +} + static inline IData VL_SEL_IWII(int lbits, WDataInP const lwp, IData lsb, IData width) VL_MT_SAFE { const int msb = lsb + width - 1; if (VL_UNLIKELY(msb >= lbits)) { diff --git a/nodist/log_changes b/nodist/log_changes index 3d62af224..e56bef01c 100755 --- a/nodist/log_changes +++ b/nodist/log_changes @@ -68,6 +68,8 @@ def process() -> None: author += ", Heidelberg University" if re.search(r'tenstorrent', email): author += ", Testorrent USA, Inc." + if re.search(r'secturion', email): + author += ", Secturion Systems, Inc." if re.search(r'github action', author): author = "" continue diff --git a/src/V3Const.cpp b/src/V3Const.cpp index 59236a2e9..2d5a07faf 100644 --- a/src/V3Const.cpp +++ b/src/V3Const.cpp @@ -1388,7 +1388,8 @@ class ConstVisitor final : public VNVisitor { && (!VN_AS(nodep->rhsp(), Const)->num().fitsInUInt() // > 2^32 shift || (VN_AS(nodep->rhsp(), Const)->toUInt() >= static_cast(nodep->lhsp()->width()))) - && nodep->lhsp()->isPure()); + && nodep->lhsp()->isPure() + && !(VN_IS(nodep->lhsp()->dtypep()->skipRefp(), StreamDType))); } bool operandIsTwo(const AstNode* nodep) { const AstConst* const constp = VN_CAST(nodep, Const); @@ -1831,6 +1832,7 @@ class ConstVisitor final : public VNVisitor { nodep->replaceWithKeepDType(childp); VL_DO_DANGLING(pushDeletep(nodep), nodep); } + void replaceWChildBool(AstNode* nodep, AstNodeExpr* childp) { // NODE(..., CHILD(...)) -> REDOR(CHILD(...)) childp->unlinkFrBack(); @@ -2354,7 +2356,8 @@ class ConstVisitor final : public VNVisitor { VL_DO_DANGLING(pushDeletep(conp), conp); // Further reduce, either node may have more reductions. return true; - } else if (m_doV && VN_IS(nodep->rhsp(), StreamR)) { + } else if (m_doV && VN_IS(nodep->rhsp(), StreamR) + && !VN_IS(nodep->lhsp()->dtypep()->skipRefp(), QueueDType)) { // The right-streaming operator on rhs of assignment does not // change the order of bits. Eliminate stream but keep its lhsp. // Add a cast if needed. @@ -2517,12 +2520,16 @@ class ConstVisitor final : public VNVisitor { // Source narrower than destination: left-justify by shifting left. // The right stream operator packs left-to-right, so remaining // LSBs are zero-filled (IEEE 1800-2023 11.4.14.2). - AstExtend* const extendp = new AstExtend{srcp->fileline(), srcp}; - extendp->dtypeSetLogicSized(dWidth, VSigning::UNSIGNED); - srcp = new AstShiftL{ - srcp->fileline(), extendp, - new AstConst{srcp->fileline(), static_cast(dWidth - sWidth)}, - dWidth}; + if (!VN_IS(srcp->dtypep()->skipRefp(), QueueDType)) { + AstExtend* const extendp = new AstExtend{srcp->fileline(), srcp}; + extendp->dtypeSetLogicSized(dWidth, VSigning::UNSIGNED); + srcp = new AstShiftL{ + srcp->fileline(), extendp, + new AstConst{srcp->fileline(), static_cast(dWidth - sWidth)}, + dWidth}; + } else { // if it is a queue we dont need do do a shiftL + srcp = new AstExtend{srcp->fileline(), srcp}; + } } } nodep->lhsp(dstp); diff --git a/src/V3EmitCFunc.cpp b/src/V3EmitCFunc.cpp index 81b15a6f2..97d8decc7 100644 --- a/src/V3EmitCFunc.cpp +++ b/src/V3EmitCFunc.cpp @@ -57,6 +57,7 @@ void EmitCFunc::emitOpName(AstNode* nodep, const string& format, AstNode* lhsp, putnbs(nodep, ""); bool needComma = false; + bool usesQueue = false; string nextComma; auto commaOut = [&out, &nextComma]() { if (!nextComma.empty()) { @@ -109,7 +110,7 @@ void EmitCFunc::emitOpName(AstNode* nodep, const string& format, AstNode* lhsp, detailp = thsp; break; case 'P': - if (nodep->isWide()) { + if (nodep->isWide() && !usesQueue) { UASSERT_OBJ(m_wideTempRefp, nodep, "Wide op " << nodep->prettyTypeName() << " w/ no temp, perhaps missing op in V3EmitC?"); @@ -122,7 +123,13 @@ void EmitCFunc::emitOpName(AstNode* nodep, const string& format, AstNode* lhsp, out += m_wideTempRefp->varp()->nameProtect(); m_wideTempRefp = nullptr; needComma = true; + } else if (usesQueue) { + commaOut(); + putOut(); + iterateAndNextConstNull(nodep->backp()->op2p()); + needComma = true; } + break; default: nodep->v3fatalSrc("Unknown emitOperator format code: %" << pos[0]); break; } @@ -132,7 +139,17 @@ void EmitCFunc::emitOpName(AstNode* nodep, const string& format, AstNode* lhsp, switch (pos[0]) { case 'q': putOut(); - emitIQW(detailp); + // If we are assigning this to a queue we need to get the return type + if (VN_IS(detailp->backp(), Assign) + && VN_IS(detailp->backp()->op2p()->dtypep()->skipRefp(), QueueDType)) { + puts("R"); // R for queue + usesQueue = true; + } else if (VN_IS(detailp->dtypep()->skipRefp(), QueueDType) + || VN_IS(detailp->dtypep()->skipRefp(), StreamDType)) { + puts("R"); // R for queue + } else { + emitIQW(detailp); + } break; case 'w': commaOut(); @@ -144,6 +161,10 @@ void EmitCFunc::emitOpName(AstNode* nodep, const string& format, AstNode* lhsp, commaOut(); out += cvtToStr(lhsp->widthWords()); needComma = true; + } else if (VN_IS(lhsp, StreamR)) { + commaOut(); + out += cvtToStr(rhsp->widthWords()); + needComma = true; } break; case 'i': @@ -324,7 +345,11 @@ void EmitCFunc::displayNode(AstNode* nodep, AstSFormatF* fmtp, // fmtp is nullp const bool addrof = isScan || formatAttr.isString() || formatAttr.isComplex(); puts(","); if (addrof) puts("&("); - iterateConst(subargp); + if (VN_IS(subargp, StreamR)) + emitStreamR( + VN_CAST(subargp, StreamR), + nodep); // This has to be done here because streamR doesn't know what it returns + else { iterateConst(subargp); } if (addrof) puts(")"); if (!addrof) emitDatap(argp); ofp()->indentDec(); diff --git a/src/V3EmitCFunc.h b/src/V3EmitCFunc.h index 184046f70..3c05a6055 100644 --- a/src/V3EmitCFunc.h +++ b/src/V3EmitCFunc.h @@ -627,6 +627,15 @@ public: puts(cvtToStr(nodep->widthMin()) + ", "); iterateAndNextConstNull(nodep->lhsp()); puts(", "); + } else if (VN_IS(nodep->lhsp()->dtypep()->skipRefp(), QueueDType) + && (VN_IS(nodep->rhsp(), StreamL) || VN_IS(nodep->lhsp(), StreamL) + || VN_IS(nodep->rhsp(), StreamR) || VN_IS(nodep->lhsp(), StreamR) + || VN_IS(nodep->rhsp(), StreamR))) { + //if either side is streamL or streamR don't emit lhsp everything will be passed by + //reference + + paren = false; + } else { paren = false; iterateAndNextConstNull(nodep->lhsp()); @@ -1599,6 +1608,33 @@ public: emitOpName(nodep, nodep->emitC(), nodep->srcp(), nodep->countp(), nullptr); } } + void emitStreamR(AstStreamR* nodep, AstNode* parent) { + //TODO: This might need to handle more cases like the visit(AstStreamR) function + emitOpName(nodep, nodep->emitC(), nodep->lhsp(), nodep->rhsp(), nullptr); + } + void visit(AstStreamR* nodep) override { + //The parrent node of our AstStreamR will give just enough info for what streamR should + //output if nodep->backp() is not the parent then emitStreamR should have been used. throw + //an error + bool backpIsParent = (nodep->backp()->op1p() == nodep || nodep->backp()->op2p() == nodep); + UASSERT(backpIsParent, "can not find return type for streamR"); + if ((VN_IS(nodep->backp()->dtypep()->skipRefp(), QueueDType))) { + emitOpName(nodep, "VL_STREAMR_%nq%lq%rq(%lw, %P, %li, %ri)", nodep->lhsp(), + nodep->rhsp(), nullptr); + } else if (VN_IS(nodep->lhsp()->dtypep()->skipRefp(), QueueDType)) { + if (!((nodep->backp()->op1p() && nodep->backp()->op1p()->isWide()) + || (nodep->backp()->op2p() && nodep->backp()->op2p()->isWide()))) { + //If our lhsp is a queue make sure we streamR and return the correct type. + //If either side is wide or the previous node is string type dont use this case + emitOpName(nodep->backp(), "VL_STREAMR_%nq%lq%rq(%lw, %P, %li, %ri)", + nodep->lhsp(), nodep->rhsp(), nullptr); + } else { + emitOpName(nodep, nodep->emitC(), nodep->lhsp(), nodep->rhsp(), nullptr); + } + } else { + emitOpName(nodep, nodep->emitC(), nodep->lhsp(), nodep->rhsp(), nullptr); + } + } void visit(AstStreamL* nodep) override { // Attempt to use a "fast" stream function for slice size = power of 2 if (!nodep->isWide()) { @@ -1606,11 +1642,26 @@ public: const uint32_t sliceSize = VN_AS(nodep->rhsp(), Const)->toUInt(); if (isPow2 && sliceSize <= (nodep->isQuad() ? sizeof(uint64_t) : sizeof(uint32_t))) { putns(nodep, "VL_STREAML_FAST_"); - emitIQW(nodep); + bool usesQueue = false; + AstQueueDType* qtypep + = nodep->backp()->op2p() + ? VN_CAST(nodep->backp()->op2p()->dtypep()->skipRefp(), QueueDType) + : nullptr; + if (VN_IS(nodep->backp(), Assign) + && qtypep) { // If we are assigning to a queue then emit the correct symbol + puts("R"); // R for queue + usesQueue = true; + } else { + emitIQW(nodep); + } emitIQW(nodep->lhsp()); puts("I("); puts(cvtToStr(nodep->lhsp()->widthMin())); puts(", "); + if (usesQueue) { + iterateAndNextConstNull(nodep->backp()->op2p()); + puts(", "); + } iterateAndNextConstNull(nodep->lhsp()); puts(", "); const uint32_t rd_log2 = V3Number::log2b(VN_AS(nodep->rhsp(), Const)->toUInt()); @@ -1618,7 +1669,18 @@ public: return; } } - emitOpName(nodep, nodep->emitC(), nodep->lhsp(), nodep->rhsp(), nullptr); + if (VN_IS(nodep->backp(), Assign) + && VN_IS(nodep->backp()->op2p()->dtypep()->skipRefp(), QueueDType)) { + int queueWidth + = nodep->backp()->op2p()->dtypep()->subDTypep()->width(); //We need to know the + //width of both sides + emitOpName(nodep, + "VL_STREAML_%nq%lq%rq(%lw," + std::to_string(queueWidth) + + ", %P, %li, %ri)", + nodep->lhsp(), nodep->rhsp(), nullptr); + } else { + emitOpName(nodep, nodep->emitC(), nodep->lhsp(), nodep->rhsp(), nullptr); + } } void visit(AstCastDynamic* nodep) override { putnbs(nodep, "VL_CAST_DYNAMIC("); diff --git a/src/V3Expand.cpp b/src/V3Expand.cpp index 83d9d35ea..267360e15 100644 --- a/src/V3Expand.cpp +++ b/src/V3Expand.cpp @@ -508,6 +508,9 @@ class ExpandVisitor final : public VNVisitor { // Sel is an LHS assignment select } else if (nodep->isWide()) { // See under ASSIGN(WIDE) + } else if (VN_IS(nodep->fromp()->dtypep(), StreamDType) + || VN_IS(nodep->fromp()->dtypep(), QueueDType)) { + //sel stream or queue } else if (nodep->fromp()->isWide()) { if (isImpure(nodep)) return; UINFO(8, " SEL(wide) " << nodep); @@ -962,7 +965,9 @@ class ExpandVisitor final : public VNVisitor { void visitEqNeq(AstNodeBiop* nodep) { if (nodep->user1SetOnce()) return; // Process once iterateChildren(nodep); - if (nodep->lhsp()->isWide()) { + if (nodep->lhsp()->isWide() + && !(VN_IS(nodep->lhsp()->dtypep()->skipRefp(), StreamDType) + || VN_IS(nodep->rhsp()->dtypep()->skipRefp(), StreamDType))) { if (isImpure(nodep)) return; if (!doExpandWide(nodep->lhsp())) return; if (!doExpandWide(nodep->rhsp())) return; diff --git a/src/V3FuncOpt.cpp b/src/V3FuncOpt.cpp index 4e4423161..1fb6527e3 100644 --- a/src/V3FuncOpt.cpp +++ b/src/V3FuncOpt.cpp @@ -264,7 +264,8 @@ class FuncOptVisitor final : public VNVisitor { if (!VN_IS(rhsp, Concat) && !VN_IS(rhsp, Extend)) return false; // Will need the LHS AstNodeExpr* lhsp = nodep->lhsp(); - UASSERT_OBJ(lhsp->width() == rhsp->width(), nodep, "Inconsistent assignment"); + if (!VN_IS(lhsp->dtypep()->skipRefp(), QueueDType)) + UASSERT_OBJ(lhsp->width() == rhsp->width(), nodep, "Inconsistent assignment"); // Only consider pure assignments. Nodes inserted below are safe. if (!nodep->user1() && (!lhsp->isPure() || !rhsp->isPure())) return false; // Do not split assignments to SC variables, they cannot be assigned in parts diff --git a/src/V3Width.cpp b/src/V3Width.cpp index 9bcfd5ffe..924bfd4e8 100644 --- a/src/V3Width.cpp +++ b/src/V3Width.cpp @@ -6229,6 +6229,11 @@ class WidthVisitor final : public VNVisitor { << lwidth << " bits) is narrower than the stream (" << rwidth << " bits) (IEEE 1800-2023 11.4.14)"); } + if (VN_IS(streamp->lhsp()->dtypep()->skipRefp(), QueueDType) + && !VN_IS(nodep->lhsp()->dtypep()->skipRefp(), QueueDType)) { + const int queueElementSize = streamp->lhsp()->dtypep()->subDTypep()->width(); + UASSERT_OBJ(queueElementSize <= lwidth, nodep, "LHS < RHS"); + } if (VN_IS(lhsDTypeSkippedRefp, UnpackArrayDType)) { streamp->unlinkFrBack(); nodep->rhsp(new AstCvtPackedToArray{streamp->fileline(), streamp, diff --git a/test_regress/t/t_stream_queue.py b/test_regress/t/t_stream_queue.py index 84b274f68..3a46a7545 100755 --- a/test_regress/t/t_stream_queue.py +++ b/test_regress/t/t_stream_queue.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile() +test.compile(verilator_flags2=["--timing"]) test.execute() diff --git a/test_regress/t/t_stream_queue.v b/test_regress/t/t_stream_queue.v index 5c15e4e78..7e6b333b9 100644 --- a/test_regress/t/t_stream_queue.v +++ b/test_regress/t/t_stream_queue.v @@ -4,31 +4,372 @@ // SPDX-FileCopyrightText: 2025 Wilson Snyder // SPDX-License-Identifier: CC0-1.0 +`define stop $stop +`define checks(gotv,expv) do if ((gotv) != (expv)) begin $write("%%Error: %s:%0d: got='%h' exp='%h'\n", `__FILE__,`__LINE__, (gotv), (expv)); `stop; end while(0); module t; + logic [7:0] i_char; + logic [15:0] i_short; int i_header; int i_len; - byte i_data[]; + int i_data; int i_crc; + logic [7:0] o_char; + logic [15:0] o_short; int o_header; int o_len; - byte o_data[]; + int o_data; int o_crc; + logic [128:0] wide129; + initial begin - byte pkt[$]; + byte byte_pkt[$]; + logic [15:0] sdata_pkt[$]; + int int_pkt[$]; + logic [63:0] qdata_pkt[$]; + logic [128:0] vlwide_pkt_129[$];//this is off by one to test edge cases + logic [127:0] vlwide_pkt_128[$]; +/* verilator lint_off ASCRANGE */ + logic [0:7] byte_pkt_rev[$]; + logic [0:15] sdata_pkt_rev[$]; + logic [0:31] int_pkt_rev[$]; + logic [0:63] qdata_pkt_rev[$]; + logic [0:128] vlwide_pkt_129_rev[$];//this is off by one to test edge cases + logic [0:127] vlwide_pkt_128_rev[$]; + i_header = 12; i_len = 5; - i_data = new[5]; + i_data = 11; i_crc = 42; + i_char = 15; + i_short = 16'hFF; + #0; // this forces no-life + //TODO make this work with V3Life + //-------------------- STREAML ------------------------------------ + //----------- CData QUEUE -------- + byte_pkt = {<<8{i_char}}; + o_char = {<<8{byte_pkt}}; + `checks(o_char,i_char); - pkt = {<<8{i_header, i_len, i_data, i_crc}}; + byte_pkt = {<<8{i_short}}; + o_short = {<<8{byte_pkt}}; + `checks(o_short,i_short); - {<<8{o_header, o_len, o_data, o_crc}} = pkt; + byte_pkt = {<<8{i_header}}; + o_header = {<<8{byte_pkt}}; + `checks(o_header,i_header); + byte_pkt = {<<8{i_header,i_len}}; + {<<8{o_header,o_len}} = byte_pkt; + `checks({i_header,i_len},{o_header,o_len}); + + byte_pkt = {<<8{i_header,i_len,i_crc,i_data}}; + {<<8{o_header,o_len,o_crc,o_data}} = byte_pkt; + + `checks({>>{byte_pkt}},{<<8{i_header,i_len,i_crc,i_data}}); + `checks({i_header,i_len,i_crc,i_data},{<<8{byte_pkt}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + // //----------- SData QUEUE -------- + // sdata_pkt = {<<8{i_char}}; + //TODO This should compile + // o_char = {{<<8{sdata_pkt}}}[7:0]; + // `checks(o_char,i_char); + + sdata_pkt = {<<8{i_short}}; + o_short = {<<8{sdata_pkt}}; + `checks(o_short,i_short); + + sdata_pkt = {<<8{i_header}}; + o_header = {<<8{sdata_pkt}}; + `checks(o_header,i_header); + + //test with QData + sdata_pkt = {<<8{i_header,i_len}}; + {<<8{o_header,o_len}} = sdata_pkt; + `checks({i_header,i_len},{o_header,o_len}); + + sdata_pkt = {<<8{i_header,i_len,i_crc,i_data}}; + {<<8{o_header,o_len,o_crc,o_data}} = sdata_pkt; + + `checks({>>{sdata_pkt}},{<<8{i_header,i_len,i_crc,i_data}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + //----------- IData QUEUE -------- + int_pkt = {<<8{i_header}}; + o_header = {<<8{int_pkt}}; + `checks(o_header,i_header); + + //test with QData + int_pkt = {<<8{i_header,i_len}}; + {<<8{o_header,o_len}} = int_pkt; + `checks({i_header,i_len},{o_header,o_len}); + + int_pkt = {<<8{i_header,i_len,i_crc,i_data}}; + {<<8{o_header,o_len,o_crc,o_data}} = int_pkt; + + `checks({>>{int_pkt}},{<<8{i_header,i_len,i_crc,i_data}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + //----------- QData QUEUE -------- + qdata_pkt = {<<8{i_header}}; + // o_header = {<<8{qdata_pkt}}; + //TODO This should compile + // o_header = {{<<8{sdata_pkt}}}[32:0]; + // `checks(o_header,i_header); + + //test with QData + qdata_pkt = {<<8{i_header,i_len}}; + {<<8{o_header,o_len}} = qdata_pkt; + `checks({i_header,i_len},{o_header,o_len}); + + + qdata_pkt = {<<8{i_header,i_len,i_crc,i_data}}; + {<<8{o_header,o_len,o_crc,o_data}} = qdata_pkt; + + `checks({>>{qdata_pkt}},{<<8{i_header,i_len,i_crc,i_data}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + // ----------- VLWide QUEUE -------- + // test with QData + vlwide_pkt_129 = {<<8{i_header,i_len}}; + {<<8{o_header,o_len}} = vlwide_pkt_129; //TODO this shouldn't compile lhs should not be smaller then rhs + // `checks({i_header,i_len},{o_header,o_len}); + + vlwide_pkt_129 = {<<8{i_header,i_len,i_crc,i_data}}; + + /* verilator lint_off WIDTHEXPAND */ + wide129 = {<<8{i_header,i_len,i_crc,i_data}}; + `checks({>>{vlwide_pkt_129}},wide129); + /* verilator lint_on WIDTHEXPAND */ + + //------------------------------- REVERSE ENDIAN ------------------------------ + //----------- CData QUEUE -------- + byte_pkt_rev = {<<8{i_char}}; + o_char = {<<8{byte_pkt_rev}}; + `checks(o_char,i_char); + + byte_pkt_rev = {<<8{i_short}}; + o_short = {<<8{byte_pkt_rev}}; + `checks(o_short,i_short); + + byte_pkt_rev = {<<8{i_header}}; + o_header = {<<8{byte_pkt_rev}}; + `checks(o_header,i_header); + + byte_pkt_rev = {<<8{i_header,i_len}}; + {<<8{o_header,o_len}} = byte_pkt_rev; + `checks({i_header,i_len},{o_header,o_len}); + + byte_pkt_rev = {<<8{i_header,i_len,i_crc,i_data}}; + {<<8{o_header,o_len,o_crc,o_data}} = byte_pkt_rev; + + `checks({>>{byte_pkt_rev}},{<<8{i_header,i_len,i_crc,i_data}}); + `checks({i_header,i_len,i_crc,i_data},{<<8{byte_pkt_rev}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + //----------- SData QUEUE -------- + sdata_pkt_rev = {<<8{i_short}}; + o_short = {<<8{sdata_pkt_rev}}; + `checks(o_short,i_short); + + sdata_pkt_rev = {<<8{i_header}}; + o_header = {<<8{sdata_pkt_rev}}; + `checks(o_header,i_header); + + //test with QData + sdata_pkt_rev = {<<8{i_header,i_len}}; + {<<8{o_header,o_len}} = sdata_pkt_rev; + `checks({i_header,i_len},{o_header,o_len}); + + sdata_pkt_rev = {<<8{i_header,i_len,i_crc,i_data}}; + {<<8{o_header,o_len,o_crc,o_data}} = sdata_pkt_rev; + + `checks({>>{sdata_pkt_rev}},{<<8{i_header,i_len,i_crc,i_data}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + //----------- IData QUEUE -------- + int_pkt_rev = {<<8{i_header}}; + o_header = {<<8{int_pkt_rev}}; + `checks(o_header,i_header); + + //test with QData + int_pkt_rev = {<<8{i_header,i_len}}; + {<<8{o_header,o_len}} = int_pkt_rev; + `checks({i_header,i_len},{o_header,o_len}); + + int_pkt_rev = {<<8{i_header,i_len,i_crc,i_data}}; + {<<8{o_header,o_len,o_crc,o_data}} = int_pkt_rev; + + `checks({>>{int_pkt_rev}},{<<8{i_header,i_len,i_crc,i_data}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + //----------- QData QUEUE -------- + + //test with QData + qdata_pkt_rev = {<<8{i_header,i_len}}; + {<<8{o_header,o_len}} = qdata_pkt_rev; + `checks({i_header,i_len},{o_header,o_len}); + + + qdata_pkt_rev = {<<8{i_header,i_len,i_crc,i_data}}; + {<<8{o_header,o_len,o_crc,o_data}} = qdata_pkt_rev; + + `checks({>>{qdata_pkt_rev}},{<<8{i_header,i_len,i_crc,i_data}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + // ----------- VLWide QUEUE -------- + + vlwide_pkt_129_rev = {<<8{i_header,i_len,i_crc,i_data}}; + /* verilator lint_off WIDTHEXPAND */ + wide129 = {<<8{i_header,i_len,i_crc,i_data}}; + /* verilator lint_on WIDTHEXPAND */ + `checks({>>{vlwide_pkt_129_rev}},wide129); + + // // -------------------- STREAMR ------------------------------------ + // //----------- CData QUEUE -------- + byte_pkt = {>>{i_header}}; + o_header = {>>{byte_pkt}}; + `checks(o_header,i_header); + + byte_pkt = {>>{i_header,i_len}}; + {>>{o_header,o_len}} = byte_pkt; + `checks({>>{i_header,i_len}},{>>{o_header,o_len}}); + `checks({i_header,i_len},{o_header,o_len}); + + byte_pkt = {>>{i_header,i_len,i_crc,i_data}}; + {>>{o_header,o_len,o_crc,o_data}} = byte_pkt; + + `checks({>>{byte_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + //----------- IData QUEUE -------- + int_pkt = {>>{i_header}}; + o_header = {>>{int_pkt}}; + `checks(o_header,i_header); + `checks(o_header,{>>{int_pkt}}); + `checks({>>{o_header}},{>>{int_pkt}}); + + //test with QData + int_pkt = {>>{i_header,i_len}}; + {>>{o_header,o_len}} = int_pkt; + `checks({i_header,i_len},{o_header,o_len}); + + int_pkt = {>>{i_header,i_len,i_crc,i_data}}; + {>>{o_header,o_len,o_crc,o_data}} = int_pkt; + + `checks({>>{int_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + //----------- QData QUEUE -------- + + // test with QData + qdata_pkt = {>>{i_header,i_len}}; + {>>{o_header,o_len}} = qdata_pkt; + `checks({i_header,i_len},{o_header,o_len}); + + qdata_pkt = {>>{i_header,i_len,i_crc,i_data}}; + {>>{o_header,o_len,o_crc,o_data}} = qdata_pkt; + + `checks({>>{qdata_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + // ----------- VLWide QUEUE -------- + + // test with QData + vlwide_pkt_129 = {>>{i_header,i_len}}; + {>>{o_header,o_len}} = vlwide_pkt_129; + `checks({i_header,i_len},{o_header,o_len}); + + + vlwide_pkt_129 = {>>{i_header,i_len,i_crc,i_data}}; + {>>{o_header,o_len,o_crc,o_data}} = vlwide_pkt_129; + + `checks({>>{vlwide_pkt_129}},{>>{1'b0,i_header,i_len,i_crc,i_data}}); + `checks({o_header,o_len,o_crc,o_data} ,{i_header,i_len,i_crc,i_data}); + + //---------- into other queues ------ + int_pkt = {>>{i_header,i_len,i_crc,i_data}}; + byte_pkt = {>>{int_pkt}}; + `checks({>>{byte_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + byte_pkt = {>>{i_header,i_len,i_crc,i_data}}; + int_pkt = {>>{byte_pkt}}; + `checks({>>{int_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + byte_pkt = {>>{i_header,i_len,i_crc,i_data}}; + int_pkt = {>>{byte_pkt}}; + `checks({>>{int_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + sdata_pkt = {>>{i_header,i_len,i_crc,i_data}}; + byte_pkt = {>>{sdata_pkt}}; + `checks({>>{byte_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + byte_pkt = {>>{i_header,i_len,i_crc,i_data}}; + sdata_pkt = {>>{byte_pkt}}; + `checks({>>{sdata_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + byte_pkt = {>>{i_header,i_len,i_crc,i_data}}; + qdata_pkt = {>>{byte_pkt}}; + `checks({>>{qdata_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + qdata_pkt = {>>{i_header,i_len,i_crc,i_data}}; + byte_pkt = {>>{qdata_pkt}}; + `checks({>>{byte_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + qdata_pkt = {>>{i_header,i_len,i_crc,i_data}}; + int_pkt = {>>{qdata_pkt}}; + `checks({>>{int_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + int_pkt = {>>{i_header,i_len,i_crc,i_data}}; + qdata_pkt = {>>{int_pkt}}; + `checks({>>{qdata_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + byte_pkt = {>>{i_header,i_len,i_crc,i_data}}; + vlwide_pkt_128 = {>>{byte_pkt}}; + `checks({>>{vlwide_pkt_128}},{>>{i_header,i_len,i_crc,i_data}}); + + vlwide_pkt_128 = {>>{i_header,i_len,i_crc,i_data}}; + byte_pkt = {>>{vlwide_pkt_128}}; + `checks({i_header,i_len,i_crc,i_data},{>>{byte_pkt}}); + `checks({>>{byte_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + int_pkt = {>>{i_header,i_len,i_crc,i_data}}; + vlwide_pkt_128 = {>>{int_pkt}}; + `checks({i_header,i_len,i_crc,i_data},{>>{vlwide_pkt_128}}); + `checks({>>{vlwide_pkt_128}},{>>{i_header,i_len,i_crc,i_data}}); + + vlwide_pkt_128 = {>>{i_header,i_len,i_crc,i_data}}; + int_pkt = {>>{vlwide_pkt_128}}; + `checks({i_header,i_len,i_crc,i_data},{>>{int_pkt}}); + `checks({>>{int_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + + qdata_pkt = {>>{i_header,i_len,i_crc,i_data}}; + vlwide_pkt_128 = {>>{qdata_pkt}}; + `checks({i_header,i_len,i_crc,i_data},{>>{vlwide_pkt_128}}); + `checks({>>{vlwide_pkt_128}},{>>{i_header,i_len,i_crc,i_data}}); + + qdata_pkt = {>>{i_header,i_len,i_crc,i_data,i_header,i_len,i_crc,i_data}}; + vlwide_pkt_128 = {>>{qdata_pkt}}; + `checks({i_header,i_len,i_crc,i_data,i_header,i_len,i_crc,i_data},{>>{vlwide_pkt_128}}); + `checks({>>{vlwide_pkt_128}},{>>{i_header,i_len,i_crc,i_data,i_header,i_len,i_crc,i_data}}); + + qdata_pkt = {>>{i_header,i_len,i_crc,i_data,i_header,i_len,i_crc}}; + vlwide_pkt_128 = {>>{qdata_pkt}}; + `checks({32'h0,i_header,i_len,i_crc,i_data,i_header,i_len,i_crc},{>>{vlwide_pkt_128}}); + `checks({>>{vlwide_pkt_128}},{>>{32'h0,i_header,i_len,i_crc,i_data,i_header,i_len,i_crc}}); + + vlwide_pkt_128 = {>>{i_header,i_len,i_crc,i_data}}; + qdata_pkt = {>>{vlwide_pkt_128}}; + `checks({i_header,i_len,i_crc,i_data},{>>{vlwide_pkt_128}}); + `checks({>>{qdata_pkt}},{>>{i_header,i_len,i_crc,i_data}}); + $write("*-* All Finished *-*\n"); $finish; + end + endmodule diff --git a/test_regress/t/t_stream_queue_interface.py b/test_regress/t/t_stream_queue_interface.py new file mode 100755 index 000000000..da1d399fc --- /dev/null +++ b/test_regress/t/t_stream_queue_interface.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2025 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('simulator') + +test.top_filename = "t/t_stream_queue_interface.sv" + +test.compile(verilator_flags2=["--timing -Wno-WIDTHEXPAND"]) + +test.execute() + +test.passes() diff --git a/test_regress/t/t_stream_queue_interface.sv b/test_regress/t/t_stream_queue_interface.sv new file mode 100644 index 000000000..290a174d7 --- /dev/null +++ b/test_regress/t/t_stream_queue_interface.sv @@ -0,0 +1,76 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain. +// SPDX-FileCopyrightText: 2025 Wilson Snyder +// SPDX-License-Identifier: CC0-1.0 + +`define stop $stop +`define checks(gotv,expv) do if ((gotv) != (expv)) begin $write("%%Error: %s:%0d: got='%h' exp='%h'\n", `__FILE__,`__LINE__, (gotv), (expv)); end while(0); +module t; + logic clk; + int i_header; + int i_len; + int i_data; + int i_crc; + + int o_header; + int o_len; + int o_data; + int o_crc; + + pkt_if pkt_if_init (clk); + //this will not compile without -fno-life + initial begin + byte byte_pkt[$]; + //---------------------- STREAM WITH INTERFACE ------------------- + //using this forces verilator to a AstSel Node into a Stream Node + #0 //make sure we dont optimize it all away in v3life + pkt_if_init.s.extra = 8'hd; + byte_pkt = {>>{pkt_if_init.s.extra}}; + if(8'hd == {>>{byte_pkt}}) begin + $write("*-* All Finished *-*\n"); + $finish(); + end + end + +endmodule + +interface pkt_if ( + input wire clk +); + + typedef struct packed { + logic [31:0] extra; + logic [31:0] empty; + logic [31:0] data; + logic valid; + logic sop; + logic eop; + } avst_s; + + avst_s s; + logic ready; + + // Read-Only Helper Signals + logic sop_pulse; + logic eop_pulse; + + modport src ( + output s, + input ready, + input sop_pulse, eop_pulse + ); + + modport snoop ( + input s, + input ready, + input sop_pulse, eop_pulse + ); + + modport sink ( + input s, + output ready, + input sop_pulse, eop_pulse + ); + +endinterface diff --git a/test_regress/t/t_stream_trace.py b/test_regress/t/t_stream_trace.py index 5eeac50cf..3fbef9037 100755 --- a/test_regress/t/t_stream_trace.py +++ b/test_regress/t/t_stream_trace.py @@ -11,7 +11,7 @@ import vltest_bootstrap test.scenarios('simulator') -test.compile(verilator_flags2=['--cc --trace-vcd']) +test.compile(verilator_flags2=['--cc --trace-vcd -fno-life']) test.execute()