Support streaming into 2-D unpacked arrays (#7686) (#7687)

Fixes #7686.
2026-05-30 19:08:32 -07:00 · 2026-05-30 19:08:32 -07:00 · 1eb12685a7
parent e2063e7ac0
commit 1eb12685a7
2 changed files with 181 additions and 10 deletions
--- a/include/verilated_funcs.h
+++ b/include/verilated_funcs.h
@ -2134,6 +2134,16 @@ static inline IData VL_PACK_I_RI(int /*obits*/, int lbits, const VlQueue<IData>&
    return ret;
 }

+template <typename T>
+struct VlUnpackedElements final {
+    static constexpr size_t count = 1;
+};
+
+template <typename T, size_t N>
+struct VlUnpackedElements<VlUnpacked<T, N>> final {
+    static constexpr size_t count = N * VlUnpackedElements<T>::count;
+};
+
 template <std::size_t N_Depth>
 static inline IData VL_PACK_I_UI(int /*obits*/, int lbits, const VlUnpacked<CData, N_Depth>& q) {
    IData ret = 0;
@ -2157,6 +2167,18 @@ static inline IData VL_PACK_I_UI(int /*obits*/, int lbits, const VlUnpacked<IDat
    return ret;
 }

+template <typename T_Sub, std::size_t N_Sub, std::size_t N_Depth>
+static inline IData VL_PACK_I_UI(const int obits, const int lbits,
+                                 const VlUnpacked<VlUnpacked<T_Sub, N_Sub>, N_Depth>& q) {
+    IData ret = 0;
+    const int sub_bits = VlUnpackedElements<VlUnpacked<T_Sub, N_Sub>>::count * lbits;
+    for (size_t i = 0; i < N_Depth; ++i) {
+        const IData sub_val = VL_PACK_I_UI(sub_bits, lbits, q[N_Depth - 1 - i]);
+        ret |= sub_val << (i * sub_bits);
+    }
+    return ret;
+}
+
 static inline QData VL_PACK_Q_RI(int /*obits*/, int lbits, const VlQueue<CData>& q) {
    QData ret = 0;
    for (size_t i = 0; i < q.size(); ++i)
@ -3145,6 +3167,17 @@ static inline void VL_UNPACK_UI_I(int lbits, int /*rbits*/, VlUnpacked<IData, N_
    for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask;
 }

+template <typename T_Sub, std::size_t N_Sub, std::size_t N_Depth>
+static inline void VL_UNPACK_UI_I(const int lbits, const int rbits,
+                                  VlUnpacked<VlUnpacked<T_Sub, N_Sub>, N_Depth>& q,
+                                  const IData from) {
+    const int sub_bits = VlUnpackedElements<VlUnpacked<T_Sub, N_Sub>>::count * lbits;
+    for (size_t i = 0; i < N_Depth; ++i) {
+        const IData sub_from = (from >> ((N_Depth - 1 - i) * sub_bits));
+        VL_UNPACK_UI_I(lbits, sub_bits, q[i], sub_from);
+    }
+}
+
 template <std::size_t N_Depth>
 static inline void VL_UNPACK_UI_Q(int lbits, int /*rbits*/, VlUnpacked<CData, N_Depth>& q,
                                  QData from) {
@ -3166,6 +3199,17 @@ static inline void VL_UNPACK_UI_Q(int lbits, int /*rbits*/, VlUnpacked<IData, N_
    for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask;
 }

+template <typename T_Sub, std::size_t N_Sub, std::size_t N_Depth>
+static inline void VL_UNPACK_UI_Q(const int lbits, const int rbits,
+                                  VlUnpacked<VlUnpacked<T_Sub, N_Sub>, N_Depth>& q,
+                                  const QData from) {
+    const int sub_bits = VlUnpackedElements<VlUnpacked<T_Sub, N_Sub>>::count * lbits;
+    for (size_t i = 0; i < N_Depth; ++i) {
+        const QData sub_from = (from >> ((N_Depth - 1 - i) * sub_bits));
+        VL_UNPACK_UI_Q(lbits, sub_bits, q[i], sub_from);
+    }
+}
+
 template <std::size_t N_Depth>
 static inline void VL_UNPACK_UQ_Q(int lbits, int /*rbits*/, VlUnpacked<QData, N_Depth>& q,
                                  QData from) {
@ -3173,43 +3217,73 @@ static inline void VL_UNPACK_UQ_Q(int lbits, int /*rbits*/, VlUnpacked<QData, N_
    for (size_t i = 0; i < N_Depth; ++i) q[i] = (from >> ((N_Depth - 1 - i) * lbits)) & mask;
 }

+template <typename T_Sub, std::size_t N_Sub, std::size_t N_Depth>
+static inline void VL_UNPACK_UI_W(const int lbits, const int rbits,
+                                  VlUnpacked<VlUnpacked<T_Sub, N_Sub>, N_Depth>& q, WDataInP rwp,
+                                  const int bit_offset = 0) {
+    const int sub_bits = VlUnpackedElements<VlUnpacked<T_Sub, N_Sub>>::count * lbits;
+    for (size_t i = 0; i < N_Depth; ++i) {
+        VL_UNPACK_UI_W(lbits, rbits, q[i], rwp, bit_offset + (N_Depth - 1 - i) * sub_bits);
+    }
+}
+
+template <typename T_Sub, std::size_t N_Sub, std::size_t N_Depth>
+static inline void VL_UNPACK_UQ_W(const int lbits, const int rbits,
+                                  VlUnpacked<VlUnpacked<T_Sub, N_Sub>, N_Depth>& q, WDataInP rwp,
+                                  const int bit_offset = 0) {
+    const int sub_bits = VlUnpackedElements<VlUnpacked<T_Sub, N_Sub>>::count * lbits;
+    for (size_t i = 0; i < N_Depth; ++i) {
+        VL_UNPACK_UQ_W(lbits, rbits, q[i], rwp, bit_offset + (N_Depth - 1 - i) * sub_bits);
+    }
+}
+
+template <typename T_Sub, std::size_t N_Sub, std::size_t N_Depth>
+static inline void VL_UNPACK_UW_W(const int lbits, const int rbits,
+                                  VlUnpacked<VlUnpacked<T_Sub, N_Sub>, N_Depth>& q, WDataInP rwp,
+                                  const int bit_offset = 0) {
+    const int sub_bits = VlUnpackedElements<VlUnpacked<T_Sub, N_Sub>>::count * lbits;
+    for (size_t i = 0; i < N_Depth; ++i) {
+        VL_UNPACK_UW_W(lbits, rbits, q[i], rwp, bit_offset + (N_Depth - 1 - i) * sub_bits);
+    }
+}
+
 template <std::size_t N_Depth>
 static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked<CData, N_Depth>& q,
-                                  WDataInP rwp) {
+                                  WDataInP rwp, const int bit_offset = 0) {
    const IData mask = VL_MASK_I(lbits);
    for (size_t i = 0; i < N_Depth; ++i)
-        q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask;
+        q[i] = VL_SEL_IWII(rbits, rwp, bit_offset + (N_Depth - 1 - i) * lbits, lbits) & mask;
 }

 template <std::size_t N_Depth>
 static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked<SData, N_Depth>& q,
-                                  WDataInP rwp) {
+                                  WDataInP rwp, const int bit_offset = 0) {
    const IData mask = VL_MASK_I(lbits);
    for (size_t i = 0; i < N_Depth; ++i)
-        q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask;
+        q[i] = VL_SEL_IWII(rbits, rwp, bit_offset + (N_Depth - 1 - i) * lbits, lbits) & mask;
 }

 template <std::size_t N_Depth>
 static inline void VL_UNPACK_UI_W(int lbits, int rbits, VlUnpacked<IData, N_Depth>& q,
-                                  WDataInP rwp) {
+                                  WDataInP rwp, const int bit_offset = 0) {
    const IData mask = VL_MASK_I(lbits);
    for (size_t i = 0; i < N_Depth; ++i)
-        q[i] = VL_SEL_IWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask;
+        q[i] = VL_SEL_IWII(rbits, rwp, bit_offset + (N_Depth - 1 - i) * lbits, lbits) & mask;
 }

 template <std::size_t N_Depth>
 static inline void VL_UNPACK_UQ_W(int lbits, int rbits, VlUnpacked<QData, N_Depth>& q,
-                                  WDataInP rwp) {
+                                  WDataInP rwp, const int bit_offset = 0) {
    const QData mask = VL_MASK_Q(lbits);
    for (size_t i = 0; i < N_Depth; ++i)
-        q[i] = VL_SEL_QWII(rbits, rwp, (N_Depth - 1 - i) * lbits, lbits) & mask;
+        q[i] = VL_SEL_QWII(rbits, rwp, bit_offset + (N_Depth - 1 - i) * lbits, lbits) & mask;
 }

 template <std::size_t N_Depth, std::size_t N_Words>
 static inline void VL_UNPACK_UW_W(int lbits, int rbits, VlUnpacked<VlWide<N_Words>, N_Depth>& q,
-                                  WDataInP rwp) {
+                                  WDataInP rwp, const int bit_offset = 0) {
    for (size_t i = 0; i < N_Depth; ++i)
-        VL_SEL_WWII(lbits, rbits, q[i], rwp, (N_Depth - 1 - i) * lbits, lbits);
+        VL_SEL_WWII(lbits, rbits, q[i], rwp, bit_offset + (N_Depth - 1 - i) * lbits, lbits);
 }

 // Return QData from double (numeric)
--- a/test_regress/t/t_stream_unpack.v
+++ b/test_regress/t/t_stream_unpack.v
@ -285,6 +285,103 @@ module t;
    `checkh(p[0], 128'habcd0123456789abfadecafedeadbeef);
    `checkh(p[1], 128'hfadecafedeadbeefabcd0123456789ab);

+    begin
+      automatic logic arr2d_1 [2][2];
+      automatic logic [3:0] packed_4;
+
+      // Right-side test (unpack)
+      packed_4 = 4'b1100;
+      arr2d_1 = { >> {packed_4}};
+      `checkh(arr2d_1[0][0], 1'b1);
+      `checkh(arr2d_1[0][1], 1'b1);
+      `checkh(arr2d_1[1][0], 1'b0);
+      `checkh(arr2d_1[1][1], 1'b0);
+
+      // Left-side test (pack)
+      arr2d_1[0][0] = 1'b0;
+      arr2d_1[0][1] = 1'b1;
+      arr2d_1[1][0] = 1'b0;
+      arr2d_1[1][1] = 1'b1;
+      { >> {packed_4}} = arr2d_1;
+      `checkh(packed_4, 4'b0101);
+
+      // Constant source test
+      arr2d_1 = { >> {4'b1010}};
+      `checkh(arr2d_1[0][0], 1'b1);
+      `checkh(arr2d_1[0][1], 1'b0);
+      `checkh(arr2d_1[1][0], 1'b1);
+      `checkh(arr2d_1[1][1], 1'b0);
+
+      // 3D Test
+      begin
+        automatic logic arr3d_1 [2][2][2];
+        automatic logic [7:0] packed_8;
+
+        packed_8 = 8'b1100_1010;
+        arr3d_1 = { >> {packed_8}};
+        `checkh(arr3d_1[0][0][0], 1'b1);
+        `checkh(arr3d_1[0][0][1], 1'b1);
+        `checkh(arr3d_1[0][1][0], 1'b0);
+        `checkh(arr3d_1[0][1][1], 1'b0);
+        `checkh(arr3d_1[1][0][0], 1'b1);
+        `checkh(arr3d_1[1][0][1], 1'b0);
+        `checkh(arr3d_1[1][1][0], 1'b1);
+        `checkh(arr3d_1[1][1][1], 1'b0);
+
+        packed_8 = 8'h0;
+        { >> {packed_8}} = arr3d_1;
+        `checkh(packed_8, 8'b1100_1010);
+      end
+
+      // 48-bit Test (tests VL_UNPACK_UI_Q)
+      begin
+        automatic logic [47:0] packed_48;
+        automatic logic [11:0] arr2d_12 [2][2];
+
+        packed_48 = 48'habcdef_012345;
+        arr2d_12 = { >> {packed_48}};
+        `checkh(arr2d_12[0][0], 12'habc);
+        `checkh(arr2d_12[0][1], 12'hdef);
+        `checkh(arr2d_12[1][0], 12'h012);
+        `checkh(arr2d_12[1][1], 12'h345);
+      end
+
+      // 96-bit Test (tests VL_UNPACK_UI_W)
+      begin
+        automatic logic [95:0] packed_96;
+        automatic logic [23:0] arr2d_24 [2][2];
+
+        packed_96 = 96'h123456_789abc_def012_345678;
+        arr2d_24 = { >> {packed_96}};
+        `checkh(arr2d_24[0][0], 24'h123456);
+        `checkh(arr2d_24[0][1], 24'h789abc);
+        `checkh(arr2d_24[1][0], 24'hdef012);
+        `checkh(arr2d_24[1][1], 24'h345678);
+      end
+
+      // 2D Array of QData (64-bit) Elements Test
+      begin
+        automatic logic [127:0] packed_128;
+        automatic logic [63:0] arr2d_q [1][2];
+
+        packed_128 = 128'hfadecafedeadbeef_abcd0123456789ab;
+        arr2d_q = { >> {packed_128}};
+        `checkh(arr2d_q[0][0], 64'hfadecafedeadbeef);
+        `checkh(arr2d_q[0][1], 64'habcd0123456789ab);
+      end
+
+      // 2D Array of Wide (96-bit) Elements Test
+      begin
+        automatic logic [191:0] packed_192;
+        automatic logic [95:0] arr2d_w [1][2];
+
+        packed_192 = 192'h123456789abcdef012345678_9abcdef01234567812345678;
+        arr2d_w = { >> {packed_192}};
+        `checkh(arr2d_w[0][0], 96'h123456789abcdef012345678);
+        `checkh(arr2d_w[0][1], 96'h9abcdef01234567812345678);
+      end
+    end
+
    $write("*-* All Finished *-*\n");
    $finish;
  end