diff --git a/include/verilated_force.h b/include/verilated_force.h index d50d27c18..5cbafdc62 100644 --- a/include/verilated_force.h +++ b/include/verilated_force.h @@ -101,12 +101,13 @@ using VlForceStorageType = typename VlForceStorageTypeOf>::ty class VlForceVec final { private: struct Entry final { - int m_lsb; // Inclusive lower bit - int m_msb; // Inclusive upper bit + int m_lsb; // Inclusive lower bit for scalar path or element index for unpacked + int m_msb; // Inclusive upper bit for scalar path or element index for unpacked int m_rhsLsb; // Destination index that maps to RHS index 0 const void* m_rhsDatap; // Pointer to RHS storage - - bool operator<(const Entry& other) const { return m_msb < other.m_msb; } + int m_bitLsb = 0; + int m_bitMsb = 0; + int m_elemWidth = 0; }; std::vector m_entries; // Sorted by msb, non-overlapping @@ -134,6 +135,41 @@ private: return it; } + std::size_t trimElementBitRange(int elem, int bitLsb, int bitMsb) { + auto it = std::lower_bound(m_entries.begin(), m_entries.end(), elem, + [](const Entry& e, int idx) { return e.m_msb < idx; }); + while (it != m_entries.end() && it->m_lsb <= elem) { + if (it->m_elemWidth == 0 || it->m_bitMsb < bitLsb || it->m_bitLsb > bitMsb) { + ++it; + continue; + } + if (it->m_bitLsb < bitLsb && it->m_bitMsb > bitMsb) { + Entry high = *it; + high.m_bitLsb = bitMsb + 1; + it->m_bitMsb = bitLsb - 1; + m_entries.insert(it + 1, high); + break; + } + if (it->m_bitLsb < bitLsb) { + it->m_bitMsb = bitLsb - 1; + ++it; + continue; + } + if (it->m_bitMsb > bitMsb) { + it->m_bitLsb = bitMsb + 1; + break; + } + it = m_entries.erase(it); + } + auto ins = std::lower_bound(m_entries.begin(), m_entries.end(), elem, + [](const Entry& e, int idx) { return e.m_msb < idx; }); + while (ins != m_entries.end() && ins->m_lsb <= elem + && (ins->m_elemWidth == 0 || ins->m_bitLsb <= bitLsb)) { + ++ins; + } + return static_cast(ins - m_entries.begin()); + } + static QData extractRhsChunk(const Entry& entry, int rhsLsb, int width) { assert(width > 0 && width <= VL_QUADSIZE); assert(rhsLsb >= 0); @@ -195,6 +231,22 @@ private: return *static_cast*>(entry.m_rhsDatap); } + template + static typename std::enable_if::value, Elem>::type + blendElem(Elem cur, const Entry& e) { + const Entry bitEntry{e.m_bitLsb, e.m_bitMsb, e.m_rhsLsb, e.m_rhsDatap, 0, 0, 0}; + return applyEntry(cur, bitEntry); + } + + template + static typename std::enable_if::value, Elem>::type blendElem(Elem cur, + const Entry& e) { + Elem res = cur; + const Entry bitEntry{e.m_bitLsb, e.m_bitMsb, e.m_rhsLsb, e.m_rhsDatap, 0, 0, 0}; + applyEntry(res, bitEntry, e.m_bitLsb, e.m_bitMsb, 0); + return res; + } + template typename std::enable_if::value>::type applyEntries(T& val) const { for (const auto& entry : m_entries) { @@ -229,15 +281,19 @@ public: using ElemRef = decltype(VlForceArrayIndexer::elem(result, static_cast(0))); using Elem = VlForceBaseType; - const int total = static_cast(VlForceArrayIndexer::size); for (const auto& entry : m_entries) { - const Elem* const rhsBasep = static_cast(entry.m_rhsDatap); const int startIdx = entry.m_lsb; const int endIdx = entry.m_msb; for (int idx = startIdx; idx <= endIdx; idx++) { - const int rhsIndex = idx - entry.m_rhsLsb; const std::size_t uidx = static_cast(idx); - VlForceArrayIndexer::elem(result, uidx) = rhsBasep[rhsIndex]; + Elem& dst = VlForceArrayIndexer::elem(result, uidx); + if (entry.m_elemWidth == 0) { + const Elem* const rhsBasep = static_cast(entry.m_rhsDatap); + const int rhsIndex = idx - entry.m_rhsLsb; + dst = rhsBasep[rhsIndex]; + } else { + dst = blendElem(dst, entry); + } } } return result; @@ -250,14 +306,18 @@ public: T readIndex(T origVal, int index) const { if (m_entries.empty()) return origVal; - const auto it = std::lower_bound(m_entries.begin(), m_entries.end(), index, - [](const Entry& e, int idx) { return e.m_msb < idx; }); - if (it != m_entries.end() && it->m_lsb <= index) { - const int rhsIndex = index - it->m_rhsLsb; - const T* const rhsBasep = static_cast(it->m_rhsDatap); - return rhsBasep[rhsIndex]; + T result = origVal; + for (auto it = std::lower_bound(m_entries.begin(), m_entries.end(), index, + [](const Entry& e, int idx) { return e.m_msb < idx; }); + it != m_entries.end() && it->m_lsb <= index; ++it) { + if (it->m_elemWidth == 0) { + const int rhsIndex = index - it->m_rhsLsb; + result = static_cast(it->m_rhsDatap)[rhsIndex]; + } else { + result = blendElem(result, *it); + } } - return origVal; + return result; } IData readSelI(int lbits, WDataInP valp, int lsb, int width) const { @@ -291,11 +351,28 @@ public: m_entries.insert(it, {lsb, msb, rhsLsb, rhsDatap}); } + void addForce(int lsb, int msb, const void* rhsDatap, int rhsLsb, int bitLsb, int bitMsb, + int elemWidth) { + assert(lsb == msb); + assert(rhsDatap); + assert(elemWidth > 0); + assert(0 <= bitLsb && bitLsb <= bitMsb && bitMsb < elemWidth); + const std::size_t at = trimElementBitRange(lsb, bitLsb, bitMsb); + m_entries.insert(m_entries.begin() + at, + Entry{lsb, msb, rhsLsb, rhsDatap, bitLsb, bitMsb, elemWidth}); + } + void release(int lsb, int msb) { assert(lsb <= msb); trimEntries(lsb, msb); } + void release(int lsb, int msb, int bitLsb, int bitMsb) { + assert(lsb == msb); + assert(bitLsb <= bitMsb); + trimElementBitRange(lsb, bitLsb, bitMsb); + } + void touch() {} }; diff --git a/src/V3Force.cpp b/src/V3Force.cpp index 75f3cf256..3a7b51ce9 100644 --- a/src/V3Force.cpp +++ b/src/V3Force.cpp @@ -870,30 +870,8 @@ class ForceDiscoveryVisitor final : public VNVisitorConst { ForceState::ForceRangeInfo rangeInfo = m_state.getForceRangeInfo(nodep->lhsp(), forcedVarp, true); - // Start from a cloned RHS expression; adjust below for partial bit selects. - const AstSel* const selLhsp = VN_CAST(nodep->lhsp(), Sel); - AstNodeExpr* rhsExprp = nodep->rhsp()->cloneTreePure(false); - - // For bitwise selects inside arrays, merge updated bits with preserved base bits. - if (rangeInfo.m_hasArraySel && rangeInfo.m_arrayInfo.m_hasBitSel && selLhsp - && ForceState::isBitwiseDType(selLhsp->fromp())) { - AstNodeExpr* const baseExprp = selLhsp->fromp()->cloneTreePure(false); - baseExprp->foreach( - [](AstVarRef* const refp) { ForceState::markNonReplaceable(refp); }); - - // Pad the selected value back to full base width before masking/or-ing. - rhsExprp = ForceState::zeroPadToBaseWidth(rhsExprp, selLhsp->fromp()->width(), - rangeInfo.m_padLsb, rangeInfo.m_padMsb); - - // Keep untouched base bits and insert the newly forced bit range. - // rhsExpr = (baseExpr & ~mask(range)) | (zeroPad(force_rhs) & mask(range)); - AstConst* const maskConstp = ForceState::makeRangeMaskConst( - nodep->lhsp(), selLhsp->fromp()->width(), rangeInfo.m_padLsb, rangeInfo.m_padMsb); - AstNodeExpr* const maskedOldp - = new AstAnd{nodep->lhsp()->fileline(), baseExprp, - new AstNot{nodep->lhsp()->fileline(), maskConstp}}; - rhsExprp = new AstOr{nodep->lhsp()->fileline(), maskedOldp, rhsExprp}; - } + // Keep narrow rhs, VlForceVec blends unpacked-array bit-select forces at read time + AstNodeExpr* const rhsExprp = nodep->rhsp()->cloneTreePure(false); m_state.addForceAssignment(forcedVarp, lhsVarRefp->varScopep(), rhsExprp, nodep, rangeInfo.m_rangeLsb, rangeInfo.m_rangeMsb, rangeInfo.m_padLsb, @@ -1114,6 +1092,11 @@ class ForceConvertVisitor final : public VNVisitor { // Verilog pseudocode: // forceVec.addForce(range_lsb, range_msb, &forceRHS[id], rhs_lsb); + const AstSel* const selLhsp = VN_CAST(lhsp, Sel); + const bool arrayBitSel + = info.m_hasArraySel && selLhsp && ForceState::getArraySelInfo(lhsp).m_hasBitSel + && ForceState::isBitwiseDType(selLhsp->fromp()) + && (info.m_padMsb - info.m_padLsb + 1) < selLhsp->fromp()->width(); AstNodeExpr* const rhsDatap = ForceState::buildRhsDataExpr(flp, info); AstCExpr* const rhsAddrp = new AstCExpr{flp}; rhsAddrp->add("&("); @@ -1124,7 +1107,13 @@ class ForceConvertVisitor final : public VNVisitor { ForceState::makeConst32(flp, info.m_rangeLsb)}; addForceCallp->addPinsp(ForceState::makeConst32(flp, info.m_rangeMsb)); addForceCallp->addPinsp(rhsAddrp); - addForceCallp->addPinsp(ForceState::makeConst32(flp, info.m_rangeLsb)); + addForceCallp->addPinsp( + ForceState::makeConst32(flp, arrayBitSel ? info.m_padLsb : info.m_rangeLsb)); + if (arrayBitSel) { + addForceCallp->addPinsp(ForceState::makeConst32(flp, info.m_padLsb)); + addForceCallp->addPinsp(ForceState::makeConst32(flp, info.m_padMsb)); + addForceCallp->addPinsp(ForceState::makeConst32(flp, selLhsp->fromp()->width())); + } addForceCallp->dtypeSetVoid(); AstNodeStmt* const stmtp = addForceCallp->makeStmt(); @@ -1164,12 +1153,21 @@ class ForceConvertVisitor final : public VNVisitor { const ForceState::ForceRangeInfo rangeInfo = m_state.getForceRangeInfo(lhsp, releasedVarp, false); + const AstSel* const selLhsp = VN_CAST(lhsp, Sel); + const bool arrayBitSel + = rangeInfo.m_hasArraySel && selLhsp && rangeInfo.m_arrayInfo.m_hasBitSel + && ForceState::isBitwiseDType(selLhsp->fromp()) + && (rangeInfo.m_padMsb - rangeInfo.m_padLsb + 1) < selLhsp->fromp()->width(); AstCMethodHard* const releaseCallp = new AstCMethodHard{ flp, new AstVarRef{flp, varInfo->m_forceVecVscp, VAccess::WRITE}, VCMethod::FORCE_RELEASE, ForceState::makeConst32(flp, rangeInfo.m_rangeLsb)}; releaseCallp->addPinsp(ForceState::makeConst32(flp, rangeInfo.m_rangeMsb)); + if (arrayBitSel) { + releaseCallp->addPinsp(ForceState::makeConst32(flp, rangeInfo.m_padLsb)); + releaseCallp->addPinsp(ForceState::makeConst32(flp, rangeInfo.m_padMsb)); + } releaseCallp->dtypeSetVoid(); - // forceVec.release(range_lsb, range_msb); + // forceVec.release(range_lsb, range_msb [, bit_lsb, bit_msb]); AstNodeStmt* const releasep = releaseCallp->makeStmt(); AstAssign* clearEnp = nullptr; diff --git a/test_regress/t/t_force_unpacked_bitsel.py b/test_regress/t/t_force_unpacked_bitsel.py new file mode 100755 index 000000000..8a938befd --- /dev/null +++ b/test_regress/t/t_force_unpacked_bitsel.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of either the GNU Lesser General Public License Version 3 +# or the Perl Artistic License Version 2.0. +# SPDX-FileCopyrightText: 2026 Wilson Snyder +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +import vltest_bootstrap + +test.scenarios('simulator') + +test.compile() + +test.execute() + +test.passes() diff --git a/test_regress/t/t_force_unpacked_bitsel.v b/test_regress/t/t_force_unpacked_bitsel.v new file mode 100644 index 000000000..7a98869d0 --- /dev/null +++ b/test_regress/t/t_force_unpacked_bitsel.v @@ -0,0 +1,131 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain. +// SPDX-FileCopyrightText: 2026 Nikolai Kumar +// SPDX-License-Identifier: CC0-1.0 + +`define checkh(g,e) do if ((g) !==(e)) begin $write("%%Error: %s:%0d: got=%b exp=%b\n", `__FILE__,`__LINE__, (g),(e)); $stop; end while(0) + + +module t ( + input clk +); + int cyc = 0; + always @(posedge clk) cyc <= cyc + 1; + + logic [4:1] var_arr [2]; + /* verilator lint_off ASCRANGE */ + logic [1:4] var_arr_a [2]; + /* verilator lint_on ASCRANGE */ + logic [72:1] wide_arr [2]; + + always @(posedge clk) begin + var_arr[0] <= 4'b0101; + var_arr[1] <= (cyc <= 3) ? 4'b1111 : (cyc <= 7) ? 4'b0000 : 4'b0001; + var_arr_a[0] <= 4'b1010; + var_arr_a[1] <= 4'b0000; + wide_arr[0] <= '0; + wide_arr[1] <= 72'hAB_CDEF0123_456789AB; + end + + always @(posedge clk) begin + if (cyc == 2) force var_arr[1][1] = 1'b0; + if (cyc == 6) force var_arr[1][4] = 1'b1; + if (cyc == 8) release var_arr[1][1]; + if (cyc == 10) force var_arr[1] = 4'b1010; + if (cyc == 12) release var_arr[1]; + + if (cyc == 2) force wide_arr[1][36:5] = 32'hffff_ffff; + if (cyc == 4) release wide_arr[1]; + + if (cyc == 2) force var_arr_a[1][2:4] = 3'b111; + if (cyc == 4) force var_arr_a[1][3]= 1'b0; + if (cyc == 6) release var_arr_a[1]; + if (cyc == 7) force var_arr_a[1][3:4] = 2'b11; + if (cyc == 9) force var_arr_a[1][2:3]= 2'b00; + if (cyc == 11) release var_arr_a[1]; + if (cyc == 12) force var_arr_a[1][1:2] = 2'b11; + if (cyc == 14) force var_arr_a[1][2:3]= 2'b00; + if (cyc == 16) release var_arr_a[1]; + if (cyc == 17) force var_arr_a[1][2:3] = 2'b11; + if (cyc == 19) force var_arr_a[1][1:3]= 3'b000; + if (cyc == 21) release var_arr_a[1]; + + if (cyc == 14) force var_arr[1][1] = 1'b0; + if (cyc == 14) force var_arr[0][1] = 1'b0; + if (cyc == 16) release var_arr[1][1]; + if (cyc == 16) release var_arr[0][1]; + if (cyc == 18) force var_arr[0] = 4'b1010; + if (cyc == 20) force var_arr[0][1] = 1'b1; + if (cyc == 22) release var_arr[0]; + end + + always @(posedge clk) case (cyc) + 1: begin + `checkh(var_arr[0], 4'b0101); + `checkh(var_arr[1], 4'b1111); + end + 3: begin + `checkh(var_arr[1], 4'b1110); + `checkh(wide_arr[1][36:5], 32'hffff_ffff); + `checkh(wide_arr[1][4:1], 4'hB); + `checkh(wide_arr[1][72:37], 36'hABCDEF012); + `checkh(var_arr_a[1], 4'b0111); + `checkh(var_arr_a[0], 4'b1010); + end + 5: begin + `checkh(var_arr[1], 4'b0000); + `checkh(wide_arr[1], 72'hAB_CDEF0123_456789AB); + `checkh(var_arr_a[1], 4'b0101); + end + 7: begin + `checkh(var_arr[1], 4'b1000); + end + 8: begin + `checkh(var_arr_a[1], 4'b0011); + end + 9: begin + `checkh(var_arr[1], 4'b1001); + end + 10: begin + `checkh(var_arr_a[1], 4'b0001); + end + 11: begin + `checkh(var_arr[1], 4'b1010); + `checkh(var_arr[0], 4'b0101); + end + 13: begin + `checkh(var_arr[1], 4'b0001); + `checkh(var_arr_a[1], 4'b1100); + end + 15: begin + `checkh(var_arr_a[1], 4'b1000); + `checkh(var_arr[1], 4'b0000); + `checkh(var_arr[0], 4'b0100); + end + 17: begin + `checkh(var_arr[1], 4'b0001); + `checkh(var_arr[0], 4'b0101); + end + 18: begin + `checkh(var_arr_a[1], 4'b0110); + end + 19: begin + `checkh(var_arr[0], 4'b1010); + end + 20: begin + `checkh(var_arr_a[1], 4'b0000); + end + 21: begin + `checkh(var_arr[0], 4'b1011); + end + 22: begin + `checkh(var_arr_a[1], 4'b0000); + end + 23: begin + `checkh(var_arr[0], 4'b0101); + $write("*-* All Finished *-*\n"); + $finish; + end + endcase +endmodule