diff --git a/passes/silimate/Makefile.inc b/passes/silimate/Makefile.inc
index 9ecc6128b..f58cf352b 100644
--- a/passes/silimate/Makefile.inc
+++ b/passes/silimate/Makefile.inc
@@ -17,6 +17,7 @@ OBJS += passes/silimate/splitnetlist.o
 OBJS += passes/silimate/opt_timing_balance.o
 OBJS += passes/silimate/cone_partition.o
 OBJS += passes/silimate/clkmerge.o
+OBJS += passes/silimate/opt_vps.o
 
 OBJS += passes/silimate/opt_expand.o
 GENFILES += passes/silimate/peepopt_expand.h
diff --git a/passes/silimate/opt_vps.cc b/passes/silimate/opt_vps.cc
new file mode 100644
index 000000000..6569bd8bd
--- /dev/null
+++ b/passes/silimate/opt_vps.cc
@@ -0,0 +1,615 @@
+/*
+ *  yosys -- Yosys Open SYnthesis Suite
+ *
+ *  Copyright (C) 2012  Claire Xenia Wolf <claire@yosyshq.com>
+ *                2025  Silimate Inc.     <akash@silimate.com>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "kernel/yosys.h"
+#include "kernel/sigtools.h"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+struct OptVpsWorker
+{
+	struct PmuxInfo {
+		Cell *cell;
+		int window_start;
+	};
+
+	struct FeedbackInfo {
+		Cell *feedback_mux;
+		Cell *and_gate;
+		SigBit q_bit;
+	};
+
+	Module *module;
+	SigMap sigmap;
+	dict<SigBit, Cell *> bit_drivers;
+	dict<SigBit, pool<Cell *>> bit_consumers;
+	int groups_optimized = 0;
+	int pmux_replaced = 0;
+	int reduce_or_replaced = 0;
+	int feedback_collapsed = 0;
+	int min_stride;
+
+	OptVpsWorker(Module *module, int min_stride)
+		: module(module), sigmap(module), min_stride(min_stride)
+	{
+		for (auto cell : module->cells())
+			for (auto &conn : cell->connections())
+				if (cell->output(conn.first))
+					for (int i = 0; i < GetSize(conn.second); i++) {
+						SigBit bit = sigmap(conn.second[i]);
+						bit_drivers[bit] = cell;
+					}
+				else
+					for (int i = 0; i < GetSize(conn.second); i++) {
+						SigBit bit = sigmap(conn.second[i]);
+						if (bit.wire)
+							bit_consumers[bit].insert(cell);
+					}
+	}
+
+	Cell *find_sole_consumer(SigBit bit)
+	{
+		auto it = bit_consumers.find(sigmap(bit));
+		if (it == bit_consumers.end() || it->second.size() != 1)
+			return nullptr;
+		return *(it->second.begin());
+	}
+
+	bool is_decoder_shl(Cell *cell)
+	{
+		if (cell->type != ID($shl))
+			return false;
+		SigSpec a = cell->getPort(ID::A);
+		if (!a.is_fully_const())
+			return false;
+		Const a_val = a.as_const();
+		if (GetSize(a_val) < 1 || a_val[0] != State::S1)
+			return false;
+		for (int i = 1; i < GetSize(a_val); i++)
+			if (a_val[i] != State::S0)
+				return false;
+		return true;
+	}
+
+	// Trace an S-port bit back through an optional AND gate to find
+	// which decoder output position it comes from.  Returns -1 on failure.
+	// If overflow_cond is non-null, stores the non-decoder input of the
+	// AND gate (the overflow mask bit), or State::S1 if direct.
+	int trace_to_decoder_pos(SigBit bit, SigSpec &decoder_y,
+				 SigBit *overflow_cond = nullptr)
+	{
+		SigBit mapped = sigmap(bit);
+
+		for (int i = 0; i < GetSize(decoder_y); i++)
+			if (sigmap(decoder_y[i]) == mapped) {
+				if (overflow_cond)
+					*overflow_cond = State::S1;
+				return i;
+			}
+
+		Cell *driver = bit_drivers.at(mapped, nullptr);
+		if (!driver)
+			return -1;
+
+		if (driver->type == ID($and)) {
+			SigSpec port_a = driver->getPort(ID::A);
+			SigSpec port_b = driver->getPort(ID::B);
+			if (GetSize(port_a) == 1 && GetSize(port_b) == 1) {
+				SigBit a = sigmap(port_a[0]);
+				SigBit b = sigmap(port_b[0]);
+				for (int i = 0; i < GetSize(decoder_y); i++) {
+					SigBit dy = sigmap(decoder_y[i]);
+					if (dy == a) {
+						if (overflow_cond) *overflow_cond = b;
+						return i;
+					}
+					if (dy == b) {
+						if (overflow_cond) *overflow_cond = a;
+						return i;
+					}
+				}
+			}
+		}
+
+		if (driver->type == ID($_AND_)) {
+			SigBit a = sigmap(driver->getPort(ID::A));
+			SigBit b = sigmap(driver->getPort(ID::B));
+			for (int i = 0; i < GetSize(decoder_y); i++) {
+				SigBit dy = sigmap(decoder_y[i]);
+				if (dy == a) {
+					if (overflow_cond) *overflow_cond = b;
+					return i;
+				}
+				if (dy == b) {
+					if (overflow_cond) *overflow_cond = a;
+					return i;
+				}
+			}
+		}
+
+		return -1;
+	}
+
+	void run()
+	{
+		std::vector<Cell *> decoders;
+		for (auto cell : module->selected_cells())
+			if (is_decoder_shl(cell))
+				decoders.push_back(cell);
+
+		for (auto decoder : decoders)
+			process_decoder(decoder);
+	}
+
+	void process_decoder(Cell *decoder)
+	{
+		SigSpec decoder_y = decoder->getPort(ID::Y);
+
+		std::vector<PmuxInfo> candidates;
+
+		for (auto cell : module->selected_cells()) {
+			if (cell->type != ID($pmux))
+				continue;
+			if (cell->getParam(ID::WIDTH).as_int() != 1)
+				continue;
+			SigSpec sig_a = cell->getPort(ID::A);
+			if (!sig_a.is_fully_zero())
+				continue;
+
+			SigSpec sig_s = cell->getPort(ID::S);
+			int s_width = GetSize(sig_s);
+			if (s_width < min_stride)
+				continue;
+
+			std::vector<int> positions;
+			bool valid = true;
+
+			for (int i = 0; i < s_width; i++) {
+				int pos = trace_to_decoder_pos(sig_s[i], decoder_y);
+				if (pos < 0) { valid = false; break; }
+				positions.push_back(pos);
+			}
+			if (!valid)
+				continue;
+
+			bool contiguous = true;
+			for (int i = 1; i < s_width; i++) {
+				if (positions[i] != positions[i - 1] + 1) {
+					contiguous = false;
+					break;
+				}
+			}
+			if (!contiguous)
+				continue;
+
+			candidates.push_back({cell, positions[0]});
+		}
+
+		if (candidates.empty())
+			return;
+
+		std::sort(candidates.begin(), candidates.end(),
+			  [](const PmuxInfo &a, const PmuxInfo &b) {
+				  return a.window_start < b.window_start;
+			  });
+
+		// Partition candidates by S_WIDTH, then separate multiplexed
+		// VPS groups that share the same decoder positions.
+		dict<int, std::vector<PmuxInfo>> by_swidth;
+		for (auto &c : candidates)
+			by_swidth[GetSize(c.cell->getPort(ID::S))].push_back(c);
+
+		for (auto &[W, cells] : by_swidth) {
+			// Sort by window_start
+			std::sort(cells.begin(), cells.end(),
+				  [](const PmuxInfo &a, const PmuxInfo &b) {
+					  return a.window_start < b.window_start;
+				  });
+
+			// Build position buckets: window_start → list of cells
+			dict<int, std::vector<PmuxInfo>> by_pos;
+			for (auto &c : cells)
+				by_pos[c.window_start].push_back(c);
+
+			// Find longest contiguous run of positions
+			std::vector<int> positions;
+			for (auto &[pos, _] : by_pos)
+				positions.push_back(pos);
+			std::sort(positions.begin(), positions.end());
+
+			// Extract contiguous runs
+			int run_start = 0;
+			while (run_start < (int)positions.size()) {
+				int run_end = run_start + 1;
+				while (run_end < (int)positions.size() &&
+				       positions[run_end] == positions[run_end - 1] + 1)
+					run_end++;
+
+				int N = run_end - run_start;
+				if (N >= W) {
+					int base = positions[run_start];
+					int multiplicity = GetSize(by_pos[base]);
+					for (int pos_idx = run_start; pos_idx < run_end; pos_idx++)
+						multiplicity = std::min(multiplicity,
+							GetSize(by_pos[positions[pos_idx]]));
+
+					for (int g = 0; g < multiplicity; g++) {
+						std::vector<PmuxInfo> group;
+						for (int pos_idx = run_start; pos_idx < run_end; pos_idx++)
+							group.push_back(by_pos[positions[pos_idx]][g]);
+
+						// Store group in candidates array for optimize_group
+						int gstart = candidates.size();
+						for (auto &c : group)
+							candidates.push_back(c);
+						optimize_group(decoder, candidates, gstart,
+							       N, W);
+					}
+				}
+
+				run_start = run_end;
+			}
+		}
+	}
+
+	void optimize_group(Cell *decoder, std::vector<PmuxInfo> &candidates,
+			    int group_start, int N, int W)
+	{
+		int base = candidates[group_start].window_start;
+		int lane_count = (N + W - 1) / W;
+
+		log("  VPS group: decoder %s, base=%d, %d bits, stride=%d, %d lanes\n",
+		    log_id(decoder->name), base, N, W, lane_count);
+
+		SigSpec decoder_y = decoder->getPort(ID::Y);
+
+		// Collect gated decoder bits and overflow conditions
+		dict<int, SigBit> gated_bits;
+		dict<int, SigBit> overflow_bits;
+
+		for (int i = 0; i < N; i++) {
+			Cell *pmux_cell = candidates[group_start + i].cell;
+			SigSpec sig_s = pmux_cell->getPort(ID::S);
+			int ws = candidates[group_start + i].window_start;
+			for (int k = 0; k < W; k++) {
+				int pos = ws + k;
+				SigBit sb = sigmap(sig_s[k]);
+				if (gated_bits.count(pos)) {
+					if (gated_bits[pos] != sb) {
+						log("    WARNING: inconsistent gated bit at decoder pos %d\n", pos);
+						return;
+					}
+				} else {
+					gated_bits[pos] = sb;
+					SigBit ov_cond;
+					trace_to_decoder_pos(sb, decoder_y, &ov_cond);
+					overflow_bits[pos] = ov_cond;
+				}
+			}
+		}
+
+		// Try binary-index lane enables: instead of OR-reducing W one-hot
+		// decoder bits per lane, compare the binary index directly.
+		// Requirements: W is a power of 2, base is W-aligned.
+		bool use_binary = (W & (W - 1)) == 0 && (base % W) == 0;
+
+		SigSpec binary_index;
+		int log2_w = 0;
+
+		if (use_binary) {
+			binary_index = decoder->getPort(ID::B);
+			for (int tmp = W; tmp > 1; tmp >>= 1)
+				log2_w++;
+
+			int decoder_y_width = GetSize(decoder->getPort(ID::Y));
+			if (base + lane_count * W > decoder_y_width)
+				use_binary = false;
+		}
+
+		std::vector<SigBit> lane_en(lane_count);
+
+		if (use_binary) {
+			int upper_width = GetSize(binary_index) - log2_w;
+			SigSpec upper_bits;
+			if (upper_width > 0)
+				upper_bits = binary_index.extract(log2_w, upper_width);
+
+			for (int L = 0; L < lane_count; L++) {
+				SigBit range_bit;
+
+				if (upper_width > 0) {
+					int lane_idx = base / W + L;
+					Wire *eq_w = module->addWire(NEW_ID_SUFFIX("vps_lane_eq"), 1);
+					module->addEq(NEW_ID_SUFFIX("vps_lane_cmp"),
+						      upper_bits, Const(lane_idx, upper_width), eq_w);
+					range_bit = SigBit(eq_w);
+				} else {
+					range_bit = State::S1;
+				}
+
+				lane_en[L] = range_bit;
+			}
+
+			log("    using binary-index lane enables (%d upper bits)\n",
+			    upper_width > 0 ? upper_width : 0);
+		} else {
+			for (int L = 0; L < lane_count; L++) {
+				SigSpec lane_bits;
+				for (int k = 0; k < W; k++) {
+					int pos = base + L * W + k;
+					if (gated_bits.count(pos))
+						lane_bits.append(gated_bits.at(pos));
+				}
+
+				if (GetSize(lane_bits) == 0) {
+					lane_en[L] = State::S0;
+				} else if (GetSize(lane_bits) == 1) {
+					lane_en[L] = lane_bits[0];
+				} else {
+					Wire *w = module->addWire(NEW_ID_SUFFIX("vps_lane_en"), 1);
+					module->addReduceOr(NEW_ID_SUFFIX("vps_lane_or"), lane_bits, w);
+					lane_en[L] = SigBit(w);
+				}
+			}
+		}
+
+		// Probe for the full feedback collapse pattern:
+		//   $pmux.Y -> $mux(Q[i], pmux_Y, gated_en).Y -> top_$mux(Q, {results}, wr_en)
+		// When detected, replace the entire chain with per-lane wide muxes.
+		bool full_collapse = use_binary && (N % W == 0);
+		Cell *top_wr_mux = nullptr;
+		SigBit wr_en_sig;
+		std::vector<FeedbackInfo> fb_info(N);
+
+		if (full_collapse) {
+			for (int i = 0; i < N; i++) {
+				Cell *pmux_cell = candidates[group_start + i].cell;
+				SigBit pmux_y = sigmap(pmux_cell->getPort(ID::Y)[0]);
+
+				Cell *fb_mux = find_sole_consumer(pmux_y);
+				if (!fb_mux || fb_mux->type != ID($mux) ||
+				    fb_mux->getParam(ID::WIDTH).as_int() != 1 ||
+				    sigmap(fb_mux->getPort(ID::B)[0]) != pmux_y) {
+					full_collapse = false;
+					break;
+				}
+
+				SigBit q_bit = sigmap(fb_mux->getPort(ID::A)[0]);
+				SigBit gated_en = sigmap(fb_mux->getPort(ID::S)[0]);
+
+				Cell *and_gate = bit_drivers.at(gated_en, nullptr);
+				if (and_gate &&
+				    and_gate->type != ID($and) &&
+				    and_gate->type != ID($_AND_))
+					and_gate = nullptr;
+
+				SigBit fb_y = sigmap(fb_mux->getPort(ID::Y)[0]);
+				Cell *wr_mux = find_sole_consumer(fb_y);
+				if (!wr_mux || wr_mux->type != ID($mux) ||
+				    wr_mux->getParam(ID::WIDTH).as_int() <= 1) {
+					full_collapse = false;
+					break;
+				}
+
+				SigSpec wr_b = wr_mux->getPort(ID::B);
+				bool in_b = false;
+				for (int j = 0; j < GetSize(wr_b); j++)
+					if (sigmap(wr_b[j]) == fb_y) { in_b = true; break; }
+				if (!in_b) {
+					full_collapse = false;
+					break;
+				}
+
+				SigBit this_wr_en = sigmap(wr_mux->getPort(ID::S)[0]);
+				if (top_wr_mux == nullptr) {
+					top_wr_mux = wr_mux;
+					wr_en_sig = this_wr_en;
+				} else if (top_wr_mux != wr_mux) {
+					full_collapse = false;
+					break;
+				}
+
+				fb_info[i] = {fb_mux, and_gate, q_bit};
+			}
+		}
+
+		// Build lookup: S SigSpec (through sigmap) -> $reduce_or cell
+		dict<SigSpec, Cell *> reduce_or_map;
+		for (auto cell : module->cells()) {
+			if (cell->type != ID($reduce_or))
+				continue;
+			SigSpec a = sigmap(cell->getPort(ID::A));
+			reduce_or_map[a] = cell;
+		}
+
+		if (full_collapse) {
+			log("    full feedback collapse: %d lanes, wr_en mux %s\n",
+			    lane_count, log_id(top_wr_mux->name));
+
+			pool<Cell *> cells_to_remove;
+
+			for (int L = 0; L < lane_count; L++) {
+				SigSpec data_lane, q_lane, fb_y_lane;
+
+				for (int b = 0; b < W; b++) {
+					int i = L * W + b;
+					Cell *pmux_cell = candidates[group_start + i].cell;
+					SigSpec cell_b = pmux_cell->getPort(ID::B);
+					data_lane.append(cell_b[W - 1 - b]);
+					q_lane.append(fb_info[i].q_bit);
+					fb_y_lane.append(fb_info[i].feedback_mux->getPort(ID::Y));
+
+					cells_to_remove.insert(pmux_cell);
+					cells_to_remove.insert(fb_info[i].feedback_mux);
+					if (fb_info[i].and_gate) {
+						SigBit and_y = sigmap(fb_info[i].and_gate->getPort(ID::Y)[0]);
+						auto ac = bit_consumers.find(and_y);
+						if (ac != bit_consumers.end() && ac->second.size() == 1)
+							cells_to_remove.insert(fb_info[i].and_gate);
+					}
+
+					SigSpec pmux_s = sigmap(pmux_cell->getPort(ID::S));
+					auto it = reduce_or_map.find(pmux_s);
+					if (it != reduce_or_map.end()) {
+						cells_to_remove.insert(it->second);
+						reduce_or_map.erase(it);
+						reduce_or_replaced++;
+					}
+					pmux_replaced++;
+				}
+
+				Wire *gated_w = module->addWire(NEW_ID_SUFFIX("vps_wr_lane_en"), 1);
+				module->addAnd(NEW_ID_SUFFIX("vps_wr_lane_and"),
+					       SigSpec(wr_en_sig), SigSpec(lane_en[L]),
+					       SigSpec(gated_w));
+
+				Cell *lane_mux = module->addMux(
+					NEW_ID_SUFFIX("vps_lane_mux"),
+					q_lane, data_lane, SigBit(gated_w), fb_y_lane);
+				lane_mux->add_strpool_attribute(ID::src,
+					candidates[group_start + L * W].cell->get_strpool_attribute(ID::src));
+			}
+
+			for (auto c : cells_to_remove)
+				module->remove(c);
+
+			// Remove redundant top-level wr_en mux if all its B-port
+			// bits are now driven by the per-lane muxes.
+			if (N == top_wr_mux->getParam(ID::WIDTH).as_int()) {
+				SigSpec wr_y = top_wr_mux->getPort(ID::Y);
+				SigSpec wr_b = top_wr_mux->getPort(ID::B);
+				module->connect(wr_y, wr_b);
+				module->remove(top_wr_mux);
+				log("    removed redundant top-level wr_en mux %s\n",
+				    log_id(top_wr_mux->name));
+			}
+
+			feedback_collapsed += N;
+		} else {
+			// Fallback: per-bit $mux replacement
+			for (int i = 0; i < N; i++) {
+				Cell *pmux_cell = candidates[group_start + i].cell;
+				int L = i / W;
+				int b = i % W;
+
+				SigSpec cell_b = pmux_cell->getPort(ID::B);
+				SigBit data_bit = cell_b[W - 1 - b];
+				SigSpec sig_y = pmux_cell->getPort(ID::Y);
+
+				Cell *mux = module->addMux(NEW_ID_SUFFIX("vps_mux"),
+							   State::S0, data_bit, lane_en[L], sig_y);
+				mux->add_strpool_attribute(ID::src,
+							   pmux_cell->get_strpool_attribute(ID::src));
+
+				SigSpec pmux_s = sigmap(pmux_cell->getPort(ID::S));
+				auto it = reduce_or_map.find(pmux_s);
+				if (it != reduce_or_map.end()) {
+					Cell *ror = it->second;
+					module->connect(ror->getPort(ID::Y), lane_en[L]);
+					module->remove(ror);
+					reduce_or_map.erase(it);
+					reduce_or_replaced++;
+				}
+
+				module->remove(pmux_cell);
+				pmux_replaced++;
+			}
+		}
+
+		groups_optimized++;
+	}
+};
+
+struct OptVpsPass : public Pass {
+	OptVpsPass() : Pass("opt_vps", "optimize Verific variable-part-select patterns") {}
+	void help() override
+	{
+		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+		log("\n");
+		log("    opt_vps [options] [selection]\n");
+		log("\n");
+		log("Detect variable-part-select (VPS) write patterns generated by Verific\n");
+		log("and replace the per-bit sliding-window $pmux cells with per-lane\n");
+		log("enable logic and direct data wiring.\n");
+		log("\n");
+		log("Verific lowers VPS writes like `reg[idx -: W] <= data` into a\n");
+		log("bit-granularity decoder ($shl with A=1) followed by overflow-gated\n");
+		log("AND gates and N sliding-window one-hot $pmux cells (one per output\n");
+		log("bit, each with S_WIDTH=W). This structure has O(N*W) gates after\n");
+		log("pmuxtree expansion.\n");
+		log("\n");
+		log("This pass recovers the lane structure and replaces each W-entry\n");
+		log("$pmux with a single 2:1 $mux gated by a shared per-lane enable,\n");
+		log("reducing the gate count to O(N + N/W).\n");
+		log("\n");
+		log("The pass also replaces per-bit $reduce_or enable cells with the\n");
+		log("shared lane enable signal.\n");
+		log("\n");
+		log("    -min_stride <n>\n");
+		log("        Minimum stride (S_WIDTH of the $pmux cells) to consider.\n");
+		log("        Default: 4.\n");
+		log("\n");
+	}
+	void execute(std::vector<std::string> args, RTLIL::Design *design) override
+	{
+		int min_stride = 4;
+
+		log_header(design, "Executing OPT_VPS pass (optimize Verific VPS patterns).\n");
+
+		size_t argidx;
+		for (argidx = 1; argidx < args.size(); argidx++) {
+			if (args[argidx] == "-min_stride" && argidx + 1 < args.size()) {
+				min_stride = std::stoi(args[++argidx]);
+				continue;
+			}
+			break;
+		}
+		extra_args(args, argidx, design);
+
+		int total_groups = 0, total_pmux = 0, total_ror = 0, total_fb = 0;
+
+		for (auto module : design->selected_modules()) {
+			if (module->has_processes_warn())
+				continue;
+
+			OptVpsWorker worker(module, min_stride);
+			worker.run();
+
+			if (worker.groups_optimized > 0)
+				log("  Module %s: %d VPS group(s), %d $pmux replaced, "
+				    "%d $reduce_or replaced, %d feedback collapsed.\n",
+				    log_id(module->name), worker.groups_optimized,
+				    worker.pmux_replaced, worker.reduce_or_replaced,
+				    worker.feedback_collapsed);
+
+			total_groups += worker.groups_optimized;
+			total_pmux += worker.pmux_replaced;
+			total_ror += worker.reduce_or_replaced;
+			total_fb += worker.feedback_collapsed;
+		}
+
+		log("Optimized %d VPS group(s), %d $pmux replaced, "
+		    "%d $reduce_or replaced, %d feedback collapsed.\n",
+		    total_groups, total_pmux, total_ror, total_fb);
+	}
+} OptVpsPass;
+
+PRIVATE_NAMESPACE_END
diff --git a/tests/silimate/opt_vps.ys b/tests/silimate/opt_vps.ys
new file mode 100644
index 000000000..6f35f8808
--- /dev/null
+++ b/tests/silimate/opt_vps.ys
@@ -0,0 +1,159 @@
+# =============================================================================
+# Test 1: SAT equivalence — VPS byte-write vs case-statement reference
+# Proves opt_vps produces a logically equivalent circuit to hand-written
+# case statements for a 32-bit register with 4 byte lanes.
+# =============================================================================
+log -header "SAT equivalence: byte-write VPS vs case-statement ref"
+log -push
+design -reset
+verific -cfg veri_optimize_wide_selector 1
+verific -cfg db_infer_wide_muxes_post_elaboration 0
+
+read -sv opt_vps_byte_write.sv
+verific -import opt_vps_byte_write
+proc; opt_clean
+opt_vps; opt_clean
+rename opt_vps_byte_write gate
+
+read -sv opt_vps_byte_write_ref.sv
+verific -import opt_vps_byte_write
+proc; opt_clean
+rename opt_vps_byte_write gold
+
+miter -equiv -flatten -make_assert gold gate miter
+hierarchy -top miter
+proc; opt; memory; opt
+clk2fflogic
+sat -set-init-zero -tempinduct -prove-asserts -verify
+design -reset
+log -pop
+
+# =============================================================================
+# Test 2: SAT self-equivalence — byte-write before vs after opt_vps
+# Proves opt_vps does not change the functional behavior.
+# =============================================================================
+log -header "SAT self-equivalence: byte-write before vs after opt_vps"
+log -push
+design -reset
+verific -cfg veri_optimize_wide_selector 1
+verific -cfg db_infer_wide_muxes_post_elaboration 0
+
+read -sv opt_vps_byte_write.sv
+verific -import opt_vps_byte_write
+proc; opt_clean
+rename opt_vps_byte_write gold
+
+read -sv opt_vps_byte_write.sv
+verific -import opt_vps_byte_write
+proc; opt_clean
+opt_vps; opt_clean
+rename opt_vps_byte_write gate
+
+miter -equiv -flatten -make_assert gold gate miter
+hierarchy -top miter
+proc; opt; memory; opt
+clk2fflogic
+sat -set-init-zero -tempinduct -prove-asserts -verify
+design -reset
+log -pop
+
+# =============================================================================
+# Test 3: SAT self-equivalence — wide (128-bit, 16-bit lanes)
+# Ensures opt_vps is correct on a larger design with 8 lanes.
+# =============================================================================
+log -header "SAT self-equivalence: wide 128-bit VPS"
+log -push
+design -reset
+verific -cfg veri_optimize_wide_selector 1
+verific -cfg db_infer_wide_muxes_post_elaboration 0
+
+read -sv opt_vps_wide.sv
+verific -import opt_vps_wide
+proc; opt_clean
+rename opt_vps_wide gold
+
+read -sv opt_vps_wide.sv
+verific -import opt_vps_wide
+proc; opt_clean
+opt_vps; opt_clean
+rename opt_vps_wide gate
+
+miter -equiv -flatten -make_assert gold gate miter
+hierarchy -top miter
+proc; opt; memory; opt
+clk2fflogic
+sat -set-init-zero -tempinduct -prove-asserts -verify
+design -reset
+log -pop
+
+# =============================================================================
+# Test 4: Cell count verification — byte-write
+# After opt_vps, all $pmux and $reduce_or cells should be eliminated and
+# replaced with per-lane $eq/$and/$mux cells.
+# =============================================================================
+log -header "Cell counts: byte-write post-opt_vps"
+log -push
+design -reset
+verific -cfg veri_optimize_wide_selector 1
+verific -cfg db_infer_wide_muxes_post_elaboration 0
+
+read -sv opt_vps_byte_write.sv
+verific -import opt_vps_byte_write
+proc; opt_clean
+opt_vps; opt_clean
+
+select -assert-none t:$pmux
+select -assert-none t:$reduce_or
+select -assert-count 4 t:$eq
+select -assert-count 4 t:$and
+select -assert-count 4 t:$mux
+select -assert-count 1 t:$dff
+design -reset
+log -pop
+
+# =============================================================================
+# Test 5: Cell count verification — wide
+# Same as above but for the wider 128-bit / 8-lane case.
+# =============================================================================
+log -header "Cell counts: wide post-opt_vps"
+log -push
+design -reset
+verific -cfg veri_optimize_wide_selector 1
+verific -cfg db_infer_wide_muxes_post_elaboration 0
+
+read -sv opt_vps_wide.sv
+verific -import opt_vps_wide
+proc; opt_clean
+opt_vps; opt_clean
+
+select -assert-none t:$pmux
+select -assert-none t:$reduce_or
+select -assert-count 1 t:$dff
+design -reset
+log -pop
+
+# =============================================================================
+# Test 6: Negative case — no VPS pattern
+# A simple mux-based register should not trigger opt_vps.
+# =============================================================================
+log -header "Negative: non-VPS design unchanged"
+log -push
+design -reset
+verific -cfg veri_optimize_wide_selector 1
+verific -cfg db_infer_wide_muxes_post_elaboration 0
+
+read -sv opt_vps_no_match.sv
+verific -import opt_vps_no_match
+proc; opt_clean
+
+stat
+opt_vps
+stat
+
+select -assert-none t:$pmux
+select -assert-none t:$eq w:*vps*
+select -assert-count 1 t:$mux
+select -assert-count 1 t:$dff
+design -reset
+log -pop
+
diff --git a/tests/silimate/opt_vps_byte_write.sv b/tests/silimate/opt_vps_byte_write.sv
new file mode 100644
index 000000000..d4ef154fb
--- /dev/null
+++ b/tests/silimate/opt_vps_byte_write.sv
@@ -0,0 +1,14 @@
+// 32-bit register with byte-lane writes indexed by a 2-bit selector (VPS).
+module opt_vps_byte_write (
+    input  logic        clk,
+    input  logic        wr_en,
+    input  logic [1:0]  lane,
+    input  logic [7:0]  wdata,
+    output logic [31:0] q
+);
+    logic [31:0] reg_data;
+    always_ff @(posedge clk)
+        if (wr_en)
+            reg_data[((lane + 1) * 8) - 1 -: 8] <= wdata;
+    assign q = reg_data;
+endmodule
diff --git a/tests/silimate/opt_vps_byte_write_ref.sv b/tests/silimate/opt_vps_byte_write_ref.sv
new file mode 100644
index 000000000..60de54d00
--- /dev/null
+++ b/tests/silimate/opt_vps_byte_write_ref.sv
@@ -0,0 +1,19 @@
+// Reference: equivalent design WITHOUT variable-part-select.
+module opt_vps_byte_write (
+    input  logic        clk,
+    input  logic        wr_en,
+    input  logic [1:0]  lane,
+    input  logic [7:0]  wdata,
+    output logic [31:0] q
+);
+    logic [31:0] reg_data;
+    always_ff @(posedge clk)
+        if (wr_en)
+            case (lane)
+                2'd0: reg_data[ 7: 0] <= wdata;
+                2'd1: reg_data[15: 8] <= wdata;
+                2'd2: reg_data[23:16] <= wdata;
+                2'd3: reg_data[31:24] <= wdata;
+            endcase
+    assign q = reg_data;
+endmodule
diff --git a/tests/silimate/opt_vps_no_match.sv b/tests/silimate/opt_vps_no_match.sv
new file mode 100644
index 000000000..d2ec0be78
--- /dev/null
+++ b/tests/silimate/opt_vps_no_match.sv
@@ -0,0 +1,12 @@
+// Simple mux-based register -- no VPS pattern, opt_vps should not fire.
+module opt_vps_no_match (
+    input  logic        clk,
+    input  logic        sel,
+    input  logic [7:0]  a, b,
+    output logic [7:0]  q
+);
+    logic [7:0] reg_data;
+    always_ff @(posedge clk)
+        reg_data <= sel ? a : b;
+    assign q = reg_data;
+endmodule
diff --git a/tests/silimate/opt_vps_wide.sv b/tests/silimate/opt_vps_wide.sv
new file mode 100644
index 000000000..88ad76011
--- /dev/null
+++ b/tests/silimate/opt_vps_wide.sv
@@ -0,0 +1,14 @@
+// 128-bit register with 16-bit lane writes indexed by a 3-bit selector (VPS).
+module opt_vps_wide (
+    input  logic        clk,
+    input  logic        wr_en,
+    input  logic [2:0]  lane,
+    input  logic [15:0] wdata,
+    output logic [127:0] q
+);
+    logic [127:0] reg_data;
+    always_ff @(posedge clk)
+        if (wr_en)
+            reg_data[((lane + 1) * 16) - 1 -: 16] <= wdata;
+    assign q = reg_data;
+endmodule