diff --git a/passes/silimate/Makefile.inc b/passes/silimate/Makefile.inc index 9ecc6128b..f58cf352b 100644 --- a/passes/silimate/Makefile.inc +++ b/passes/silimate/Makefile.inc @@ -17,6 +17,7 @@ OBJS += passes/silimate/splitnetlist.o OBJS += passes/silimate/opt_timing_balance.o OBJS += passes/silimate/cone_partition.o OBJS += passes/silimate/clkmerge.o +OBJS += passes/silimate/opt_vps.o OBJS += passes/silimate/opt_expand.o GENFILES += passes/silimate/peepopt_expand.h diff --git a/passes/silimate/opt_vps.cc b/passes/silimate/opt_vps.cc new file mode 100644 index 000000000..6569bd8bd --- /dev/null +++ b/passes/silimate/opt_vps.cc @@ -0,0 +1,615 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Claire Xenia Wolf + * 2025 Silimate Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/yosys.h" +#include "kernel/sigtools.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +struct OptVpsWorker +{ + struct PmuxInfo { + Cell *cell; + int window_start; + }; + + struct FeedbackInfo { + Cell *feedback_mux; + Cell *and_gate; + SigBit q_bit; + }; + + Module *module; + SigMap sigmap; + dict bit_drivers; + dict> bit_consumers; + int groups_optimized = 0; + int pmux_replaced = 0; + int reduce_or_replaced = 0; + int feedback_collapsed = 0; + int min_stride; + + OptVpsWorker(Module *module, int min_stride) + : module(module), sigmap(module), min_stride(min_stride) + { + for (auto cell : module->cells()) + for (auto &conn : cell->connections()) + if (cell->output(conn.first)) + for (int i = 0; i < GetSize(conn.second); i++) { + SigBit bit = sigmap(conn.second[i]); + bit_drivers[bit] = cell; + } + else + for (int i = 0; i < GetSize(conn.second); i++) { + SigBit bit = sigmap(conn.second[i]); + if (bit.wire) + bit_consumers[bit].insert(cell); + } + } + + Cell *find_sole_consumer(SigBit bit) + { + auto it = bit_consumers.find(sigmap(bit)); + if (it == bit_consumers.end() || it->second.size() != 1) + return nullptr; + return *(it->second.begin()); + } + + bool is_decoder_shl(Cell *cell) + { + if (cell->type != ID($shl)) + return false; + SigSpec a = cell->getPort(ID::A); + if (!a.is_fully_const()) + return false; + Const a_val = a.as_const(); + if (GetSize(a_val) < 1 || a_val[0] != State::S1) + return false; + for (int i = 1; i < GetSize(a_val); i++) + if (a_val[i] != State::S0) + return false; + return true; + } + + // Trace an S-port bit back through an optional AND gate to find + // which decoder output position it comes from. Returns -1 on failure. + // If overflow_cond is non-null, stores the non-decoder input of the + // AND gate (the overflow mask bit), or State::S1 if direct. + int trace_to_decoder_pos(SigBit bit, SigSpec &decoder_y, + SigBit *overflow_cond = nullptr) + { + SigBit mapped = sigmap(bit); + + for (int i = 0; i < GetSize(decoder_y); i++) + if (sigmap(decoder_y[i]) == mapped) { + if (overflow_cond) + *overflow_cond = State::S1; + return i; + } + + Cell *driver = bit_drivers.at(mapped, nullptr); + if (!driver) + return -1; + + if (driver->type == ID($and)) { + SigSpec port_a = driver->getPort(ID::A); + SigSpec port_b = driver->getPort(ID::B); + if (GetSize(port_a) == 1 && GetSize(port_b) == 1) { + SigBit a = sigmap(port_a[0]); + SigBit b = sigmap(port_b[0]); + for (int i = 0; i < GetSize(decoder_y); i++) { + SigBit dy = sigmap(decoder_y[i]); + if (dy == a) { + if (overflow_cond) *overflow_cond = b; + return i; + } + if (dy == b) { + if (overflow_cond) *overflow_cond = a; + return i; + } + } + } + } + + if (driver->type == ID($_AND_)) { + SigBit a = sigmap(driver->getPort(ID::A)); + SigBit b = sigmap(driver->getPort(ID::B)); + for (int i = 0; i < GetSize(decoder_y); i++) { + SigBit dy = sigmap(decoder_y[i]); + if (dy == a) { + if (overflow_cond) *overflow_cond = b; + return i; + } + if (dy == b) { + if (overflow_cond) *overflow_cond = a; + return i; + } + } + } + + return -1; + } + + void run() + { + std::vector decoders; + for (auto cell : module->selected_cells()) + if (is_decoder_shl(cell)) + decoders.push_back(cell); + + for (auto decoder : decoders) + process_decoder(decoder); + } + + void process_decoder(Cell *decoder) + { + SigSpec decoder_y = decoder->getPort(ID::Y); + + std::vector candidates; + + for (auto cell : module->selected_cells()) { + if (cell->type != ID($pmux)) + continue; + if (cell->getParam(ID::WIDTH).as_int() != 1) + continue; + SigSpec sig_a = cell->getPort(ID::A); + if (!sig_a.is_fully_zero()) + continue; + + SigSpec sig_s = cell->getPort(ID::S); + int s_width = GetSize(sig_s); + if (s_width < min_stride) + continue; + + std::vector positions; + bool valid = true; + + for (int i = 0; i < s_width; i++) { + int pos = trace_to_decoder_pos(sig_s[i], decoder_y); + if (pos < 0) { valid = false; break; } + positions.push_back(pos); + } + if (!valid) + continue; + + bool contiguous = true; + for (int i = 1; i < s_width; i++) { + if (positions[i] != positions[i - 1] + 1) { + contiguous = false; + break; + } + } + if (!contiguous) + continue; + + candidates.push_back({cell, positions[0]}); + } + + if (candidates.empty()) + return; + + std::sort(candidates.begin(), candidates.end(), + [](const PmuxInfo &a, const PmuxInfo &b) { + return a.window_start < b.window_start; + }); + + // Partition candidates by S_WIDTH, then separate multiplexed + // VPS groups that share the same decoder positions. + dict> by_swidth; + for (auto &c : candidates) + by_swidth[GetSize(c.cell->getPort(ID::S))].push_back(c); + + for (auto &[W, cells] : by_swidth) { + // Sort by window_start + std::sort(cells.begin(), cells.end(), + [](const PmuxInfo &a, const PmuxInfo &b) { + return a.window_start < b.window_start; + }); + + // Build position buckets: window_start → list of cells + dict> by_pos; + for (auto &c : cells) + by_pos[c.window_start].push_back(c); + + // Find longest contiguous run of positions + std::vector positions; + for (auto &[pos, _] : by_pos) + positions.push_back(pos); + std::sort(positions.begin(), positions.end()); + + // Extract contiguous runs + int run_start = 0; + while (run_start < (int)positions.size()) { + int run_end = run_start + 1; + while (run_end < (int)positions.size() && + positions[run_end] == positions[run_end - 1] + 1) + run_end++; + + int N = run_end - run_start; + if (N >= W) { + int base = positions[run_start]; + int multiplicity = GetSize(by_pos[base]); + for (int pos_idx = run_start; pos_idx < run_end; pos_idx++) + multiplicity = std::min(multiplicity, + GetSize(by_pos[positions[pos_idx]])); + + for (int g = 0; g < multiplicity; g++) { + std::vector group; + for (int pos_idx = run_start; pos_idx < run_end; pos_idx++) + group.push_back(by_pos[positions[pos_idx]][g]); + + // Store group in candidates array for optimize_group + int gstart = candidates.size(); + for (auto &c : group) + candidates.push_back(c); + optimize_group(decoder, candidates, gstart, + N, W); + } + } + + run_start = run_end; + } + } + } + + void optimize_group(Cell *decoder, std::vector &candidates, + int group_start, int N, int W) + { + int base = candidates[group_start].window_start; + int lane_count = (N + W - 1) / W; + + log(" VPS group: decoder %s, base=%d, %d bits, stride=%d, %d lanes\n", + log_id(decoder->name), base, N, W, lane_count); + + SigSpec decoder_y = decoder->getPort(ID::Y); + + // Collect gated decoder bits and overflow conditions + dict gated_bits; + dict overflow_bits; + + for (int i = 0; i < N; i++) { + Cell *pmux_cell = candidates[group_start + i].cell; + SigSpec sig_s = pmux_cell->getPort(ID::S); + int ws = candidates[group_start + i].window_start; + for (int k = 0; k < W; k++) { + int pos = ws + k; + SigBit sb = sigmap(sig_s[k]); + if (gated_bits.count(pos)) { + if (gated_bits[pos] != sb) { + log(" WARNING: inconsistent gated bit at decoder pos %d\n", pos); + return; + } + } else { + gated_bits[pos] = sb; + SigBit ov_cond; + trace_to_decoder_pos(sb, decoder_y, &ov_cond); + overflow_bits[pos] = ov_cond; + } + } + } + + // Try binary-index lane enables: instead of OR-reducing W one-hot + // decoder bits per lane, compare the binary index directly. + // Requirements: W is a power of 2, base is W-aligned. + bool use_binary = (W & (W - 1)) == 0 && (base % W) == 0; + + SigSpec binary_index; + int log2_w = 0; + + if (use_binary) { + binary_index = decoder->getPort(ID::B); + for (int tmp = W; tmp > 1; tmp >>= 1) + log2_w++; + + int decoder_y_width = GetSize(decoder->getPort(ID::Y)); + if (base + lane_count * W > decoder_y_width) + use_binary = false; + } + + std::vector lane_en(lane_count); + + if (use_binary) { + int upper_width = GetSize(binary_index) - log2_w; + SigSpec upper_bits; + if (upper_width > 0) + upper_bits = binary_index.extract(log2_w, upper_width); + + for (int L = 0; L < lane_count; L++) { + SigBit range_bit; + + if (upper_width > 0) { + int lane_idx = base / W + L; + Wire *eq_w = module->addWire(NEW_ID_SUFFIX("vps_lane_eq"), 1); + module->addEq(NEW_ID_SUFFIX("vps_lane_cmp"), + upper_bits, Const(lane_idx, upper_width), eq_w); + range_bit = SigBit(eq_w); + } else { + range_bit = State::S1; + } + + lane_en[L] = range_bit; + } + + log(" using binary-index lane enables (%d upper bits)\n", + upper_width > 0 ? upper_width : 0); + } else { + for (int L = 0; L < lane_count; L++) { + SigSpec lane_bits; + for (int k = 0; k < W; k++) { + int pos = base + L * W + k; + if (gated_bits.count(pos)) + lane_bits.append(gated_bits.at(pos)); + } + + if (GetSize(lane_bits) == 0) { + lane_en[L] = State::S0; + } else if (GetSize(lane_bits) == 1) { + lane_en[L] = lane_bits[0]; + } else { + Wire *w = module->addWire(NEW_ID_SUFFIX("vps_lane_en"), 1); + module->addReduceOr(NEW_ID_SUFFIX("vps_lane_or"), lane_bits, w); + lane_en[L] = SigBit(w); + } + } + } + + // Probe for the full feedback collapse pattern: + // $pmux.Y -> $mux(Q[i], pmux_Y, gated_en).Y -> top_$mux(Q, {results}, wr_en) + // When detected, replace the entire chain with per-lane wide muxes. + bool full_collapse = use_binary && (N % W == 0); + Cell *top_wr_mux = nullptr; + SigBit wr_en_sig; + std::vector fb_info(N); + + if (full_collapse) { + for (int i = 0; i < N; i++) { + Cell *pmux_cell = candidates[group_start + i].cell; + SigBit pmux_y = sigmap(pmux_cell->getPort(ID::Y)[0]); + + Cell *fb_mux = find_sole_consumer(pmux_y); + if (!fb_mux || fb_mux->type != ID($mux) || + fb_mux->getParam(ID::WIDTH).as_int() != 1 || + sigmap(fb_mux->getPort(ID::B)[0]) != pmux_y) { + full_collapse = false; + break; + } + + SigBit q_bit = sigmap(fb_mux->getPort(ID::A)[0]); + SigBit gated_en = sigmap(fb_mux->getPort(ID::S)[0]); + + Cell *and_gate = bit_drivers.at(gated_en, nullptr); + if (and_gate && + and_gate->type != ID($and) && + and_gate->type != ID($_AND_)) + and_gate = nullptr; + + SigBit fb_y = sigmap(fb_mux->getPort(ID::Y)[0]); + Cell *wr_mux = find_sole_consumer(fb_y); + if (!wr_mux || wr_mux->type != ID($mux) || + wr_mux->getParam(ID::WIDTH).as_int() <= 1) { + full_collapse = false; + break; + } + + SigSpec wr_b = wr_mux->getPort(ID::B); + bool in_b = false; + for (int j = 0; j < GetSize(wr_b); j++) + if (sigmap(wr_b[j]) == fb_y) { in_b = true; break; } + if (!in_b) { + full_collapse = false; + break; + } + + SigBit this_wr_en = sigmap(wr_mux->getPort(ID::S)[0]); + if (top_wr_mux == nullptr) { + top_wr_mux = wr_mux; + wr_en_sig = this_wr_en; + } else if (top_wr_mux != wr_mux) { + full_collapse = false; + break; + } + + fb_info[i] = {fb_mux, and_gate, q_bit}; + } + } + + // Build lookup: S SigSpec (through sigmap) -> $reduce_or cell + dict reduce_or_map; + for (auto cell : module->cells()) { + if (cell->type != ID($reduce_or)) + continue; + SigSpec a = sigmap(cell->getPort(ID::A)); + reduce_or_map[a] = cell; + } + + if (full_collapse) { + log(" full feedback collapse: %d lanes, wr_en mux %s\n", + lane_count, log_id(top_wr_mux->name)); + + pool cells_to_remove; + + for (int L = 0; L < lane_count; L++) { + SigSpec data_lane, q_lane, fb_y_lane; + + for (int b = 0; b < W; b++) { + int i = L * W + b; + Cell *pmux_cell = candidates[group_start + i].cell; + SigSpec cell_b = pmux_cell->getPort(ID::B); + data_lane.append(cell_b[W - 1 - b]); + q_lane.append(fb_info[i].q_bit); + fb_y_lane.append(fb_info[i].feedback_mux->getPort(ID::Y)); + + cells_to_remove.insert(pmux_cell); + cells_to_remove.insert(fb_info[i].feedback_mux); + if (fb_info[i].and_gate) { + SigBit and_y = sigmap(fb_info[i].and_gate->getPort(ID::Y)[0]); + auto ac = bit_consumers.find(and_y); + if (ac != bit_consumers.end() && ac->second.size() == 1) + cells_to_remove.insert(fb_info[i].and_gate); + } + + SigSpec pmux_s = sigmap(pmux_cell->getPort(ID::S)); + auto it = reduce_or_map.find(pmux_s); + if (it != reduce_or_map.end()) { + cells_to_remove.insert(it->second); + reduce_or_map.erase(it); + reduce_or_replaced++; + } + pmux_replaced++; + } + + Wire *gated_w = module->addWire(NEW_ID_SUFFIX("vps_wr_lane_en"), 1); + module->addAnd(NEW_ID_SUFFIX("vps_wr_lane_and"), + SigSpec(wr_en_sig), SigSpec(lane_en[L]), + SigSpec(gated_w)); + + Cell *lane_mux = module->addMux( + NEW_ID_SUFFIX("vps_lane_mux"), + q_lane, data_lane, SigBit(gated_w), fb_y_lane); + lane_mux->add_strpool_attribute(ID::src, + candidates[group_start + L * W].cell->get_strpool_attribute(ID::src)); + } + + for (auto c : cells_to_remove) + module->remove(c); + + // Remove redundant top-level wr_en mux if all its B-port + // bits are now driven by the per-lane muxes. + if (N == top_wr_mux->getParam(ID::WIDTH).as_int()) { + SigSpec wr_y = top_wr_mux->getPort(ID::Y); + SigSpec wr_b = top_wr_mux->getPort(ID::B); + module->connect(wr_y, wr_b); + module->remove(top_wr_mux); + log(" removed redundant top-level wr_en mux %s\n", + log_id(top_wr_mux->name)); + } + + feedback_collapsed += N; + } else { + // Fallback: per-bit $mux replacement + for (int i = 0; i < N; i++) { + Cell *pmux_cell = candidates[group_start + i].cell; + int L = i / W; + int b = i % W; + + SigSpec cell_b = pmux_cell->getPort(ID::B); + SigBit data_bit = cell_b[W - 1 - b]; + SigSpec sig_y = pmux_cell->getPort(ID::Y); + + Cell *mux = module->addMux(NEW_ID_SUFFIX("vps_mux"), + State::S0, data_bit, lane_en[L], sig_y); + mux->add_strpool_attribute(ID::src, + pmux_cell->get_strpool_attribute(ID::src)); + + SigSpec pmux_s = sigmap(pmux_cell->getPort(ID::S)); + auto it = reduce_or_map.find(pmux_s); + if (it != reduce_or_map.end()) { + Cell *ror = it->second; + module->connect(ror->getPort(ID::Y), lane_en[L]); + module->remove(ror); + reduce_or_map.erase(it); + reduce_or_replaced++; + } + + module->remove(pmux_cell); + pmux_replaced++; + } + } + + groups_optimized++; + } +}; + +struct OptVpsPass : public Pass { + OptVpsPass() : Pass("opt_vps", "optimize Verific variable-part-select patterns") {} + void help() override + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" opt_vps [options] [selection]\n"); + log("\n"); + log("Detect variable-part-select (VPS) write patterns generated by Verific\n"); + log("and replace the per-bit sliding-window $pmux cells with per-lane\n"); + log("enable logic and direct data wiring.\n"); + log("\n"); + log("Verific lowers VPS writes like `reg[idx -: W] <= data` into a\n"); + log("bit-granularity decoder ($shl with A=1) followed by overflow-gated\n"); + log("AND gates and N sliding-window one-hot $pmux cells (one per output\n"); + log("bit, each with S_WIDTH=W). This structure has O(N*W) gates after\n"); + log("pmuxtree expansion.\n"); + log("\n"); + log("This pass recovers the lane structure and replaces each W-entry\n"); + log("$pmux with a single 2:1 $mux gated by a shared per-lane enable,\n"); + log("reducing the gate count to O(N + N/W).\n"); + log("\n"); + log("The pass also replaces per-bit $reduce_or enable cells with the\n"); + log("shared lane enable signal.\n"); + log("\n"); + log(" -min_stride \n"); + log(" Minimum stride (S_WIDTH of the $pmux cells) to consider.\n"); + log(" Default: 4.\n"); + log("\n"); + } + void execute(std::vector args, RTLIL::Design *design) override + { + int min_stride = 4; + + log_header(design, "Executing OPT_VPS pass (optimize Verific VPS patterns).\n"); + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-min_stride" && argidx + 1 < args.size()) { + min_stride = std::stoi(args[++argidx]); + continue; + } + break; + } + extra_args(args, argidx, design); + + int total_groups = 0, total_pmux = 0, total_ror = 0, total_fb = 0; + + for (auto module : design->selected_modules()) { + if (module->has_processes_warn()) + continue; + + OptVpsWorker worker(module, min_stride); + worker.run(); + + if (worker.groups_optimized > 0) + log(" Module %s: %d VPS group(s), %d $pmux replaced, " + "%d $reduce_or replaced, %d feedback collapsed.\n", + log_id(module->name), worker.groups_optimized, + worker.pmux_replaced, worker.reduce_or_replaced, + worker.feedback_collapsed); + + total_groups += worker.groups_optimized; + total_pmux += worker.pmux_replaced; + total_ror += worker.reduce_or_replaced; + total_fb += worker.feedback_collapsed; + } + + log("Optimized %d VPS group(s), %d $pmux replaced, " + "%d $reduce_or replaced, %d feedback collapsed.\n", + total_groups, total_pmux, total_ror, total_fb); + } +} OptVpsPass; + +PRIVATE_NAMESPACE_END diff --git a/tests/silimate/opt_vps.ys b/tests/silimate/opt_vps.ys new file mode 100644 index 000000000..6f35f8808 --- /dev/null +++ b/tests/silimate/opt_vps.ys @@ -0,0 +1,159 @@ +# ============================================================================= +# Test 1: SAT equivalence — VPS byte-write vs case-statement reference +# Proves opt_vps produces a logically equivalent circuit to hand-written +# case statements for a 32-bit register with 4 byte lanes. +# ============================================================================= +log -header "SAT equivalence: byte-write VPS vs case-statement ref" +log -push +design -reset +verific -cfg veri_optimize_wide_selector 1 +verific -cfg db_infer_wide_muxes_post_elaboration 0 + +read -sv opt_vps_byte_write.sv +verific -import opt_vps_byte_write +proc; opt_clean +opt_vps; opt_clean +rename opt_vps_byte_write gate + +read -sv opt_vps_byte_write_ref.sv +verific -import opt_vps_byte_write +proc; opt_clean +rename opt_vps_byte_write gold + +miter -equiv -flatten -make_assert gold gate miter +hierarchy -top miter +proc; opt; memory; opt +clk2fflogic +sat -set-init-zero -tempinduct -prove-asserts -verify +design -reset +log -pop + +# ============================================================================= +# Test 2: SAT self-equivalence — byte-write before vs after opt_vps +# Proves opt_vps does not change the functional behavior. +# ============================================================================= +log -header "SAT self-equivalence: byte-write before vs after opt_vps" +log -push +design -reset +verific -cfg veri_optimize_wide_selector 1 +verific -cfg db_infer_wide_muxes_post_elaboration 0 + +read -sv opt_vps_byte_write.sv +verific -import opt_vps_byte_write +proc; opt_clean +rename opt_vps_byte_write gold + +read -sv opt_vps_byte_write.sv +verific -import opt_vps_byte_write +proc; opt_clean +opt_vps; opt_clean +rename opt_vps_byte_write gate + +miter -equiv -flatten -make_assert gold gate miter +hierarchy -top miter +proc; opt; memory; opt +clk2fflogic +sat -set-init-zero -tempinduct -prove-asserts -verify +design -reset +log -pop + +# ============================================================================= +# Test 3: SAT self-equivalence — wide (128-bit, 16-bit lanes) +# Ensures opt_vps is correct on a larger design with 8 lanes. +# ============================================================================= +log -header "SAT self-equivalence: wide 128-bit VPS" +log -push +design -reset +verific -cfg veri_optimize_wide_selector 1 +verific -cfg db_infer_wide_muxes_post_elaboration 0 + +read -sv opt_vps_wide.sv +verific -import opt_vps_wide +proc; opt_clean +rename opt_vps_wide gold + +read -sv opt_vps_wide.sv +verific -import opt_vps_wide +proc; opt_clean +opt_vps; opt_clean +rename opt_vps_wide gate + +miter -equiv -flatten -make_assert gold gate miter +hierarchy -top miter +proc; opt; memory; opt +clk2fflogic +sat -set-init-zero -tempinduct -prove-asserts -verify +design -reset +log -pop + +# ============================================================================= +# Test 4: Cell count verification — byte-write +# After opt_vps, all $pmux and $reduce_or cells should be eliminated and +# replaced with per-lane $eq/$and/$mux cells. +# ============================================================================= +log -header "Cell counts: byte-write post-opt_vps" +log -push +design -reset +verific -cfg veri_optimize_wide_selector 1 +verific -cfg db_infer_wide_muxes_post_elaboration 0 + +read -sv opt_vps_byte_write.sv +verific -import opt_vps_byte_write +proc; opt_clean +opt_vps; opt_clean + +select -assert-none t:$pmux +select -assert-none t:$reduce_or +select -assert-count 4 t:$eq +select -assert-count 4 t:$and +select -assert-count 4 t:$mux +select -assert-count 1 t:$dff +design -reset +log -pop + +# ============================================================================= +# Test 5: Cell count verification — wide +# Same as above but for the wider 128-bit / 8-lane case. +# ============================================================================= +log -header "Cell counts: wide post-opt_vps" +log -push +design -reset +verific -cfg veri_optimize_wide_selector 1 +verific -cfg db_infer_wide_muxes_post_elaboration 0 + +read -sv opt_vps_wide.sv +verific -import opt_vps_wide +proc; opt_clean +opt_vps; opt_clean + +select -assert-none t:$pmux +select -assert-none t:$reduce_or +select -assert-count 1 t:$dff +design -reset +log -pop + +# ============================================================================= +# Test 6: Negative case — no VPS pattern +# A simple mux-based register should not trigger opt_vps. +# ============================================================================= +log -header "Negative: non-VPS design unchanged" +log -push +design -reset +verific -cfg veri_optimize_wide_selector 1 +verific -cfg db_infer_wide_muxes_post_elaboration 0 + +read -sv opt_vps_no_match.sv +verific -import opt_vps_no_match +proc; opt_clean + +stat +opt_vps +stat + +select -assert-none t:$pmux +select -assert-none t:$eq w:*vps* +select -assert-count 1 t:$mux +select -assert-count 1 t:$dff +design -reset +log -pop + diff --git a/tests/silimate/opt_vps_byte_write.sv b/tests/silimate/opt_vps_byte_write.sv new file mode 100644 index 000000000..d4ef154fb --- /dev/null +++ b/tests/silimate/opt_vps_byte_write.sv @@ -0,0 +1,14 @@ +// 32-bit register with byte-lane writes indexed by a 2-bit selector (VPS). +module opt_vps_byte_write ( + input logic clk, + input logic wr_en, + input logic [1:0] lane, + input logic [7:0] wdata, + output logic [31:0] q +); + logic [31:0] reg_data; + always_ff @(posedge clk) + if (wr_en) + reg_data[((lane + 1) * 8) - 1 -: 8] <= wdata; + assign q = reg_data; +endmodule diff --git a/tests/silimate/opt_vps_byte_write_ref.sv b/tests/silimate/opt_vps_byte_write_ref.sv new file mode 100644 index 000000000..60de54d00 --- /dev/null +++ b/tests/silimate/opt_vps_byte_write_ref.sv @@ -0,0 +1,19 @@ +// Reference: equivalent design WITHOUT variable-part-select. +module opt_vps_byte_write ( + input logic clk, + input logic wr_en, + input logic [1:0] lane, + input logic [7:0] wdata, + output logic [31:0] q +); + logic [31:0] reg_data; + always_ff @(posedge clk) + if (wr_en) + case (lane) + 2'd0: reg_data[ 7: 0] <= wdata; + 2'd1: reg_data[15: 8] <= wdata; + 2'd2: reg_data[23:16] <= wdata; + 2'd3: reg_data[31:24] <= wdata; + endcase + assign q = reg_data; +endmodule diff --git a/tests/silimate/opt_vps_no_match.sv b/tests/silimate/opt_vps_no_match.sv new file mode 100644 index 000000000..d2ec0be78 --- /dev/null +++ b/tests/silimate/opt_vps_no_match.sv @@ -0,0 +1,12 @@ +// Simple mux-based register -- no VPS pattern, opt_vps should not fire. +module opt_vps_no_match ( + input logic clk, + input logic sel, + input logic [7:0] a, b, + output logic [7:0] q +); + logic [7:0] reg_data; + always_ff @(posedge clk) + reg_data <= sel ? a : b; + assign q = reg_data; +endmodule diff --git a/tests/silimate/opt_vps_wide.sv b/tests/silimate/opt_vps_wide.sv new file mode 100644 index 000000000..88ad76011 --- /dev/null +++ b/tests/silimate/opt_vps_wide.sv @@ -0,0 +1,14 @@ +// 128-bit register with 16-bit lane writes indexed by a 3-bit selector (VPS). +module opt_vps_wide ( + input logic clk, + input logic wr_en, + input logic [2:0] lane, + input logic [15:0] wdata, + output logic [127:0] q +); + logic [127:0] reg_data; + always_ff @(posedge clk) + if (wr_en) + reg_data[((lane + 1) * 16) - 1 -: 16] <= wdata; + assign q = reg_data; +endmodule