mirror of https://github.com/YosysHQ/yosys.git
opt_vps
This commit is contained in:
parent
9a099f73b3
commit
1820526a9a
|
|
@ -17,6 +17,7 @@ OBJS += passes/silimate/splitnetlist.o
|
|||
OBJS += passes/silimate/opt_timing_balance.o
|
||||
OBJS += passes/silimate/cone_partition.o
|
||||
OBJS += passes/silimate/clkmerge.o
|
||||
OBJS += passes/silimate/opt_vps.o
|
||||
|
||||
OBJS += passes/silimate/opt_expand.o
|
||||
GENFILES += passes/silimate/peepopt_expand.h
|
||||
|
|
|
|||
|
|
@ -0,0 +1,615 @@
|
|||
/*
|
||||
* yosys -- Yosys Open SYnthesis Suite
|
||||
*
|
||||
* Copyright (C) 2012 Claire Xenia Wolf <claire@yosyshq.com>
|
||||
* 2025 Silimate Inc. <akash@silimate.com>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/yosys.h"
|
||||
#include "kernel/sigtools.h"
|
||||
|
||||
USING_YOSYS_NAMESPACE
|
||||
PRIVATE_NAMESPACE_BEGIN
|
||||
|
||||
struct OptVpsWorker
|
||||
{
|
||||
struct PmuxInfo {
|
||||
Cell *cell;
|
||||
int window_start;
|
||||
};
|
||||
|
||||
struct FeedbackInfo {
|
||||
Cell *feedback_mux;
|
||||
Cell *and_gate;
|
||||
SigBit q_bit;
|
||||
};
|
||||
|
||||
Module *module;
|
||||
SigMap sigmap;
|
||||
dict<SigBit, Cell *> bit_drivers;
|
||||
dict<SigBit, pool<Cell *>> bit_consumers;
|
||||
int groups_optimized = 0;
|
||||
int pmux_replaced = 0;
|
||||
int reduce_or_replaced = 0;
|
||||
int feedback_collapsed = 0;
|
||||
int min_stride;
|
||||
|
||||
OptVpsWorker(Module *module, int min_stride)
|
||||
: module(module), sigmap(module), min_stride(min_stride)
|
||||
{
|
||||
for (auto cell : module->cells())
|
||||
for (auto &conn : cell->connections())
|
||||
if (cell->output(conn.first))
|
||||
for (int i = 0; i < GetSize(conn.second); i++) {
|
||||
SigBit bit = sigmap(conn.second[i]);
|
||||
bit_drivers[bit] = cell;
|
||||
}
|
||||
else
|
||||
for (int i = 0; i < GetSize(conn.second); i++) {
|
||||
SigBit bit = sigmap(conn.second[i]);
|
||||
if (bit.wire)
|
||||
bit_consumers[bit].insert(cell);
|
||||
}
|
||||
}
|
||||
|
||||
Cell *find_sole_consumer(SigBit bit)
|
||||
{
|
||||
auto it = bit_consumers.find(sigmap(bit));
|
||||
if (it == bit_consumers.end() || it->second.size() != 1)
|
||||
return nullptr;
|
||||
return *(it->second.begin());
|
||||
}
|
||||
|
||||
bool is_decoder_shl(Cell *cell)
|
||||
{
|
||||
if (cell->type != ID($shl))
|
||||
return false;
|
||||
SigSpec a = cell->getPort(ID::A);
|
||||
if (!a.is_fully_const())
|
||||
return false;
|
||||
Const a_val = a.as_const();
|
||||
if (GetSize(a_val) < 1 || a_val[0] != State::S1)
|
||||
return false;
|
||||
for (int i = 1; i < GetSize(a_val); i++)
|
||||
if (a_val[i] != State::S0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Trace an S-port bit back through an optional AND gate to find
|
||||
// which decoder output position it comes from. Returns -1 on failure.
|
||||
// If overflow_cond is non-null, stores the non-decoder input of the
|
||||
// AND gate (the overflow mask bit), or State::S1 if direct.
|
||||
int trace_to_decoder_pos(SigBit bit, SigSpec &decoder_y,
|
||||
SigBit *overflow_cond = nullptr)
|
||||
{
|
||||
SigBit mapped = sigmap(bit);
|
||||
|
||||
for (int i = 0; i < GetSize(decoder_y); i++)
|
||||
if (sigmap(decoder_y[i]) == mapped) {
|
||||
if (overflow_cond)
|
||||
*overflow_cond = State::S1;
|
||||
return i;
|
||||
}
|
||||
|
||||
Cell *driver = bit_drivers.at(mapped, nullptr);
|
||||
if (!driver)
|
||||
return -1;
|
||||
|
||||
if (driver->type == ID($and)) {
|
||||
SigSpec port_a = driver->getPort(ID::A);
|
||||
SigSpec port_b = driver->getPort(ID::B);
|
||||
if (GetSize(port_a) == 1 && GetSize(port_b) == 1) {
|
||||
SigBit a = sigmap(port_a[0]);
|
||||
SigBit b = sigmap(port_b[0]);
|
||||
for (int i = 0; i < GetSize(decoder_y); i++) {
|
||||
SigBit dy = sigmap(decoder_y[i]);
|
||||
if (dy == a) {
|
||||
if (overflow_cond) *overflow_cond = b;
|
||||
return i;
|
||||
}
|
||||
if (dy == b) {
|
||||
if (overflow_cond) *overflow_cond = a;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (driver->type == ID($_AND_)) {
|
||||
SigBit a = sigmap(driver->getPort(ID::A));
|
||||
SigBit b = sigmap(driver->getPort(ID::B));
|
||||
for (int i = 0; i < GetSize(decoder_y); i++) {
|
||||
SigBit dy = sigmap(decoder_y[i]);
|
||||
if (dy == a) {
|
||||
if (overflow_cond) *overflow_cond = b;
|
||||
return i;
|
||||
}
|
||||
if (dy == b) {
|
||||
if (overflow_cond) *overflow_cond = a;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void run()
|
||||
{
|
||||
std::vector<Cell *> decoders;
|
||||
for (auto cell : module->selected_cells())
|
||||
if (is_decoder_shl(cell))
|
||||
decoders.push_back(cell);
|
||||
|
||||
for (auto decoder : decoders)
|
||||
process_decoder(decoder);
|
||||
}
|
||||
|
||||
void process_decoder(Cell *decoder)
|
||||
{
|
||||
SigSpec decoder_y = decoder->getPort(ID::Y);
|
||||
|
||||
std::vector<PmuxInfo> candidates;
|
||||
|
||||
for (auto cell : module->selected_cells()) {
|
||||
if (cell->type != ID($pmux))
|
||||
continue;
|
||||
if (cell->getParam(ID::WIDTH).as_int() != 1)
|
||||
continue;
|
||||
SigSpec sig_a = cell->getPort(ID::A);
|
||||
if (!sig_a.is_fully_zero())
|
||||
continue;
|
||||
|
||||
SigSpec sig_s = cell->getPort(ID::S);
|
||||
int s_width = GetSize(sig_s);
|
||||
if (s_width < min_stride)
|
||||
continue;
|
||||
|
||||
std::vector<int> positions;
|
||||
bool valid = true;
|
||||
|
||||
for (int i = 0; i < s_width; i++) {
|
||||
int pos = trace_to_decoder_pos(sig_s[i], decoder_y);
|
||||
if (pos < 0) { valid = false; break; }
|
||||
positions.push_back(pos);
|
||||
}
|
||||
if (!valid)
|
||||
continue;
|
||||
|
||||
bool contiguous = true;
|
||||
for (int i = 1; i < s_width; i++) {
|
||||
if (positions[i] != positions[i - 1] + 1) {
|
||||
contiguous = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!contiguous)
|
||||
continue;
|
||||
|
||||
candidates.push_back({cell, positions[0]});
|
||||
}
|
||||
|
||||
if (candidates.empty())
|
||||
return;
|
||||
|
||||
std::sort(candidates.begin(), candidates.end(),
|
||||
[](const PmuxInfo &a, const PmuxInfo &b) {
|
||||
return a.window_start < b.window_start;
|
||||
});
|
||||
|
||||
// Partition candidates by S_WIDTH, then separate multiplexed
|
||||
// VPS groups that share the same decoder positions.
|
||||
dict<int, std::vector<PmuxInfo>> by_swidth;
|
||||
for (auto &c : candidates)
|
||||
by_swidth[GetSize(c.cell->getPort(ID::S))].push_back(c);
|
||||
|
||||
for (auto &[W, cells] : by_swidth) {
|
||||
// Sort by window_start
|
||||
std::sort(cells.begin(), cells.end(),
|
||||
[](const PmuxInfo &a, const PmuxInfo &b) {
|
||||
return a.window_start < b.window_start;
|
||||
});
|
||||
|
||||
// Build position buckets: window_start → list of cells
|
||||
dict<int, std::vector<PmuxInfo>> by_pos;
|
||||
for (auto &c : cells)
|
||||
by_pos[c.window_start].push_back(c);
|
||||
|
||||
// Find longest contiguous run of positions
|
||||
std::vector<int> positions;
|
||||
for (auto &[pos, _] : by_pos)
|
||||
positions.push_back(pos);
|
||||
std::sort(positions.begin(), positions.end());
|
||||
|
||||
// Extract contiguous runs
|
||||
int run_start = 0;
|
||||
while (run_start < (int)positions.size()) {
|
||||
int run_end = run_start + 1;
|
||||
while (run_end < (int)positions.size() &&
|
||||
positions[run_end] == positions[run_end - 1] + 1)
|
||||
run_end++;
|
||||
|
||||
int N = run_end - run_start;
|
||||
if (N >= W) {
|
||||
int base = positions[run_start];
|
||||
int multiplicity = GetSize(by_pos[base]);
|
||||
for (int pos_idx = run_start; pos_idx < run_end; pos_idx++)
|
||||
multiplicity = std::min(multiplicity,
|
||||
GetSize(by_pos[positions[pos_idx]]));
|
||||
|
||||
for (int g = 0; g < multiplicity; g++) {
|
||||
std::vector<PmuxInfo> group;
|
||||
for (int pos_idx = run_start; pos_idx < run_end; pos_idx++)
|
||||
group.push_back(by_pos[positions[pos_idx]][g]);
|
||||
|
||||
// Store group in candidates array for optimize_group
|
||||
int gstart = candidates.size();
|
||||
for (auto &c : group)
|
||||
candidates.push_back(c);
|
||||
optimize_group(decoder, candidates, gstart,
|
||||
N, W);
|
||||
}
|
||||
}
|
||||
|
||||
run_start = run_end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void optimize_group(Cell *decoder, std::vector<PmuxInfo> &candidates,
|
||||
int group_start, int N, int W)
|
||||
{
|
||||
int base = candidates[group_start].window_start;
|
||||
int lane_count = (N + W - 1) / W;
|
||||
|
||||
log(" VPS group: decoder %s, base=%d, %d bits, stride=%d, %d lanes\n",
|
||||
log_id(decoder->name), base, N, W, lane_count);
|
||||
|
||||
SigSpec decoder_y = decoder->getPort(ID::Y);
|
||||
|
||||
// Collect gated decoder bits and overflow conditions
|
||||
dict<int, SigBit> gated_bits;
|
||||
dict<int, SigBit> overflow_bits;
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
Cell *pmux_cell = candidates[group_start + i].cell;
|
||||
SigSpec sig_s = pmux_cell->getPort(ID::S);
|
||||
int ws = candidates[group_start + i].window_start;
|
||||
for (int k = 0; k < W; k++) {
|
||||
int pos = ws + k;
|
||||
SigBit sb = sigmap(sig_s[k]);
|
||||
if (gated_bits.count(pos)) {
|
||||
if (gated_bits[pos] != sb) {
|
||||
log(" WARNING: inconsistent gated bit at decoder pos %d\n", pos);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
gated_bits[pos] = sb;
|
||||
SigBit ov_cond;
|
||||
trace_to_decoder_pos(sb, decoder_y, &ov_cond);
|
||||
overflow_bits[pos] = ov_cond;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try binary-index lane enables: instead of OR-reducing W one-hot
|
||||
// decoder bits per lane, compare the binary index directly.
|
||||
// Requirements: W is a power of 2, base is W-aligned.
|
||||
bool use_binary = (W & (W - 1)) == 0 && (base % W) == 0;
|
||||
|
||||
SigSpec binary_index;
|
||||
int log2_w = 0;
|
||||
|
||||
if (use_binary) {
|
||||
binary_index = decoder->getPort(ID::B);
|
||||
for (int tmp = W; tmp > 1; tmp >>= 1)
|
||||
log2_w++;
|
||||
|
||||
int decoder_y_width = GetSize(decoder->getPort(ID::Y));
|
||||
if (base + lane_count * W > decoder_y_width)
|
||||
use_binary = false;
|
||||
}
|
||||
|
||||
std::vector<SigBit> lane_en(lane_count);
|
||||
|
||||
if (use_binary) {
|
||||
int upper_width = GetSize(binary_index) - log2_w;
|
||||
SigSpec upper_bits;
|
||||
if (upper_width > 0)
|
||||
upper_bits = binary_index.extract(log2_w, upper_width);
|
||||
|
||||
for (int L = 0; L < lane_count; L++) {
|
||||
SigBit range_bit;
|
||||
|
||||
if (upper_width > 0) {
|
||||
int lane_idx = base / W + L;
|
||||
Wire *eq_w = module->addWire(NEW_ID_SUFFIX("vps_lane_eq"), 1);
|
||||
module->addEq(NEW_ID_SUFFIX("vps_lane_cmp"),
|
||||
upper_bits, Const(lane_idx, upper_width), eq_w);
|
||||
range_bit = SigBit(eq_w);
|
||||
} else {
|
||||
range_bit = State::S1;
|
||||
}
|
||||
|
||||
lane_en[L] = range_bit;
|
||||
}
|
||||
|
||||
log(" using binary-index lane enables (%d upper bits)\n",
|
||||
upper_width > 0 ? upper_width : 0);
|
||||
} else {
|
||||
for (int L = 0; L < lane_count; L++) {
|
||||
SigSpec lane_bits;
|
||||
for (int k = 0; k < W; k++) {
|
||||
int pos = base + L * W + k;
|
||||
if (gated_bits.count(pos))
|
||||
lane_bits.append(gated_bits.at(pos));
|
||||
}
|
||||
|
||||
if (GetSize(lane_bits) == 0) {
|
||||
lane_en[L] = State::S0;
|
||||
} else if (GetSize(lane_bits) == 1) {
|
||||
lane_en[L] = lane_bits[0];
|
||||
} else {
|
||||
Wire *w = module->addWire(NEW_ID_SUFFIX("vps_lane_en"), 1);
|
||||
module->addReduceOr(NEW_ID_SUFFIX("vps_lane_or"), lane_bits, w);
|
||||
lane_en[L] = SigBit(w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Probe for the full feedback collapse pattern:
|
||||
// $pmux.Y -> $mux(Q[i], pmux_Y, gated_en).Y -> top_$mux(Q, {results}, wr_en)
|
||||
// When detected, replace the entire chain with per-lane wide muxes.
|
||||
bool full_collapse = use_binary && (N % W == 0);
|
||||
Cell *top_wr_mux = nullptr;
|
||||
SigBit wr_en_sig;
|
||||
std::vector<FeedbackInfo> fb_info(N);
|
||||
|
||||
if (full_collapse) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
Cell *pmux_cell = candidates[group_start + i].cell;
|
||||
SigBit pmux_y = sigmap(pmux_cell->getPort(ID::Y)[0]);
|
||||
|
||||
Cell *fb_mux = find_sole_consumer(pmux_y);
|
||||
if (!fb_mux || fb_mux->type != ID($mux) ||
|
||||
fb_mux->getParam(ID::WIDTH).as_int() != 1 ||
|
||||
sigmap(fb_mux->getPort(ID::B)[0]) != pmux_y) {
|
||||
full_collapse = false;
|
||||
break;
|
||||
}
|
||||
|
||||
SigBit q_bit = sigmap(fb_mux->getPort(ID::A)[0]);
|
||||
SigBit gated_en = sigmap(fb_mux->getPort(ID::S)[0]);
|
||||
|
||||
Cell *and_gate = bit_drivers.at(gated_en, nullptr);
|
||||
if (and_gate &&
|
||||
and_gate->type != ID($and) &&
|
||||
and_gate->type != ID($_AND_))
|
||||
and_gate = nullptr;
|
||||
|
||||
SigBit fb_y = sigmap(fb_mux->getPort(ID::Y)[0]);
|
||||
Cell *wr_mux = find_sole_consumer(fb_y);
|
||||
if (!wr_mux || wr_mux->type != ID($mux) ||
|
||||
wr_mux->getParam(ID::WIDTH).as_int() <= 1) {
|
||||
full_collapse = false;
|
||||
break;
|
||||
}
|
||||
|
||||
SigSpec wr_b = wr_mux->getPort(ID::B);
|
||||
bool in_b = false;
|
||||
for (int j = 0; j < GetSize(wr_b); j++)
|
||||
if (sigmap(wr_b[j]) == fb_y) { in_b = true; break; }
|
||||
if (!in_b) {
|
||||
full_collapse = false;
|
||||
break;
|
||||
}
|
||||
|
||||
SigBit this_wr_en = sigmap(wr_mux->getPort(ID::S)[0]);
|
||||
if (top_wr_mux == nullptr) {
|
||||
top_wr_mux = wr_mux;
|
||||
wr_en_sig = this_wr_en;
|
||||
} else if (top_wr_mux != wr_mux) {
|
||||
full_collapse = false;
|
||||
break;
|
||||
}
|
||||
|
||||
fb_info[i] = {fb_mux, and_gate, q_bit};
|
||||
}
|
||||
}
|
||||
|
||||
// Build lookup: S SigSpec (through sigmap) -> $reduce_or cell
|
||||
dict<SigSpec, Cell *> reduce_or_map;
|
||||
for (auto cell : module->cells()) {
|
||||
if (cell->type != ID($reduce_or))
|
||||
continue;
|
||||
SigSpec a = sigmap(cell->getPort(ID::A));
|
||||
reduce_or_map[a] = cell;
|
||||
}
|
||||
|
||||
if (full_collapse) {
|
||||
log(" full feedback collapse: %d lanes, wr_en mux %s\n",
|
||||
lane_count, log_id(top_wr_mux->name));
|
||||
|
||||
pool<Cell *> cells_to_remove;
|
||||
|
||||
for (int L = 0; L < lane_count; L++) {
|
||||
SigSpec data_lane, q_lane, fb_y_lane;
|
||||
|
||||
for (int b = 0; b < W; b++) {
|
||||
int i = L * W + b;
|
||||
Cell *pmux_cell = candidates[group_start + i].cell;
|
||||
SigSpec cell_b = pmux_cell->getPort(ID::B);
|
||||
data_lane.append(cell_b[W - 1 - b]);
|
||||
q_lane.append(fb_info[i].q_bit);
|
||||
fb_y_lane.append(fb_info[i].feedback_mux->getPort(ID::Y));
|
||||
|
||||
cells_to_remove.insert(pmux_cell);
|
||||
cells_to_remove.insert(fb_info[i].feedback_mux);
|
||||
if (fb_info[i].and_gate) {
|
||||
SigBit and_y = sigmap(fb_info[i].and_gate->getPort(ID::Y)[0]);
|
||||
auto ac = bit_consumers.find(and_y);
|
||||
if (ac != bit_consumers.end() && ac->second.size() == 1)
|
||||
cells_to_remove.insert(fb_info[i].and_gate);
|
||||
}
|
||||
|
||||
SigSpec pmux_s = sigmap(pmux_cell->getPort(ID::S));
|
||||
auto it = reduce_or_map.find(pmux_s);
|
||||
if (it != reduce_or_map.end()) {
|
||||
cells_to_remove.insert(it->second);
|
||||
reduce_or_map.erase(it);
|
||||
reduce_or_replaced++;
|
||||
}
|
||||
pmux_replaced++;
|
||||
}
|
||||
|
||||
Wire *gated_w = module->addWire(NEW_ID_SUFFIX("vps_wr_lane_en"), 1);
|
||||
module->addAnd(NEW_ID_SUFFIX("vps_wr_lane_and"),
|
||||
SigSpec(wr_en_sig), SigSpec(lane_en[L]),
|
||||
SigSpec(gated_w));
|
||||
|
||||
Cell *lane_mux = module->addMux(
|
||||
NEW_ID_SUFFIX("vps_lane_mux"),
|
||||
q_lane, data_lane, SigBit(gated_w), fb_y_lane);
|
||||
lane_mux->add_strpool_attribute(ID::src,
|
||||
candidates[group_start + L * W].cell->get_strpool_attribute(ID::src));
|
||||
}
|
||||
|
||||
for (auto c : cells_to_remove)
|
||||
module->remove(c);
|
||||
|
||||
// Remove redundant top-level wr_en mux if all its B-port
|
||||
// bits are now driven by the per-lane muxes.
|
||||
if (N == top_wr_mux->getParam(ID::WIDTH).as_int()) {
|
||||
SigSpec wr_y = top_wr_mux->getPort(ID::Y);
|
||||
SigSpec wr_b = top_wr_mux->getPort(ID::B);
|
||||
module->connect(wr_y, wr_b);
|
||||
module->remove(top_wr_mux);
|
||||
log(" removed redundant top-level wr_en mux %s\n",
|
||||
log_id(top_wr_mux->name));
|
||||
}
|
||||
|
||||
feedback_collapsed += N;
|
||||
} else {
|
||||
// Fallback: per-bit $mux replacement
|
||||
for (int i = 0; i < N; i++) {
|
||||
Cell *pmux_cell = candidates[group_start + i].cell;
|
||||
int L = i / W;
|
||||
int b = i % W;
|
||||
|
||||
SigSpec cell_b = pmux_cell->getPort(ID::B);
|
||||
SigBit data_bit = cell_b[W - 1 - b];
|
||||
SigSpec sig_y = pmux_cell->getPort(ID::Y);
|
||||
|
||||
Cell *mux = module->addMux(NEW_ID_SUFFIX("vps_mux"),
|
||||
State::S0, data_bit, lane_en[L], sig_y);
|
||||
mux->add_strpool_attribute(ID::src,
|
||||
pmux_cell->get_strpool_attribute(ID::src));
|
||||
|
||||
SigSpec pmux_s = sigmap(pmux_cell->getPort(ID::S));
|
||||
auto it = reduce_or_map.find(pmux_s);
|
||||
if (it != reduce_or_map.end()) {
|
||||
Cell *ror = it->second;
|
||||
module->connect(ror->getPort(ID::Y), lane_en[L]);
|
||||
module->remove(ror);
|
||||
reduce_or_map.erase(it);
|
||||
reduce_or_replaced++;
|
||||
}
|
||||
|
||||
module->remove(pmux_cell);
|
||||
pmux_replaced++;
|
||||
}
|
||||
}
|
||||
|
||||
groups_optimized++;
|
||||
}
|
||||
};
|
||||
|
||||
struct OptVpsPass : public Pass {
|
||||
OptVpsPass() : Pass("opt_vps", "optimize Verific variable-part-select patterns") {}
|
||||
void help() override
|
||||
{
|
||||
// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
|
||||
log("\n");
|
||||
log(" opt_vps [options] [selection]\n");
|
||||
log("\n");
|
||||
log("Detect variable-part-select (VPS) write patterns generated by Verific\n");
|
||||
log("and replace the per-bit sliding-window $pmux cells with per-lane\n");
|
||||
log("enable logic and direct data wiring.\n");
|
||||
log("\n");
|
||||
log("Verific lowers VPS writes like `reg[idx -: W] <= data` into a\n");
|
||||
log("bit-granularity decoder ($shl with A=1) followed by overflow-gated\n");
|
||||
log("AND gates and N sliding-window one-hot $pmux cells (one per output\n");
|
||||
log("bit, each with S_WIDTH=W). This structure has O(N*W) gates after\n");
|
||||
log("pmuxtree expansion.\n");
|
||||
log("\n");
|
||||
log("This pass recovers the lane structure and replaces each W-entry\n");
|
||||
log("$pmux with a single 2:1 $mux gated by a shared per-lane enable,\n");
|
||||
log("reducing the gate count to O(N + N/W).\n");
|
||||
log("\n");
|
||||
log("The pass also replaces per-bit $reduce_or enable cells with the\n");
|
||||
log("shared lane enable signal.\n");
|
||||
log("\n");
|
||||
log(" -min_stride <n>\n");
|
||||
log(" Minimum stride (S_WIDTH of the $pmux cells) to consider.\n");
|
||||
log(" Default: 4.\n");
|
||||
log("\n");
|
||||
}
|
||||
void execute(std::vector<std::string> args, RTLIL::Design *design) override
|
||||
{
|
||||
int min_stride = 4;
|
||||
|
||||
log_header(design, "Executing OPT_VPS pass (optimize Verific VPS patterns).\n");
|
||||
|
||||
size_t argidx;
|
||||
for (argidx = 1; argidx < args.size(); argidx++) {
|
||||
if (args[argidx] == "-min_stride" && argidx + 1 < args.size()) {
|
||||
min_stride = std::stoi(args[++argidx]);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
extra_args(args, argidx, design);
|
||||
|
||||
int total_groups = 0, total_pmux = 0, total_ror = 0, total_fb = 0;
|
||||
|
||||
for (auto module : design->selected_modules()) {
|
||||
if (module->has_processes_warn())
|
||||
continue;
|
||||
|
||||
OptVpsWorker worker(module, min_stride);
|
||||
worker.run();
|
||||
|
||||
if (worker.groups_optimized > 0)
|
||||
log(" Module %s: %d VPS group(s), %d $pmux replaced, "
|
||||
"%d $reduce_or replaced, %d feedback collapsed.\n",
|
||||
log_id(module->name), worker.groups_optimized,
|
||||
worker.pmux_replaced, worker.reduce_or_replaced,
|
||||
worker.feedback_collapsed);
|
||||
|
||||
total_groups += worker.groups_optimized;
|
||||
total_pmux += worker.pmux_replaced;
|
||||
total_ror += worker.reduce_or_replaced;
|
||||
total_fb += worker.feedback_collapsed;
|
||||
}
|
||||
|
||||
log("Optimized %d VPS group(s), %d $pmux replaced, "
|
||||
"%d $reduce_or replaced, %d feedback collapsed.\n",
|
||||
total_groups, total_pmux, total_ror, total_fb);
|
||||
}
|
||||
} OptVpsPass;
|
||||
|
||||
PRIVATE_NAMESPACE_END
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
# =============================================================================
|
||||
# Test 1: SAT equivalence — VPS byte-write vs case-statement reference
|
||||
# Proves opt_vps produces a logically equivalent circuit to hand-written
|
||||
# case statements for a 32-bit register with 4 byte lanes.
|
||||
# =============================================================================
|
||||
log -header "SAT equivalence: byte-write VPS vs case-statement ref"
|
||||
log -push
|
||||
design -reset
|
||||
verific -cfg veri_optimize_wide_selector 1
|
||||
verific -cfg db_infer_wide_muxes_post_elaboration 0
|
||||
|
||||
read -sv opt_vps_byte_write.sv
|
||||
verific -import opt_vps_byte_write
|
||||
proc; opt_clean
|
||||
opt_vps; opt_clean
|
||||
rename opt_vps_byte_write gate
|
||||
|
||||
read -sv opt_vps_byte_write_ref.sv
|
||||
verific -import opt_vps_byte_write
|
||||
proc; opt_clean
|
||||
rename opt_vps_byte_write gold
|
||||
|
||||
miter -equiv -flatten -make_assert gold gate miter
|
||||
hierarchy -top miter
|
||||
proc; opt; memory; opt
|
||||
clk2fflogic
|
||||
sat -set-init-zero -tempinduct -prove-asserts -verify
|
||||
design -reset
|
||||
log -pop
|
||||
|
||||
# =============================================================================
|
||||
# Test 2: SAT self-equivalence — byte-write before vs after opt_vps
|
||||
# Proves opt_vps does not change the functional behavior.
|
||||
# =============================================================================
|
||||
log -header "SAT self-equivalence: byte-write before vs after opt_vps"
|
||||
log -push
|
||||
design -reset
|
||||
verific -cfg veri_optimize_wide_selector 1
|
||||
verific -cfg db_infer_wide_muxes_post_elaboration 0
|
||||
|
||||
read -sv opt_vps_byte_write.sv
|
||||
verific -import opt_vps_byte_write
|
||||
proc; opt_clean
|
||||
rename opt_vps_byte_write gold
|
||||
|
||||
read -sv opt_vps_byte_write.sv
|
||||
verific -import opt_vps_byte_write
|
||||
proc; opt_clean
|
||||
opt_vps; opt_clean
|
||||
rename opt_vps_byte_write gate
|
||||
|
||||
miter -equiv -flatten -make_assert gold gate miter
|
||||
hierarchy -top miter
|
||||
proc; opt; memory; opt
|
||||
clk2fflogic
|
||||
sat -set-init-zero -tempinduct -prove-asserts -verify
|
||||
design -reset
|
||||
log -pop
|
||||
|
||||
# =============================================================================
|
||||
# Test 3: SAT self-equivalence — wide (128-bit, 16-bit lanes)
|
||||
# Ensures opt_vps is correct on a larger design with 8 lanes.
|
||||
# =============================================================================
|
||||
log -header "SAT self-equivalence: wide 128-bit VPS"
|
||||
log -push
|
||||
design -reset
|
||||
verific -cfg veri_optimize_wide_selector 1
|
||||
verific -cfg db_infer_wide_muxes_post_elaboration 0
|
||||
|
||||
read -sv opt_vps_wide.sv
|
||||
verific -import opt_vps_wide
|
||||
proc; opt_clean
|
||||
rename opt_vps_wide gold
|
||||
|
||||
read -sv opt_vps_wide.sv
|
||||
verific -import opt_vps_wide
|
||||
proc; opt_clean
|
||||
opt_vps; opt_clean
|
||||
rename opt_vps_wide gate
|
||||
|
||||
miter -equiv -flatten -make_assert gold gate miter
|
||||
hierarchy -top miter
|
||||
proc; opt; memory; opt
|
||||
clk2fflogic
|
||||
sat -set-init-zero -tempinduct -prove-asserts -verify
|
||||
design -reset
|
||||
log -pop
|
||||
|
||||
# =============================================================================
|
||||
# Test 4: Cell count verification — byte-write
|
||||
# After opt_vps, all $pmux and $reduce_or cells should be eliminated and
|
||||
# replaced with per-lane $eq/$and/$mux cells.
|
||||
# =============================================================================
|
||||
log -header "Cell counts: byte-write post-opt_vps"
|
||||
log -push
|
||||
design -reset
|
||||
verific -cfg veri_optimize_wide_selector 1
|
||||
verific -cfg db_infer_wide_muxes_post_elaboration 0
|
||||
|
||||
read -sv opt_vps_byte_write.sv
|
||||
verific -import opt_vps_byte_write
|
||||
proc; opt_clean
|
||||
opt_vps; opt_clean
|
||||
|
||||
select -assert-none t:$pmux
|
||||
select -assert-none t:$reduce_or
|
||||
select -assert-count 4 t:$eq
|
||||
select -assert-count 4 t:$and
|
||||
select -assert-count 4 t:$mux
|
||||
select -assert-count 1 t:$dff
|
||||
design -reset
|
||||
log -pop
|
||||
|
||||
# =============================================================================
|
||||
# Test 5: Cell count verification — wide
|
||||
# Same as above but for the wider 128-bit / 8-lane case.
|
||||
# =============================================================================
|
||||
log -header "Cell counts: wide post-opt_vps"
|
||||
log -push
|
||||
design -reset
|
||||
verific -cfg veri_optimize_wide_selector 1
|
||||
verific -cfg db_infer_wide_muxes_post_elaboration 0
|
||||
|
||||
read -sv opt_vps_wide.sv
|
||||
verific -import opt_vps_wide
|
||||
proc; opt_clean
|
||||
opt_vps; opt_clean
|
||||
|
||||
select -assert-none t:$pmux
|
||||
select -assert-none t:$reduce_or
|
||||
select -assert-count 1 t:$dff
|
||||
design -reset
|
||||
log -pop
|
||||
|
||||
# =============================================================================
|
||||
# Test 6: Negative case — no VPS pattern
|
||||
# A simple mux-based register should not trigger opt_vps.
|
||||
# =============================================================================
|
||||
log -header "Negative: non-VPS design unchanged"
|
||||
log -push
|
||||
design -reset
|
||||
verific -cfg veri_optimize_wide_selector 1
|
||||
verific -cfg db_infer_wide_muxes_post_elaboration 0
|
||||
|
||||
read -sv opt_vps_no_match.sv
|
||||
verific -import opt_vps_no_match
|
||||
proc; opt_clean
|
||||
|
||||
stat
|
||||
opt_vps
|
||||
stat
|
||||
|
||||
select -assert-none t:$pmux
|
||||
select -assert-none t:$eq w:*vps*
|
||||
select -assert-count 1 t:$mux
|
||||
select -assert-count 1 t:$dff
|
||||
design -reset
|
||||
log -pop
|
||||
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
// 32-bit register with byte-lane writes indexed by a 2-bit selector (VPS).
|
||||
module opt_vps_byte_write (
|
||||
input logic clk,
|
||||
input logic wr_en,
|
||||
input logic [1:0] lane,
|
||||
input logic [7:0] wdata,
|
||||
output logic [31:0] q
|
||||
);
|
||||
logic [31:0] reg_data;
|
||||
always_ff @(posedge clk)
|
||||
if (wr_en)
|
||||
reg_data[((lane + 1) * 8) - 1 -: 8] <= wdata;
|
||||
assign q = reg_data;
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
// Reference: equivalent design WITHOUT variable-part-select.
|
||||
module opt_vps_byte_write (
|
||||
input logic clk,
|
||||
input logic wr_en,
|
||||
input logic [1:0] lane,
|
||||
input logic [7:0] wdata,
|
||||
output logic [31:0] q
|
||||
);
|
||||
logic [31:0] reg_data;
|
||||
always_ff @(posedge clk)
|
||||
if (wr_en)
|
||||
case (lane)
|
||||
2'd0: reg_data[ 7: 0] <= wdata;
|
||||
2'd1: reg_data[15: 8] <= wdata;
|
||||
2'd2: reg_data[23:16] <= wdata;
|
||||
2'd3: reg_data[31:24] <= wdata;
|
||||
endcase
|
||||
assign q = reg_data;
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
// Simple mux-based register -- no VPS pattern, opt_vps should not fire.
|
||||
module opt_vps_no_match (
|
||||
input logic clk,
|
||||
input logic sel,
|
||||
input logic [7:0] a, b,
|
||||
output logic [7:0] q
|
||||
);
|
||||
logic [7:0] reg_data;
|
||||
always_ff @(posedge clk)
|
||||
reg_data <= sel ? a : b;
|
||||
assign q = reg_data;
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
// 128-bit register with 16-bit lane writes indexed by a 3-bit selector (VPS).
|
||||
module opt_vps_wide (
|
||||
input logic clk,
|
||||
input logic wr_en,
|
||||
input logic [2:0] lane,
|
||||
input logic [15:0] wdata,
|
||||
output logic [127:0] q
|
||||
);
|
||||
logic [127:0] reg_data;
|
||||
always_ff @(posedge clk)
|
||||
if (wr_en)
|
||||
reg_data[((lane + 1) * 16) - 1 -: 16] <= wdata;
|
||||
assign q = reg_data;
|
||||
endmodule
|
||||
Loading…
Reference in New Issue