yosys/passes/silimate/opt_timing_balance.cc

1467 lines
44 KiB
C++

/*
* yosys -- Yosys Open SYnthesis Suite
*
* Copyright (C) 2012 Claire Xenia Wolf <claire@yosyshq.com>
* 2026 Abhinav Tondapu <abhinav@silimate.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include "kernel/yosys.h"
#include "kernel/sigtools.h"
#include "kernel/celltypes.h"
#include "kernel/utils.h"
#include <algorithm>
#include <cmath>
#include <cstring>
#include <deque>
#include <limits>
#include <queue>
#include <tuple>
#include <vector>
USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN
/* Invariants:
* - Operates on internal word cells ($add/$and/$or/$xor) pre-techmap
* - Connectivity and timing keys use sigmap-mapped signals
* - Rewiring uses original head Y bits to avoid alias drift
* - Disjoint clusters are rewritten per sweep, clean/rebuild happens per iteration
*/
// -----------------------------------------------------------------------------
// Shared constants, helpers, and traits
// -----------------------------------------------------------------------------
static constexpr double kDelayDefault = 1.0;
static constexpr double kDelayLogic = 0.5;
static constexpr double kMinIterationDelta = 1e-3;
static constexpr int kMaxPassIterations = 10;
static constexpr int kTraversalStackReserve = 256;
static const IdString kAttrTimingBalanceGenerated = "\\timing_balance_generated";
static IdString make_id(Cell *anchor, const char *suffix)
{
// NEW_ID2_SUFFIX relies on a local variable named `cell`
Cell *cell = anchor;
return NEW_ID2_SUFFIX(suffix);
}
static inline double log2p1_int(int n) { return std::log2(static_cast<double>(n) + 1.0); }
static int cell_y_width(const Cell *cell)
{
log_assert(cell != nullptr);
if (cell->hasParam(ID::Y_WIDTH))
return std::max(1, cell->getParam(ID::Y_WIDTH).as_int());
if (cell->hasPort(ID::Y))
return std::max(1, GetSize(cell->getPort(ID::Y)));
// TimingOracle can query non-target drivers, fall back to widest output port
int width = 0;
for (const auto &[port_id, sig] : cell->connections())
if (cell->output(port_id))
width = std::max(width, GetSize(sig));
return std::max(1, width);
}
enum class BalanceCategory {
Logic,
Arith
};
enum class WidthRule {
MaxInput,
AddCarry
};
enum class DelayHeuristicKind {
Fixed,
AddLike
};
enum class TraversalState : int {
Unseen = 0,
Active = 1,
Done = 2
};
// Per-cell balancing traits and delay heuristic policy
struct SupportedCellSpec
{
IdString type;
BalanceCategory category;
bool requires_strict_width_match = false;
bool requires_matching_signedness = false;
WidthRule width_rule = WidthRule::MaxInput;
DelayHeuristicKind delay_kind = DelayHeuristicKind::Fixed;
double fixed_delay = 0.0;
};
// Registry for balance targets and their delay/width behavior
// Adding a new associative target should only require editing this table
static const std::vector<SupportedCellSpec> &supported_cell_registry()
{
static const std::vector<SupportedCellSpec> specs = {
{ID($and), BalanceCategory::Logic, false, false, WidthRule::MaxInput, DelayHeuristicKind::Fixed, kDelayLogic},
{ID($or), BalanceCategory::Logic, false, false, WidthRule::MaxInput, DelayHeuristicKind::Fixed, kDelayLogic},
{ID($xor), BalanceCategory::Logic, false, false, WidthRule::MaxInput, DelayHeuristicKind::Fixed, kDelayDefault},
{ID($add), BalanceCategory::Arith, true, true, WidthRule::AddCarry, DelayHeuristicKind::AddLike, 0.0},
};
return specs;
}
static const dict<IdString, const SupportedCellSpec*> &supported_cell_registry_map()
{
static const dict<IdString, const SupportedCellSpec*> by_type = []() {
dict<IdString, const SupportedCellSpec*> m;
for (const auto &spec : supported_cell_registry())
m[spec.type] = &spec;
return m;
}();
return by_type;
}
static const SupportedCellSpec *get_supported_cell_spec(IdString type)
{
const auto &by_type = supported_cell_registry_map();
auto it = by_type.find(type);
if (it == by_type.end())
return nullptr;
return it->second;
}
static std::vector<IdString> collect_target_cell_ids(bool enable_logic, bool enable_arith)
{
std::vector<IdString> ids;
for (const auto &spec : supported_cell_registry())
{
bool enabled_category = (spec.category == BalanceCategory::Logic) ? enable_logic : enable_arith;
if (!enabled_category)
continue;
ids.push_back(spec.type);
}
return ids;
}
static bool less_sigbit_key(const SigBit &a, const SigBit &b)
{
bool a_const = a.wire == nullptr;
bool b_const = b.wire == nullptr;
if (a_const != b_const)
return a_const;
if (a_const) {
int ad = static_cast<int>(a.data);
int bd = static_cast<int>(b.data);
return ad < bd;
}
if (a.wire->name != b.wire->name)
return std::strcmp(a.wire->name.c_str(), b.wire->name.c_str()) < 0;
return a.offset < b.offset;
}
static bool less_sigspec_key(const SigSpec &a, const SigSpec &b)
{
if (GetSize(a) != GetSize(b))
return GetSize(a) < GetSize(b);
int n = GetSize(a);
for (int i = 0; i < n; i++) {
const SigBit &ab = a[i];
const SigBit &bb = b[i];
if (ab == bb)
continue;
return less_sigbit_key(ab, bb);
}
return false;
}
// For supported ops here, result signedness is true only when both inputs are signed
static constexpr bool yosys_binary_result_signed(bool a_signed, bool b_signed) { return a_signed && b_signed; }
static const dict<IdString, double> &fixed_delay_table()
{
static const auto table = dict<IdString, double>{
{ID($not), 0.0},
{ID($pos), 0.0},
{ID($logic_not), 0.0},
{ID($and), kDelayLogic},
{ID($or), kDelayLogic},
{ID($xor), kDelayDefault},
{ID($xnor), kDelayDefault},
{ID($logic_and), kDelayLogic},
{ID($logic_or), kDelayLogic},
{ID($mux), kDelayDefault},
};
return table;
}
static bool is_timing_boundary_cell(Cell *cell, const CellTypes &cell_types)
{
if (cell == nullptr)
return true;
// Explicit user attributes
if (cell->get_bool_attribute(ID::keep) || cell->get_bool_attribute(ID::blackbox))
return true;
// Flip-flops
if (cell->is_builtin_ff())
return true;
// Latches, memories, and formal/simulation cells
if (cell->type.in(
ID($dlatch), ID($adlatch), ID($dlatchsr),
ID($mem), ID($mem_v2), ID($memrd), ID($memrd_v2), ID($memwr), ID($memwr_v2), ID($meminit), ID($meminit_v2),
ID($anyconst), ID($anyseq), ID($allconst), ID($allseq), ID($equiv),
ID($assert), ID($assume), ID($cover), ID($check), ID($print)
))
return true;
// Macro or unknown cell
return !cell_types.cell_known(cell->type);
}
static double estimate_cell_delay(const Cell *cell, int out_width)
{
if (cell == nullptr)
return kDelayDefault;
IdString type = cell->type;
int width = out_width;
const auto &by_type = supported_cell_registry_map();
auto reg_it = by_type.find(type);
if (reg_it != by_type.end()) {
const SupportedCellSpec *spec = reg_it->second;
switch (spec->delay_kind)
{
case DelayHeuristicKind::Fixed:
return spec->fixed_delay;
case DelayHeuristicKind::AddLike:
return log2p1_int(width);
}
}
if (type == ID($pmux)) {
int s_width = 1;
if (cell->hasParam(ID::S_WIDTH))
s_width = cell->getParam(ID::S_WIDTH).as_int();
return log2p1_int(s_width);
}
if (type.in(ID($add), ID($sub), ID($neg), ID($alu)))
return log2p1_int(width);
if (type.in(ID($mul), ID($div), ID($mod)))
return width;
if (type.in(ID($shl), ID($shr), ID($sshl), ID($sshr)))
return log2p1_int(width);
const auto &fixed = fixed_delay_table();
auto it = fixed.find(type);
if (it != fixed.end())
return it->second;
return kDelayDefault;
}
// -----------------------------------------------------------------------------
// Analysis: connectivity and timing oracle
// -----------------------------------------------------------------------------
struct ConnectivitySnapshot
{
// One-sweep structural connectivity view
dict<SigBit, Cell*> unique_driver_by_bit;
SigSet<Cell*> sinks_by_bit;
pool<SigBit> output_port_bits;
ConnectivitySnapshot() = default;
ConnectivitySnapshot(Module *module, SigMap &sigmap) { build(module, sigmap); }
void build(Module *module, SigMap &sigmap)
{
unique_driver_by_bit.clear();
sinks_by_bit.clear();
output_port_bits.clear();
// Full-module view keeps fanout checks selection-safe
for (Cell *cell : module->cells()) {
for (const auto &[port_id, sig] : cell->connections()) {
SigSpec mapped = sigmap(sig);
if (cell->output(port_id)) {
for (auto bit : mapped) {
if (!bit.wire)
continue;
auto [it, inserted] = unique_driver_by_bit.emplace(bit, cell);
if (!inserted && it->second != cell)
it->second = nullptr;
}
}
if (cell->input(port_id))
sinks_by_bit.insert(mapped, cell);
}
}
// Output ports mark head boundaries. Input boundaries are handled in TimingOracle
for (auto wire : module->wires()) {
if (wire->port_output) {
for (auto bit : sigmap(wire))
output_port_bits.insert(bit);
}
}
}
Cell *get_unique_driver_mapped(const SigSpec &sig) const
{
// Caller passes sigmap-mapped signal slices
Cell *driver = nullptr;
for (auto bit : sig)
{
if (!bit.wire)
return nullptr;
auto it = unique_driver_by_bit.find(bit);
if (it == unique_driver_by_bit.end() || it->second == nullptr)
return nullptr;
if (driver == nullptr)
driver = it->second;
else if (driver != it->second)
return nullptr;
}
return driver;
}
void collect_sinks_mapped(const SigSpec &mapped_sig, pool<Cell*> &sinks)
{
// SigSet::find() is non-const in current Yosys API
sinks.clear();
sinks_by_bit.find(mapped_sig, sinks);
}
};
struct TimingOracle
{
// Lazy backward arrival estimator over the current connectivity snapshot
// Unknown or boundary drivers return 0.0, combinational cycles return +inf
const CellTypes &cell_types;
SigMap &sigmap;
const dict<SigBit, Cell*> *driver_map;
dict<SigBit, double> arrival_cache;
dict<SigBit, TraversalState> visit_state;
struct StackEntry {
SigBit bit;
// false: expand dependencies, true: finalize after children
bool finalize_phase = false;
};
bool cycle_detected = false;
TimingOracle(const CellTypes &cell_types, SigMap &sigmap,
const dict<SigBit, Cell*> &driver_map) :
cell_types(cell_types), sigmap(sigmap), driver_map(&driver_map) { }
void clear_timing_cache()
{
arrival_cache.clear();
visit_state.clear();
cycle_detected = false;
}
void rebind_driver_map(const dict<SigBit, Cell*> &new_driver_map)
{
driver_map = &new_driver_map;
clear_timing_cache();
}
void cache_final_value(SigBit bit, double arrival)
{
if (!bit.wire)
return;
bit = sigmap(bit);
arrival_cache[bit] = arrival;
visit_state[bit] = TraversalState::Done;
}
TraversalState get_visit_state(SigBit bit) const
{
if (auto it = visit_state.find(bit); it != visit_state.end())
return it->second;
return TraversalState::Unseen;
}
void set_visit_state(SigBit bit, TraversalState state)
{
visit_state[bit] = state;
}
double get_arrival(const SigSpec &sig)
{
cycle_detected = false;
double t = 0.0;
for (auto bit : sigmap(sig))
t = std::max(t, get_arrival_noguard(bit));
return t;
}
private:
/*
* Two-phase DFS avoids recursion,
* finalize_phase = false expands inputs, true computes and caches node arrival
* Active marks the current path, unresolved inputs during finalize are treated as cycles with +inf
*/
double get_arrival_noguard(SigBit bit)
{
SigBit start = sigmap(bit);
if (!start.wire)
return 0.0;
if (auto it = arrival_cache.find(start); it != arrival_cache.end())
return it->second;
// Local stack keeps traversal state scoped to one query
std::vector<StackEntry> eval_stack;
eval_stack.reserve(kTraversalStackReserve);
eval_stack.push_back({start, false});
while (!eval_stack.empty())
{
StackEntry e = std::move(eval_stack.back());
eval_stack.pop_back();
SigBit curr = e.bit;
if (!curr.wire)
continue;
if (arrival_cache.count(curr))
continue;
if (curr.wire->port_input) {
cache_final_value(curr, 0.0);
continue;
}
Cell *driver = nullptr;
if (auto it_drv = driver_map->find(curr); it_drv != driver_map->end())
driver = it_drv->second;
if (driver == nullptr || is_timing_boundary_cell(driver, cell_types)) {
cache_final_value(curr, 0.0);
continue;
}
TraversalState state = get_visit_state(curr);
if (!e.finalize_phase)
{
if (state == TraversalState::Done)
continue;
if (state == TraversalState::Active) {
// Node already on current path, skip duplicate expansion
continue;
}
set_visit_state(curr, TraversalState::Active);
eval_stack.push_back({curr, true});
for (const auto &[port_id, sig] : driver->connections()) {
if (!driver->input(port_id))
continue;
for (auto in_bit : sigmap(sig)) {
if (!in_bit.wire || arrival_cache.count(in_bit))
continue;
if (get_visit_state(in_bit) == TraversalState::Active) {
cycle_detected = true;
continue;
}
eval_stack.push_back({in_bit, false});
}
}
continue;
}
double max_input = 0.0;
for (const auto &[port_id, sig] : driver->connections()) {
if (!driver->input(port_id))
continue;
for (auto in_bit : sigmap(sig)) {
double in_arrival = 0.0;
if (in_bit.wire) {
auto it = arrival_cache.find(in_bit);
if (it != arrival_cache.end())
in_arrival = it->second;
else {
// Missing child arrival at finalize implies combinational cycle
cycle_detected = true;
in_arrival = std::numeric_limits<double>::infinity();
}
}
max_input = std::max(max_input, in_arrival);
}
}
double cell_delay = estimate_cell_delay(driver, cell_y_width(driver));
double t = max_input + cell_delay;
cache_final_value(curr, t);
}
auto it = arrival_cache.find(start);
return it != arrival_cache.end() ? it->second : 0.0;
}
};
// -----------------------------------------------------------------------------
// Rewrite planning and emission
// -----------------------------------------------------------------------------
static int natural_output_width(WidthRule width_rule, int a_width, int b_width)
{
switch (width_rule)
{
case WidthRule::AddCarry:
return std::max(a_width, b_width) + 1;
case WidthRule::MaxInput:
default:
return std::max(a_width, b_width);
}
}
static int minimum_y_width_for_reassociation(WidthRule width_rule, int a_width, int b_width)
{
if (width_rule == WidthRule::AddCarry)
// Validation-only relaxation for modulo 2^N add reassociation
return std::max(a_width, b_width);
return natural_output_width(width_rule, a_width, b_width);
}
struct TreeLeaf
{
SigSpec signal;
double arrival_time = 0.0;
int width = 0;
bool is_signed = false;
int stable_id = 0;
};
struct MergeShape
{
int out_width = 1;
bool a_signed = false;
bool b_signed = false;
bool out_signed = false;
};
struct PlannedMerge
{
int lhs_node = -1;
int rhs_node = -1;
MergeShape shape;
};
// Immutable plan produced by HuffmanPlanner and consumed by TreeEmitter
struct TreePlan
{
// Node ids are dense:
// - [0, leaves) are leaf nodes
// - [leaves, leaves+merges) are merge nodes in emission order
std::vector<TreeLeaf> leaves;
std::vector<PlannedMerge> merges;
int root_node = -1;
double output_arrival = 0.0;
bool valid() const { return root_node >= 0; }
int node_count() const { return GetSize(leaves) + GetSize(merges); }
};
// Computes merge order and expected arrival, does not mutate RTLIL
struct HuffmanPlanner
{
struct PlanNode
{
int node_id = -1;
double arrival_time = 0.0;
int width = 0;
bool is_signed = false;
int stable_id = 0;
};
struct PlanNodeCmp
{
bool operator()(const PlanNode &a, const PlanNode &b) const
{
// Use a min-heap by inverting comparator for std::priority_queue
return std::tie(a.arrival_time, a.width, a.stable_id) >
std::tie(b.arrival_time, b.width, b.stable_id);
}
};
MergeShape compute_merge_shape(const TreeLeaf &a, const TreeLeaf &b,
const SupportedCellSpec &spec, int target_out_width, bool force_root_width) const
{
int out_width = std::max(1, target_out_width);
if (!force_root_width && spec.width_rule == WidthRule::AddCarry)
out_width = std::min(out_width, natural_output_width(spec.width_rule, a.width, b.width));
bool a_signed = a.is_signed;
bool b_signed = b.is_signed;
bool out_signed = yosys_binary_result_signed(a_signed, b_signed);
return {out_width, a_signed, b_signed, out_signed};
}
double compute_merge_arrival(double a_arrival, double b_arrival, int out_width, const Cell *delay_ref_cell) const
{
return std::max(a_arrival, b_arrival) + estimate_cell_delay(delay_ref_cell, out_width);
}
TreePlan plan(const std::vector<TreeLeaf> &leaves, IdString cell_type, Cell *reference_cell) const
{
// Deterministic leaf ordering is provided by build_tree_leaves()
TreePlan plan;
if (leaves.empty())
return plan;
plan.leaves = leaves;
if (GetSize(leaves) == 1) {
plan.root_node = 0;
plan.output_arrival = leaves.front().arrival_time;
return plan;
}
const SupportedCellSpec *spec = get_supported_cell_spec(cell_type);
if (spec == nullptr)
return {};
int target_out_width = std::max(1, cell_y_width(reference_cell));
std::priority_queue<PlanNode, std::vector<PlanNode>, PlanNodeCmp> pq;
for (int i = 0; i < GetSize(leaves); i++) {
const auto &leaf = leaves[i];
pq.push({i, leaf.arrival_time, leaf.width, leaf.is_signed, leaf.stable_id});
}
int next_internal_id = GetSize(leaves);
int next_stable_id = GetSize(leaves);
/* Greedy Huffman merge always pops the two best nodes first,
* stable_id makes tie breaks deterministic for equal arrival and width,
* root merge forces target width to preserve the head output contract
*/
while (GetSize(pq) > 1)
{
PlanNode a = pq.top(); pq.pop();
PlanNode b = pq.top(); pq.pop();
bool force_root_width = pq.empty();
TreeLeaf a_leaf = {SigSpec(), a.arrival_time, a.width, a.is_signed, a.stable_id};
TreeLeaf b_leaf = {SigSpec(), b.arrival_time, b.width, b.is_signed, b.stable_id};
MergeShape shape = compute_merge_shape(a_leaf, b_leaf, *spec, target_out_width, force_root_width);
int out_width = shape.out_width;
double new_arrival = compute_merge_arrival(a.arrival_time, b.arrival_time, out_width, reference_cell);
int node_id = next_internal_id++;
plan.merges.push_back({a.node_id, b.node_id, shape});
pq.push({node_id, new_arrival, out_width, shape.out_signed, next_stable_id++});
}
log_assert(!pq.empty());
plan.root_node = pq.top().node_id;
plan.output_arrival = pq.top().arrival_time;
return plan;
}
};
// TreeEmitter materializes a precomputed plan into RTLIL cells and wires
struct TreeEmitter
{
Module *module;
dict<IdString, int> &cell_count;
TreeEmitter(Module *module, dict<IdString, int> &cell_count) :
module(module), cell_count(cell_count) { }
SigSpec apply(const TreePlan &plan, IdString cell_type, Cell *reference_cell)
{
if (!plan.valid() || plan.leaves.empty())
return {};
if (GetSize(plan.leaves) == 1)
return plan.leaves.front().signal;
int total_nodes = plan.node_count();
std::vector<SigSpec> node_signals(total_nodes);
for (int i = 0; i < GetSize(plan.leaves); i++)
node_signals[i] = plan.leaves[i].signal;
for (int merge_idx = 0; merge_idx < GetSize(plan.merges); merge_idx++)
{
const PlannedMerge &m = plan.merges[merge_idx];
log_assert(m.lhs_node >= 0 && m.lhs_node < total_nodes);
log_assert(m.rhs_node >= 0 && m.rhs_node < total_nodes);
SigSpec a_sig = node_signals[m.lhs_node];
SigSpec b_sig = node_signals[m.rhs_node];
log_assert(GetSize(a_sig) > 0 && GetSize(b_sig) > 0);
IdString new_cell_name = make_id(reference_cell, "timing_balance");
Cell *new_cell = module->addCell(new_cell_name, cell_type);
new_cell->set_bool_attribute(kAttrTimingBalanceGenerated);
new_cell->set_src_attribute(reference_cell->get_src_attribute());
IdString out_wire_name = make_id(reference_cell, "timing_balance_y");
Wire *out_wire = module->addWire(out_wire_name, m.shape.out_width);
new_cell->setPort(ID::A, a_sig);
new_cell->setPort(ID::B, b_sig);
new_cell->setPort(ID::Y, out_wire);
if (new_cell->hasParam(ID::A_SIGNED))
new_cell->setParam(ID::A_SIGNED, m.shape.a_signed);
if (new_cell->hasParam(ID::B_SIGNED))
new_cell->setParam(ID::B_SIGNED, m.shape.b_signed);
if (new_cell->hasParam(ID::A_WIDTH))
new_cell->setParam(ID::A_WIDTH, GetSize(a_sig));
if (new_cell->hasParam(ID::B_WIDTH))
new_cell->setParam(ID::B_WIDTH, GetSize(b_sig));
if (new_cell->hasParam(ID::Y_WIDTH))
new_cell->setParam(ID::Y_WIDTH, m.shape.out_width);
new_cell->fixup_parameters();
int node_id = GetSize(plan.leaves) + merge_idx;
node_signals[node_id] = SigSpec(out_wire);
cell_count[cell_type]++;
}
log_assert(plan.root_node >= 0 && plan.root_node < total_nodes);
return node_signals[plan.root_node];
}
};
// -----------------------------------------------------------------------------
// Rewrite engine: cluster harvest, evaluation, and commit loop
// -----------------------------------------------------------------------------
// Harvested cluster plus external source multiset for one candidate head
struct ClusterHarvest
{
// Track source multiplicity by signedness to preserve per-use semantics
dict<SigSpec, int> signed_source_uses;
dict<SigSpec, int> unsigned_source_uses;
pool<Cell*> cluster_cells;
};
// Worker contract:
// Finds heads for each target type, harvests and evaluates clusters, commits
// beneficial disjoint rewrites in-sweep, and rebuilds views between iterations
struct OptTimingBalanceWorker
{
struct RewriteStats
{
int candidates = 0;
int trees = 0;
int rewrites = 0;
};
struct RewriteDecision
{
SigSpec head_output;
TreePlan plan;
};
struct ObjectiveScore
{
double sum_arrival = 0.0;
};
struct SweepContext
{
pool<Cell*> candidate_cells;
pool<Cell*> consumed_cells;
RewriteStats stats;
dict<Cell*, bool> target_cache;
dict<Cell*, SigSpec> y_cache;
};
Design *design;
Module *module;
SigMap sigmap;
CellTypes cell_types;
std::vector<IdString> target_cell_ids;
dict<IdString, int> cell_count;
HuffmanPlanner planner;
TreeEmitter emitter;
dict<IdString, int> warned_contract_issues;
static constexpr int warnRequiredPortsErrCode = 1;
static constexpr int warnRequiredWidthParamsErrCode = 2;
OptTimingBalanceWorker(Design *design, Module *module, const std::vector<IdString> &target_cell_ids) :
design(design), module(module), sigmap(module), cell_types(design), target_cell_ids(target_cell_ids),
planner(), emitter(module, cell_count)
{ }
// View lifecycle
void rebuild_views(ConnectivitySnapshot &graph, TimingOracle &timer)
{
sigmap = SigMap(module);
graph.build(module, sigmap);
timer.rebind_driver_map(graph.unique_driver_by_bit);
}
// Warnings and objective gate
void warn_contract_once(IdString cell_type, int err_code)
{
int &mask = warned_contract_issues[cell_type];
if (mask & err_code)
return;
mask |= err_code;
if (err_code == warnRequiredPortsErrCode) {
log_warning("opt_timing_balance: skipping %s cells without A/B/Y ports in module %s.\n",
log_id(cell_type), log_id(module));
} else {
log_warning("opt_timing_balance: skipping %s cells without width parameters in module %s. "
"Pass expects word-level RTL cells (run before gate-level techmapping).\n",
log_id(cell_type), log_id(module));
}
}
bool objective_improved(const ObjectiveScore &objective_before, const ObjectiveScore &objective_after) const
{
if (!std::isfinite(objective_after.sum_arrival))
return false;
if (!std::isfinite(objective_before.sum_arrival))
return true;
// Sum-only gating can regress the worst single path, but may unlock deferred global gains in later iterations
return objective_after.sum_arrival < objective_before.sum_arrival - kMinIterationDelta;
}
// Candidate and head predicates
bool is_target_cell_type(Cell *cell, IdString cell_type, bool exclude_generated)
{
if (cell == nullptr || cell->type != cell_type)
return false;
if (exclude_generated && cell->get_bool_attribute(kAttrTimingBalanceGenerated))
return false;
const SupportedCellSpec *spec = get_supported_cell_spec(cell_type);
if (spec == nullptr)
return false;
if (!cell->hasPort(ID::A) || !cell->hasPort(ID::B) || !cell->hasPort(ID::Y)) {
warn_contract_once(cell_type, warnRequiredPortsErrCode);
return false;
}
if (!cell->hasParam(ID::Y_WIDTH) || !cell->hasParam(ID::A_WIDTH) || !cell->hasParam(ID::B_WIDTH)) {
warn_contract_once(cell_type, warnRequiredWidthParamsErrCode);
return false;
}
int y_width = cell->getParam(ID::Y_WIDTH).as_int();
int a_width = cell->getParam(ID::A_WIDTH).as_int();
int b_width = cell->getParam(ID::B_WIDTH).as_int();
if (y_width <= 0 || a_width <= 0 || b_width <= 0)
return false;
if (GetSize(cell->getPort(ID::A)) != a_width)
return false;
if (GetSize(cell->getPort(ID::B)) != b_width)
return false;
if (GetSize(cell->getPort(ID::Y)) != y_width)
return false;
if (spec->requires_matching_signedness) {
if (!cell->hasParam(ID::A_SIGNED) || !cell->hasParam(ID::B_SIGNED))
return false;
}
int required_width = minimum_y_width_for_reassociation(spec->width_rule, a_width, b_width);
return y_width >= required_width;
}
bool is_target_cell_type_cached(Cell *cell, IdString cell_type,
bool exclude_generated, dict<Cell*, bool> &target_cache)
{
if (cell == nullptr)
return false;
auto it = target_cache.find(cell);
if (it != target_cache.end())
return it->second;
bool is_target = is_target_cell_type(cell, cell_type, exclude_generated);
target_cache[cell] = is_target;
return is_target;
}
const SigSpec &mapped_y(Cell *cell, dict<Cell*, SigSpec> &y_cache)
{
auto it = y_cache.find(cell);
if (it != y_cache.end())
return it->second;
y_cache[cell] = sigmap(cell->getPort(ID::Y));
return y_cache[cell];
}
// Backward cluster extraction
bool is_head_cell(Cell *cell, IdString cell_type, bool exclude_generated,
ConnectivitySnapshot &graph, dict<Cell*, bool> &target_cache, dict<Cell*, SigSpec> &y_cache)
{
if (cell == nullptr)
return false;
const SigSpec &y = mapped_y(cell, y_cache);
// Output-port drivers are always heads
for (auto bit : y)
if (graph.output_port_bits.count(bit))
return true;
pool<Cell*> sinks;
graph.collect_sinks_mapped(y, sinks);
// Leaf drivers are heads
if (sinks.empty())
return true;
// Any non-target consumer terminates same-type chain growth
for (Cell *sink : sinks) {
if (!is_target_cell_type_cached(sink, cell_type, exclude_generated, target_cache))
return true;
}
return false;
}
/*
* BFS over same-type unique drivers from head_cell,
* merge only when driver Y exactly matches consumed mapped bits to avoid semantic drift,
* when merge stops, record source use count with per-port signedness
*/
bool collect_cluster(IdString cell_type, Cell *head_cell, const pool<Cell*> &candidate_cells,
ConnectivitySnapshot &graph, dict<Cell*, bool> &target_cache, dict<Cell*, SigSpec> &y_cache,
ClusterHarvest &harvest)
{
const SupportedCellSpec *spec = get_supported_cell_spec(cell_type);
if (spec == nullptr || head_cell == nullptr)
return false;
bool enforce_strict_width_match = spec->requires_strict_width_match;
int target_width = 0;
if (enforce_strict_width_match) {
// Strict width preserves truncation points
target_width = cell_y_width(head_cell);
}
bool enforce_matching_signedness = spec->requires_matching_signedness;
bool target_add_signed = false;
if (enforce_matching_signedness) {
if (!head_cell->hasParam(ID::A_SIGNED) || !head_cell->hasParam(ID::B_SIGNED))
return false;
bool head_a_signed = head_cell->getParam(ID::A_SIGNED).as_bool();
bool head_b_signed = head_cell->getParam(ID::B_SIGNED).as_bool();
if (head_a_signed != head_b_signed)
return false;
target_add_signed = head_a_signed;
}
harvest = ClusterHarvest();
harvest.cluster_cells.insert(head_cell);
std::deque<Cell*> queue = {head_cell};
while (!queue.empty())
{
Cell *cell = queue.front();
queue.pop_front();
for (IdString port : {ID::A, ID::B}) {
SigSpec sig = sigmap(cell->getPort(port));
Cell *driver = graph.get_unique_driver_mapped(sig);
bool can_merge = true;
if (driver == nullptr || driver == cell || !candidate_cells.count(driver))
can_merge = false;
if (can_merge && !is_target_cell_type_cached(driver, cell_type, true, target_cache))
can_merge = false;
if (can_merge) {
const SigSpec &drv_y = mapped_y(driver, y_cache);
// Require exact Y coverage for safe reassociation
if (GetSize(drv_y) != GetSize(sig) || drv_y != sig)
can_merge = false;
}
if (can_merge && enforce_strict_width_match &&
cell_y_width(driver) != target_width)
can_merge = false;
if (can_merge && enforce_matching_signedness) {
if (!driver->hasParam(ID::A_SIGNED) || !driver->hasParam(ID::B_SIGNED))
can_merge = false;
else {
bool a_signed = driver->getParam(ID::A_SIGNED).as_bool();
bool b_signed = driver->getParam(ID::B_SIGNED).as_bool();
if (a_signed != b_signed || a_signed != target_add_signed)
can_merge = false;
}
}
if (can_merge) {
if (!harvest.cluster_cells.count(driver)) {
harvest.cluster_cells.insert(driver);
queue.push_back(driver);
}
continue;
}
IdString signed_param = port == ID::A ? ID::A_SIGNED : ID::B_SIGNED;
bool signed_port = cell->hasParam(signed_param) && cell->getParam(signed_param).as_bool();
if (signed_port)
harvest.signed_source_uses[sig]++;
else
harvest.unsigned_source_uses[sig]++;
}
}
// Single-cell cluster is a no-op
return GetSize(harvest.cluster_cells) > 1;
}
std::vector<Cell*> collect_candidates(IdString cell_type, bool exclude_generated, dict<Cell*, bool> &target_cache)
{
std::vector<Cell*> cells;
for (Cell *cell : module->selected_cells())
if (is_target_cell_type_cached(cell, cell_type, exclude_generated, target_cache))
cells.push_back(cell);
// Sort lexically for cross-run deterministic candidate order
std::sort(cells.begin(), cells.end(), [](Cell *a, Cell *b) {
return std::strcmp(a->name.c_str(), b->name.c_str()) < 0;
});
return cells;
}
// Rewrite evaluation and commit
void rewrite_one_head(IdString cell_type, Cell *head, SweepContext &sweep,
ConnectivitySnapshot &graph, TimingOracle &timer)
{
// No per-head rebuild in this sweep, defer heads that read already consumed drivers
auto source_uses_consumed_driver = [&](const dict<SigSpec, int> &uses) -> bool {
// Stale snapshot guard: skip heads fed by already rewritten clusters
for (const auto &[sig, use_count] : uses) {
if (use_count <= 0)
continue;
for (auto bit : sig) {
if (!bit.wire)
continue;
auto drv_it = graph.unique_driver_by_bit.find(bit);
if (drv_it == graph.unique_driver_by_bit.end())
continue;
Cell *driver = drv_it->second;
if (driver != nullptr && sweep.consumed_cells.count(driver))
return true;
}
}
return false;
};
if (sweep.consumed_cells.count(head))
return;
if (!is_head_cell(head, cell_type, true, graph, sweep.target_cache, sweep.y_cache))
return;
ClusterHarvest harvest;
if (!collect_cluster(cell_type, head, sweep.candidate_cells, graph, sweep.target_cache, sweep.y_cache, harvest))
return;
// Batch only disjoint clusters in one sweep
for (Cell *cell : harvest.cluster_cells)
if (cell != nullptr && sweep.consumed_cells.count(cell))
return;
// Defer heads that depend on already rewritten snapshot drivers
if (source_uses_consumed_driver(harvest.signed_source_uses) ||
source_uses_consumed_driver(harvest.unsigned_source_uses))
return;
RewriteDecision decision;
if (!evaluate_rewrite(cell_type, head, harvest, timer, decision))
return;
if (!commit_rewrite(cell_type, head, decision))
return;
for (Cell *cell : harvest.cluster_cells)
if (cell != nullptr)
sweep.consumed_cells.insert(cell);
sweep.stats.rewrites++;
// No per-head rebuild, invalidate rewritten Y-cache entries only
for (Cell *cell : harvest.cluster_cells)
if (cell != nullptr)
sweep.y_cache.erase(cell);
sweep.y_cache.erase(head);
}
std::vector<Cell*> order_heads_by_dependency(const std::vector<Cell*> &heads, ConnectivitySnapshot &graph, bool &saw_cycle)
{
saw_cycle = false;
if (heads.empty())
return {};
/*
* Backward DFS over driver links,
* postorder emits upstream-first head order,
* cycles fall back to conservative skip in this sweep
*/
pool<Cell*> head_cells;
for (auto head : heads)
head_cells.insert(head);
dict<Cell*, TraversalState> state;
std::vector<Cell*> postorder_heads;
struct DfsEntry {
Cell *cell;
bool postorder;
};
std::vector<DfsEntry> stack;
stack.reserve(kTraversalStackReserve);
for (auto root : heads)
{
if (root == nullptr)
continue;
stack.clear();
stack.push_back({root, false});
while (!stack.empty())
{
DfsEntry e = stack.back();
stack.pop_back();
Cell *cell = e.cell;
if (cell == nullptr || is_timing_boundary_cell(cell, cell_types))
continue;
TraversalState st = TraversalState::Unseen;
if (auto it = state.find(cell); it != state.end())
st = it->second;
if (e.postorder) {
if (st != TraversalState::Done) {
state[cell] = TraversalState::Done;
if (head_cells.count(cell))
postorder_heads.push_back(cell);
}
continue;
}
if (st == TraversalState::Done)
continue;
if (st == TraversalState::Active) {
saw_cycle = true;
continue;
}
state[cell] = TraversalState::Active;
stack.push_back({cell, true});
for (const auto &[port_id, sig] : cell->connections()) {
if (!cell->input(port_id))
continue;
for (auto bit : sigmap(sig)) {
if (!bit.wire)
continue;
auto drv_it = graph.unique_driver_by_bit.find(bit);
if (drv_it == graph.unique_driver_by_bit.end())
continue;
Cell *driver = drv_it->second;
if (driver == nullptr || driver == cell)
continue;
stack.push_back({driver, false});
}
}
}
}
if (saw_cycle)
log_warning("opt_timing_balance: cycle detected in head ordering in module %s, using conservative order.\n",
log_id(module));
// Preserve deterministic order for disconnected heads
pool<Cell*> seen_heads;
std::vector<Cell*> ordered_heads;
ordered_heads.reserve(GetSize(heads));
for (auto head : postorder_heads) {
if (!seen_heads.count(head)) {
seen_heads.insert(head);
ordered_heads.push_back(head);
}
}
for (auto head : heads) {
if (!seen_heads.count(head))
ordered_heads.push_back(head);
}
return ordered_heads;
}
bool build_tree_leaves(const ClusterHarvest &harvest, TimingOracle &timer, std::vector<TreeLeaf> &leaves)
{
struct SourceUse {
SigSpec sig;
bool is_signed;
int count;
};
leaves.clear();
int stable_id = 0;
// Deterministic source-use ordering for stable tree shape
std::vector<SourceUse> uses;
uses.reserve(GetSize(harvest.signed_source_uses) + GetSize(harvest.unsigned_source_uses));
for (const auto &[sig, count] : harvest.signed_source_uses)
uses.push_back({sig, true, count});
for (const auto &[sig, count] : harvest.unsigned_source_uses)
uses.push_back({sig, false, count});
std::sort(uses.begin(), uses.end(), [](const SourceUse &a, const SourceUse &b) {
if (a.sig != b.sig)
return less_sigspec_key(a.sig, b.sig);
if (a.is_signed != b.is_signed)
return a.is_signed > b.is_signed;
return a.count < b.count;
});
for (const auto &use : uses)
{
if (use.count <= 0)
continue;
double src_arrival = timer.get_arrival(use.sig);
if (!std::isfinite(src_arrival))
return false;
for (int i = 0; i < use.count; i++)
leaves.push_back({use.sig, src_arrival, GetSize(use.sig), use.is_signed, stable_id++});
}
return !leaves.empty() && !timer.cycle_detected;
}
bool evaluate_rewrite(IdString cell_type, Cell *head_cell, const ClusterHarvest &harvest,
TimingOracle &timer, RewriteDecision &decision)
{
decision = RewriteDecision();
// Keep exact head output bits. Mapping here can rewire the wrong alias
decision.head_output = head_cell->getPort(ID::Y);
std::vector<TreeLeaf> leaves;
if (!build_tree_leaves(harvest, timer, leaves))
return false;
double old_arrival = timer.get_arrival(decision.head_output);
if (timer.cycle_detected || !std::isfinite(old_arrival))
return false;
decision.plan = planner.plan(leaves, cell_type, head_cell);
if (!decision.plan.valid())
return false;
double estimated_new_arrival = decision.plan.output_arrival;
if (!std::isfinite(estimated_new_arrival) || estimated_new_arrival >= old_arrival - kMinIterationDelta)
return false;
return true;
}
bool commit_rewrite(IdString cell_type, Cell *head_cell,
const RewriteDecision &decision)
{
SigSpec head_output = decision.head_output;
SigSpec tree_output = emitter.apply(decision.plan, cell_type, head_cell);
if (GetSize(head_output) <= 0 || GetSize(tree_output) <= 0)
return false;
if (GetSize(head_output) != GetSize(tree_output))
return false;
// Detach old driver first to avoid transient multi-driver aliasing
IdString detached_name = make_id(head_cell, "timing_balance_detach");
Wire *detached = module->addWire(detached_name, std::max(1, GetSize(head_output)));
head_cell->setPort(ID::Y, SigSpec(detached));
if (head_cell->hasParam(ID::Y_WIDTH))
head_cell->setParam(ID::Y_WIDTH, GetSize(head_output));
head_cell->fixup_parameters();
module->connect(head_output, tree_output);
return true;
}
// Objective and per-type sweep
ObjectiveScore compute_delay_objective(const std::vector<IdString> &target_cell_ids, ConnectivitySnapshot &graph, TimingOracle &timer)
{
ObjectiveScore objective;
for (auto cell_type : target_cell_ids)
{
dict<Cell*, bool> target_cache;
dict<Cell*, SigSpec> y_cache;
std::vector<Cell*> candidates = collect_candidates(cell_type, false, target_cache);
std::vector<Cell*> heads;
for (Cell *cell : candidates) {
if (is_head_cell(cell, cell_type, false, graph, target_cache, y_cache))
heads.push_back(cell);
}
for (Cell *cell : heads) {
double arrival = timer.get_arrival(cell->getPort(ID::Y));
if (timer.cycle_detected || !std::isfinite(arrival))
return {std::numeric_limits<double>::infinity()};
objective.sum_arrival += arrival;
}
}
return objective;
}
RewriteStats process_cell_type_once(IdString cell_type, ConnectivitySnapshot &graph, TimingOracle &timer)
{
SweepContext sweep;
std::vector<Cell*> candidates = collect_candidates(cell_type, true, sweep.target_cache);
for (Cell *cell : candidates)
sweep.candidate_cells.insert(cell);
sweep.stats.candidates = GetSize(candidates);
std::vector<Cell*> heads;
for (Cell *cell : candidates)
if (is_head_cell(cell, cell_type, true, graph, sweep.target_cache, sweep.y_cache))
heads.push_back(cell);
sweep.stats.trees = GetSize(heads);
bool saw_cycle = false;
std::vector<Cell*> ordered_heads = order_heads_by_dependency(heads, graph, saw_cycle);
if (saw_cycle) {
// Cyclic cones are rejected conservatively for this sweep
return sweep.stats;
}
for (Cell *head : ordered_heads)
rewrite_one_head(cell_type, head, sweep, graph, timer);
return sweep.stats;
}
// Top-level worker loop
void run()
{
if (target_cell_ids.empty())
return;
ConnectivitySnapshot graph(module, sigmap);
TimingOracle timer(cell_types, sigmap, graph.unique_driver_by_bit);
ObjectiveScore objective_before = compute_delay_objective(target_cell_ids, graph, timer);
bool stopped_early = false;
log(" processing module %s\n", log_id(module));
log_flush();
for (int iter = 0; iter < kMaxPassIterations; iter++) {
ObjectiveScore iter_before = objective_before;
ObjectiveScore iter_after = iter_before;
bool improved = false;
int generated_before = 0;
for (IdString cell_type : target_cell_ids)
generated_before += cell_count[cell_type];
log(" iteration %d/%d begin\n", iter + 1, kMaxPassIterations);
int total_rewrites = 0;
for (IdString cell_type : target_cell_ids) {
RewriteStats stats = process_cell_type_once(cell_type, graph, timer);
total_rewrites += stats.rewrites;
log(" %s trees=%d candidates=%d rewrites=%d\n",
log_id(cell_type), stats.trees, stats.candidates, stats.rewrites);
}
int generated_after = 0;
for (IdString cell_type : target_cell_ids)
generated_after += cell_count[cell_type];
int generated_delta = generated_after - generated_before;
log(" rewrote_trees=%d generated_cells=%d\n", total_rewrites, generated_delta);
if (total_rewrites > 0) {
log(" clean -purge begin\n");
Pass::call_on_module(design, module, "clean -purge");
log(" clean -purge end\n");
rebuild_views(graph, timer);
iter_after = compute_delay_objective(target_cell_ids, graph, timer);
improved = objective_improved(iter_before, iter_after);
}
log(" before = %.3f after = %.3f, %s\n",
iter_before.sum_arrival, iter_after.sum_arrival,
improved ? "timing estimation improved, continuing" : "timing estimation did not improve, stopping");
log(" iteration %d/%d end\n", iter + 1, kMaxPassIterations);
log_flush();
if (!improved) {
stopped_early = true;
break;
}
objective_before = iter_after;
}
if (!stopped_early) {
log(" reached iteration cap %d stopping\n", kMaxPassIterations);
log_flush();
}
}
};
// -----------------------------------------------------------------------------
// Pass wrapper
// -----------------------------------------------------------------------------
struct OptTimingBalancePass : public Pass
{
OptTimingBalancePass() : Pass("opt_timing_balance", "timing-aware balancing of associative trees") { }
void help() override
{
log("\n");
log(" opt_timing_balance [options] [selection]\n");
log("\n");
log("Iterative timing-aware balancing for cascaded associative cells.\n");
log("Uses lazy backward arrival estimation plus DAG-ordered Huffman rebuilding.\n");
log("\n");
log(" -arith\n");
log(" only convert arithmetic cells ($add).\n");
log("\n");
log(" -logic\n");
log(" only convert logic cells ($and/$or/$xor).\n");
log("\n");
}
void execute(std::vector<std::string> args, RTLIL::Design *design) override
{
log_header(design, "Executing OPT_TIMING_BALANCE pass (iterative timing-aware tree rewrite).\n");
size_t argidx;
bool saw_type_flag = false;
bool enable_arith = false;
bool enable_logic = false;
for (argidx = 1; argidx < (size_t)GetSize(args); argidx++) {
if (args[argidx] == "-arith") {
saw_type_flag = true;
enable_arith = true;
continue;
}
if (args[argidx] == "-logic") {
saw_type_flag = true;
enable_logic = true;
continue;
}
// Remaining args are selection filters
break;
}
extra_args(args, argidx, design);
if (!saw_type_flag) {
enable_arith = true;
enable_logic = true;
}
std::vector<IdString> target_cell_ids = collect_target_cell_ids(enable_logic, enable_arith);
dict<IdString, int> cell_count;
for (auto module : design->selected_modules()) {
OptTimingBalanceWorker worker(design, module, target_cell_ids);
worker.run();
for (const auto &[type, count] : worker.cell_count)
cell_count[type] += count;
}
for (auto cell_type : target_cell_ids) {
log(" Converted %d %s cells into timing-balanced trees.\n", cell_count[cell_type], log_id(cell_type));
}
}
} OptTimingBalancePass;
PRIVATE_NAMESPACE_END