/* * yosys -- Yosys Open SYnthesis Suite * * Copyright (C) 2012 Claire Xenia Wolf * 2026 Abhinav Tondapu * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * */ #include "kernel/yosys.h" #include "kernel/sigtools.h" #include "kernel/celltypes.h" #include "kernel/utils.h" #include #include #include #include #include #include #include #include USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN /* Invariants: * - Operates on internal word cells ($add/$and/$or/$xor) pre-techmap * - Connectivity and timing keys use sigmap-mapped signals * - Rewiring uses original head Y bits to avoid alias drift * - Disjoint clusters are rewritten per sweep, clean/rebuild happens per iteration */ // ----------------------------------------------------------------------------- // Shared constants, helpers, and traits // ----------------------------------------------------------------------------- static constexpr double kDelayDefault = 1.0; static constexpr double kDelayLogic = 0.5; static constexpr double kMinIterationDelta = 1e-3; static constexpr int kMaxPassIterations = 10; static constexpr int kTraversalStackReserve = 256; static const IdString kAttrTimingBalanceGenerated = "\\timing_balance_generated"; static IdString make_id(Cell *anchor, const char *suffix) { // NEW_ID2_SUFFIX relies on a local variable named `cell` Cell *cell = anchor; return NEW_ID2_SUFFIX(suffix); } static inline double log2p1_int(int n) { return std::log2(static_cast(n) + 1.0); } static int cell_y_width(const Cell *cell) { log_assert(cell != nullptr); if (cell->hasParam(ID::Y_WIDTH)) return std::max(1, cell->getParam(ID::Y_WIDTH).as_int()); if (cell->hasPort(ID::Y)) return std::max(1, GetSize(cell->getPort(ID::Y))); // TimingOracle can query non-target drivers, fall back to widest output port int width = 0; for (const auto &[port_id, sig] : cell->connections()) if (cell->output(port_id)) width = std::max(width, GetSize(sig)); return std::max(1, width); } enum class BalanceCategory { Logic, Arith }; enum class WidthRule { MaxInput, AddCarry }; enum class DelayHeuristicKind { Fixed, AddLike }; enum class TraversalState : int { Unseen = 0, Active = 1, Done = 2 }; // Per-cell balancing traits and delay heuristic policy struct SupportedCellSpec { IdString type; BalanceCategory category; bool requires_strict_width_match = false; bool requires_matching_signedness = false; WidthRule width_rule = WidthRule::MaxInput; DelayHeuristicKind delay_kind = DelayHeuristicKind::Fixed; double fixed_delay = 0.0; }; // Registry for balance targets and their delay/width behavior // Adding a new associative target should only require editing this table static const std::vector &supported_cell_registry() { static const std::vector specs = { {ID($and), BalanceCategory::Logic, false, false, WidthRule::MaxInput, DelayHeuristicKind::Fixed, kDelayLogic}, {ID($or), BalanceCategory::Logic, false, false, WidthRule::MaxInput, DelayHeuristicKind::Fixed, kDelayLogic}, {ID($xor), BalanceCategory::Logic, false, false, WidthRule::MaxInput, DelayHeuristicKind::Fixed, kDelayDefault}, {ID($add), BalanceCategory::Arith, true, true, WidthRule::AddCarry, DelayHeuristicKind::AddLike, 0.0}, }; return specs; } static const dict &supported_cell_registry_map() { static const dict by_type = []() { dict m; for (const auto &spec : supported_cell_registry()) m[spec.type] = &spec; return m; }(); return by_type; } static const SupportedCellSpec *get_supported_cell_spec(IdString type) { const auto &by_type = supported_cell_registry_map(); auto it = by_type.find(type); if (it == by_type.end()) return nullptr; return it->second; } static std::vector collect_target_cell_ids(bool enable_logic, bool enable_arith) { std::vector ids; for (const auto &spec : supported_cell_registry()) { bool enabled_category = (spec.category == BalanceCategory::Logic) ? enable_logic : enable_arith; if (!enabled_category) continue; ids.push_back(spec.type); } return ids; } static bool less_sigbit_key(const SigBit &a, const SigBit &b) { bool a_const = a.wire == nullptr; bool b_const = b.wire == nullptr; if (a_const != b_const) return a_const; if (a_const) { int ad = static_cast(a.data); int bd = static_cast(b.data); return ad < bd; } if (a.wire->name != b.wire->name) return std::strcmp(a.wire->name.c_str(), b.wire->name.c_str()) < 0; return a.offset < b.offset; } static bool less_sigspec_key(const SigSpec &a, const SigSpec &b) { if (GetSize(a) != GetSize(b)) return GetSize(a) < GetSize(b); int n = GetSize(a); for (int i = 0; i < n; i++) { const SigBit &ab = a[i]; const SigBit &bb = b[i]; if (ab == bb) continue; return less_sigbit_key(ab, bb); } return false; } // For supported ops here, result signedness is true only when both inputs are signed static constexpr bool yosys_binary_result_signed(bool a_signed, bool b_signed) { return a_signed && b_signed; } static const dict &fixed_delay_table() { static const auto table = dict{ {ID($not), 0.0}, {ID($pos), 0.0}, {ID($logic_not), 0.0}, {ID($and), kDelayLogic}, {ID($or), kDelayLogic}, {ID($xor), kDelayDefault}, {ID($xnor), kDelayDefault}, {ID($logic_and), kDelayLogic}, {ID($logic_or), kDelayLogic}, {ID($mux), kDelayDefault}, }; return table; } static bool is_timing_boundary_cell(Cell *cell, const CellTypes &cell_types) { if (cell == nullptr) return true; // Explicit user attributes if (cell->get_bool_attribute(ID::keep) || cell->get_bool_attribute(ID::blackbox)) return true; // Flip-flops if (cell->is_builtin_ff()) return true; // Latches, memories, and formal/simulation cells if (cell->type.in( ID($dlatch), ID($adlatch), ID($dlatchsr), ID($mem), ID($mem_v2), ID($memrd), ID($memrd_v2), ID($memwr), ID($memwr_v2), ID($meminit), ID($meminit_v2), ID($anyconst), ID($anyseq), ID($allconst), ID($allseq), ID($equiv), ID($assert), ID($assume), ID($cover), ID($check), ID($print) )) return true; // Macro or unknown cell return !cell_types.cell_known(cell->type); } static double estimate_cell_delay(const Cell *cell, int out_width) { if (cell == nullptr) return kDelayDefault; IdString type = cell->type; int width = out_width; const auto &by_type = supported_cell_registry_map(); auto reg_it = by_type.find(type); if (reg_it != by_type.end()) { const SupportedCellSpec *spec = reg_it->second; switch (spec->delay_kind) { case DelayHeuristicKind::Fixed: return spec->fixed_delay; case DelayHeuristicKind::AddLike: return log2p1_int(width); } } if (type == ID($pmux)) { int s_width = 1; if (cell->hasParam(ID::S_WIDTH)) s_width = cell->getParam(ID::S_WIDTH).as_int(); return log2p1_int(s_width); } if (type.in(ID($add), ID($sub), ID($neg), ID($alu))) return log2p1_int(width); if (type.in(ID($mul), ID($div), ID($mod))) return width; if (type.in(ID($shl), ID($shr), ID($sshl), ID($sshr))) return log2p1_int(width); const auto &fixed = fixed_delay_table(); auto it = fixed.find(type); if (it != fixed.end()) return it->second; return kDelayDefault; } // ----------------------------------------------------------------------------- // Analysis: connectivity and timing oracle // ----------------------------------------------------------------------------- struct ConnectivitySnapshot { // One-sweep structural connectivity view dict unique_driver_by_bit; SigSet sinks_by_bit; pool output_port_bits; ConnectivitySnapshot() = default; ConnectivitySnapshot(Module *module, SigMap &sigmap) { build(module, sigmap); } void build(Module *module, SigMap &sigmap) { unique_driver_by_bit.clear(); sinks_by_bit.clear(); output_port_bits.clear(); // Full-module view keeps fanout checks selection-safe for (Cell *cell : module->cells()) { for (const auto &[port_id, sig] : cell->connections()) { SigSpec mapped = sigmap(sig); if (cell->output(port_id)) { for (auto bit : mapped) { if (!bit.wire) continue; auto [it, inserted] = unique_driver_by_bit.emplace(bit, cell); if (!inserted && it->second != cell) it->second = nullptr; } } if (cell->input(port_id)) sinks_by_bit.insert(mapped, cell); } } // Output ports mark head boundaries. Input boundaries are handled in TimingOracle for (auto wire : module->wires()) { if (wire->port_output) { for (auto bit : sigmap(wire)) output_port_bits.insert(bit); } } } Cell *get_unique_driver_mapped(const SigSpec &sig) const { // Caller passes sigmap-mapped signal slices Cell *driver = nullptr; for (auto bit : sig) { if (!bit.wire) return nullptr; auto it = unique_driver_by_bit.find(bit); if (it == unique_driver_by_bit.end() || it->second == nullptr) return nullptr; if (driver == nullptr) driver = it->second; else if (driver != it->second) return nullptr; } return driver; } void collect_sinks_mapped(const SigSpec &mapped_sig, pool &sinks) { // SigSet::find() is non-const in current Yosys API sinks.clear(); sinks_by_bit.find(mapped_sig, sinks); } }; struct TimingOracle { // Lazy backward arrival estimator over the current connectivity snapshot // Unknown or boundary drivers return 0.0, combinational cycles return +inf const CellTypes &cell_types; SigMap &sigmap; const dict *driver_map; dict arrival_cache; dict visit_state; struct StackEntry { SigBit bit; // false: expand dependencies, true: finalize after children bool finalize_phase = false; }; bool cycle_detected = false; TimingOracle(const CellTypes &cell_types, SigMap &sigmap, const dict &driver_map) : cell_types(cell_types), sigmap(sigmap), driver_map(&driver_map) { } void clear_timing_cache() { arrival_cache.clear(); visit_state.clear(); cycle_detected = false; } void rebind_driver_map(const dict &new_driver_map) { driver_map = &new_driver_map; clear_timing_cache(); } void cache_final_value(SigBit bit, double arrival) { if (!bit.wire) return; bit = sigmap(bit); arrival_cache[bit] = arrival; visit_state[bit] = TraversalState::Done; } TraversalState get_visit_state(SigBit bit) const { if (auto it = visit_state.find(bit); it != visit_state.end()) return it->second; return TraversalState::Unseen; } void set_visit_state(SigBit bit, TraversalState state) { visit_state[bit] = state; } double get_arrival(const SigSpec &sig) { cycle_detected = false; double t = 0.0; for (auto bit : sigmap(sig)) t = std::max(t, get_arrival_noguard(bit)); return t; } private: /* * Two-phase DFS avoids recursion, * finalize_phase = false expands inputs, true computes and caches node arrival * Active marks the current path, unresolved inputs during finalize are treated as cycles with +inf */ double get_arrival_noguard(SigBit bit) { SigBit start = sigmap(bit); if (!start.wire) return 0.0; if (auto it = arrival_cache.find(start); it != arrival_cache.end()) return it->second; // Local stack keeps traversal state scoped to one query std::vector eval_stack; eval_stack.reserve(kTraversalStackReserve); eval_stack.push_back({start, false}); while (!eval_stack.empty()) { StackEntry e = std::move(eval_stack.back()); eval_stack.pop_back(); SigBit curr = e.bit; if (!curr.wire) continue; if (arrival_cache.count(curr)) continue; if (curr.wire->port_input) { cache_final_value(curr, 0.0); continue; } Cell *driver = nullptr; if (auto it_drv = driver_map->find(curr); it_drv != driver_map->end()) driver = it_drv->second; if (driver == nullptr || is_timing_boundary_cell(driver, cell_types)) { cache_final_value(curr, 0.0); continue; } TraversalState state = get_visit_state(curr); if (!e.finalize_phase) { if (state == TraversalState::Done) continue; if (state == TraversalState::Active) { // Node already on current path, skip duplicate expansion continue; } set_visit_state(curr, TraversalState::Active); eval_stack.push_back({curr, true}); for (const auto &[port_id, sig] : driver->connections()) { if (!driver->input(port_id)) continue; for (auto in_bit : sigmap(sig)) { if (!in_bit.wire || arrival_cache.count(in_bit)) continue; if (get_visit_state(in_bit) == TraversalState::Active) { cycle_detected = true; continue; } eval_stack.push_back({in_bit, false}); } } continue; } double max_input = 0.0; for (const auto &[port_id, sig] : driver->connections()) { if (!driver->input(port_id)) continue; for (auto in_bit : sigmap(sig)) { double in_arrival = 0.0; if (in_bit.wire) { auto it = arrival_cache.find(in_bit); if (it != arrival_cache.end()) in_arrival = it->second; else { // Missing child arrival at finalize implies combinational cycle cycle_detected = true; in_arrival = std::numeric_limits::infinity(); } } max_input = std::max(max_input, in_arrival); } } double cell_delay = estimate_cell_delay(driver, cell_y_width(driver)); double t = max_input + cell_delay; cache_final_value(curr, t); } auto it = arrival_cache.find(start); return it != arrival_cache.end() ? it->second : 0.0; } }; // ----------------------------------------------------------------------------- // Rewrite planning and emission // ----------------------------------------------------------------------------- static int natural_output_width(WidthRule width_rule, int a_width, int b_width) { switch (width_rule) { case WidthRule::AddCarry: return std::max(a_width, b_width) + 1; case WidthRule::MaxInput: default: return std::max(a_width, b_width); } } static int minimum_y_width_for_reassociation(WidthRule width_rule, int a_width, int b_width) { if (width_rule == WidthRule::AddCarry) // Validation-only relaxation for modulo 2^N add reassociation return std::max(a_width, b_width); return natural_output_width(width_rule, a_width, b_width); } struct TreeLeaf { SigSpec signal; double arrival_time = 0.0; int width = 0; bool is_signed = false; int stable_id = 0; }; struct MergeShape { int out_width = 1; bool a_signed = false; bool b_signed = false; bool out_signed = false; }; struct PlannedMerge { int lhs_node = -1; int rhs_node = -1; MergeShape shape; }; // Immutable plan produced by HuffmanPlanner and consumed by TreeEmitter struct TreePlan { // Node ids are dense: // - [0, leaves) are leaf nodes // - [leaves, leaves+merges) are merge nodes in emission order std::vector leaves; std::vector merges; int root_node = -1; double output_arrival = 0.0; bool valid() const { return root_node >= 0; } int node_count() const { return GetSize(leaves) + GetSize(merges); } }; // Computes merge order and expected arrival, does not mutate RTLIL struct HuffmanPlanner { struct PlanNode { int node_id = -1; double arrival_time = 0.0; int width = 0; bool is_signed = false; int stable_id = 0; }; struct PlanNodeCmp { bool operator()(const PlanNode &a, const PlanNode &b) const { // Use a min-heap by inverting comparator for std::priority_queue return std::tie(a.arrival_time, a.width, a.stable_id) > std::tie(b.arrival_time, b.width, b.stable_id); } }; MergeShape compute_merge_shape(const TreeLeaf &a, const TreeLeaf &b, const SupportedCellSpec &spec, int target_out_width, bool force_root_width) const { int out_width = std::max(1, target_out_width); if (!force_root_width && spec.width_rule == WidthRule::AddCarry) out_width = std::min(out_width, natural_output_width(spec.width_rule, a.width, b.width)); bool a_signed = a.is_signed; bool b_signed = b.is_signed; bool out_signed = yosys_binary_result_signed(a_signed, b_signed); return {out_width, a_signed, b_signed, out_signed}; } double compute_merge_arrival(double a_arrival, double b_arrival, int out_width, const Cell *delay_ref_cell) const { return std::max(a_arrival, b_arrival) + estimate_cell_delay(delay_ref_cell, out_width); } TreePlan plan(const std::vector &leaves, IdString cell_type, Cell *reference_cell) const { // Deterministic leaf ordering is provided by build_tree_leaves() TreePlan plan; if (leaves.empty()) return plan; plan.leaves = leaves; if (GetSize(leaves) == 1) { plan.root_node = 0; plan.output_arrival = leaves.front().arrival_time; return plan; } const SupportedCellSpec *spec = get_supported_cell_spec(cell_type); if (spec == nullptr) return {}; int target_out_width = std::max(1, cell_y_width(reference_cell)); std::priority_queue, PlanNodeCmp> pq; for (int i = 0; i < GetSize(leaves); i++) { const auto &leaf = leaves[i]; pq.push({i, leaf.arrival_time, leaf.width, leaf.is_signed, leaf.stable_id}); } int next_internal_id = GetSize(leaves); int next_stable_id = GetSize(leaves); /* Greedy Huffman merge always pops the two best nodes first, * stable_id makes tie breaks deterministic for equal arrival and width, * root merge forces target width to preserve the head output contract */ while (GetSize(pq) > 1) { PlanNode a = pq.top(); pq.pop(); PlanNode b = pq.top(); pq.pop(); bool force_root_width = pq.empty(); TreeLeaf a_leaf = {SigSpec(), a.arrival_time, a.width, a.is_signed, a.stable_id}; TreeLeaf b_leaf = {SigSpec(), b.arrival_time, b.width, b.is_signed, b.stable_id}; MergeShape shape = compute_merge_shape(a_leaf, b_leaf, *spec, target_out_width, force_root_width); int out_width = shape.out_width; double new_arrival = compute_merge_arrival(a.arrival_time, b.arrival_time, out_width, reference_cell); int node_id = next_internal_id++; plan.merges.push_back({a.node_id, b.node_id, shape}); pq.push({node_id, new_arrival, out_width, shape.out_signed, next_stable_id++}); } log_assert(!pq.empty()); plan.root_node = pq.top().node_id; plan.output_arrival = pq.top().arrival_time; return plan; } }; // TreeEmitter materializes a precomputed plan into RTLIL cells and wires struct TreeEmitter { Module *module; dict &cell_count; TreeEmitter(Module *module, dict &cell_count) : module(module), cell_count(cell_count) { } SigSpec apply(const TreePlan &plan, IdString cell_type, Cell *reference_cell) { if (!plan.valid() || plan.leaves.empty()) return {}; if (GetSize(plan.leaves) == 1) return plan.leaves.front().signal; int total_nodes = plan.node_count(); std::vector node_signals(total_nodes); for (int i = 0; i < GetSize(plan.leaves); i++) node_signals[i] = plan.leaves[i].signal; for (int merge_idx = 0; merge_idx < GetSize(plan.merges); merge_idx++) { const PlannedMerge &m = plan.merges[merge_idx]; log_assert(m.lhs_node >= 0 && m.lhs_node < total_nodes); log_assert(m.rhs_node >= 0 && m.rhs_node < total_nodes); SigSpec a_sig = node_signals[m.lhs_node]; SigSpec b_sig = node_signals[m.rhs_node]; log_assert(GetSize(a_sig) > 0 && GetSize(b_sig) > 0); IdString new_cell_name = make_id(reference_cell, "timing_balance"); Cell *new_cell = module->addCell(new_cell_name, cell_type); new_cell->set_bool_attribute(kAttrTimingBalanceGenerated); new_cell->set_src_attribute(reference_cell->get_src_attribute()); IdString out_wire_name = make_id(reference_cell, "timing_balance_y"); Wire *out_wire = module->addWire(out_wire_name, m.shape.out_width); new_cell->setPort(ID::A, a_sig); new_cell->setPort(ID::B, b_sig); new_cell->setPort(ID::Y, out_wire); if (new_cell->hasParam(ID::A_SIGNED)) new_cell->setParam(ID::A_SIGNED, m.shape.a_signed); if (new_cell->hasParam(ID::B_SIGNED)) new_cell->setParam(ID::B_SIGNED, m.shape.b_signed); if (new_cell->hasParam(ID::A_WIDTH)) new_cell->setParam(ID::A_WIDTH, GetSize(a_sig)); if (new_cell->hasParam(ID::B_WIDTH)) new_cell->setParam(ID::B_WIDTH, GetSize(b_sig)); if (new_cell->hasParam(ID::Y_WIDTH)) new_cell->setParam(ID::Y_WIDTH, m.shape.out_width); new_cell->fixup_parameters(); int node_id = GetSize(plan.leaves) + merge_idx; node_signals[node_id] = SigSpec(out_wire); cell_count[cell_type]++; } log_assert(plan.root_node >= 0 && plan.root_node < total_nodes); return node_signals[plan.root_node]; } }; // ----------------------------------------------------------------------------- // Rewrite engine: cluster harvest, evaluation, and commit loop // ----------------------------------------------------------------------------- // Harvested cluster plus external source multiset for one candidate head struct ClusterHarvest { // Track source multiplicity by signedness to preserve per-use semantics dict signed_source_uses; dict unsigned_source_uses; pool cluster_cells; }; // Worker contract: // Finds heads for each target type, harvests and evaluates clusters, commits // beneficial disjoint rewrites in-sweep, and rebuilds views between iterations struct OptTimingBalanceWorker { struct RewriteStats { int candidates = 0; int trees = 0; int rewrites = 0; }; struct RewriteDecision { SigSpec head_output; TreePlan plan; }; struct ObjectiveScore { double sum_arrival = 0.0; }; struct SweepContext { pool candidate_cells; pool consumed_cells; RewriteStats stats; dict target_cache; dict y_cache; }; Design *design; Module *module; SigMap sigmap; CellTypes cell_types; std::vector target_cell_ids; dict cell_count; HuffmanPlanner planner; TreeEmitter emitter; dict warned_contract_issues; static constexpr int warnRequiredPortsErrCode = 1; static constexpr int warnRequiredWidthParamsErrCode = 2; OptTimingBalanceWorker(Design *design, Module *module, const std::vector &target_cell_ids) : design(design), module(module), sigmap(module), cell_types(design), target_cell_ids(target_cell_ids), planner(), emitter(module, cell_count) { } // View lifecycle void rebuild_views(ConnectivitySnapshot &graph, TimingOracle &timer) { sigmap = SigMap(module); graph.build(module, sigmap); timer.rebind_driver_map(graph.unique_driver_by_bit); } // Warnings and objective gate void warn_contract_once(IdString cell_type, int err_code) { int &mask = warned_contract_issues[cell_type]; if (mask & err_code) return; mask |= err_code; if (err_code == warnRequiredPortsErrCode) { log_warning("opt_timing_balance: skipping %s cells without A/B/Y ports in module %s.\n", log_id(cell_type), log_id(module)); } else { log_warning("opt_timing_balance: skipping %s cells without width parameters in module %s. " "Pass expects word-level RTL cells (run before gate-level techmapping).\n", log_id(cell_type), log_id(module)); } } bool objective_improved(const ObjectiveScore &objective_before, const ObjectiveScore &objective_after) const { if (!std::isfinite(objective_after.sum_arrival)) return false; if (!std::isfinite(objective_before.sum_arrival)) return true; // Sum-only gating can regress the worst single path, but may unlock deferred global gains in later iterations return objective_after.sum_arrival < objective_before.sum_arrival - kMinIterationDelta; } // Candidate and head predicates bool is_target_cell_type(Cell *cell, IdString cell_type, bool exclude_generated) { if (cell == nullptr || cell->type != cell_type) return false; if (exclude_generated && cell->get_bool_attribute(kAttrTimingBalanceGenerated)) return false; const SupportedCellSpec *spec = get_supported_cell_spec(cell_type); if (spec == nullptr) return false; if (!cell->hasPort(ID::A) || !cell->hasPort(ID::B) || !cell->hasPort(ID::Y)) { warn_contract_once(cell_type, warnRequiredPortsErrCode); return false; } if (!cell->hasParam(ID::Y_WIDTH) || !cell->hasParam(ID::A_WIDTH) || !cell->hasParam(ID::B_WIDTH)) { warn_contract_once(cell_type, warnRequiredWidthParamsErrCode); return false; } int y_width = cell->getParam(ID::Y_WIDTH).as_int(); int a_width = cell->getParam(ID::A_WIDTH).as_int(); int b_width = cell->getParam(ID::B_WIDTH).as_int(); if (y_width <= 0 || a_width <= 0 || b_width <= 0) return false; if (GetSize(cell->getPort(ID::A)) != a_width) return false; if (GetSize(cell->getPort(ID::B)) != b_width) return false; if (GetSize(cell->getPort(ID::Y)) != y_width) return false; if (spec->requires_matching_signedness) { if (!cell->hasParam(ID::A_SIGNED) || !cell->hasParam(ID::B_SIGNED)) return false; } int required_width = minimum_y_width_for_reassociation(spec->width_rule, a_width, b_width); return y_width >= required_width; } bool is_target_cell_type_cached(Cell *cell, IdString cell_type, bool exclude_generated, dict &target_cache) { if (cell == nullptr) return false; auto it = target_cache.find(cell); if (it != target_cache.end()) return it->second; bool is_target = is_target_cell_type(cell, cell_type, exclude_generated); target_cache[cell] = is_target; return is_target; } const SigSpec &mapped_y(Cell *cell, dict &y_cache) { auto it = y_cache.find(cell); if (it != y_cache.end()) return it->second; y_cache[cell] = sigmap(cell->getPort(ID::Y)); return y_cache[cell]; } // Backward cluster extraction bool is_head_cell(Cell *cell, IdString cell_type, bool exclude_generated, ConnectivitySnapshot &graph, dict &target_cache, dict &y_cache) { if (cell == nullptr) return false; const SigSpec &y = mapped_y(cell, y_cache); // Output-port drivers are always heads for (auto bit : y) if (graph.output_port_bits.count(bit)) return true; pool sinks; graph.collect_sinks_mapped(y, sinks); // Leaf drivers are heads if (sinks.empty()) return true; // Any non-target consumer terminates same-type chain growth for (Cell *sink : sinks) { if (!is_target_cell_type_cached(sink, cell_type, exclude_generated, target_cache)) return true; } return false; } /* * BFS over same-type unique drivers from head_cell, * merge only when driver Y exactly matches consumed mapped bits to avoid semantic drift, * when merge stops, record source use count with per-port signedness */ bool collect_cluster(IdString cell_type, Cell *head_cell, const pool &candidate_cells, ConnectivitySnapshot &graph, dict &target_cache, dict &y_cache, ClusterHarvest &harvest) { const SupportedCellSpec *spec = get_supported_cell_spec(cell_type); if (spec == nullptr || head_cell == nullptr) return false; bool enforce_strict_width_match = spec->requires_strict_width_match; int target_width = 0; if (enforce_strict_width_match) { // Strict width preserves truncation points target_width = cell_y_width(head_cell); } bool enforce_matching_signedness = spec->requires_matching_signedness; bool target_add_signed = false; if (enforce_matching_signedness) { if (!head_cell->hasParam(ID::A_SIGNED) || !head_cell->hasParam(ID::B_SIGNED)) return false; bool head_a_signed = head_cell->getParam(ID::A_SIGNED).as_bool(); bool head_b_signed = head_cell->getParam(ID::B_SIGNED).as_bool(); if (head_a_signed != head_b_signed) return false; target_add_signed = head_a_signed; } harvest = ClusterHarvest(); harvest.cluster_cells.insert(head_cell); std::deque queue = {head_cell}; while (!queue.empty()) { Cell *cell = queue.front(); queue.pop_front(); for (IdString port : {ID::A, ID::B}) { SigSpec sig = sigmap(cell->getPort(port)); Cell *driver = graph.get_unique_driver_mapped(sig); bool can_merge = true; if (driver == nullptr || driver == cell || !candidate_cells.count(driver)) can_merge = false; if (can_merge && !is_target_cell_type_cached(driver, cell_type, true, target_cache)) can_merge = false; if (can_merge) { const SigSpec &drv_y = mapped_y(driver, y_cache); // Require exact Y coverage for safe reassociation if (GetSize(drv_y) != GetSize(sig) || drv_y != sig) can_merge = false; } if (can_merge && enforce_strict_width_match && cell_y_width(driver) != target_width) can_merge = false; if (can_merge && enforce_matching_signedness) { if (!driver->hasParam(ID::A_SIGNED) || !driver->hasParam(ID::B_SIGNED)) can_merge = false; else { bool a_signed = driver->getParam(ID::A_SIGNED).as_bool(); bool b_signed = driver->getParam(ID::B_SIGNED).as_bool(); if (a_signed != b_signed || a_signed != target_add_signed) can_merge = false; } } if (can_merge) { if (!harvest.cluster_cells.count(driver)) { harvest.cluster_cells.insert(driver); queue.push_back(driver); } continue; } IdString signed_param = port == ID::A ? ID::A_SIGNED : ID::B_SIGNED; bool signed_port = cell->hasParam(signed_param) && cell->getParam(signed_param).as_bool(); if (signed_port) harvest.signed_source_uses[sig]++; else harvest.unsigned_source_uses[sig]++; } } // Single-cell cluster is a no-op return GetSize(harvest.cluster_cells) > 1; } std::vector collect_candidates(IdString cell_type, bool exclude_generated, dict &target_cache) { std::vector cells; for (Cell *cell : module->selected_cells()) if (is_target_cell_type_cached(cell, cell_type, exclude_generated, target_cache)) cells.push_back(cell); // Sort lexically for cross-run deterministic candidate order std::sort(cells.begin(), cells.end(), [](Cell *a, Cell *b) { return std::strcmp(a->name.c_str(), b->name.c_str()) < 0; }); return cells; } // Rewrite evaluation and commit void rewrite_one_head(IdString cell_type, Cell *head, SweepContext &sweep, ConnectivitySnapshot &graph, TimingOracle &timer) { // No per-head rebuild in this sweep, defer heads that read already consumed drivers auto source_uses_consumed_driver = [&](const dict &uses) -> bool { // Stale snapshot guard: skip heads fed by already rewritten clusters for (const auto &[sig, use_count] : uses) { if (use_count <= 0) continue; for (auto bit : sig) { if (!bit.wire) continue; auto drv_it = graph.unique_driver_by_bit.find(bit); if (drv_it == graph.unique_driver_by_bit.end()) continue; Cell *driver = drv_it->second; if (driver != nullptr && sweep.consumed_cells.count(driver)) return true; } } return false; }; if (sweep.consumed_cells.count(head)) return; if (!is_head_cell(head, cell_type, true, graph, sweep.target_cache, sweep.y_cache)) return; ClusterHarvest harvest; if (!collect_cluster(cell_type, head, sweep.candidate_cells, graph, sweep.target_cache, sweep.y_cache, harvest)) return; // Batch only disjoint clusters in one sweep for (Cell *cell : harvest.cluster_cells) if (cell != nullptr && sweep.consumed_cells.count(cell)) return; // Defer heads that depend on already rewritten snapshot drivers if (source_uses_consumed_driver(harvest.signed_source_uses) || source_uses_consumed_driver(harvest.unsigned_source_uses)) return; RewriteDecision decision; if (!evaluate_rewrite(cell_type, head, harvest, timer, decision)) return; if (!commit_rewrite(cell_type, head, decision)) return; for (Cell *cell : harvest.cluster_cells) if (cell != nullptr) sweep.consumed_cells.insert(cell); sweep.stats.rewrites++; // No per-head rebuild, invalidate rewritten Y-cache entries only for (Cell *cell : harvest.cluster_cells) if (cell != nullptr) sweep.y_cache.erase(cell); sweep.y_cache.erase(head); } std::vector order_heads_by_dependency(const std::vector &heads, ConnectivitySnapshot &graph, bool &saw_cycle) { saw_cycle = false; if (heads.empty()) return {}; /* * Backward DFS over driver links, * postorder emits upstream-first head order, * cycles fall back to conservative skip in this sweep */ pool head_cells; for (auto head : heads) head_cells.insert(head); dict state; std::vector postorder_heads; struct DfsEntry { Cell *cell; bool postorder; }; std::vector stack; stack.reserve(kTraversalStackReserve); for (auto root : heads) { if (root == nullptr) continue; stack.clear(); stack.push_back({root, false}); while (!stack.empty()) { DfsEntry e = stack.back(); stack.pop_back(); Cell *cell = e.cell; if (cell == nullptr || is_timing_boundary_cell(cell, cell_types)) continue; TraversalState st = TraversalState::Unseen; if (auto it = state.find(cell); it != state.end()) st = it->second; if (e.postorder) { if (st != TraversalState::Done) { state[cell] = TraversalState::Done; if (head_cells.count(cell)) postorder_heads.push_back(cell); } continue; } if (st == TraversalState::Done) continue; if (st == TraversalState::Active) { saw_cycle = true; continue; } state[cell] = TraversalState::Active; stack.push_back({cell, true}); for (const auto &[port_id, sig] : cell->connections()) { if (!cell->input(port_id)) continue; for (auto bit : sigmap(sig)) { if (!bit.wire) continue; auto drv_it = graph.unique_driver_by_bit.find(bit); if (drv_it == graph.unique_driver_by_bit.end()) continue; Cell *driver = drv_it->second; if (driver == nullptr || driver == cell) continue; stack.push_back({driver, false}); } } } } if (saw_cycle) log_warning("opt_timing_balance: cycle detected in head ordering in module %s, using conservative order.\n", log_id(module)); // Preserve deterministic order for disconnected heads pool seen_heads; std::vector ordered_heads; ordered_heads.reserve(GetSize(heads)); for (auto head : postorder_heads) { if (!seen_heads.count(head)) { seen_heads.insert(head); ordered_heads.push_back(head); } } for (auto head : heads) { if (!seen_heads.count(head)) ordered_heads.push_back(head); } return ordered_heads; } bool build_tree_leaves(const ClusterHarvest &harvest, TimingOracle &timer, std::vector &leaves) { struct SourceUse { SigSpec sig; bool is_signed; int count; }; leaves.clear(); int stable_id = 0; // Deterministic source-use ordering for stable tree shape std::vector uses; uses.reserve(GetSize(harvest.signed_source_uses) + GetSize(harvest.unsigned_source_uses)); for (const auto &[sig, count] : harvest.signed_source_uses) uses.push_back({sig, true, count}); for (const auto &[sig, count] : harvest.unsigned_source_uses) uses.push_back({sig, false, count}); std::sort(uses.begin(), uses.end(), [](const SourceUse &a, const SourceUse &b) { if (a.sig != b.sig) return less_sigspec_key(a.sig, b.sig); if (a.is_signed != b.is_signed) return a.is_signed > b.is_signed; return a.count < b.count; }); for (const auto &use : uses) { if (use.count <= 0) continue; double src_arrival = timer.get_arrival(use.sig); if (!std::isfinite(src_arrival)) return false; for (int i = 0; i < use.count; i++) leaves.push_back({use.sig, src_arrival, GetSize(use.sig), use.is_signed, stable_id++}); } return !leaves.empty() && !timer.cycle_detected; } bool evaluate_rewrite(IdString cell_type, Cell *head_cell, const ClusterHarvest &harvest, TimingOracle &timer, RewriteDecision &decision) { decision = RewriteDecision(); // Keep exact head output bits. Mapping here can rewire the wrong alias decision.head_output = head_cell->getPort(ID::Y); std::vector leaves; if (!build_tree_leaves(harvest, timer, leaves)) return false; double old_arrival = timer.get_arrival(decision.head_output); if (timer.cycle_detected || !std::isfinite(old_arrival)) return false; decision.plan = planner.plan(leaves, cell_type, head_cell); if (!decision.plan.valid()) return false; double estimated_new_arrival = decision.plan.output_arrival; if (!std::isfinite(estimated_new_arrival) || estimated_new_arrival >= old_arrival - kMinIterationDelta) return false; return true; } bool commit_rewrite(IdString cell_type, Cell *head_cell, const RewriteDecision &decision) { SigSpec head_output = decision.head_output; SigSpec tree_output = emitter.apply(decision.plan, cell_type, head_cell); if (GetSize(head_output) <= 0 || GetSize(tree_output) <= 0) return false; if (GetSize(head_output) != GetSize(tree_output)) return false; // Detach old driver first to avoid transient multi-driver aliasing IdString detached_name = make_id(head_cell, "timing_balance_detach"); Wire *detached = module->addWire(detached_name, std::max(1, GetSize(head_output))); head_cell->setPort(ID::Y, SigSpec(detached)); if (head_cell->hasParam(ID::Y_WIDTH)) head_cell->setParam(ID::Y_WIDTH, GetSize(head_output)); head_cell->fixup_parameters(); module->connect(head_output, tree_output); return true; } // Objective and per-type sweep ObjectiveScore compute_delay_objective(const std::vector &target_cell_ids, ConnectivitySnapshot &graph, TimingOracle &timer) { ObjectiveScore objective; for (auto cell_type : target_cell_ids) { dict target_cache; dict y_cache; std::vector candidates = collect_candidates(cell_type, false, target_cache); std::vector heads; for (Cell *cell : candidates) { if (is_head_cell(cell, cell_type, false, graph, target_cache, y_cache)) heads.push_back(cell); } for (Cell *cell : heads) { double arrival = timer.get_arrival(cell->getPort(ID::Y)); if (timer.cycle_detected || !std::isfinite(arrival)) return {std::numeric_limits::infinity()}; objective.sum_arrival += arrival; } } return objective; } RewriteStats process_cell_type_once(IdString cell_type, ConnectivitySnapshot &graph, TimingOracle &timer) { SweepContext sweep; std::vector candidates = collect_candidates(cell_type, true, sweep.target_cache); for (Cell *cell : candidates) sweep.candidate_cells.insert(cell); sweep.stats.candidates = GetSize(candidates); std::vector heads; for (Cell *cell : candidates) if (is_head_cell(cell, cell_type, true, graph, sweep.target_cache, sweep.y_cache)) heads.push_back(cell); sweep.stats.trees = GetSize(heads); bool saw_cycle = false; std::vector ordered_heads = order_heads_by_dependency(heads, graph, saw_cycle); if (saw_cycle) { // Cyclic cones are rejected conservatively for this sweep return sweep.stats; } for (Cell *head : ordered_heads) rewrite_one_head(cell_type, head, sweep, graph, timer); return sweep.stats; } // Top-level worker loop void run() { if (target_cell_ids.empty()) return; ConnectivitySnapshot graph(module, sigmap); TimingOracle timer(cell_types, sigmap, graph.unique_driver_by_bit); ObjectiveScore objective_before = compute_delay_objective(target_cell_ids, graph, timer); bool stopped_early = false; log(" processing module %s\n", log_id(module)); log_flush(); for (int iter = 0; iter < kMaxPassIterations; iter++) { ObjectiveScore iter_before = objective_before; ObjectiveScore iter_after = iter_before; bool improved = false; int generated_before = 0; for (IdString cell_type : target_cell_ids) generated_before += cell_count[cell_type]; log(" iteration %d/%d begin\n", iter + 1, kMaxPassIterations); int total_rewrites = 0; for (IdString cell_type : target_cell_ids) { RewriteStats stats = process_cell_type_once(cell_type, graph, timer); total_rewrites += stats.rewrites; log(" %s trees=%d candidates=%d rewrites=%d\n", log_id(cell_type), stats.trees, stats.candidates, stats.rewrites); } int generated_after = 0; for (IdString cell_type : target_cell_ids) generated_after += cell_count[cell_type]; int generated_delta = generated_after - generated_before; log(" rewrote_trees=%d generated_cells=%d\n", total_rewrites, generated_delta); if (total_rewrites > 0) { log(" clean -purge begin\n"); Pass::call_on_module(design, module, "clean -purge"); log(" clean -purge end\n"); rebuild_views(graph, timer); iter_after = compute_delay_objective(target_cell_ids, graph, timer); improved = objective_improved(iter_before, iter_after); } log(" before = %.3f after = %.3f, %s\n", iter_before.sum_arrival, iter_after.sum_arrival, improved ? "timing estimation improved, continuing" : "timing estimation did not improve, stopping"); log(" iteration %d/%d end\n", iter + 1, kMaxPassIterations); log_flush(); if (!improved) { stopped_early = true; break; } objective_before = iter_after; } if (!stopped_early) { log(" reached iteration cap %d stopping\n", kMaxPassIterations); log_flush(); } } }; // ----------------------------------------------------------------------------- // Pass wrapper // ----------------------------------------------------------------------------- struct OptTimingBalancePass : public Pass { OptTimingBalancePass() : Pass("opt_timing_balance", "timing-aware balancing of associative trees") { } void help() override { log("\n"); log(" opt_timing_balance [options] [selection]\n"); log("\n"); log("Iterative timing-aware balancing for cascaded associative cells.\n"); log("Uses lazy backward arrival estimation plus DAG-ordered Huffman rebuilding.\n"); log("\n"); log(" -arith\n"); log(" only convert arithmetic cells ($add).\n"); log("\n"); log(" -logic\n"); log(" only convert logic cells ($and/$or/$xor).\n"); log("\n"); } void execute(std::vector args, RTLIL::Design *design) override { log_header(design, "Executing OPT_TIMING_BALANCE pass (iterative timing-aware tree rewrite).\n"); size_t argidx; bool saw_type_flag = false; bool enable_arith = false; bool enable_logic = false; for (argidx = 1; argidx < (size_t)GetSize(args); argidx++) { if (args[argidx] == "-arith") { saw_type_flag = true; enable_arith = true; continue; } if (args[argidx] == "-logic") { saw_type_flag = true; enable_logic = true; continue; } // Remaining args are selection filters break; } extra_args(args, argidx, design); if (!saw_type_flag) { enable_arith = true; enable_logic = true; } std::vector target_cell_ids = collect_target_cell_ids(enable_logic, enable_arith); dict cell_count; for (auto module : design->selected_modules()) { OptTimingBalanceWorker worker(design, module, target_cell_ids); worker.run(); for (const auto &[type, count] : worker.cell_count) cell_count[type] += count; } for (auto cell_type : target_cell_ids) { log(" Converted %d %s cells into timing-balanced trees.\n", cell_count[cell_type], log_id(cell_type)); } } } OptTimingBalancePass; PRIVATE_NAMESPACE_END