diff --git a/passes/silimate/Makefile.inc b/passes/silimate/Makefile.inc index 4f480ba05..f57fcab15 100644 --- a/passes/silimate/Makefile.inc +++ b/passes/silimate/Makefile.inc @@ -10,6 +10,7 @@ OBJS += passes/silimate/mux_push.o OBJS += passes/silimate/obs_clean.o OBJS += passes/silimate/segv.o OBJS += passes/silimate/reg_rename.o +OBJS += passes/silimate/infer_ce.o OBJS += passes/silimate/splitfanout.o OBJS += passes/silimate/splitlarge.o OBJS += passes/silimate/splitnetlist.o diff --git a/passes/silimate/infer_ce.cc b/passes/silimate/infer_ce.cc new file mode 100644 index 000000000..04034cf99 --- /dev/null +++ b/passes/silimate/infer_ce.cc @@ -0,0 +1,551 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2024 Silimate Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "kernel/yosys.h" +#include "kernel/sigtools.h" +#include "kernel/ff.h" +#include "kernel/satgen.h" +#include +#include + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +// Configuration +static const int DEFAULT_MAX_COVER = 100; // Max candidate signals to consider +static const int DEFAULT_MIN_NET_SIZE = 10; // Min registers per clock gate + +struct InferCeWorker +{ + Module *module; + SigMap sigmap; + + // Configuration + int max_cover; + int min_net_size; + + // Maps output signal bits to their driver cells + dict sig_to_driver; + + // Maps cell input pins to their source signals + dict> sig_to_sinks; + + // Pre-computed list of combinational cells (for SAT import) + std::vector comb_cells; + + // Statistics + int accepted_count = 0; + int rejected_sat_count = 0; + int sat_solves = 0; + + InferCeWorker(Module *module, int max_cover, int min_net_size) + : module(module), sigmap(module), + max_cover(max_cover), min_net_size(min_net_size) + { + // Build driver and sink maps + for (auto cell : module->cells()) { + for (auto &conn : cell->connections()) { + if (cell->output(conn.first)) { + for (auto bit : sigmap(conn.second)) + if (bit.wire) + sig_to_driver[bit] = cell; + } + if (cell->input(conn.first)) { + for (auto bit : sigmap(conn.second)) + if (bit.wire) + sig_to_sinks[bit].insert(cell); + } + } + + // Collect combinational cells for SAT + if (!cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) { + comb_cells.push_back(cell); + } + } + } + + + // Get upstream signals feeding into given signals (BFS backward) + pool getUpstreamSignals(const pool &start_signals, int limit) + { + pool visited; + std::queue worklist; + + for (auto bit : start_signals) { + worklist.push(bit); + visited.insert(bit); + } + + while (!worklist.empty() && (int)visited.size() < limit) { + SigBit bit = worklist.front(); + worklist.pop(); + + if (!sig_to_driver.count(bit)) + continue; + + Cell *driver = sig_to_driver[bit]; + + if (driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) + continue; + + for (auto &conn : driver->connections()) + if (driver->input(conn.first)) + for (auto in_bit : sigmap(conn.second)) + if (in_bit.wire && !visited.count(in_bit)) { + visited.insert(in_bit); + worklist.push(in_bit); + } + } + return visited; + } + + // Get cells in the transitive fanin cone of given signals (for SAT import) + // This is much faster than importing ALL cells + pool getConeOfLogic(SigSpec sig) + { + pool cone_cells; + pool visited; + std::queue worklist; + + // Start from all bits in sig + for (auto bit : sigmap(sig)) { + if (bit.wire && !visited.count(bit)) { + visited.insert(bit); + worklist.push(bit); + } + } + + // BFS backward through drivers + while (!worklist.empty()) { + SigBit bit = worklist.front(); + worklist.pop(); + + if (!sig_to_driver.count(bit)) + continue; + + Cell *driver = sig_to_driver[bit]; + + // Skip registers + if (driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) + continue; + + // Add this cell to cone + if (cone_cells.count(driver)) + continue; // Already processed + cone_cells.insert(driver); + + // Add inputs of driver to worklist + for (auto &conn : driver->connections()) { + if (driver->input(conn.first)) { + for (auto in_bit : sigmap(conn.second)) { + if (in_bit.wire && !visited.count(in_bit)) { + visited.insert(in_bit); + worklist.push(in_bit); + } + } + } + } + } + + return cone_cells; + } + + // Check if OR/AND of signals forms a valid gating condition using SAT + // Uses a PRE-CREATED SAT solver (passed in) to avoid recreating for each check + bool isValidGatingSetWithSolver(ezSatPtr &ez, SatGen &satgen, + const std::vector &conds, + SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + if (conds.empty()) + return false; + + sat_solves++; + + std::vector d_vec = satgen.importSigSpec(sig_d); + std::vector q_vec = satgen.importSigSpec(sig_q); + + // Build OR (for enable) or AND (for disable) of condition signals + std::vector cond_vars; + for (auto bit : conds) + cond_vars.push_back(satgen.importSigSpec(SigSpec(bit))[0]); + + int combined_cond; + if (as_enable) { + // Clock enable: OR of signals (any signal high = enable) + combined_cond = ez->expression(ezSAT::OpOr, cond_vars); + } else { + // Clock disable: AND of signals (all signals high = disable) + combined_cond = ez->expression(ezSAT::OpAnd, cond_vars); + } + + int d_ne_q = ez->vec_ne(d_vec, q_vec); + + // Safe gating: when gating is active (enable=0 or disable=1), D must equal Q + int gating_active = as_enable ? ez->NOT(combined_cond) : combined_cond; + int query = ez->AND(gating_active, d_ne_q); + + std::vector assumptions = {query}; + std::vector dummy_exprs; + std::vector dummy_vals; + + bool is_valid = !ez->solve(dummy_exprs, dummy_vals, assumptions); + if (!is_valid) + rejected_sat_count++; + return is_valid; + } + + // Wrapper that creates a fresh SAT solver (used for standalone checks) + bool isValidGatingSet(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + if (conds.empty()) + return false; + + pool cone = getConeOfLogic(sig_d); + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + for (auto cell : cone) + satgen.importCell(cell); + + return isValidGatingSetWithSolver(ez, satgen, conds, sig_d, sig_q, as_enable); + } + + // Binary search to minimize the gating condition set + // Tries to remove half of the signals at a time + // Uses pre-created SAT solver to avoid recreating for each check + void minimizeGatingConditionWithSolver( + ezSatPtr &ez, SatGen &satgen, + std::vector &good_conds, + std::vector::iterator begin, + std::vector::iterator end, + SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + int half_len = (end - begin) / 2; + + if (half_len == 0) + return; + + auto mid = begin + half_len; + + // Try removing [mid, end) from the condition + std::vector test_conds; + test_conds.insert(test_conds.end(), good_conds.begin(), begin); + test_conds.insert(test_conds.end(), begin, mid); + test_conds.insert(test_conds.end(), end, good_conds.end()); + + if (!test_conds.empty() && isValidGatingSetWithSolver(ez, satgen, test_conds, sig_d, sig_q, as_enable)) { + // Can remove [mid, end) + good_conds.erase(mid, end); + // Recurse on remaining half + minimizeGatingConditionWithSolver(ez, satgen, good_conds, begin, begin + half_len, sig_d, sig_q, as_enable); + } else { + // Cannot remove all of [mid, end), try to minimize each half + if (end - mid > 1) + minimizeGatingConditionWithSolver(ez, satgen, good_conds, mid, end, sig_d, sig_q, as_enable); + minimizeGatingConditionWithSolver(ez, satgen, good_conds, begin, mid, sig_d, sig_q, as_enable); + } + } + + // Wrapper for standalone use (creates fresh solver) + void minimizeGatingCondition( + std::vector &good_conds, + std::vector::iterator begin, + std::vector::iterator end, + SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + pool cone = getConeOfLogic(sig_d); + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + for (auto cell : cone) + satgen.importCell(cell); + + minimizeGatingConditionWithSolver(ez, satgen, good_conds, begin, end, sig_d, sig_q, as_enable); + } + + // Find gating condition for a register + // Returns: {gating_conds, is_enable, cone_size} + std::tuple, bool, int> findGatingCondition(Cell *reg) + { + FfData ff(nullptr, reg); + + pool d_inputs; + for (auto bit : sigmap(ff.sig_d)) + if (bit.wire) + d_inputs.insert(bit); + + pool upstream = getUpstreamSignals(d_inputs, max_cover); + + std::vector candidates; + for (auto bit : upstream) + candidates.push_back(bit); + + if ((int)candidates.size() > max_cover) + candidates.resize(max_cover); + + if (candidates.empty()) + return {{}, false, 0}; + + // Create SAT solver ONCE for this register + pool cone = getConeOfLogic(ff.sig_d); + int cone_size = (int)cone.size(); + + // Skip registers with trivial cones (not worth gating) or huge cones (too expensive) + const int MIN_CONE_SIZE = 2; + const int MAX_CONE_SIZE = 500; + if (cone_size < MIN_CONE_SIZE || cone_size > MAX_CONE_SIZE) + return {{}, false, cone_size}; + + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + for (auto cell : cone) + satgen.importCell(cell); + + // Try as clock enable first + if (isValidGatingSetWithSolver(ez, satgen, candidates, ff.sig_d, ff.sig_q, true)) { + minimizeGatingConditionWithSolver(ez, satgen, candidates, candidates.begin(), candidates.end(), + ff.sig_d, ff.sig_q, true); + if (!candidates.empty()) + return {candidates, true, cone_size}; + } + + // Try as clock disable + if (isValidGatingSetWithSolver(ez, satgen, candidates, ff.sig_d, ff.sig_q, false)) { + minimizeGatingConditionWithSolver(ez, satgen, candidates, candidates.begin(), candidates.end(), + ff.sig_d, ff.sig_q, false); + if (!candidates.empty()) + return {candidates, false, cone_size}; + } + + return {{}, false, cone_size}; + } + + // Insert clock gating logic for a group of registers + void insertClockGate(const std::vector ®s, + const std::vector &gating_conds, + bool as_enable) + { + if (regs.empty() || gating_conds.empty()) + return; + + // Build gating condition: OR for enable, AND for disable + SigBit gating_signal; + if (gating_conds.size() == 1) { + gating_signal = gating_conds[0]; + } else { + SigSpec cond_inputs; + for (auto bit : gating_conds) + cond_inputs.append(bit); + + Wire *cond_wire = module->addWire(NEW_ID); + if (as_enable) + module->addReduceOr(NEW_ID, cond_inputs, cond_wire); + else + module->addReduceAnd(NEW_ID, cond_inputs, cond_wire); + gating_signal = cond_wire; + } + + // If disable signal, invert to get enable + if (!as_enable) { + Wire *inv_wire = module->addWire(NEW_ID); + module->addNot(NEW_ID, gating_signal, inv_wire); + gating_signal = inv_wire; + } + + // Add CE to each register + for (auto reg : regs) { + FfData ff(nullptr, reg); + + if (ff.has_ce) { + Wire *combined_ce = module->addWire(NEW_ID); + module->addAnd(NEW_ID, ff.sig_ce, gating_signal, combined_ce); + ff.sig_ce = combined_ce; + } else { + ff.has_ce = true; + ff.sig_ce = gating_signal; + ff.pol_ce = true; + } + + ff.emit(); + } + } + + // Check if register can be added to an existing gate + bool canReuseGate(const std::vector &existing_conds, Cell *reg, bool is_enable) + { + FfData ff(nullptr, reg); + return isValidGatingSet(existing_conds, ff.sig_d, ff.sig_q, is_enable); + } + + // Main processing function + void run() + { + std::vector registers; + for (auto cell : module->cells()) { + if (!cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) + continue; + + FfData ff(nullptr, cell); + if (ff.has_ce || !ff.has_clk) + continue; + + registers.push_back(cell); + } + + log("Processing module %s: %zu cells, %zu flip-flops, %zu wires\n", + log_id(module), module->cells().size(), registers.size(), module->wires().size()); + + if (registers.empty()) + return; + + struct AcceptedGate { + std::vector conds; + pool cond_set; + std::vector regs; + bool is_enable; + }; + std::vector accepted_gates; + dict> net_to_accepted; + + int reg_idx = 0; + for (auto reg : registers) { + auto [gating_conds, is_enable, cone_size] = findGatingCondition(reg); + log("Processing register %d/%zu: %s (cone=%d)\n", ++reg_idx, registers.size(), log_id(reg), cone_size); + + if (gating_conds.empty()) + continue; + + pool cond_set; + for (auto bit : gating_conds) + cond_set.insert(bit); + + // Find candidate gates sharing any net + pool candidate_gates; + for (auto bit : gating_conds) + if (net_to_accepted.count(bit)) + for (auto idx : net_to_accepted[bit]) + candidate_gates.insert(idx); + + // HEURISTIC: Only check limited gates for reuse + const int MAX_REUSE_CHECKS = 20; + + bool found_match = false; + int checked = 0; + for (auto idx : candidate_gates) { + if (checked >= MAX_REUSE_CHECKS) + break; + + auto &gate = accepted_gates[idx]; + if (gate.is_enable != is_enable) + continue; + + checked++; + if (canReuseGate(gate.conds, reg, is_enable)) { + gate.regs.push_back(reg); + found_match = true; + break; + } + } + + if (!found_match) { + size_t new_idx = accepted_gates.size(); + accepted_gates.push_back({gating_conds, cond_set, {reg}, is_enable}); + for (auto bit : gating_conds) + net_to_accepted[bit].push_back(new_idx); + } + } + + // Insert clock gates for groups meeting threshold + for (auto &gate : accepted_gates) { + if ((int)gate.regs.size() >= min_net_size) { + insertClockGate(gate.regs, gate.conds, gate.is_enable); + accepted_count += gate.regs.size(); + } + } + } +}; + +struct InferCePass : public Pass { + InferCePass() : Pass("infer_ce", "Infer clock enable signals from conditional logic") { } + + void help() override + { + log("\n"); + log(" infer_ce [options] [selection]\n"); + log("\n"); + log("This command infers clock enable (CE) signals from conditional logic.\n"); + log("It analyzes registers and uses SAT solving to find signals that can\n"); + log("serve as clock enable conditions (when the signal is low, D==Q).\n"); + log("\n"); + log("Algorithm based on:\n"); + log(" - \"Automatic Synthesis of Clock Gating Logic\" by Aaron P. Hurst\n"); + log(" - OpenROAD's cgt module implementation\n"); + log("\n"); + log(" -max_cover \n"); + log(" maximum number of candidate signals to consider per register\n"); + log(" (default: %d)\n", DEFAULT_MAX_COVER); + log("\n"); + log(" -min_net_size \n"); + log(" minimum number of registers that must share a gating condition\n"); + log(" for a clock gate to be inserted (default: %d)\n", DEFAULT_MIN_NET_SIZE); + log("\n"); + } + + void execute(std::vector args, RTLIL::Design *design) override + { + log_header(design, "Executing INFER_CE pass.\n"); + + int max_cover = DEFAULT_MAX_COVER; + int min_net_size = DEFAULT_MIN_NET_SIZE; + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-max_cover" && argidx+1 < args.size()) { + max_cover = std::stoi(args[++argidx]); + continue; + } + if (args[argidx] == "-min_net_size" && argidx+1 < args.size()) { + min_net_size = std::stoi(args[++argidx]); + continue; + } + break; + } + extra_args(args, argidx, design); + + int total_gates = 0; + for (auto module : design->selected_modules()) { + InferCeWorker worker(module, max_cover, min_net_size); + worker.run(); + total_gates += worker.accepted_count; + } + + log("Inserted clock enables for %d registers.\n", total_gates); + } +} InferCePass; + +PRIVATE_NAMESPACE_END diff --git a/passes/techmap/clockgate.cc b/passes/techmap/clockgate.cc index 6a5f95d5b..50d557c08 100644 --- a/passes/techmap/clockgate.cc +++ b/passes/techmap/clockgate.cc @@ -224,6 +224,8 @@ struct ClockgatePass : public Pass { log(" Only transform sets of at least eligible FFs.\n"); log(" -max_src \n"); log(" Maximum number of src attributes to copy to ICG cells (default: unlimited).\n"); + log(" -word\n"); + log(" Use word-level $not cell for CE inversion instead of gate-level $_NOT_.\n"); log(" \n"); } @@ -279,6 +281,7 @@ struct ClockgatePass : public Pass { std::vector dont_use_cells; int min_net_size = 0; int max_src = -1; + bool word_level = false; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { @@ -314,6 +317,10 @@ struct ClockgatePass : public Pass { max_src = atoi(args[++argidx].c_str()); continue; } + if (args[argidx] == "-word") { + word_level = true; + continue; + } break; } @@ -395,9 +402,19 @@ struct ClockgatePass : public Pass { // Fix CE polarity if needed if (!clk.pol_ce) { Wire *ce_not_wire = module->addWire(NEW_ID2_SUFFIX("ce_not_w")); - Cell *ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($_NOT_)); - ce_not->setPort(ID::A, clk.ce_bit); - ce_not->setPort(ID::Y, ce_not_wire); + Cell *ce_not; + if (word_level) { + ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($not)); + ce_not->setParam(ID::A_SIGNED, 0); + ce_not->setParam(ID::A_WIDTH, 1); + ce_not->setParam(ID::Y_WIDTH, 1); + ce_not->setPort(ID::A, clk.ce_bit); + ce_not->setPort(ID::Y, ce_not_wire); + } else { + ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($_NOT_)); + ce_not->setPort(ID::A, clk.ce_bit); + ce_not->setPort(ID::Y, ce_not_wire); + } gclk.ce_not_cell = ce_not; icg->setPort(matching_icg_desc->ce_pin, ce_not_wire); } diff --git a/tests/silimate/infer_ce.ys b/tests/silimate/infer_ce.ys new file mode 100644 index 000000000..68b6bf628 --- /dev/null +++ b/tests/silimate/infer_ce.ys @@ -0,0 +1,217 @@ +# ============================================================================= +# Test 1: Basic enable inference with non-trivial cone +# infer_ce needs cone_size >= 2, so we add combinational logic before the mux. +# We use proc; opt_expr; opt_clean (NOT full opt) to avoid opt_dff stealing +# the mux-feedback pattern before infer_ce gets a chance. +# ============================================================================= +log -header "Basic enable inference" +log -push +design -reset +read_verilog <