diff --git a/kernel/fstdata.cc b/kernel/fstdata.cc index 6b771d4f6..2e3c429a5 100644 --- a/kernel/fstdata.cc +++ b/kernel/fstdata.cc @@ -375,41 +375,33 @@ std::string FstData::valueOf(fstHandle signal) return past_data[signal]; } -// Auto-discover scope from FST by finding the top module -std::string FstData::autoScope(Module *topmod) { - - log("Auto-discovering scope from file...\n"); - std::string top = RTLIL::unescape_id(topmod->name); - - log("Available scopes:\n"); - std::set unique_scopes; - for (const auto& var : vars) { - unique_scopes.insert(var.scope); - } - for (const auto& scope : unique_scopes) { - log(" %s\n", scope.c_str()); - } - - // Option 1 - Instance based scope matching - // Will fail if the DUT instance name != the top module name - log("Trying instance-based scope matching...\n"); - for (const auto& var : vars) { - // Check if this scope ends with our top module - log_debug("Checking scope: %s\n", var.scope.c_str()); - if (var.scope == top || - var.scope.find("." + top) != std::string::npos) { - // Extract the full path up to (and including) the top module - size_t pos = var.scope.find(top); - if (pos != std::string::npos) { - std::string scope = var.scope.substr(0, pos + top.length()); - return scope; - } +int FstData::getWidth(fstHandle signal) +{ + // Check if signal is a fork scope (struct) + if (fork_scope_members.count(signal)) { + // Sum the widths of all members of the fork scope, which may be forks themselves + int width = 0; + for (fstHandle member : fork_scope_members[signal]) { + width += getWidth(member); } + return width; } - // Option 2 - Port based scope matching - // Matches based on exact port name matching of the top module - log("Trying port-based scope matching...\n"); + if (handle_to_var.count(signal)) { + return handle_to_var[signal].width; + } + + // Signal not found + log_warning("Signal %d was not extracted from file...\n", signal); + return 0; +} + +// Auto-discover scope from FST by finding the top module +std::string FstData::autoScope(Module *topmod) { + + log("Auto-discovering scopes from %d candidates...\n", GetSize(name_to_handle)); + std::string top = RTLIL::unescape_id(topmod->name); + std::string scope = ""; // Map top module port name to their bit widths (RTL reference point) dict top2widths; @@ -418,43 +410,56 @@ std::string FstData::autoScope(Module *topmod) { top2widths[RTLIL::unescape_id(wire->name)] = wire->width; } } - log("Extracted %d ports from top module\n", GetSize(top2widths)); + log("Extracted %d ports from module '%s'\n", GetSize(top2widths), top.c_str()); // For each scope, track the number of matching ports dict scopes2matches; - for (const auto& var : vars) { - // Strip array '[]' notation from variable name - std::string var_name = var.name; - size_t bracket = var_name.find('['); - if (bracket != std::string::npos) { - var_name = var_name.substr(0, bracket); - } + // Use name_to_handle to get all signals from the FST file + for (auto entry : name_to_handle) { + std::string name = entry.first; + fstHandle handle = entry.second; - // Check if this variable name matches one of our top module port names and width - if (top2widths.count(var_name) && top2widths[var_name] == var.width) { - scopes2matches[var.scope] += 1; + // Extract signal name and scope using '.' + // Signal names of form '{scope}.signal_name' with scope potentially + // having zero to multiple '.' + size_t last_dot = name.find_last_of('.'); + if (last_dot != std::string::npos) { // no '.' means no scope/signal extraction is possible + std::string scope = name.substr(0, last_dot); + std::string signal_name = name.substr(last_dot + 1); + + // Check that signal is in the top module and width matches + if (top2widths.count(signal_name)) { + int signal_width = getWidth(handle); + if (signal_width == top2widths[signal_name]) { + scopes2matches[scope]++; + } + } } } - // Find scopes with exact matches - // If there is a tie, return the longest scope - std::string result = ""; + // Find scopes with exact matches and add to array + std::vector results; for (const auto& entry : scopes2matches) { int num_matches = entry.second; if (num_matches == GetSize(top2widths)) { std::string scope = entry.first; - if (result.empty() || scope.length() > result.length()) { - result = scope; - } + results.push_back(scope); } } - if (!result.empty()) { - return result; + if (results.empty()) { + log_warning("Could not auto-discover scope for module '%s'...\n", + top.c_str()); + return ""; + } else { + log("Found %d scopes for module '%s':\n", GetSize(results), top.c_str()); + for (const auto& scope : results) { + log(" %s\n", scope.c_str()); + } + if (results.size() > 1) { + log_warning("Multiple scopes found for module '%s'. Using the first one.\n", + top.c_str()); + } + return results[0]; } - - // No match found - log_warning("Could not auto-discover scope for module '%s'...\n", - RTLIL::unescape_id(topmod->name).c_str()); - return ""; } diff --git a/kernel/fstdata.h b/kernel/fstdata.h index 80d0c19b2..f95806cca 100644 --- a/kernel/fstdata.h +++ b/kernel/fstdata.h @@ -57,6 +57,7 @@ class FstData dict getMemoryHandles(std::string name); double getTimescale() { return timescale; } const char *getTimescaleString() { return timescale_str.c_str(); } + int getWidth(fstHandle signal); std::string autoScope(Module *topmod); private: void extractVarNames(); diff --git a/kernel/rtlil.cc b/kernel/rtlil.cc index 266aedfe0..9cf8715f4 100644 --- a/kernel/rtlil.cc +++ b/kernel/rtlil.cc @@ -4564,6 +4564,32 @@ const RTLIL::Const &RTLIL::Cell::getParam(RTLIL::IdString paramname) const throw std::out_of_range("Cell::getParam()"); } +// NOTE: as_int() silently truncates >32-bit values and reinterprets string-typed Const values +std::map RTLIL::Cell::getParamsAsInts() const +{ + std::map result; + for (auto ¶m : parameters) { + std::string key = param.first.str(); + if (key.size() > 0 && key[0] == '\\') + key = key.substr(1); + result[key] = param.second.as_int(); + } + return result; +} + +double RTLIL::Cell::maxInputConstRatio() const +{ + double max_ratio = 0.0; + for (auto &conn : connections_) { + if (input(conn.first)) { + double ratio = conn.second.const_ratio(); + if (ratio > max_ratio) + max_ratio = ratio; + } + } + return max_ratio; +} + void RTLIL::Cell::sort() { connections_.sort(sort_by_id_str()); @@ -5605,13 +5631,18 @@ bool RTLIL::SigSpec::is_chunk() const return ++it == cs.end(); } -bool RTLIL::SigSpec::is_mostly_const() const +double RTLIL::SigSpec::const_ratio() const { int constbits = 0; for (auto &chunk : chunks()) if (chunk.width > 0 && chunk.wire == NULL) constbits += chunk.width; - return (constbits > size()/2); + return empty() ? 0.0 : static_cast(constbits) / size(); +} + +bool RTLIL::SigSpec::is_mostly_const(double const_ratio_threshold) const +{ + return (const_ratio() > const_ratio_threshold); } bool RTLIL::SigSpec::known_driver() const diff --git a/kernel/rtlil.h b/kernel/rtlil.h index 9dfe0c570..3d2e54a1a 100644 --- a/kernel/rtlil.h +++ b/kernel/rtlil.h @@ -1684,7 +1684,10 @@ public: bool known_driver() const; - bool is_mostly_const() const; + // Constant bit ratio helpers: const_ratio() returns [0.0, 1.0], + // is_mostly_const() returns true if const_ratio() > threshold + double const_ratio() const; + bool is_mostly_const(double const_ratio_threshold = 0.5) const; bool is_fully_const() const; bool is_fully_zero() const; bool is_fully_ones() const; @@ -2529,6 +2532,13 @@ public: void setParam(RTLIL::IdString paramname, RTLIL::Const value); const RTLIL::Const &getParam(RTLIL::IdString paramname) const; + // Primitive-type parameter accessors for efficient Python interop. + // NOTE: silently truncates wide (>32-bit) parameters and reinterprets string-typed Const values + std::map getParamsAsInts() const; + + // Returns the maximum const_ratio() across all input ports, 0.0 if no input ports + double maxInputConstRatio() const; + void sort(); void check(); void fixup_parameters(bool set_a_signed = false, bool set_b_signed = false); diff --git a/passes/silimate/Makefile.inc b/passes/silimate/Makefile.inc index 4f480ba05..f57fcab15 100644 --- a/passes/silimate/Makefile.inc +++ b/passes/silimate/Makefile.inc @@ -10,6 +10,7 @@ OBJS += passes/silimate/mux_push.o OBJS += passes/silimate/obs_clean.o OBJS += passes/silimate/segv.o OBJS += passes/silimate/reg_rename.o +OBJS += passes/silimate/infer_ce.o OBJS += passes/silimate/splitfanout.o OBJS += passes/silimate/splitlarge.o OBJS += passes/silimate/splitnetlist.o diff --git a/passes/silimate/infer_ce.cc b/passes/silimate/infer_ce.cc new file mode 100644 index 000000000..04034cf99 --- /dev/null +++ b/passes/silimate/infer_ce.cc @@ -0,0 +1,551 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2024 Silimate Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "kernel/yosys.h" +#include "kernel/sigtools.h" +#include "kernel/ff.h" +#include "kernel/satgen.h" +#include +#include + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +// Configuration +static const int DEFAULT_MAX_COVER = 100; // Max candidate signals to consider +static const int DEFAULT_MIN_NET_SIZE = 10; // Min registers per clock gate + +struct InferCeWorker +{ + Module *module; + SigMap sigmap; + + // Configuration + int max_cover; + int min_net_size; + + // Maps output signal bits to their driver cells + dict sig_to_driver; + + // Maps cell input pins to their source signals + dict> sig_to_sinks; + + // Pre-computed list of combinational cells (for SAT import) + std::vector comb_cells; + + // Statistics + int accepted_count = 0; + int rejected_sat_count = 0; + int sat_solves = 0; + + InferCeWorker(Module *module, int max_cover, int min_net_size) + : module(module), sigmap(module), + max_cover(max_cover), min_net_size(min_net_size) + { + // Build driver and sink maps + for (auto cell : module->cells()) { + for (auto &conn : cell->connections()) { + if (cell->output(conn.first)) { + for (auto bit : sigmap(conn.second)) + if (bit.wire) + sig_to_driver[bit] = cell; + } + if (cell->input(conn.first)) { + for (auto bit : sigmap(conn.second)) + if (bit.wire) + sig_to_sinks[bit].insert(cell); + } + } + + // Collect combinational cells for SAT + if (!cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) { + comb_cells.push_back(cell); + } + } + } + + + // Get upstream signals feeding into given signals (BFS backward) + pool getUpstreamSignals(const pool &start_signals, int limit) + { + pool visited; + std::queue worklist; + + for (auto bit : start_signals) { + worklist.push(bit); + visited.insert(bit); + } + + while (!worklist.empty() && (int)visited.size() < limit) { + SigBit bit = worklist.front(); + worklist.pop(); + + if (!sig_to_driver.count(bit)) + continue; + + Cell *driver = sig_to_driver[bit]; + + if (driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) + continue; + + for (auto &conn : driver->connections()) + if (driver->input(conn.first)) + for (auto in_bit : sigmap(conn.second)) + if (in_bit.wire && !visited.count(in_bit)) { + visited.insert(in_bit); + worklist.push(in_bit); + } + } + return visited; + } + + // Get cells in the transitive fanin cone of given signals (for SAT import) + // This is much faster than importing ALL cells + pool getConeOfLogic(SigSpec sig) + { + pool cone_cells; + pool visited; + std::queue worklist; + + // Start from all bits in sig + for (auto bit : sigmap(sig)) { + if (bit.wire && !visited.count(bit)) { + visited.insert(bit); + worklist.push(bit); + } + } + + // BFS backward through drivers + while (!worklist.empty()) { + SigBit bit = worklist.front(); + worklist.pop(); + + if (!sig_to_driver.count(bit)) + continue; + + Cell *driver = sig_to_driver[bit]; + + // Skip registers + if (driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) + continue; + + // Add this cell to cone + if (cone_cells.count(driver)) + continue; // Already processed + cone_cells.insert(driver); + + // Add inputs of driver to worklist + for (auto &conn : driver->connections()) { + if (driver->input(conn.first)) { + for (auto in_bit : sigmap(conn.second)) { + if (in_bit.wire && !visited.count(in_bit)) { + visited.insert(in_bit); + worklist.push(in_bit); + } + } + } + } + } + + return cone_cells; + } + + // Check if OR/AND of signals forms a valid gating condition using SAT + // Uses a PRE-CREATED SAT solver (passed in) to avoid recreating for each check + bool isValidGatingSetWithSolver(ezSatPtr &ez, SatGen &satgen, + const std::vector &conds, + SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + if (conds.empty()) + return false; + + sat_solves++; + + std::vector d_vec = satgen.importSigSpec(sig_d); + std::vector q_vec = satgen.importSigSpec(sig_q); + + // Build OR (for enable) or AND (for disable) of condition signals + std::vector cond_vars; + for (auto bit : conds) + cond_vars.push_back(satgen.importSigSpec(SigSpec(bit))[0]); + + int combined_cond; + if (as_enable) { + // Clock enable: OR of signals (any signal high = enable) + combined_cond = ez->expression(ezSAT::OpOr, cond_vars); + } else { + // Clock disable: AND of signals (all signals high = disable) + combined_cond = ez->expression(ezSAT::OpAnd, cond_vars); + } + + int d_ne_q = ez->vec_ne(d_vec, q_vec); + + // Safe gating: when gating is active (enable=0 or disable=1), D must equal Q + int gating_active = as_enable ? ez->NOT(combined_cond) : combined_cond; + int query = ez->AND(gating_active, d_ne_q); + + std::vector assumptions = {query}; + std::vector dummy_exprs; + std::vector dummy_vals; + + bool is_valid = !ez->solve(dummy_exprs, dummy_vals, assumptions); + if (!is_valid) + rejected_sat_count++; + return is_valid; + } + + // Wrapper that creates a fresh SAT solver (used for standalone checks) + bool isValidGatingSet(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + if (conds.empty()) + return false; + + pool cone = getConeOfLogic(sig_d); + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + for (auto cell : cone) + satgen.importCell(cell); + + return isValidGatingSetWithSolver(ez, satgen, conds, sig_d, sig_q, as_enable); + } + + // Binary search to minimize the gating condition set + // Tries to remove half of the signals at a time + // Uses pre-created SAT solver to avoid recreating for each check + void minimizeGatingConditionWithSolver( + ezSatPtr &ez, SatGen &satgen, + std::vector &good_conds, + std::vector::iterator begin, + std::vector::iterator end, + SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + int half_len = (end - begin) / 2; + + if (half_len == 0) + return; + + auto mid = begin + half_len; + + // Try removing [mid, end) from the condition + std::vector test_conds; + test_conds.insert(test_conds.end(), good_conds.begin(), begin); + test_conds.insert(test_conds.end(), begin, mid); + test_conds.insert(test_conds.end(), end, good_conds.end()); + + if (!test_conds.empty() && isValidGatingSetWithSolver(ez, satgen, test_conds, sig_d, sig_q, as_enable)) { + // Can remove [mid, end) + good_conds.erase(mid, end); + // Recurse on remaining half + minimizeGatingConditionWithSolver(ez, satgen, good_conds, begin, begin + half_len, sig_d, sig_q, as_enable); + } else { + // Cannot remove all of [mid, end), try to minimize each half + if (end - mid > 1) + minimizeGatingConditionWithSolver(ez, satgen, good_conds, mid, end, sig_d, sig_q, as_enable); + minimizeGatingConditionWithSolver(ez, satgen, good_conds, begin, mid, sig_d, sig_q, as_enable); + } + } + + // Wrapper for standalone use (creates fresh solver) + void minimizeGatingCondition( + std::vector &good_conds, + std::vector::iterator begin, + std::vector::iterator end, + SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + pool cone = getConeOfLogic(sig_d); + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + for (auto cell : cone) + satgen.importCell(cell); + + minimizeGatingConditionWithSolver(ez, satgen, good_conds, begin, end, sig_d, sig_q, as_enable); + } + + // Find gating condition for a register + // Returns: {gating_conds, is_enable, cone_size} + std::tuple, bool, int> findGatingCondition(Cell *reg) + { + FfData ff(nullptr, reg); + + pool d_inputs; + for (auto bit : sigmap(ff.sig_d)) + if (bit.wire) + d_inputs.insert(bit); + + pool upstream = getUpstreamSignals(d_inputs, max_cover); + + std::vector candidates; + for (auto bit : upstream) + candidates.push_back(bit); + + if ((int)candidates.size() > max_cover) + candidates.resize(max_cover); + + if (candidates.empty()) + return {{}, false, 0}; + + // Create SAT solver ONCE for this register + pool cone = getConeOfLogic(ff.sig_d); + int cone_size = (int)cone.size(); + + // Skip registers with trivial cones (not worth gating) or huge cones (too expensive) + const int MIN_CONE_SIZE = 2; + const int MAX_CONE_SIZE = 500; + if (cone_size < MIN_CONE_SIZE || cone_size > MAX_CONE_SIZE) + return {{}, false, cone_size}; + + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + for (auto cell : cone) + satgen.importCell(cell); + + // Try as clock enable first + if (isValidGatingSetWithSolver(ez, satgen, candidates, ff.sig_d, ff.sig_q, true)) { + minimizeGatingConditionWithSolver(ez, satgen, candidates, candidates.begin(), candidates.end(), + ff.sig_d, ff.sig_q, true); + if (!candidates.empty()) + return {candidates, true, cone_size}; + } + + // Try as clock disable + if (isValidGatingSetWithSolver(ez, satgen, candidates, ff.sig_d, ff.sig_q, false)) { + minimizeGatingConditionWithSolver(ez, satgen, candidates, candidates.begin(), candidates.end(), + ff.sig_d, ff.sig_q, false); + if (!candidates.empty()) + return {candidates, false, cone_size}; + } + + return {{}, false, cone_size}; + } + + // Insert clock gating logic for a group of registers + void insertClockGate(const std::vector ®s, + const std::vector &gating_conds, + bool as_enable) + { + if (regs.empty() || gating_conds.empty()) + return; + + // Build gating condition: OR for enable, AND for disable + SigBit gating_signal; + if (gating_conds.size() == 1) { + gating_signal = gating_conds[0]; + } else { + SigSpec cond_inputs; + for (auto bit : gating_conds) + cond_inputs.append(bit); + + Wire *cond_wire = module->addWire(NEW_ID); + if (as_enable) + module->addReduceOr(NEW_ID, cond_inputs, cond_wire); + else + module->addReduceAnd(NEW_ID, cond_inputs, cond_wire); + gating_signal = cond_wire; + } + + // If disable signal, invert to get enable + if (!as_enable) { + Wire *inv_wire = module->addWire(NEW_ID); + module->addNot(NEW_ID, gating_signal, inv_wire); + gating_signal = inv_wire; + } + + // Add CE to each register + for (auto reg : regs) { + FfData ff(nullptr, reg); + + if (ff.has_ce) { + Wire *combined_ce = module->addWire(NEW_ID); + module->addAnd(NEW_ID, ff.sig_ce, gating_signal, combined_ce); + ff.sig_ce = combined_ce; + } else { + ff.has_ce = true; + ff.sig_ce = gating_signal; + ff.pol_ce = true; + } + + ff.emit(); + } + } + + // Check if register can be added to an existing gate + bool canReuseGate(const std::vector &existing_conds, Cell *reg, bool is_enable) + { + FfData ff(nullptr, reg); + return isValidGatingSet(existing_conds, ff.sig_d, ff.sig_q, is_enable); + } + + // Main processing function + void run() + { + std::vector registers; + for (auto cell : module->cells()) { + if (!cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) + continue; + + FfData ff(nullptr, cell); + if (ff.has_ce || !ff.has_clk) + continue; + + registers.push_back(cell); + } + + log("Processing module %s: %zu cells, %zu flip-flops, %zu wires\n", + log_id(module), module->cells().size(), registers.size(), module->wires().size()); + + if (registers.empty()) + return; + + struct AcceptedGate { + std::vector conds; + pool cond_set; + std::vector regs; + bool is_enable; + }; + std::vector accepted_gates; + dict> net_to_accepted; + + int reg_idx = 0; + for (auto reg : registers) { + auto [gating_conds, is_enable, cone_size] = findGatingCondition(reg); + log("Processing register %d/%zu: %s (cone=%d)\n", ++reg_idx, registers.size(), log_id(reg), cone_size); + + if (gating_conds.empty()) + continue; + + pool cond_set; + for (auto bit : gating_conds) + cond_set.insert(bit); + + // Find candidate gates sharing any net + pool candidate_gates; + for (auto bit : gating_conds) + if (net_to_accepted.count(bit)) + for (auto idx : net_to_accepted[bit]) + candidate_gates.insert(idx); + + // HEURISTIC: Only check limited gates for reuse + const int MAX_REUSE_CHECKS = 20; + + bool found_match = false; + int checked = 0; + for (auto idx : candidate_gates) { + if (checked >= MAX_REUSE_CHECKS) + break; + + auto &gate = accepted_gates[idx]; + if (gate.is_enable != is_enable) + continue; + + checked++; + if (canReuseGate(gate.conds, reg, is_enable)) { + gate.regs.push_back(reg); + found_match = true; + break; + } + } + + if (!found_match) { + size_t new_idx = accepted_gates.size(); + accepted_gates.push_back({gating_conds, cond_set, {reg}, is_enable}); + for (auto bit : gating_conds) + net_to_accepted[bit].push_back(new_idx); + } + } + + // Insert clock gates for groups meeting threshold + for (auto &gate : accepted_gates) { + if ((int)gate.regs.size() >= min_net_size) { + insertClockGate(gate.regs, gate.conds, gate.is_enable); + accepted_count += gate.regs.size(); + } + } + } +}; + +struct InferCePass : public Pass { + InferCePass() : Pass("infer_ce", "Infer clock enable signals from conditional logic") { } + + void help() override + { + log("\n"); + log(" infer_ce [options] [selection]\n"); + log("\n"); + log("This command infers clock enable (CE) signals from conditional logic.\n"); + log("It analyzes registers and uses SAT solving to find signals that can\n"); + log("serve as clock enable conditions (when the signal is low, D==Q).\n"); + log("\n"); + log("Algorithm based on:\n"); + log(" - \"Automatic Synthesis of Clock Gating Logic\" by Aaron P. Hurst\n"); + log(" - OpenROAD's cgt module implementation\n"); + log("\n"); + log(" -max_cover \n"); + log(" maximum number of candidate signals to consider per register\n"); + log(" (default: %d)\n", DEFAULT_MAX_COVER); + log("\n"); + log(" -min_net_size \n"); + log(" minimum number of registers that must share a gating condition\n"); + log(" for a clock gate to be inserted (default: %d)\n", DEFAULT_MIN_NET_SIZE); + log("\n"); + } + + void execute(std::vector args, RTLIL::Design *design) override + { + log_header(design, "Executing INFER_CE pass.\n"); + + int max_cover = DEFAULT_MAX_COVER; + int min_net_size = DEFAULT_MIN_NET_SIZE; + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-max_cover" && argidx+1 < args.size()) { + max_cover = std::stoi(args[++argidx]); + continue; + } + if (args[argidx] == "-min_net_size" && argidx+1 < args.size()) { + min_net_size = std::stoi(args[++argidx]); + continue; + } + break; + } + extra_args(args, argidx, design); + + int total_gates = 0; + for (auto module : design->selected_modules()) { + InferCeWorker worker(module, max_cover, min_net_size); + worker.run(); + total_gates += worker.accepted_count; + } + + log("Inserted clock enables for %d registers.\n", total_gates); + } +} InferCePass; + +PRIVATE_NAMESPACE_END diff --git a/passes/techmap/clockgate.cc b/passes/techmap/clockgate.cc index 6a5f95d5b..50d557c08 100644 --- a/passes/techmap/clockgate.cc +++ b/passes/techmap/clockgate.cc @@ -224,6 +224,8 @@ struct ClockgatePass : public Pass { log(" Only transform sets of at least eligible FFs.\n"); log(" -max_src \n"); log(" Maximum number of src attributes to copy to ICG cells (default: unlimited).\n"); + log(" -word\n"); + log(" Use word-level $not cell for CE inversion instead of gate-level $_NOT_.\n"); log(" \n"); } @@ -279,6 +281,7 @@ struct ClockgatePass : public Pass { std::vector dont_use_cells; int min_net_size = 0; int max_src = -1; + bool word_level = false; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { @@ -314,6 +317,10 @@ struct ClockgatePass : public Pass { max_src = atoi(args[++argidx].c_str()); continue; } + if (args[argidx] == "-word") { + word_level = true; + continue; + } break; } @@ -395,9 +402,19 @@ struct ClockgatePass : public Pass { // Fix CE polarity if needed if (!clk.pol_ce) { Wire *ce_not_wire = module->addWire(NEW_ID2_SUFFIX("ce_not_w")); - Cell *ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($_NOT_)); - ce_not->setPort(ID::A, clk.ce_bit); - ce_not->setPort(ID::Y, ce_not_wire); + Cell *ce_not; + if (word_level) { + ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($not)); + ce_not->setParam(ID::A_SIGNED, 0); + ce_not->setParam(ID::A_WIDTH, 1); + ce_not->setParam(ID::Y_WIDTH, 1); + ce_not->setPort(ID::A, clk.ce_bit); + ce_not->setPort(ID::Y, ce_not_wire); + } else { + ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($_NOT_)); + ce_not->setPort(ID::A, clk.ce_bit); + ce_not->setPort(ID::Y, ce_not_wire); + } gclk.ce_not_cell = ce_not; icg->setPort(matching_icg_desc->ce_pin, ce_not_wire); } diff --git a/tests/silimate/infer_ce.ys b/tests/silimate/infer_ce.ys new file mode 100644 index 000000000..68b6bf628 --- /dev/null +++ b/tests/silimate/infer_ce.ys @@ -0,0 +1,217 @@ +# ============================================================================= +# Test 1: Basic enable inference with non-trivial cone +# infer_ce needs cone_size >= 2, so we add combinational logic before the mux. +# We use proc; opt_expr; opt_clean (NOT full opt) to avoid opt_dff stealing +# the mux-feedback pattern before infer_ce gets a chance. +# ============================================================================= +log -header "Basic enable inference" +log -push +design -reset +read_verilog <