From b4ef420c3f2eb883b9c1ed3cabd9f5b9924235d2 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 10 Feb 2026 14:02:15 -0800 Subject: [PATCH 01/55] Added inital SAT based clock gating file --- passes/silimate/sat_clockgate.cc | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 passes/silimate/sat_clockgate.cc diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc new file mode 100644 index 000000000..e69de29bb From b53acb0ff05c88eafcded46512dae15c97699069 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 10 Feb 2026 14:33:17 -0800 Subject: [PATCH 02/55] Added pass in Makefile.inc --- passes/silimate/Makefile.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/passes/silimate/Makefile.inc b/passes/silimate/Makefile.inc index 60e03c5a8..dfb1a4ffe 100644 --- a/passes/silimate/Makefile.inc +++ b/passes/silimate/Makefile.inc @@ -10,6 +10,7 @@ OBJS += passes/silimate/mux_push.o OBJS += passes/silimate/obs_clean.o OBJS += passes/silimate/segv.o OBJS += passes/silimate/reg_rename.o +OBJS += passes/silimate/sat_clockgate.o OBJS += passes/silimate/splitfanout.o OBJS += passes/silimate/splitlarge.o OBJS += passes/silimate/splitnetlist.o From 6ad01fa850283fdcd9daa3612bd5fb32eb8e96d1 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 10 Feb 2026 14:33:37 -0800 Subject: [PATCH 03/55] Added initial pass structure --- passes/silimate/sat_clockgate.cc | 56 ++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index e69de29bb..49e03e3a1 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -0,0 +1,56 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2024 Silimate Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/yosys.h" +#include "kernel/sigtools.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +struct SatClockgatePass : public Pass { + SatClockgatePass() : Pass("sat_clockgate", "SAT-based clock gating analysis") { } + + void help() override + { + log("\n"); + log(" sat_clockgate [selection]\n"); + log("\n"); + log("This command performs SAT-based clock gating analysis.\n"); + log("\n"); + } + + void execute(std::vector args, RTLIL::Design *design) override + { + log_header(design, "Executing SAT_CLOCKGATE pass.\n"); + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + // Parse options here + break; + } + extra_args(args, argidx, design); + + for (auto module : design->selected_modules()) { + log("Processing module %s...\n", log_id(module)); + // TODO: Implement SAT-based clock gating logic + } + } +} SatClockgatePass; + +PRIVATE_NAMESPACE_END From e4f69cba30da7969b12cb9af17fbbdb775ad41f4 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 09:53:03 -0800 Subject: [PATCH 04/55] Initialized notes --- notes.txt | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 notes.txt diff --git a/notes.txt b/notes.txt new file mode 100644 index 000000000..24311adbe --- /dev/null +++ b/notes.txt @@ -0,0 +1,42 @@ +Clock gating + + + +need to determine when the D == Q + +Make sure that the flip flop has a clock but not ce: !ff.has_ce && ff.has_clk +check if + +USE sat to determine when the Q is the same as D + +Q is the feedback, and then there's also the D which does in and select for the mux +becomes the enable + + +D = f(Q, other_inputs) + +!(D ^ f(Q, other_inputs)) + + +Look somewhat like this: + Q is always going to be D + But D itself has an enable built into it + + + So what D really is is: + Mux (D_r, Q, en) where en is the enable signal + + Our goal is to find en + + Equation: + Q_next = (en ∧ D_r) ∨ (¬en ∧ Q) + + Equality question: + !((Q_next) ^ ((en ∧ D_r) ∨ (¬en ∧ Q))) + + Universal Quantization + FA(Q_next, D_r, Q) !((Q_next) ^ ((en ∧ D_r) ∨ (¬en ∧ Q))) + + SAT Equation + FA(Q_next, D_r, Q) !((Q_next) ^ ((en ∧ D_r) ∨ (¬en ∧ Q))) + From 5aeb19fb6678522997707bc3a8eef673f59881d5 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 10:55:43 -0800 Subject: [PATCH 05/55] Added initial version 1 pseudocode --- notes.txt | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/notes.txt b/notes.txt index 24311adbe..64a59ef57 100644 --- a/notes.txt +++ b/notes.txt @@ -40,3 +40,65 @@ Look somewhat like this: SAT Equation FA(Q_next, D_r, Q) !((Q_next) ^ ((en ∧ D_r) ∨ (¬en ∧ Q))) +Now the issue is to determine what the en is and what the D_r in. In order +to continue using this approach, a way of differentiating that would be needed. + +Need a simpler approach which just considers all of the inputs and then performs +SAT. Here's the approach: + Consider the flip flops which go into D. Consider all of those inputs and seen +if D != Q is UNSAT. Meaning for that set of inputs into D, D is going to be Q. +Then try to minimize this set (optimization phase). + + +1) Find the input's into D and determine if there's any level at which you can +determine if (Exists(x1, x2, x3, ..., xn) | D != Q) == UNSAT (menaing for that +combination of inputs of x1, x2, x3, ..., xn, D is always = Q. + +Algorithm version one: +This version doesn't take into accound the threshold (doesn't try and insert) +the same CE into multiple different clocks, it also doesn't do any pre SAT simulation +optimization. Fruthermore, it also doesn't try and find the minimal set, just a set. + +// determines if the input set serves as an enable +input_set_is_en(input_set, D, Q): + return (Exists(x1, x2, x3, ..., xn) | D != Q) == UNSAT + +// determines the input set +determine_en_rec (input_set&, D, Q): + if (count(input_set) > N): + return false: + if input_set_is_en(input_set, D, Q): + // for now this returns. Later, when optimizing, this will try and find a smaller subset within the set + return true + else: + // Detemine set of inputs + input_set_new = Do a BFS on the Data pin in the clock and add those pins to set + determine_en_rec(input_set_new, D, Q) + +// create the CE based on the input set +// adds the CE into the clock +create_ce_logic(input_set, D, Q, ffData): + // TODO: fill in this pseudocode please + +set_ff_ces(design): + for module in design: + for cell in module: + if cell is_builtin_ff: + ffFata ff = cell; + if (!ff.has_ce && ff.has_clk && ff.has_d && ff.has_Q): + input_set = {} + if (determine_en_rec(input_set, D, Q)): + create_ce_logic(input_set, D, Q, ffData) + + // insert the ICG gates based on the new CEs inserted + pass::call("clockgate", design); + + + + + + + + + + From b4cd82bacf231c58241beff9bed232935f7a9444 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 10:56:07 -0800 Subject: [PATCH 06/55] Added initial printing of the clocks with dump_flipflops_to_file --- passes/silimate/sat_clockgate.cc | 52 +++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 49e03e3a1..640f3caa9 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -19,19 +19,50 @@ #include "kernel/yosys.h" #include "kernel/sigtools.h" +#include "kernel/ff.h" +#include USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN +void dump_flipflops_to_file(RTLIL::Design *design, const std::string &filename) +{ + std::ofstream outfile(filename); + if (!outfile.is_open()) { + log_error("Cannot open file %s for writing\n", filename.c_str()); + return; + } + + for (auto module : design->selected_modules()) { + outfile << "Module: " << log_id(module) << "\n"; + log("Module: %s\n", log_id(module)); + + for (auto cell : module->cells()) { + if (cell->is_builtin_ff()) { + outfile << " FF: " << log_id(cell) << " (type: " << log_id(cell->type) << ")\n"; + log(" FF: %s (type: %s)\n", log_id(cell), log_id(cell->type)); + } + } + outfile << "\n"; + } + + outfile.close(); + log("Wrote flip-flop list to %s\n", filename.c_str()); +} + struct SatClockgatePass : public Pass { - SatClockgatePass() : Pass("sat_clockgate", "SAT-based clock gating analysis") { } + SatClockgatePass() : Pass("sat_clockgate", "SAT-based inferred clock gating") { } void help() override { log("\n"); - log(" sat_clockgate [selection]\n"); + log(" sat_clockgate [options] [selection]\n"); log("\n"); - log("This command performs SAT-based clock gating analysis.\n"); + log("This command performs SAT-based inferred clock gating insertion.\n"); + log("\n"); + log(" -threshold \n"); + log(" minimum number of clock cycles that must match for clock gating\n"); + log(" to be inserted (default: 1)\n"); log("\n"); } @@ -39,16 +70,29 @@ struct SatClockgatePass : public Pass { { log_header(design, "Executing SAT_CLOCKGATE pass.\n"); + int threshold = 1; + size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { - // Parse options here + if (args[argidx] == "-threshold" && argidx+1 < args.size()) { + threshold = std::stoi(args[++argidx]); + continue; + } break; } extra_args(args, argidx, design); + log("Using threshold: %d\n", threshold); + + // Dump all flip-flops to file + dump_flipflops_to_file(design, "flip_flops.txt"); + for (auto module : design->selected_modules()) { log("Processing module %s...\n", log_id(module)); // TODO: Implement SAT-based clock gating logic + + // Example: calling SAT pass + // Pass::call(design, stringf("sat -verify -prove 1 %s", log_id(module))); } } } SatClockgatePass; From 9e544aa95cf8e124f8eb5feb67f9dc7680a4a7c5 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 11:01:49 -0800 Subject: [PATCH 07/55] Added pseudocode for create_ce_logic --- notes.txt | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/notes.txt b/notes.txt index 64a59ef57..135924ad6 100644 --- a/notes.txt +++ b/notes.txt @@ -78,7 +78,21 @@ determine_en_rec (input_set&, D, Q): // create the CE based on the input set // adds the CE into the clock create_ce_logic(input_set, D, Q, ffData): - // TODO: fill in this pseudocode please + // CE = OR(all signals in input_set) + // When any input is 1 → CE=1 (update register) + // When all inputs are 0 → CE=0 (hold, since SAT proved D==Q) + + if input_set.size() == 1: + ce_signal = input_set[0] + else: + ce_wire = module.addWire(NEW_ID) + module.addReduceOr(NEW_ID, input_set, ce_wire) + ce_signal = ce_wire + + ffData.has_ce = true + ffData.sig_ce = ce_signal + ffData.pol_ce = true // active high + ffData.emit() // rebuild the FF with CE set_ff_ces(design): for module in design: From 5b384511f297ac64e971c0a4a1cec4fe7adde20d Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 11:02:15 -0800 Subject: [PATCH 08/55] Added initial SatClockgateWorker --- passes/silimate/sat_clockgate.cc | 274 ++++++++++++++++++++++++++++++- 1 file changed, 268 insertions(+), 6 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 640f3caa9..296a54be8 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -20,11 +20,245 @@ #include "kernel/yosys.h" #include "kernel/sigtools.h" #include "kernel/ff.h" +#include "kernel/satgen.h" #include USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN +// Maximum depth for BFS exploration of input cone +static const int MAX_INPUT_DEPTH = 10; + +struct SatClockgateWorker +{ + Module *module; + SigMap sigmap; + + // Maps output signal bits to their driver cells + dict sig_to_driver; + + SatClockgateWorker(Module *module) : module(module), sigmap(module) + { + // Build driver map: for each signal bit, find which cell drives it + for (auto cell : module->cells()) { + for (auto &conn : cell->connections()) { + if (cell->output(conn.first)) { + for (auto bit : sigmap(conn.second)) + sig_to_driver[bit] = cell; + } + } + } + } + + // Get the set of input signals feeding into a given signal (one level back) + pool get_input_signals(SigBit bit) + { + pool inputs; + bit = sigmap(bit); + + if (!sig_to_driver.count(bit)) + return inputs; // Primary input or constant + + Cell *driver = sig_to_driver[bit]; + for (auto &conn : driver->connections()) { + if (driver->input(conn.first)) { + for (auto input_bit : sigmap(conn.second)) { + if (input_bit.wire != nullptr) + inputs.insert(input_bit); + } + } + } + return inputs; + } + + // BFS to collect input cone up to a certain depth + pool get_input_cone(SigSpec sig, int max_depth) + { + pool visited; + pool frontier; + + for (auto bit : sigmap(sig)) + if (bit.wire != nullptr) + frontier.insert(bit); + + for (int depth = 0; depth < max_depth && !frontier.empty(); depth++) { + pool next_frontier; + for (auto bit : frontier) { + if (visited.count(bit)) + continue; + visited.insert(bit); + + for (auto input_bit : get_input_signals(bit)) { + if (!visited.count(input_bit)) + next_frontier.insert(input_bit); + } + } + frontier = next_frontier; + } + + return visited; + } + + // Check if fixing the input_set to specific values makes D == Q always true + // Returns true if input_set can serve as an enable (when all bits are 0, D == Q) + bool input_set_is_enable(const pool &input_set, SigSpec sig_d, SigSpec sig_q) + { + if (input_set.empty()) + return false; + + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + + // Import the logic cone for D + for (auto cell : module->cells()) + satgen.importCell(cell); + + // Create SAT variables for D and Q + std::vector d_vec = satgen.importSigSpec(sig_d); + std::vector q_vec = satgen.importSigSpec(sig_q); + + // Assert: all signals in input_set are 0 + for (auto bit : input_set) { + std::vector bit_vec = satgen.importSigSpec(SigSpec(bit)); + if (!bit_vec.empty()) + ez->assume(ez->NOT(bit_vec[0])); + } + + // Assert: D != Q (we want this to be UNSAT) + std::vector neq_bits; + for (size_t i = 0; i < d_vec.size() && i < q_vec.size(); i++) { + neq_bits.push_back(ez->XOR(d_vec[i], q_vec[i])); + } + ez->assume(ez->expression(ezSAT::OpOr, neq_bits)); + + // If UNSAT, then input_set=0 guarantees D == Q + bool sat = ez->solve(); + return !sat; + } + + // Recursively determine the enable input set via BFS expansion + bool determine_enable_recursive(pool &input_set, SigSpec sig_d, SigSpec sig_q, int depth) + { + if (depth > MAX_INPUT_DEPTH) { + log_debug(" Max depth reached, giving up\n"); + return false; + } + + // Check if current input set works as enable + if (input_set_is_enable(input_set, sig_d, sig_q)) { + log_debug(" Found enable at depth %d with %zu signals\n", depth, input_set.size()); + return true; + } + + // Expand input set via BFS (one more level) + pool new_inputs; + for (auto bit : input_set) { + for (auto input_bit : get_input_signals(bit)) { + if (!input_set.count(input_bit)) + new_inputs.insert(input_bit); + } + } + + if (new_inputs.empty()) { + log_debug(" No more inputs to explore at depth %d\n", depth); + return false; + } + + // Add new inputs and recurse + for (auto bit : new_inputs) + input_set.insert(bit); + + return determine_enable_recursive(input_set, sig_d, sig_q, depth + 1); + } + + // Create CE logic based on the input set and modify the FF + // The enable condition is: when all input_set bits are 0, D == Q (hold) + // So CE should be: OR of all input_set bits (active high: CE=1 means update) + void create_ce_logic(const pool &input_set, FfData &ff) + { + if (input_set.empty()) + return; + + log(" Creating CE from %zu input signals\n", input_set.size()); + + // Build CE as OR of all input signals + // CE = 1 when any input is 1 (meaning: update the register) + // CE = 0 when all inputs are 0 (meaning: hold, since D == Q) + SigSpec ce_inputs; + for (auto bit : input_set) + ce_inputs.append(bit); + + SigBit ce_signal; + if (GetSize(ce_inputs) == 1) { + ce_signal = ce_inputs[0]; + } else { + // Create OR gate: CE = |input_set + Wire *ce_wire = module->addWire(NEW_ID); + module->addReduceOr(NEW_ID, ce_inputs, ce_wire); + ce_signal = ce_wire; + } + + // Set the CE on the FF + ff.has_ce = true; + ff.sig_ce = ce_signal; + ff.pol_ce = true; // Active high + + log(" CE signal: %s\n", log_signal(ce_signal)); + } + + // Process a single FF to find and insert CE + bool process_ff(Cell *cell) + { + FfData ff(nullptr, cell); + + // Skip if already has CE, or doesn't have clock/data + if (ff.has_ce) { + log_debug(" Skipping %s: already has CE\n", log_id(cell)); + return false; + } + if (!ff.has_clk) { + log_debug(" Skipping %s: no clock\n", log_id(cell)); + return false; + } + if (GetSize(ff.sig_d) == 0 || GetSize(ff.sig_q) == 0) { + log_debug(" Skipping %s: no D or Q\n", log_id(cell)); + return false; + } + + log("Processing FF: %s\n", log_id(cell)); + + // Start with direct inputs of D + pool input_set = get_input_cone(ff.sig_d, 1); + + // Remove Q from input set (it's the feedback, not a control signal) + for (auto bit : sigmap(ff.sig_q)) + input_set.erase(bit); + + if (input_set.empty()) { + log_debug(" No inputs to D (besides Q)\n"); + return false; + } + + log_debug(" Initial input set has %zu signals\n", input_set.size()); + + // Try to find enable + if (determine_enable_recursive(input_set, ff.sig_d, ff.sig_q, 1)) { + // Remove Q bits again (in case BFS added them back) + for (auto bit : sigmap(ff.sig_q)) + input_set.erase(bit); + + create_ce_logic(input_set, ff); + + // Emit the modified FF + ff.emit(); + return true; + } + + log_debug(" Could not find enable for %s\n", log_id(cell)); + return false; + } +}; + void dump_flipflops_to_file(RTLIL::Design *design, const std::string &filename) { std::ofstream outfile(filename); @@ -59,16 +293,19 @@ struct SatClockgatePass : public Pass { log(" sat_clockgate [options] [selection]\n"); log("\n"); log("This command performs SAT-based inferred clock gating insertion.\n"); + log("It analyzes flip-flops without explicit clock enables and uses SAT\n"); + log("to find input conditions under which D == Q (register holds value).\n"); + log("These conditions become the inferred clock enable.\n"); log("\n"); log(" -threshold \n"); - log(" minimum number of clock cycles that must match for clock gating\n"); + log(" minimum number of FFs that must share an enable for clock gating\n"); log(" to be inserted (default: 1)\n"); log("\n"); } void execute(std::vector args, RTLIL::Design *design) override { - log_header(design, "Executing SAT_CLOCKGATE pass.\n"); + log_header(design, "Executing SAT_CLOCKGATE pass (SAT-based inferred clock gating).\n"); int threshold = 1; @@ -84,16 +321,41 @@ struct SatClockgatePass : public Pass { log("Using threshold: %d\n", threshold); - // Dump all flip-flops to file + // Dump all flip-flops to file (debug) dump_flipflops_to_file(design, "flip_flops.txt"); + int total_converted = 0; + for (auto module : design->selected_modules()) { log("Processing module %s...\n", log_id(module)); - // TODO: Implement SAT-based clock gating logic - // Example: calling SAT pass - // Pass::call(design, stringf("sat -verify -prove 1 %s", log_id(module))); + SatClockgateWorker worker(module); + + // Collect FFs to process (can't modify while iterating) + std::vector ffs_to_process; + for (auto cell : module->cells()) { + if (cell->is_builtin_ff()) + ffs_to_process.push_back(cell); + } + + int converted = 0; + for (auto cell : ffs_to_process) { + if (worker.process_ff(cell)) + converted++; + } + + if (converted > 0) { + log("Converted %d FFs in module %s\n", converted, log_id(module)); + total_converted += converted; + } } + + log("Total FFs with inferred CE: %d\n", total_converted); + + // TODO: Call clockgate pass to convert CEs to ICG cells + // if (total_converted >= threshold) { + // Pass::call(design, "clockgate"); + // } } } SatClockgatePass; From dd3f2e370cb0c64ad02282d4db6d99410a9c3e57 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 12:31:13 -0800 Subject: [PATCH 09/55] Fixed naming for bfs_find_potential_enable_inputs --- passes/silimate/sat_clockgate.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 296a54be8..7d2f1b70a 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -71,8 +71,8 @@ struct SatClockgateWorker return inputs; } - // BFS to collect input cone up to a certain depth - pool get_input_cone(SigSpec sig, int max_depth) + // BFS to find potential enable signals up to a certain depth + pool bfs_find_potential_enable_inputs(SigSpec sig, int max_depth) { pool visited; pool frontier; @@ -228,7 +228,7 @@ struct SatClockgateWorker log("Processing FF: %s\n", log_id(cell)); // Start with direct inputs of D - pool input_set = get_input_cone(ff.sig_d, 1); + pool input_set = bfs_find_potential_enable_inputs(ff.sig_d, 1); // Remove Q from input set (it's the feedback, not a control signal) for (auto bit : sigmap(ff.sig_q)) From d2300b2a9f880d5d8122c3c0f51264efdc12283c Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 14:19:29 -0800 Subject: [PATCH 10/55] Added nodes for the MITER --- notes.txt | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/notes.txt b/notes.txt index 135924ad6..6cc6d6f0f 100644 --- a/notes.txt +++ b/notes.txt @@ -109,10 +109,48 @@ set_ff_ces(design): +(input_set=0) AND (D≠Q) == UNSAT + -> if one of them is 1 and D = Q +Scenario What happens OK? +CE=0, D==Q Gate clock, hold value Correct (power saved) +CE=0, D≠Q Gate clock, lose data BUG +CE=1, D==Q Clock passes, write same value Correct (wasted power) +CE=1, D≠Q Clock passes, update register Correct +(input_set=0) AND (D≠Q) == UNSAT -> +Existential Quantization of input_set such that +To ensure that if (CE=0) then D==Q: + ((combination of inputs) AND (D≠Q)) = UNSAT + - This is functionally accurate but there might be cases when CE=1 but D==Q which + is a waste of power +To ensure that if (CE=1) then D!=Q: + ((combination of inputs) AND (D==Q)) = UNSAT + - This alone is risky since there might be combinations such that CE=0 but D!=Q + which is incorrect behaviour + +Need to ensure CE=1 <-> D!=Q: + BOTH conditions must hold: + 1) ((CE=0) AND (D≠Q)) = UNSAT // CE=0 → D==Q (safe to gate) + 2) ((CE=1) AND (D==Q)) = UNSAT // CE=1 → D≠Q (no wasted power) + + Combined: CE must be the exact boolean function where CE = (D ≠ Q) + For MUX pattern D = sel ? new_val : Q, CE = sel satisfies both. +In order words, we need +Exist a combination of inputs such that UNSAT((combination of inputs) ^ (D==Q)) +Which means +((!COI) && (D!=Q)) && ((COI) && (D==Q)) + +Exit a set of inputs such that COI <-> D != Q +(COI && (D != Q)) && (!COI && (D == Q)) + + + +The final equation for the UNSAT is: + ((D != Q) != (COI)) -> UNSAT => COI = (D != Q) + Exists COI such that ((D ^ Q) ^ (COI)) -> UNSAT \ No newline at end of file From da8febc3b736bb3f02ece0f623c45043498f7f85 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 14:22:26 -0800 Subject: [PATCH 11/55] Added to notes.txt --- notes.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/notes.txt b/notes.txt index 6cc6d6f0f..ab8f88381 100644 --- a/notes.txt +++ b/notes.txt @@ -58,6 +58,9 @@ Algorithm version one: This version doesn't take into accound the threshold (doesn't try and insert) the same CE into multiple different clocks, it also doesn't do any pre SAT simulation optimization. Fruthermore, it also doesn't try and find the minimal set, just a set. +Lastly, this also does a form of safe clock gating which means that: + input_set == 0 -> D == Q +Which means there's cases when input_set may be 1 but D != Q // determines if the input set serves as an enable input_set_is_en(input_set, D, Q): @@ -153,4 +156,8 @@ Exit a set of inputs such that COI <-> D != Q The final equation for the UNSAT is: ((D != Q) != (COI)) -> UNSAT => COI = (D != Q) - Exists COI such that ((D ^ Q) ^ (COI)) -> UNSAT \ No newline at end of file + Exists COI such that ((D ^ Q) ^ (COI)) -> UNSAT + + +So in the new pseudo code algorithm, once the pool is populated (the input_set), +we create this miter with the exestential quantization with the input_set. From 143a860673e05c3e7d2d3d49f2cd1db322cdc357 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 14:39:47 -0800 Subject: [PATCH 12/55] Added future TODOs --- notes.txt | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/notes.txt b/notes.txt index ab8f88381..4f6e589c5 100644 --- a/notes.txt +++ b/notes.txt @@ -161,3 +161,31 @@ The final equation for the UNSAT is: So in the new pseudo code algorithm, once the pool is populated (the input_set), we create this miter with the exestential quantization with the input_set. + +The issue is that since the input set isn't a boolean function (is a BFS traversal), +we need to manually create a boolean function out of the input set. + +Another issue is that we must ensure that these somehow impact the D and/or the Q. This + + + + + + + + + + + + + + + + + +Future TODOs: +1) Recursively minimize the set which is actually needed +2) Add threshold (how many flops depend on the same enable signal) +3) Add different setting for type of mitter (maybe add just the !COI -> D == Q) + for a weaker clock gate (still consumes power if COI and D == Q but might be faster) +4) Deal with posede vs negedge of the clocks \ No newline at end of file From 19060eeee73f1047782bacb38aad3830d0d56e16 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 14:40:32 -0800 Subject: [PATCH 13/55] Added TODO for how to add the COI set --- notes.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/notes.txt b/notes.txt index 4f6e589c5..dabc89cf0 100644 --- a/notes.txt +++ b/notes.txt @@ -188,4 +188,6 @@ Future TODOs: 2) Add threshold (how many flops depend on the same enable signal) 3) Add different setting for type of mitter (maybe add just the !COI -> D == Q) for a weaker clock gate (still consumes power if COI and D == Q but might be faster) -4) Deal with posede vs negedge of the clocks \ No newline at end of file +4) Deal with posede vs negedge of the clocks +5) Experiment with different logical combinations of the COI set (rather than just + or-ing them all together) \ No newline at end of file From 4ca4392e9b135f23c556884fd4db487aedc254b7 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 14:56:46 -0800 Subject: [PATCH 14/55] Simplied recursion in sat_clockgate pass --- passes/silimate/sat_clockgate.cc | 68 +++++++++++--------------------- 1 file changed, 24 insertions(+), 44 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 7d2f1b70a..b8c61f88f 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -71,34 +71,6 @@ struct SatClockgateWorker return inputs; } - // BFS to find potential enable signals up to a certain depth - pool bfs_find_potential_enable_inputs(SigSpec sig, int max_depth) - { - pool visited; - pool frontier; - - for (auto bit : sigmap(sig)) - if (bit.wire != nullptr) - frontier.insert(bit); - - for (int depth = 0; depth < max_depth && !frontier.empty(); depth++) { - pool next_frontier; - for (auto bit : frontier) { - if (visited.count(bit)) - continue; - visited.insert(bit); - - for (auto input_bit : get_input_signals(bit)) { - if (!visited.count(input_bit)) - next_frontier.insert(input_bit); - } - } - frontier = next_frontier; - } - - return visited; - } - // Check if fixing the input_set to specific values makes D == Q always true // Returns true if input_set can serve as an enable (when all bits are 0, D == Q) bool input_set_is_enable(const pool &input_set, SigSpec sig_d, SigSpec sig_q) @@ -137,6 +109,7 @@ struct SatClockgateWorker } // Recursively determine the enable input set via BFS expansion + // Seeds initial input set from sig_d, excludes sig_q bits bool determine_enable_recursive(pool &input_set, SigSpec sig_d, SigSpec sig_q, int depth) { if (depth > MAX_INPUT_DEPTH) { @@ -144,6 +117,26 @@ struct SatClockgateWorker return false; } + // Seed initial input set from sig_d on first call + if (depth == 1 && input_set.empty()) { + for (auto bit : sigmap(sig_d)) { + if (bit.wire != nullptr) { + for (auto input_bit : get_input_signals(bit)) { + input_set.insert(input_bit); + } + } + } + // Remove Q bits (feedback, not control) + for (auto bit : sigmap(sig_q)) + input_set.erase(bit); + + if (input_set.empty()) { + log_debug(" No inputs to D (besides Q)\n"); + return false; + } + log_debug(" Initial input set has %zu signals\n", input_set.size()); + } + // Check if current input set works as enable if (input_set_is_enable(input_set, sig_d, sig_q)) { log_debug(" Found enable at depth %d with %zu signals\n", depth, input_set.size()); @@ -227,23 +220,10 @@ struct SatClockgateWorker log("Processing FF: %s\n", log_id(cell)); - // Start with direct inputs of D - pool input_set = bfs_find_potential_enable_inputs(ff.sig_d, 1); - - // Remove Q from input set (it's the feedback, not a control signal) - for (auto bit : sigmap(ff.sig_q)) - input_set.erase(bit); - - if (input_set.empty()) { - log_debug(" No inputs to D (besides Q)\n"); - return false; - } - - log_debug(" Initial input set has %zu signals\n", input_set.size()); - - // Try to find enable + // Find enable via recursive BFS + SAT validation + pool input_set; if (determine_enable_recursive(input_set, ff.sig_d, ff.sig_q, 1)) { - // Remove Q bits again (in case BFS added them back) + // Remove Q bits (in case BFS added them back) for (auto bit : sigmap(ff.sig_q)) input_set.erase(bit); From 532d1d45a8eeab96faaf871757ac48d04ecb05ec Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 15:08:49 -0800 Subject: [PATCH 15/55] Resolved adding SigBits from Q using static EXCLUDE_Q_FROM_ENABLE knob --- passes/silimate/sat_clockgate.cc | 33 +++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index b8c61f88f..d8a78cbc8 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -29,6 +29,9 @@ PRIVATE_NAMESPACE_BEGIN // Maximum depth for BFS exploration of input cone static const int MAX_INPUT_DEPTH = 10; +// If true, exclude Q (feedback) bits from enable input set +static const bool EXCLUDE_Q_FROM_ENABLE = true; + struct SatClockgateWorker { Module *module; @@ -37,6 +40,9 @@ struct SatClockgateWorker // Maps output signal bits to their driver cells dict sig_to_driver; + // Q bits to exclude from enable input set (set per-FF) + pool q_bits; + SatClockgateWorker(Module *module) : module(module), sigmap(module) { // Build driver map: for each signal bit, find which cell drives it @@ -50,6 +56,16 @@ struct SatClockgateWorker } } + // Set Q bits to exclude for current FF + void set_excluded_q_bits(SigSpec sig_q) + { + q_bits.clear(); + if (EXCLUDE_Q_FROM_ENABLE) { + for (auto bit : sigmap(sig_q)) + q_bits.insert(bit); + } + } + // Get the set of input signals feeding into a given signal (one level back) pool get_input_signals(SigBit bit) { @@ -63,7 +79,7 @@ struct SatClockgateWorker for (auto &conn : driver->connections()) { if (driver->input(conn.first)) { for (auto input_bit : sigmap(conn.second)) { - if (input_bit.wire != nullptr) + if (input_bit.wire != nullptr && !q_bits.count(input_bit)) inputs.insert(input_bit); } } @@ -109,7 +125,7 @@ struct SatClockgateWorker } // Recursively determine the enable input set via BFS expansion - // Seeds initial input set from sig_d, excludes sig_q bits + // Seeds initial input set from sig_d, Q bits filtered via get_input_signals bool determine_enable_recursive(pool &input_set, SigSpec sig_d, SigSpec sig_q, int depth) { if (depth > MAX_INPUT_DEPTH) { @@ -121,14 +137,10 @@ struct SatClockgateWorker if (depth == 1 && input_set.empty()) { for (auto bit : sigmap(sig_d)) { if (bit.wire != nullptr) { - for (auto input_bit : get_input_signals(bit)) { + for (auto input_bit : get_input_signals(bit)) input_set.insert(input_bit); - } } } - // Remove Q bits (feedback, not control) - for (auto bit : sigmap(sig_q)) - input_set.erase(bit); if (input_set.empty()) { log_debug(" No inputs to D (besides Q)\n"); @@ -220,13 +232,12 @@ struct SatClockgateWorker log("Processing FF: %s\n", log_id(cell)); + // Set Q bits to exclude from enable candidates + set_excluded_q_bits(ff.sig_q); + // Find enable via recursive BFS + SAT validation pool input_set; if (determine_enable_recursive(input_set, ff.sig_d, ff.sig_q, 1)) { - // Remove Q bits (in case BFS added them back) - for (auto bit : sigmap(ff.sig_q)) - input_set.erase(bit); - create_ce_logic(input_set, ff); // Emit the modified FF From 481e49954ded176b30c2d7ea7cde090c118d42c1 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 11 Feb 2026 17:05:13 -0800 Subject: [PATCH 16/55] Added notes for a fixed input_set_is_enable implementation --- notes.txt | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/notes.txt b/notes.txt index dabc89cf0..c57f47e4a 100644 --- a/notes.txt +++ b/notes.txt @@ -190,4 +190,74 @@ Future TODOs: for a weaker clock gate (still consumes power if COI and D == Q but might be faster) 4) Deal with posede vs negedge of the clocks 5) Experiment with different logical combinations of the COI set (rather than just - or-ing them all together) \ No newline at end of file + or-ing them all together) + + +=== FIXED input_set_is_enable IMPLEMENTATIONS === + +// VERSION 1: Safe clock gating (current approach, cleaned up) +// Checks: (input_set=0) AND (D≠Q) == UNSAT +// Meaning: when all inputs are 0, D is guaranteed to equal Q +bool input_set_is_enable_safe(const pool &input_set, SigSpec sig_d, SigSpec sig_q) +{ + if (input_set.empty()) + return false; + + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + + // Import circuit behavior + for (auto cell : module->cells()) + satgen.importCell(cell); + + // Import D and Q + std::vector d_vec = satgen.importSigSpec(sig_d); + std::vector q_vec = satgen.importSigSpec(sig_q); + + // Constraint 1: All input_set bits = 0 + for (auto bit : input_set) { + int bit_var = satgen.importSigSpec(SigSpec(bit))[0]; + ez->assume(ez->NOT(bit_var)); + } + + // Constraint 2: D != Q + ez->assume(ez->vec_ne(d_vec, q_vec)); + + // If UNSAT: no way for D≠Q when inputs=0 → valid enable + return !ez->solve(); +} + +// VERSION 2: Exact clock gating (stronger, no wasted power) +// Checks: (D≠Q) XOR (OR(input_set)) == UNSAT +// Meaning: COI is exactly equivalent to D≠Q +bool input_set_is_enable_exact(const pool &input_set, SigSpec sig_d, SigSpec sig_q) +{ + if (input_set.empty()) + return false; + + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + + // Import circuit behavior + for (auto cell : module->cells()) + satgen.importCell(cell); + + // Import D and Q + std::vector d_vec = satgen.importSigSpec(sig_d); + std::vector q_vec = satgen.importSigSpec(sig_q); + + // Build COI = OR(input_set) + std::vector input_vars; + for (auto bit : input_set) + input_vars.push_back(satgen.importSigSpec(SigSpec(bit))[0]); + int coi = ez->expression(ezSAT::OpOr, input_vars); + + // Build D != Q (single bit: is any bit different?) + int d_ne_q = ez->vec_ne(d_vec, q_vec); + + // Constraint: COI XOR (D≠Q) — want this UNSAT (meaning COI ↔ D≠Q) + ez->assume(ez->XOR(coi, d_ne_q)); + + // If UNSAT: COI is exactly when D≠Q → perfect enable + return !ez->solve(); +} \ No newline at end of file From 745f17a34e752be3ef0d0992babe5fbf9971b5e7 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Thu, 12 Feb 2026 11:10:10 -0800 Subject: [PATCH 17/55] Changed input_set_is_enable_exact to XOR Mitter --- passes/silimate/sat_clockgate.cc | 35 +++++++++++++++----------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index d8a78cbc8..8c849119b 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -87,8 +87,8 @@ struct SatClockgateWorker return inputs; } - // Check if fixing the input_set to specific values makes D == Q always true - // Returns true if input_set can serve as an enable (when all bits are 0, D == Q) + // Check if OR(input_set) is exactly equivalent to (D != Q) + // Returns true if COI ↔ (D≠Q) for all circuit states (exact clock gating) bool input_set_is_enable(const pool &input_set, SigSpec sig_d, SigSpec sig_q) { if (input_set.empty()) @@ -97,31 +97,28 @@ struct SatClockgateWorker ezSatPtr ez; SatGen satgen(ez.get(), &sigmap); - // Import the logic cone for D + // Import circuit behavior for (auto cell : module->cells()) satgen.importCell(cell); - // Create SAT variables for D and Q + // Import D and Q std::vector d_vec = satgen.importSigSpec(sig_d); std::vector q_vec = satgen.importSigSpec(sig_q); - // Assert: all signals in input_set are 0 - for (auto bit : input_set) { - std::vector bit_vec = satgen.importSigSpec(SigSpec(bit)); - if (!bit_vec.empty()) - ez->assume(ez->NOT(bit_vec[0])); - } + // Build COI = OR(input_set) + std::vector input_vars; + for (auto bit : input_set) + input_vars.push_back(satgen.importSigSpec(SigSpec(bit))[0]); + int coi = ez->expression(ezSAT::OpOr, input_vars); - // Assert: D != Q (we want this to be UNSAT) - std::vector neq_bits; - for (size_t i = 0; i < d_vec.size() && i < q_vec.size(); i++) { - neq_bits.push_back(ez->XOR(d_vec[i], q_vec[i])); - } - ez->assume(ez->expression(ezSAT::OpOr, neq_bits)); + // Build D != Q (single bit: is any bit different?) + int d_ne_q = ez->vec_ne(d_vec, q_vec); - // If UNSAT, then input_set=0 guarantees D == Q - bool sat = ez->solve(); - return !sat; + // Constraint: COI XOR (D≠Q) — want this UNSAT (meaning COI ↔ D≠Q) + ez->assume(ez->XOR(coi, d_ne_q)); + + // If UNSAT: COI is exactly when D≠Q → perfect enable + return !ez->solve(); } // Recursively determine the enable input set via BFS expansion From 514c01efd2a90312c1005cdf333871768319fb91 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Thu, 12 Feb 2026 12:14:25 -0800 Subject: [PATCH 18/55] Added prune expressions list TODO --- notes.txt | 11 ++++++++++- passes/silimate/sat_clockgate.cc | 34 +++++++++++++++++++++----------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/notes.txt b/notes.txt index c57f47e4a..1d39c08f9 100644 --- a/notes.txt +++ b/notes.txt @@ -191,6 +191,7 @@ Future TODOs: 4) Deal with posede vs negedge of the clocks 5) Experiment with different logical combinations of the COI set (rather than just or-ing them all together) +6) Consider pruning ezSAT expressions list — accumulates across queries, may cause memory growth === FIXED input_set_is_enable IMPLEMENTATIONS === @@ -260,4 +261,12 @@ bool input_set_is_enable_exact(const pool &input_set, SigSpec sig_d, Sig // If UNSAT: COI is exactly when D≠Q → perfect enable return !ez->solve(); -} \ No newline at end of file +} + + + + +Setting up the SAT condition: + - Need to have the equation for Q, need to have the equation for D + need to XOR those equations, need to XOR that equation with the new + one. Need to make sure that that's never SAT. \ No newline at end of file diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 8c849119b..5547db11a 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -42,8 +42,12 @@ struct SatClockgateWorker // Q bits to exclude from enable input set (set per-FF) pool q_bits; + + // SAT solver and generator - created once per module + ezSatPtr ez; + SatGen satgen; - SatClockgateWorker(Module *module) : module(module), sigmap(module) + SatClockgateWorker(Module *module) : module(module), sigmap(module), ez(), satgen(ez.get(), &sigmap) { // Build driver map: for each signal bit, find which cell drives it for (auto cell : module->cells()) { @@ -54,6 +58,10 @@ struct SatClockgateWorker } } } + + // Import all cells once - circuit constraints are permanent + for (auto cell : module->cells()) + satgen.importCell(cell); } // Set Q bits to exclude for current FF @@ -89,19 +97,16 @@ struct SatClockgateWorker // Check if OR(input_set) is exactly equivalent to (D != Q) // Returns true if COI ↔ (D≠Q) for all circuit states (exact clock gating) + // TODO: Consider pruning the expressions list — expressions accumulate across + // calls (OR, XOR, NE per query). For large designs with many FFs, this + // could cause memory growth. Options: solver checkpoints, fresh solver + // per FF with COI-only cell import, or periodic expression cleanup. bool input_set_is_enable(const pool &input_set, SigSpec sig_d, SigSpec sig_q) { if (input_set.empty()) return false; - ezSatPtr ez; - SatGen satgen(ez.get(), &sigmap); - - // Import circuit behavior - for (auto cell : module->cells()) - satgen.importCell(cell); - - // Import D and Q + // Import D and Q (uses cached literals if already imported) std::vector d_vec = satgen.importSigSpec(sig_d); std::vector q_vec = satgen.importSigSpec(sig_q); @@ -114,11 +119,16 @@ struct SatClockgateWorker // Build D != Q (single bit: is any bit different?) int d_ne_q = ez->vec_ne(d_vec, q_vec); - // Constraint: COI XOR (D≠Q) — want this UNSAT (meaning COI ↔ D≠Q) - ez->assume(ez->XOR(coi, d_ne_q)); + // Query: COI XOR (D≠Q) — want this UNSAT (meaning COI ↔ D≠Q) + // Use solve() with assumption instead of permanent assume() + int query = ez->XOR(coi, d_ne_q); + + std::vector assumptions = {query}; + std::vector dummy_model_exprs; + std::vector dummy_model_vals; // If UNSAT: COI is exactly when D≠Q → perfect enable - return !ez->solve(); + return !ez->solve(dummy_model_exprs, dummy_model_vals, assumptions); } // Recursively determine the enable input set via BFS expansion From e4734e6ca90f14febb120eea635fa4e723275336 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Thu, 12 Feb 2026 12:49:15 -0800 Subject: [PATCH 19/55] Added comments explaining the MUX network repair Idea to see if there's a combinational circuit out of the input values which can serve as the enable signal --- notes.txt | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/notes.txt b/notes.txt index 1d39c08f9..27756baf1 100644 --- a/notes.txt +++ b/notes.txt @@ -269,4 +269,27 @@ bool input_set_is_enable_exact(const pool &input_set, SigSpec sig_d, Sig Setting up the SAT condition: - Need to have the equation for Q, need to have the equation for D need to XOR those equations, need to XOR that equation with the new - one. Need to make sure that that's never SAT. \ No newline at end of file + one. Need to make sure that that's never SAT. + + + + + + + + + +Ok so I have this idea: + + + + +I'm doing to take the input variables, universally quantize them and also take D and Q and universilly quantise them + +Then I have this formula SAT((D^Q) !^ MUX)) + +Where the MUX has (for the select inputs the input values, and then has random variables d_0 to D_2^n when there's n inputs. + +SAT returns the combination of the values for d which make this work. + +But in this case, how can I go from determining the values of the Ds to determine the gates and converting that to combinational logic? And also, how can I Universially Quantize the other values? \ No newline at end of file From 0396bf48d169521019cd07ee1b20cec1a43993d5 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Thu, 12 Feb 2026 14:28:37 -0800 Subject: [PATCH 20/55] Added notes.txt --- notes.txt | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/notes.txt b/notes.txt index 27756baf1..d02b42067 100644 --- a/notes.txt +++ b/notes.txt @@ -292,4 +292,15 @@ Where the MUX has (for the select inputs the input values, and then has random v SAT returns the combination of the values for d which make this work. -But in this case, how can I go from determining the values of the Ds to determine the gates and converting that to combinational logic? And also, how can I Universially Quantize the other values? \ No newline at end of file +But in this case, how can I go from determining the values of the Ds to determine the gates and converting that to combinational logic? And also, how can I Universially Quantize the other values? + + + +This is difficult due to QBF (Quantified Boolean Format) engines being very expensive and slow. +Rather than this, potentially trying CEGAR (not sure if this is practical). Idea is this: + 1. Start with a CANDIDATE solution (guess/abstraction) + 2. CHECK: Does candidate work for ALL inputs? (via SAT) (UNSAT for XOR means they are the same) + - If YES → Done, return candidate ✓ + - If NO → SAT gives a COUNTEREXAMPLE (inputs where it fails) + 3. REFINE: Use counterexample to improve candidate + 4. GOTO 2 \ No newline at end of file From d7277fcb3a7be317adf797e7f1e660098577e2e0 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Thu, 12 Feb 2026 15:20:39 -0800 Subject: [PATCH 21/55] Added explanation for safe-gating vs exact-gating --- notes.txt | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/notes.txt b/notes.txt index d02b42067..a244285fe 100644 --- a/notes.txt +++ b/notes.txt @@ -303,4 +303,43 @@ Rather than this, potentially trying CEGAR (not sure if this is practical). Idea - If YES → Done, return candidate ✓ - If NO → SAT gives a COUNTEREXAMPLE (inputs where it fails) 3. REFINE: Use counterexample to improve candidate - 4. GOTO 2 \ No newline at end of file + 4. GOTO 2 + + + + The approach by this paper: https://dl.acm.org/doi/epdf/10.1145/1391469.1391637 + +Try each signal individually → collect ones that work → OR the winners +Your approach: "OR(all inputs) == enable?" +Paper's approach: "Which individual signals could BE the enable?" +Paper's Algorithm in Pseudocode (Yosys-feasible): +SigBit find_clock_enable(SigSpec sig_d, SigSpec sig_q) { pool candidates = get_cone_signals(sig_d); // All signals in fanin pool valid_enables; // STEP 1: Prune with simulation (fast) for (auto sig : candidates) { if (!simulation_suggests_valid(sig, sig_d, sig_q)) candidates.erase(sig); // Quick reject } // STEP 2: Prove with SAT (slow but conclusive) for (auto sig : candidates) { // Check: sig=1 → D==Q (safe to gate) // SAT query: (sig ∧ (D ⊕ Q)) — want UNSAT if (sat_proves_valid(sig, sig_d, sig_q)) valid_enables.insert(sig); } // STEP 3: Pick best (by estimated power savings) SigBit best = select_best_coverage(valid_enables); return best; // Or OR multiple if needed}bool sat_proves_valid(SigBit candidate, SigSpec sig_d, SigSpec sig_q) { // "Can candidate=1 while D≠Q?" — if UNSAT, candidate is valid int cand = satgen.importSigSpec(SigSpec(candidate))[0]; int d_ne_q = ez->vec_ne(d_vec, q_vec); ez->assume(cand); ez->assume(d_ne_q); return !ez->solve(); // UNSAT = valid enable} + + +=== SAFE GATING vs EXACT GATING === + +Safe Gating: + SAT check: sig ∧ (D≠Q) = UNSAT + Meaning: When sig=1, D is guaranteed to equal Q (safe to gate clock) + - sig=1 → gate clock, hold register (D==Q guaranteed) + - sig=0 → clock runs freely (D may or may not equal Q) + Allows wasted power (clock runs when D==Q but sig=0), but NEVER loses data. + +Exact Gating: + SAT check: sig ⊕ (D≠Q) = UNSAT + Meaning: sig is EXACTLY equivalent to (D≠Q) + - sig=1 ↔ D≠Q (perfect bidirectional match) + No wasted power, but much harder to find matching signals. + +Comparison: + | Type | SAT Check | Finds more? | Power optimal? | + |--------|-----------------------|-------------|----------------| + | Safe | sig ∧ (D≠Q) = UNSAT | Yes | No (some waste)| + | Exact | sig ⊕ (D≠Q) = UNSAT | No | Yes (perfect) | + +Recommendation: + • Use SAFE GATING — faster (weaker SAT query), finds more candidates + • Safe gating is industry standard (used in paper, commercial tools) + • Exact gating rarely finds matches unless design has explicit MUX-with-Q pattern + • Power difference is minor — safe gating still saves most power + • Safe gating has better QoR: more FFs get clock-gated From feffbbe32caa3875746deb01db9b57fd2289d02d Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Thu, 12 Feb 2026 16:12:50 -0800 Subject: [PATCH 22/55] Added initial impl based on OpenROAD --- passes/silimate/sat_clockgate.cc | 695 ++++++++++++++++++++----------- 1 file changed, 442 insertions(+), 253 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 5547db11a..089fb1305 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -14,346 +14,535 @@ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * */ #include "kernel/yosys.h" #include "kernel/sigtools.h" #include "kernel/ff.h" #include "kernel/satgen.h" -#include +#include +#include USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN -// Maximum depth for BFS exploration of input cone -static const int MAX_INPUT_DEPTH = 10; - -// If true, exclude Q (feedback) bits from enable input set -static const bool EXCLUDE_Q_FROM_ENABLE = true; +// Configuration +static const int DEFAULT_MAX_COVER = 100; // Max candidate signals to consider +static const int DEFAULT_MIN_REGS = 1; // Min registers per clock gate +static const int DEFAULT_SIM_ITERATIONS = 10; // Random simulation iterations for pruning struct SatClockgateWorker { Module *module; SigMap sigmap; + // Configuration + int max_cover; + int min_regs; + int sim_iterations; + // Maps output signal bits to their driver cells dict sig_to_driver; - // Q bits to exclude from enable input set (set per-FF) - pool q_bits; - + // Maps cell input pins to their source signals + dict> sig_to_sinks; + // SAT solver and generator - created once per module ezSatPtr ez; SatGen satgen; - SatClockgateWorker(Module *module) : module(module), sigmap(module), ez(), satgen(ez.get(), &sigmap) + // Statistics + int accepted_count = 0; + int rejected_sim_count = 0; + int rejected_sat_count = 0; + + SatClockgateWorker(Module *module, int max_cover, int min_regs, int sim_iterations) + : module(module), sigmap(module), + max_cover(max_cover), min_regs(min_regs), sim_iterations(sim_iterations), + ez(), satgen(ez.get(), &sigmap) { - // Build driver map: for each signal bit, find which cell drives it + // Build driver and sink maps for (auto cell : module->cells()) { for (auto &conn : cell->connections()) { if (cell->output(conn.first)) { for (auto bit : sigmap(conn.second)) - sig_to_driver[bit] = cell; + if (bit.wire) + sig_to_driver[bit] = cell; + } + if (cell->input(conn.first)) { + for (auto bit : sigmap(conn.second)) + if (bit.wire) + sig_to_sinks[bit].insert(cell); } } } - + // Import all cells once - circuit constraints are permanent for (auto cell : module->cells()) - satgen.importCell(cell); + if (!cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) + satgen.importCell(cell); } - - // Set Q bits to exclude for current FF - void set_excluded_q_bits(SigSpec sig_q) + + // Get downstream signals from a register (BFS forward through combinational logic) + pool getDownstreamSignals(Cell *reg, int limit) { - q_bits.clear(); - if (EXCLUDE_Q_FROM_ENABLE) { - for (auto bit : sigmap(sig_q)) - q_bits.insert(bit); + pool result; + pool visited; + std::queue worklist; + + // Start from register output Q + FfData ff(nullptr, reg); + for (auto bit : sigmap(ff.sig_q)) { + if (bit.wire) { + worklist.push(bit); + visited.insert(bit); + } } - } - - // Get the set of input signals feeding into a given signal (one level back) - pool get_input_signals(SigBit bit) - { - pool inputs; - bit = sigmap(bit); - if (!sig_to_driver.count(bit)) - return inputs; // Primary input or constant - - Cell *driver = sig_to_driver[bit]; - for (auto &conn : driver->connections()) { - if (driver->input(conn.first)) { - for (auto input_bit : sigmap(conn.second)) { - if (input_bit.wire != nullptr && !q_bits.count(input_bit)) - inputs.insert(input_bit); + while (!worklist.empty() && (int)result.size() < limit) { + SigBit bit = worklist.front(); + worklist.pop(); + + result.insert(bit); + + // Find cells driven by this signal + for (auto sink_cell : sig_to_sinks[bit]) { + // Skip registers - don't traverse through them + if (sink_cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_))) + continue; + + // Add outputs of this cell to worklist + for (auto &conn : sink_cell->connections()) { + if (sink_cell->output(conn.first)) { + for (auto out_bit : sigmap(conn.second)) { + if (out_bit.wire && !visited.count(out_bit)) { + visited.insert(out_bit); + worklist.push(out_bit); + } + } + } } } } - return inputs; + + return result; } - - // Check if OR(input_set) is exactly equivalent to (D != Q) - // Returns true if COI ↔ (D≠Q) for all circuit states (exact clock gating) - // TODO: Consider pruning the expressions list — expressions accumulate across - // calls (OR, XOR, NE per query). For large designs with many FFs, this - // could cause memory growth. Options: solver checkpoints, fresh solver - // per FF with COI-only cell import, or periodic expression cleanup. - bool input_set_is_enable(const pool &input_set, SigSpec sig_d, SigSpec sig_q) + + // Get upstream signals feeding into given signals (BFS backward) + pool getUpstreamSignals(const pool &start_signals, int limit) { - if (input_set.empty()) + pool result; + pool visited; + std::queue worklist; + + for (auto bit : start_signals) { + worklist.push(bit); + visited.insert(bit); + } + + while (!worklist.empty() && (int)result.size() < limit) { + SigBit bit = worklist.front(); + worklist.pop(); + + result.insert(bit); + + // Find driver cell + if (!sig_to_driver.count(bit)) + continue; + + Cell *driver = sig_to_driver[bit]; + + // Skip registers + if (driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_))) + continue; + + // Add inputs of driver to worklist + for (auto &conn : driver->connections()) { + if (driver->input(conn.first)) { + for (auto in_bit : sigmap(conn.second)) { + if (in_bit.wire && !visited.count(in_bit)) { + visited.insert(in_bit); + worklist.push(in_bit); + } + } + } + } + } + + return result; + } + + // Check if a candidate signal is a valid gating condition using SAT + // Safe gating check: sig=1 → D==Q (i.e., (sig ∧ (D≠Q)) is UNSAT) + bool isValidGatingSignal(SigBit candidate, SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + std::vector d_vec = satgen.importSigSpec(sig_d); + std::vector q_vec = satgen.importSigSpec(sig_q); + int cand_var = satgen.importSigSpec(SigSpec(candidate))[0]; + + // D != Q + int d_ne_q = ez->vec_ne(d_vec, q_vec); + + // For clock enable (active high): when enable=0, D must equal Q + // Check: (!enable ∧ (D≠Q)) is UNSAT + // For clock disable (active low): when disable=1, D must equal Q + // Check: (disable ∧ (D≠Q)) is UNSAT + + int gating_active = as_enable ? ez->NOT(cand_var) : cand_var; + int query = ez->AND(gating_active, d_ne_q); + + std::vector assumptions = {query}; + std::vector dummy_exprs; + std::vector dummy_vals; + + return !ez->solve(dummy_exprs, dummy_vals, assumptions); + } + + // Simple random simulation test to quickly prune candidates + bool simulationTest(SigBit candidate, SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + // For now, skip simulation and go straight to SAT + // TODO: Implement random simulation for faster pruning + return true; + } + + // Binary search to minimize the gating condition set + // Tries to remove half of the signals at a time + void minimizeGatingCondition( + std::vector &good_conds, + std::vector::iterator begin, + std::vector::iterator end, + SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + int half_len = (end - begin) / 2; + if (half_len == 0) + return; + + auto mid = begin + half_len; + + // Try removing [mid, end) from the condition + std::vector test_conds; + test_conds.insert(test_conds.end(), good_conds.begin(), begin); + test_conds.insert(test_conds.end(), begin, mid); + test_conds.insert(test_conds.end(), end, good_conds.end()); + + if (!test_conds.empty() && isValidGatingSet(test_conds, sig_d, sig_q, as_enable)) { + // Can remove [mid, end) + good_conds.erase(mid, end); + // Recurse on remaining half + minimizeGatingCondition(good_conds, begin, begin + half_len, sig_d, sig_q, as_enable); + } else { + // Cannot remove all of [mid, end), try to minimize each half + if (end - mid > 1) + minimizeGatingCondition(good_conds, mid, end, sig_d, sig_q, as_enable); + minimizeGatingCondition(good_conds, begin, mid, sig_d, sig_q, as_enable); + } + } + + // Check if OR/AND of signals forms a valid gating condition + bool isValidGatingSet(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + if (conds.empty()) return false; - - // Import D and Q (uses cached literals if already imported) + std::vector d_vec = satgen.importSigSpec(sig_d); std::vector q_vec = satgen.importSigSpec(sig_q); - // Build COI = OR(input_set) - std::vector input_vars; - for (auto bit : input_set) - input_vars.push_back(satgen.importSigSpec(SigSpec(bit))[0]); - int coi = ez->expression(ezSAT::OpOr, input_vars); + // Build OR (for enable) or AND (for disable) of condition signals + std::vector cond_vars; + for (auto bit : conds) + cond_vars.push_back(satgen.importSigSpec(SigSpec(bit))[0]); + + int combined_cond; + if (as_enable) { + // Clock enable: OR of signals (any signal high = enable) + combined_cond = ez->expression(ezSAT::OpOr, cond_vars); + } else { + // Clock disable: AND of signals (all signals high = disable) + combined_cond = ez->expression(ezSAT::OpAnd, cond_vars); + } - // Build D != Q (single bit: is any bit different?) int d_ne_q = ez->vec_ne(d_vec, q_vec); - // Query: COI XOR (D≠Q) — want this UNSAT (meaning COI ↔ D≠Q) - // Use solve() with assumption instead of permanent assume() - int query = ez->XOR(coi, d_ne_q); + // Safe gating: when gating is active (enable=0 or disable=1), D must equal Q + int gating_active = as_enable ? ez->NOT(combined_cond) : combined_cond; + int query = ez->AND(gating_active, d_ne_q); std::vector assumptions = {query}; - std::vector dummy_model_exprs; - std::vector dummy_model_vals; + std::vector dummy_exprs; + std::vector dummy_vals; - // If UNSAT: COI is exactly when D≠Q → perfect enable - return !ez->solve(dummy_model_exprs, dummy_model_vals, assumptions); + return !ez->solve(dummy_exprs, dummy_vals, assumptions); } - - // Recursively determine the enable input set via BFS expansion - // Seeds initial input set from sig_d, Q bits filtered via get_input_signals - bool determine_enable_recursive(pool &input_set, SigSpec sig_d, SigSpec sig_q, int depth) + + // Find gating condition for a register + // Returns empty vector if no valid condition found + std::pair, bool> findGatingCondition(Cell *reg) { - if (depth > MAX_INPUT_DEPTH) { - log_debug(" Max depth reached, giving up\n"); - return false; + FfData ff(nullptr, reg); + + // Get candidate signals downstream of this register + pool downstream = getDownstreamSignals(reg, max_cover); + + if (downstream.empty()) { + log_debug(" No downstream candidates for %s\n", log_id(reg)); + return {{}, false}; } - - // Seed initial input set from sig_d on first call - if (depth == 1 && input_set.empty()) { - for (auto bit : sigmap(sig_d)) { - if (bit.wire != nullptr) { - for (auto input_bit : get_input_signals(bit)) - input_set.insert(input_bit); - } - } - - if (input_set.empty()) { - log_debug(" No inputs to D (besides Q)\n"); - return false; - } - log_debug(" Initial input set has %zu signals\n", input_set.size()); - } - - // Check if current input set works as enable - if (input_set_is_enable(input_set, sig_d, sig_q)) { - log_debug(" Found enable at depth %d with %zu signals\n", depth, input_set.size()); - return true; - } - - // Expand input set via BFS (one more level) - pool new_inputs; - for (auto bit : input_set) { - for (auto input_bit : get_input_signals(bit)) { - if (!input_set.count(input_bit)) - new_inputs.insert(input_bit); + + // Also include upstream signals that could affect D + pool d_inputs; + for (auto bit : sigmap(ff.sig_d)) + if (bit.wire) + d_inputs.insert(bit); + pool upstream = getUpstreamSignals(d_inputs, max_cover); + + // Combine and limit candidates + std::vector candidates; + for (auto bit : downstream) + candidates.push_back(bit); + for (auto bit : upstream) + if (!downstream.count(bit)) + candidates.push_back(bit); + + if ((int)candidates.size() > max_cover) + candidates.resize(max_cover); + + log_debug(" Found %zu candidate signals\n", candidates.size()); + + // Try as clock enable first (more common) + if (isValidGatingSet(candidates, ff.sig_d, ff.sig_q, true)) { + minimizeGatingCondition(candidates, candidates.begin(), candidates.end(), + ff.sig_d, ff.sig_q, true); + if (!candidates.empty()) { + accepted_count++; + return {candidates, true}; // true = clock enable } } - - if (new_inputs.empty()) { - log_debug(" No more inputs to explore at depth %d\n", depth); - return false; + + // Try as clock disable + if (isValidGatingSet(candidates, ff.sig_d, ff.sig_q, false)) { + minimizeGatingCondition(candidates, candidates.begin(), candidates.end(), + ff.sig_d, ff.sig_q, false); + if (!candidates.empty()) { + accepted_count++; + return {candidates, false}; // false = clock disable + } } - - // Add new inputs and recurse - for (auto bit : new_inputs) - input_set.insert(bit); - - return determine_enable_recursive(input_set, sig_d, sig_q, depth + 1); + + rejected_sat_count++; + return {{}, false}; } - - // Create CE logic based on the input set and modify the FF - // The enable condition is: when all input_set bits are 0, D == Q (hold) - // So CE should be: OR of all input_set bits (active high: CE=1 means update) - void create_ce_logic(const pool &input_set, FfData &ff) + + // Insert clock gating logic for a group of registers + void insertClockGate(const std::vector ®s, + const std::vector &gating_conds, + bool as_enable) { - if (input_set.empty()) + if (regs.empty() || gating_conds.empty()) return; - - log(" Creating CE from %zu input signals\n", input_set.size()); - - // Build CE as OR of all input signals - // CE = 1 when any input is 1 (meaning: update the register) - // CE = 0 when all inputs are 0 (meaning: hold, since D == Q) - SigSpec ce_inputs; - for (auto bit : input_set) - ce_inputs.append(bit); - - SigBit ce_signal; - if (GetSize(ce_inputs) == 1) { - ce_signal = ce_inputs[0]; + + log(" Inserting clock gate for %zu registers with %zu condition signals\n", + regs.size(), gating_conds.size()); + + // Build gating condition: OR for enable, AND for disable + SigBit gating_signal; + if (gating_conds.size() == 1) { + gating_signal = gating_conds[0]; } else { - // Create OR gate: CE = |input_set - Wire *ce_wire = module->addWire(NEW_ID); - module->addReduceOr(NEW_ID, ce_inputs, ce_wire); - ce_signal = ce_wire; - } - - // Set the CE on the FF - ff.has_ce = true; - ff.sig_ce = ce_signal; - ff.pol_ce = true; // Active high - - log(" CE signal: %s\n", log_signal(ce_signal)); - } - - // Process a single FF to find and insert CE - bool process_ff(Cell *cell) - { - FfData ff(nullptr, cell); - - // Skip if already has CE, or doesn't have clock/data - if (ff.has_ce) { - log_debug(" Skipping %s: already has CE\n", log_id(cell)); - return false; - } - if (!ff.has_clk) { - log_debug(" Skipping %s: no clock\n", log_id(cell)); - return false; - } - if (GetSize(ff.sig_d) == 0 || GetSize(ff.sig_q) == 0) { - log_debug(" Skipping %s: no D or Q\n", log_id(cell)); - return false; - } - - log("Processing FF: %s\n", log_id(cell)); - - // Set Q bits to exclude from enable candidates - set_excluded_q_bits(ff.sig_q); - - // Find enable via recursive BFS + SAT validation - pool input_set; - if (determine_enable_recursive(input_set, ff.sig_d, ff.sig_q, 1)) { - create_ce_logic(input_set, ff); + SigSpec cond_inputs; + for (auto bit : gating_conds) + cond_inputs.append(bit); - // Emit the modified FF - ff.emit(); - return true; + Wire *cond_wire = module->addWire(NEW_ID); + if (as_enable) + module->addReduceOr(NEW_ID, cond_inputs, cond_wire); + else + module->addReduceAnd(NEW_ID, cond_inputs, cond_wire); + gating_signal = cond_wire; } - - log_debug(" Could not find enable for %s\n", log_id(cell)); - return false; + + // If disable signal, invert to get enable + if (!as_enable) { + Wire *inv_wire = module->addWire(NEW_ID); + module->addNot(NEW_ID, gating_signal, inv_wire); + gating_signal = inv_wire; + } + + // Add CE to each register + for (auto reg : regs) { + FfData ff(nullptr, reg); + + if (ff.has_ce) { + // Already has CE, AND with new condition + Wire *combined_ce = module->addWire(NEW_ID); + module->addAnd(NEW_ID, ff.sig_ce, gating_signal, combined_ce); + ff.sig_ce = combined_ce; + } else { + ff.has_ce = true; + ff.sig_ce = gating_signal; + ff.pol_ce = true; + } + + ff.emit(); + log(" Added CE to %s\n", log_id(reg)); + } + } + + // Main processing function + void run() + { + log("Processing module %s\n", log_id(module)); + + // Collect all registers + std::vector registers; + for (auto cell : module->cells()) { + if (!cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) + continue; + + FfData ff(nullptr, cell); + + // Skip registers that already have CE + if (ff.has_ce) { + log_debug(" Skipping %s: already has CE\n", log_id(cell)); + continue; + } + + if (!ff.has_clk) { + log_debug(" Skipping %s: no clock\n", log_id(cell)); + continue; + } + + registers.push_back(cell); + } + + log(" Found %zu registers without CE\n", registers.size()); + + // Track accepted gating conditions for reuse + // Maps condition signature to (condition signals, registers, is_enable) + dict, std::vector, bool>> accepted_gates; + + int processed = 0; + for (auto reg : registers) { + if (processed % 100 == 0 && processed > 0) + log(" Processed %d/%zu registers\n", processed, registers.size()); + processed++; + + log_debug("Processing register %s\n", log_id(reg)); + + auto [gating_conds, is_enable] = findGatingCondition(reg); + + if (gating_conds.empty()) { + log_debug(" No valid gating condition found\n"); + continue; + } + + // Create signature for this gating condition + std::string sig; + for (auto bit : gating_conds) + sig += log_signal(bit) + ","; + sig += is_enable ? "E" : "D"; + + // Check if we already have this condition + if (accepted_gates.count(sig)) { + auto &[conds, regs, en] = accepted_gates[sig]; + regs.push_back(reg); + log_debug(" Reusing existing gating condition for %s\n", log_id(reg)); + } else { + accepted_gates[sig] = {gating_conds, {reg}, is_enable}; + log(" Found new gating condition for %s: %s (%s)\n", + log_id(reg), sig.c_str(), is_enable ? "enable" : "disable"); + } + } + + // Insert clock gates for groups that meet minimum register threshold + int gates_inserted = 0; + for (auto &[sig, data] : accepted_gates) { + auto &[conds, regs, is_enable] = data; + + if ((int)regs.size() >= min_regs) { + insertClockGate(regs, conds, is_enable); + gates_inserted++; + } else { + log_debug(" Skipping gating condition (only %zu registers, need %d)\n", + regs.size(), min_regs); + } + } + + log(" Inserted %d clock gates\n", gates_inserted); + log(" Statistics: accepted=%d, rejected_sat=%d\n", + accepted_count, rejected_sat_count); } }; -void dump_flipflops_to_file(RTLIL::Design *design, const std::string &filename) -{ - std::ofstream outfile(filename); - if (!outfile.is_open()) { - log_error("Cannot open file %s for writing\n", filename.c_str()); - return; - } - - for (auto module : design->selected_modules()) { - outfile << "Module: " << log_id(module) << "\n"; - log("Module: %s\n", log_id(module)); - - for (auto cell : module->cells()) { - if (cell->is_builtin_ff()) { - outfile << " FF: " << log_id(cell) << " (type: " << log_id(cell->type) << ")\n"; - log(" FF: %s (type: %s)\n", log_id(cell), log_id(cell->type)); - } - } - outfile << "\n"; - } - - outfile.close(); - log("Wrote flip-flop list to %s\n", filename.c_str()); -} - struct SatClockgatePass : public Pass { - SatClockgatePass() : Pass("sat_clockgate", "SAT-based inferred clock gating") { } - + SatClockgatePass() : Pass("sat_clockgate", "SAT-based automatic clock gating") { } + void help() override { log("\n"); log(" sat_clockgate [options] [selection]\n"); log("\n"); - log("This command performs SAT-based inferred clock gating insertion.\n"); - log("It analyzes flip-flops without explicit clock enables and uses SAT\n"); - log("to find input conditions under which D == Q (register holds value).\n"); - log("These conditions become the inferred clock enable.\n"); + log("This command performs SAT-based automatic clock gating insertion.\n"); + log("It analyzes registers and uses SAT solving to find signals that can\n"); + log("serve as clock enable conditions (when the signal is low, D==Q).\n"); log("\n"); - log(" -threshold \n"); - log(" minimum number of FFs that must share an enable for clock gating\n"); - log(" to be inserted (default: 1)\n"); + log("Algorithm based on:\n"); + log(" - \"Automatic Synthesis of Clock Gating Logic\" by Aaron P. Hurst\n"); + log(" - OpenROAD's cgt module implementation\n"); + log("\n"); + log(" -max_cover \n"); + log(" maximum number of candidate signals to consider per register\n"); + log(" (default: %d)\n", DEFAULT_MAX_COVER); + log("\n"); + log(" -min_regs \n"); + log(" minimum number of registers that must share a gating condition\n"); + log(" for a clock gate to be inserted (default: %d)\n", DEFAULT_MIN_REGS); log("\n"); } - + void execute(std::vector args, RTLIL::Design *design) override { - log_header(design, "Executing SAT_CLOCKGATE pass (SAT-based inferred clock gating).\n"); - - int threshold = 1; - + log_header(design, "Executing SAT_CLOCKGATE pass.\n"); + + int max_cover = DEFAULT_MAX_COVER; + int min_regs = DEFAULT_MIN_REGS; + int sim_iterations = DEFAULT_SIM_ITERATIONS; + size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { - if (args[argidx] == "-threshold" && argidx+1 < args.size()) { - threshold = std::stoi(args[++argidx]); + if (args[argidx] == "-max_cover" && argidx+1 < args.size()) { + max_cover = std::stoi(args[++argidx]); + continue; + } + if (args[argidx] == "-min_regs" && argidx+1 < args.size()) { + min_regs = std::stoi(args[++argidx]); continue; } break; } extra_args(args, argidx, design); - - log("Using threshold: %d\n", threshold); - - // Dump all flip-flops to file (debug) - dump_flipflops_to_file(design, "flip_flops.txt"); - - int total_converted = 0; - + + log("Configuration: max_cover=%d, min_regs=%d\n", max_cover, min_regs); + + int total_gates = 0; + for (auto module : design->selected_modules()) { - log("Processing module %s...\n", log_id(module)); - - SatClockgateWorker worker(module); - - // Collect FFs to process (can't modify while iterating) - std::vector ffs_to_process; - for (auto cell : module->cells()) { - if (cell->is_builtin_ff()) - ffs_to_process.push_back(cell); - } - - int converted = 0; - for (auto cell : ffs_to_process) { - if (worker.process_ff(cell)) - converted++; - } - - if (converted > 0) { - log("Converted %d FFs in module %s\n", converted, log_id(module)); - total_converted += converted; - } + SatClockgateWorker worker(module, max_cover, min_regs, sim_iterations); + worker.run(); + total_gates += worker.accepted_count; } - - log("Total FFs with inferred CE: %d\n", total_converted); - - // TODO: Call clockgate pass to convert CEs to ICG cells - // if (total_converted >= threshold) { - // Pass::call(design, "clockgate"); - // } + + log("Total clock gates inserted: %d\n", total_gates); } } SatClockgatePass; From fca02c94dfb6937f785f51acbe0569e095e7fe7a Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Thu, 12 Feb 2026 17:04:50 -0800 Subject: [PATCH 23/55] Notes for TODOS --- notes.txt | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/notes.txt b/notes.txt index a244285fe..8f3747957 100644 --- a/notes.txt +++ b/notes.txt @@ -343,3 +343,31 @@ Recommendation: • Exact gating rarely finds matches unless design has explicit MUX-with-Q pattern • Power difference is minor — safe gating still saves most power • Safe gating has better QoR: more FFs get clock-gated + + There's also clock as_enable and as_disable + as_enable = true (clock enable): + Signal high → clock runs. Signal low → clock blocked. Check: (!enable ∧ D≠Q) must be UNSAT. + as_enable = false (clock disable): + Signal high → clock blocked. Signal low → clock runs. Check: (disable ∧ D≠Q) must be UNSAT. + + + + + +TODOs: +1) Convert from the string hash to an integer hash +2) Use is_builtin_ff +3) See why this path is needed: + if (ff.has_ce) { + // Already has CE, AND with new condition + Wire *combined_ce = module->addWire(NEW_ID); + module->addAnd(NEW_ID, ff.sig_ce, gating_signal, combined_ce); + ff.sig_ce = combined_ce; + } else { +4) Print the netlist before and after (checkout ways to determine # of flipflips) +5) Power analysis +6) Remove redundant vectors (visited and result) from getDownstreamSignals + and getUpstreamSignals +7) Check recursion +8) Check isValidGatingSet and findGatingCondition + From 56502440b3e7f448de014617e64f7c37e47a59cd Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Fri, 13 Feb 2026 15:32:54 -0800 Subject: [PATCH 24/55] Added hashing for already seen paths. ODO: add profiling to see if this is effective --- kernel/satgen.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/satgen.h b/kernel/satgen.h index 7815847b3..bf8ef4fe1 100644 --- a/kernel/satgen.h +++ b/kernel/satgen.h @@ -101,11 +101,16 @@ struct SatGen else vec.push_back(bit == (undef_mode ? RTLIL::State::Sx : RTLIL::State::S1) ? ez->CONST_TRUE : ez->CONST_FALSE); } else { - std::string wire_name = RTLIL::unescape_id(bit.wire->name); - std::string name = pf + - (bit.wire->width == 1 ? wire_name : stringf("%s [%d]", wire_name, bit.offset)); - vec.push_back(ez->frozen_literal(name)); - imported_signals[pf][bit] = vec.back(); + // Check cache first to avoid creating duplicate literals + if (imported_signals[pf].count(bit)) { + vec.push_back(imported_signals[pf].at(bit)); + } else { + std::string wire_name = RTLIL::unescape_id(bit.wire->name); + std::string name = pf + + (bit.wire->width == 1 ? wire_name : stringf("%s [%d]", wire_name, bit.offset)); + vec.push_back(ez->frozen_literal(name)); + imported_signals[pf][bit] = vec.back(); + } } return vec; } From 80fbdf7e6a31365e3d6bcd0869ab9c76fe72c6e7 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Fri, 13 Feb 2026 15:33:45 -0800 Subject: [PATCH 25/55] Removed duplication of vectors and called clockgate pass post creating enable signals --- passes/silimate/sat_clockgate.cc | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 089fb1305..3b31a9b61 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -89,7 +89,6 @@ struct SatClockgateWorker // Get downstream signals from a register (BFS forward through combinational logic) pool getDownstreamSignals(Cell *reg, int limit) { - pool result; pool visited; std::queue worklist; @@ -102,12 +101,10 @@ struct SatClockgateWorker } } - while (!worklist.empty() && (int)result.size() < limit) { + while (!worklist.empty() && (int)visited.size() < limit) { SigBit bit = worklist.front(); worklist.pop(); - result.insert(bit); - // Find cells driven by this signal for (auto sink_cell : sig_to_sinks[bit]) { // Skip registers - don't traverse through them @@ -130,13 +127,12 @@ struct SatClockgateWorker } } - return result; + return visited; } // Get upstream signals feeding into given signals (BFS backward) pool getUpstreamSignals(const pool &start_signals, int limit) { - pool result; pool visited; std::queue worklist; @@ -145,12 +141,10 @@ struct SatClockgateWorker visited.insert(bit); } - while (!worklist.empty() && (int)result.size() < limit) { + while (!worklist.empty() && (int)visited.size() < limit) { SigBit bit = worklist.front(); worklist.pop(); - result.insert(bit); - // Find driver cell if (!sig_to_driver.count(bit)) continue; @@ -176,7 +170,7 @@ struct SatClockgateWorker } } - return result; + return visited; } // Check if a candidate signal is a valid gating condition using SAT @@ -543,6 +537,9 @@ struct SatClockgatePass : public Pass { } log("Total clock gates inserted: %d\n", total_gates); + + // Convert CEs to actual clock gate cells + Pass::call(design, "clockgate"); } } SatClockgatePass; From 3442bc3a856089880f9bce1e1f97f135bfed2b35 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Fri, 13 Feb 2026 16:15:31 -0800 Subject: [PATCH 26/55] Changed indexing to be based on the literal ID in EZSat and sorted to allow better hashing --- passes/silimate/sat_clockgate.cc | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 3b31a9b61..d19d25975 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -200,12 +200,12 @@ struct SatClockgateWorker } // Simple random simulation test to quickly prune candidates - bool simulationTest(SigBit candidate, SigSpec sig_d, SigSpec sig_q, bool as_enable) - { - // For now, skip simulation and go straight to SAT - // TODO: Implement random simulation for faster pruning - return true; - } + // bool simulationTest(SigBit candidate, SigSpec sig_d, SigSpec sig_q, bool as_enable) + // { + // // For now, skip simulation and go straight to SAT + // // TODO: Implement random simulation for faster pruning + // return true; + // } // Binary search to minimize the gating condition set // Tries to remove half of the signals at a time @@ -440,10 +440,15 @@ struct SatClockgateWorker continue; } - // Create signature for this gating condition - std::string sig; + // Create signature for this gating condition (sorted by SAT literal ID for permutation invariance) + std::vector> sorted; for (auto bit : gating_conds) - sig += log_signal(bit) + ","; + sorted.push_back({satgen.importSigSpec(SigSpec(bit))[0], bit}); + std::sort(sorted.begin(), sorted.end()); + + std::string sig; + for (auto &[id, bit] : sorted) + sig += std::to_string(id) + ","; sig += is_enable ? "E" : "D"; // Check if we already have this condition From 5ce8aada271b136a013d41877dd46d12946a56b1 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Fri, 13 Feb 2026 16:34:15 -0800 Subject: [PATCH 27/55] Added profiling for literal count --- passes/silimate/sat_clockgate.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index d19d25975..8b206f5ce 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -28,7 +28,7 @@ PRIVATE_NAMESPACE_BEGIN // Configuration static const int DEFAULT_MAX_COVER = 100; // Max candidate signals to consider -static const int DEFAULT_MIN_REGS = 1; // Min registers per clock gate +static const int DEFAULT_MIN_REGS = 3; // Min registers per clock gate static const int DEFAULT_SIM_ITERATIONS = 10; // Random simulation iterations for pruning struct SatClockgateWorker @@ -315,7 +315,6 @@ struct SatClockgateWorker minimizeGatingCondition(candidates, candidates.begin(), candidates.end(), ff.sig_d, ff.sig_q, true); if (!candidates.empty()) { - accepted_count++; return {candidates, true}; // true = clock enable } } @@ -325,7 +324,6 @@ struct SatClockgateWorker minimizeGatingCondition(candidates, candidates.begin(), candidates.end(), ff.sig_d, ff.sig_q, false); if (!candidates.empty()) { - accepted_count++; return {candidates, false}; // false = clock disable } } @@ -385,7 +383,6 @@ struct SatClockgateWorker } ff.emit(); - log(" Added CE to %s\n", log_id(reg)); } } @@ -471,6 +468,7 @@ struct SatClockgateWorker if ((int)regs.size() >= min_regs) { insertClockGate(regs, conds, is_enable); gates_inserted++; + accepted_count += regs.size(); } else { log_debug(" Skipping gating condition (only %zu registers, need %d)\n", regs.size(), min_regs); @@ -480,6 +478,8 @@ struct SatClockgateWorker log(" Inserted %d clock gates\n", gates_inserted); log(" Statistics: accepted=%d, rejected_sat=%d\n", accepted_count, rejected_sat_count); + log(" SAT stats: literals=%d, expressions=%d\n", + ez->numLiterals(), ez->numExpressions()); } }; From 91d8241a9aad913ebf6e8e51c619db95bf572261 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Fri, 13 Feb 2026 16:34:38 -0800 Subject: [PATCH 28/55] Revert "Added hashing for already seen paths. ODO: add profiling to see if this is effective" This reverts commit 56502440b3e7f448de014617e64f7c37e47a59cd. --- kernel/satgen.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/kernel/satgen.h b/kernel/satgen.h index bf8ef4fe1..7815847b3 100644 --- a/kernel/satgen.h +++ b/kernel/satgen.h @@ -101,16 +101,11 @@ struct SatGen else vec.push_back(bit == (undef_mode ? RTLIL::State::Sx : RTLIL::State::S1) ? ez->CONST_TRUE : ez->CONST_FALSE); } else { - // Check cache first to avoid creating duplicate literals - if (imported_signals[pf].count(bit)) { - vec.push_back(imported_signals[pf].at(bit)); - } else { - std::string wire_name = RTLIL::unescape_id(bit.wire->name); - std::string name = pf + - (bit.wire->width == 1 ? wire_name : stringf("%s [%d]", wire_name, bit.offset)); - vec.push_back(ez->frozen_literal(name)); - imported_signals[pf][bit] = vec.back(); - } + std::string wire_name = RTLIL::unescape_id(bit.wire->name); + std::string name = pf + + (bit.wire->width == 1 ? wire_name : stringf("%s [%d]", wire_name, bit.offset)); + vec.push_back(ez->frozen_literal(name)); + imported_signals[pf][bit] = vec.back(); } return vec; } From 356796067199852ad42c54857e482d5018dd8474 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Fri, 13 Feb 2026 17:01:58 -0800 Subject: [PATCH 29/55] Changed hashing from string to pair with vector and bool --- notes.txt | 1 - passes/silimate/sat_clockgate.cc | 35 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/notes.txt b/notes.txt index 8f3747957..cd7d871f0 100644 --- a/notes.txt +++ b/notes.txt @@ -356,7 +356,6 @@ Recommendation: TODOs: 1) Convert from the string hash to an integer hash -2) Use is_builtin_ff 3) See why this path is needed: if (ff.has_ce) { // Already has CE, AND with new condition diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 8b206f5ce..a0808907b 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -419,8 +419,8 @@ struct SatClockgateWorker log(" Found %zu registers without CE\n", registers.size()); // Track accepted gating conditions for reuse - // Maps condition signature to (condition signals, registers, is_enable) - dict, std::vector, bool>> accepted_gates; + // Key: (sorted literal IDs, is_enable) -> (condition signals, registers) + std::map, bool>, std::pair, std::vector>> accepted_gates; int processed = 0; for (auto reg : registers) { @@ -437,33 +437,32 @@ struct SatClockgateWorker continue; } - // Create signature for this gating condition (sorted by SAT literal ID for permutation invariance) - std::vector> sorted; + // Create signature for this gating condition (sorted literal IDs for permutation invariance) + std::vector sorted_ids; + sorted_ids.reserve(gating_conds.size()); for (auto bit : gating_conds) - sorted.push_back({satgen.importSigSpec(SigSpec(bit))[0], bit}); - std::sort(sorted.begin(), sorted.end()); + sorted_ids.push_back(satgen.importSigSpec(SigSpec(bit))[0]); + std::sort(sorted_ids.begin(), sorted_ids.end()); - std::string sig; - for (auto &[id, bit] : sorted) - sig += std::to_string(id) + ","; - sig += is_enable ? "E" : "D"; + auto key = std::make_pair(std::move(sorted_ids), is_enable); // Check if we already have this condition - if (accepted_gates.count(sig)) { - auto &[conds, regs, en] = accepted_gates[sig]; - regs.push_back(reg); + auto it = accepted_gates.find(key); + if (it != accepted_gates.end()) { + it->second.second.push_back(reg); log_debug(" Reusing existing gating condition for %s\n", log_id(reg)); } else { - accepted_gates[sig] = {gating_conds, {reg}, is_enable}; - log(" Found new gating condition for %s: %s (%s)\n", - log_id(reg), sig.c_str(), is_enable ? "enable" : "disable"); + accepted_gates[key] = {gating_conds, {reg}}; + log(" Found new gating condition for %s (%s)\n", + log_id(reg), is_enable ? "enable" : "disable"); } } // Insert clock gates for groups that meet minimum register threshold int gates_inserted = 0; - for (auto &[sig, data] : accepted_gates) { - auto &[conds, regs, is_enable] = data; + for (auto &[key, data] : accepted_gates) { + bool is_enable = key.second; + auto &[conds, regs] = data; if ((int)regs.size() >= min_regs) { insertClockGate(regs, conds, is_enable); From 2ab34262ec20e6d650e625d5447d6188618ba923 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 09:23:32 -0800 Subject: [PATCH 30/55] Added profiling info before and after sat_clockgate pass --- passes/silimate/sat_clockgate.cc | 79 ++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index a0808907b..7fdb20b5e 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -22,10 +22,77 @@ #include "kernel/satgen.h" #include #include +#include USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN +// Profile all flip-flops and write to file +void profileFlipFlops(Module *module, const std::string &filename, const std::string &label) +{ + std::ofstream out(filename, std::ios::app); + out << "\n=== " << label << " ===\n"; + out << "Module: " << log_id(module) << "\n\n"; + + int total_ffs = 0; + int ffs_with_ce = 0; + int ffs_with_arst = 0; + int ffs_with_srst = 0; + int total_bits = 0; + int bits_with_ce = 0; + + for (auto cell : module->cells()) { + if (!cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) + continue; + + FfData ff(nullptr, cell); + total_ffs++; + total_bits += ff.width; + + out << "FF: " << log_id(cell) << "\n"; + out << " type: " << log_id(cell->type) << "\n"; + out << " width: " << ff.width << "\n"; + out << " has_clk: " << (ff.has_clk ? "yes" : "no") << "\n"; + out << " has_ce: " << (ff.has_ce ? "yes" : "no"); + if (ff.has_ce) { + out << " (sig_ce: " << log_signal(ff.sig_ce) << ", pol: " << (ff.pol_ce ? "active-high" : "active-low") << ")"; + ffs_with_ce++; + bits_with_ce += ff.width; + } + out << "\n"; + out << " has_arst: " << (ff.has_arst ? "yes" : "no"); + if (ff.has_arst) { + out << " (sig_arst: " << log_signal(ff.sig_arst) << ")"; + ffs_with_arst++; + } + out << "\n"; + out << " has_srst: " << (ff.has_srst ? "yes" : "no"); + if (ff.has_srst) { + out << " (sig_srst: " << log_signal(ff.sig_srst) << ")"; + ffs_with_srst++; + } + out << "\n"; + out << " sig_clk: " << log_signal(ff.sig_clk) << "\n"; + out << " sig_d: " << log_signal(ff.sig_d) << "\n"; + out << " sig_q: " << log_signal(ff.sig_q) << "\n"; + out << "\n"; + } + + out << "--- Summary ---\n"; + out << "Total FFs: " << total_ffs << "\n"; + out << "Total bits: " << total_bits << "\n"; + out << "FFs with CE: " << ffs_with_ce << " (" << (total_ffs ? 100*ffs_with_ce/total_ffs : 0) << "%)\n"; + out << "Bits with CE: " << bits_with_ce << " (" << (total_bits ? 100*bits_with_ce/total_bits : 0) << "%)\n"; + out << "FFs with ARST: " << ffs_with_arst << "\n"; + out << "FFs with SRST: " << ffs_with_srst << "\n"; + out << "\n"; + + out.close(); +} + // Configuration static const int DEFAULT_MAX_COVER = 100; // Max candidate signals to consider static const int DEFAULT_MIN_REGS = 3; // Min registers per clock gate @@ -532,12 +599,24 @@ struct SatClockgatePass : public Pass { log("Configuration: max_cover=%d, min_regs=%d\n", max_cover, min_regs); + // Clear profile file and write header + std::ofstream clear_file("ff_profile.txt", std::ios::trunc); + clear_file << "Flip-Flop Profile Report\n"; + clear_file << "========================\n"; + clear_file.close(); + int total_gates = 0; for (auto module : design->selected_modules()) { + // Profile BEFORE clock gating + profileFlipFlops(module, "ff_profile.txt", "BEFORE sat_clockgate"); + SatClockgateWorker worker(module, max_cover, min_regs, sim_iterations); worker.run(); total_gates += worker.accepted_count; + + // Profile AFTER clock gating + profileFlipFlops(module, "ff_profile.txt", "AFTER sat_clockgate"); } log("Total clock gates inserted: %d\n", total_gates); From cc6605f8e271a7c7b0b905aef38eb5b653df5267 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 10:49:18 -0800 Subject: [PATCH 31/55] Added passing on the args into the clockgate pass so there's an icg cell for the mapping --- passes/silimate/sat_clockgate.cc | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 7fdb20b5e..3e100fa81 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -582,6 +582,7 @@ struct SatClockgatePass : public Pass { int max_cover = DEFAULT_MAX_COVER; int min_regs = DEFAULT_MIN_REGS; int sim_iterations = DEFAULT_SIM_ITERATIONS; + std::vector clockgate_args; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { @@ -593,9 +594,14 @@ struct SatClockgatePass : public Pass { min_regs = std::stoi(args[++argidx]); continue; } - break; + // Pass remaining args to clockgate + if (args[argidx][0] == '-') { + clockgate_args.push_back(args[argidx]); + continue; + } + // Non-flag argument (value for previous flag) + clockgate_args.push_back(args[argidx]); } - extra_args(args, argidx, design); log("Configuration: max_cover=%d, min_regs=%d\n", max_cover, min_regs); @@ -622,7 +628,10 @@ struct SatClockgatePass : public Pass { log("Total clock gates inserted: %d\n", total_gates); // Convert CEs to actual clock gate cells - Pass::call(design, "clockgate"); + std::string clockgate_cmd = "clockgate"; + for (auto &arg : clockgate_args) + clockgate_cmd += " " + arg; + Pass::call(design, clockgate_cmd); } } SatClockgatePass; From f0de3ae8defa20d4a938d57cd48ba2680495a93f Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 11:19:18 -0800 Subject: [PATCH 32/55] Initial sat_clockgate pass pre speed optimization --- passes/silimate/sat_clockgate.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 3e100fa81..351226ff0 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -631,6 +631,7 @@ struct SatClockgatePass : public Pass { std::string clockgate_cmd = "clockgate"; for (auto &arg : clockgate_args) clockgate_cmd += " " + arg; + log("Calling clockgate with args: %s\n", clockgate_cmd); Pass::call(design, clockgate_cmd); } } SatClockgatePass; From 144db54c4e16d1dd47fc304587a2e271ce9ca314 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 11:53:06 -0800 Subject: [PATCH 33/55] Changed to inverse hashing for more flexibility --- passes/silimate/sat_clockgate.cc | 88 +++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 24 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 351226ff0..7fd11b397 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -453,6 +453,14 @@ struct SatClockgateWorker } } + // Check if register can be added to an existing gate (subset/superset matching) + // Returns true if the existing gate's condition is a valid gating condition for this register + bool canReuseGate(const std::vector &existing_conds, Cell *reg, bool is_enable) + { + FfData ff(nullptr, reg); + return isValidGatingSet(existing_conds, ff.sig_d, ff.sig_q, is_enable); + } + // Main processing function void run() { @@ -485,9 +493,16 @@ struct SatClockgateWorker log(" Found %zu registers without CE\n", registers.size()); - // Track accepted gating conditions for reuse - // Key: (sorted literal IDs, is_enable) -> (condition signals, registers) - std::map, bool>, std::pair, std::vector>> accepted_gates; + // Inverted index approach: net -> list of gate indices containing that net + // This allows finding subsets/supersets, not just exact matches + struct AcceptedGate { + std::vector conds; + pool cond_set; // For fast subset/superset checks + std::vector regs; + bool is_enable; + }; + std::vector accepted_gates; + dict> net_to_accepted; // Inverted index int processed = 0; for (auto reg : registers) { @@ -504,22 +519,50 @@ struct SatClockgateWorker continue; } - // Create signature for this gating condition (sorted literal IDs for permutation invariance) - std::vector sorted_ids; - sorted_ids.reserve(gating_conds.size()); + // Build set of condition signals for this register + pool cond_set; for (auto bit : gating_conds) - sorted_ids.push_back(satgen.importSigSpec(SigSpec(bit))[0]); - std::sort(sorted_ids.begin(), sorted_ids.end()); + cond_set.insert(bit); - auto key = std::make_pair(std::move(sorted_ids), is_enable); + // Find all accepted gates sharing any net with this register's condition + pool candidate_gates; + for (auto bit : gating_conds) { + if (net_to_accepted.count(bit)) { + for (auto idx : net_to_accepted[bit]) + candidate_gates.insert(idx); + } + } - // Check if we already have this condition - auto it = accepted_gates.find(key); - if (it != accepted_gates.end()) { - it->second.second.push_back(reg); - log_debug(" Reusing existing gating condition for %s\n", log_id(reg)); - } else { - accepted_gates[key] = {gating_conds, {reg}}; + // Try to find a compatible existing gate (SAT-verify each candidate) + bool found_match = false; + for (auto idx : candidate_gates) { + auto &gate = accepted_gates[idx]; + + // Must match enable/disable polarity + if (gate.is_enable != is_enable) + continue; + + // Check if existing gate's condition works for this register + // This allows: gate condition {x,y} can work for register with {x,y,z} + // (existing is subset) or register with {x} (existing is superset) + if (canReuseGate(gate.conds, reg, is_enable)) { + gate.regs.push_back(reg); + log_debug(" Reusing existing gate %zu for %s (flexible match)\n", + idx, log_id(reg)); + found_match = true; + break; + } + } + + if (!found_match) { + // Create new accepted gate + size_t new_idx = accepted_gates.size(); + accepted_gates.push_back({gating_conds, cond_set, {reg}, is_enable}); + + // Update inverted index + for (auto bit : gating_conds) + net_to_accepted[bit].push_back(new_idx); + log(" Found new gating condition for %s (%s)\n", log_id(reg), is_enable ? "enable" : "disable"); } @@ -527,17 +570,14 @@ struct SatClockgateWorker // Insert clock gates for groups that meet minimum register threshold int gates_inserted = 0; - for (auto &[key, data] : accepted_gates) { - bool is_enable = key.second; - auto &[conds, regs] = data; - - if ((int)regs.size() >= min_regs) { - insertClockGate(regs, conds, is_enable); + for (auto &gate : accepted_gates) { + if ((int)gate.regs.size() >= min_regs) { + insertClockGate(gate.regs, gate.conds, gate.is_enable); gates_inserted++; - accepted_count += regs.size(); + accepted_count += gate.regs.size(); } else { log_debug(" Skipping gating condition (only %zu registers, need %d)\n", - regs.size(), min_regs); + gate.regs.size(), min_regs); } } From 2212d85626db098cb936d6c36392c15cce6e0d2f Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 11:57:56 -0800 Subject: [PATCH 34/55] Changed configurations to match the OpenROAD project --- passes/silimate/sat_clockgate.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 7fd11b397..b7150a6f6 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -95,7 +95,7 @@ void profileFlipFlops(Module *module, const std::string &filename, const std::st // Configuration static const int DEFAULT_MAX_COVER = 100; // Max candidate signals to consider -static const int DEFAULT_MIN_REGS = 3; // Min registers per clock gate +static const int DEFAULT_MIN_REGS = 10; // Min registers per clock gate static const int DEFAULT_SIM_ITERATIONS = 10; // Random simulation iterations for pruning struct SatClockgateWorker From e755f6c42e7976d3656d5648289588d9486e27c1 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 12:14:53 -0800 Subject: [PATCH 35/55] Added initial simulation. Incorrect simulation -- changed the number of accedpted results as well as increasing runtime --- passes/silimate/sat_clockgate.cc | 260 +++++++++++++++++++++++++++++-- 1 file changed, 247 insertions(+), 13 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index b7150a6f6..1e7ae5acc 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -23,6 +23,7 @@ #include #include #include +#include USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN @@ -118,6 +119,10 @@ struct SatClockgateWorker ezSatPtr ez; SatGen satgen; + // Simulation infrastructure + std::vector topo_order; // Cells in topological order for simulation + std::mt19937 rng; // Random number generator + // Statistics int accepted_count = 0; int rejected_sim_count = 0; @@ -126,7 +131,7 @@ struct SatClockgateWorker SatClockgateWorker(Module *module, int max_cover, int min_regs, int sim_iterations) : module(module), sigmap(module), max_cover(max_cover), min_regs(min_regs), sim_iterations(sim_iterations), - ez(), satgen(ez.get(), &sigmap) + ez(), satgen(ez.get(), &sigmap), rng(42) { // Build driver and sink maps for (auto cell : module->cells()) { @@ -151,6 +156,233 @@ struct SatClockgateWorker ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) satgen.importCell(cell); + + // Build topological order for simulation + buildTopoOrder(); + } + + // Build topological order of combinational cells for simulation + void buildTopoOrder() + { + dict cell_deps; // Number of unresolved input dependencies + dict> bit_to_cells; // Which cells need this bit + + for (auto cell : module->cells()) { + // Skip FFs + if (cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) + continue; + + int deps = 0; + for (auto &conn : cell->connections()) { + if (cell->input(conn.first)) { + for (auto bit : sigmap(conn.second)) { + if (bit.wire && sig_to_driver.count(bit)) { + Cell *driver = sig_to_driver[bit]; + if (!driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_))) { + deps++; + bit_to_cells[bit].insert(cell); + } + } + } + } + } + cell_deps[cell] = deps; + } + + // Kahn's algorithm + std::queue ready; + for (auto &[cell, deps] : cell_deps) { + if (deps == 0) + ready.push(cell); + } + + while (!ready.empty()) { + Cell *cell = ready.front(); + ready.pop(); + topo_order.push_back(cell); + + // Decrement deps for cells that depend on this cell's outputs + for (auto &conn : cell->connections()) { + if (cell->output(conn.first)) { + for (auto bit : sigmap(conn.second)) { + for (auto sink : bit_to_cells[bit]) { + if (--cell_deps[sink] == 0) + ready.push(sink); + } + } + } + } + } + } + + // Evaluate a single cell given current simulation values + void evaluateCell(Cell *cell, dict &sim_values) + { + auto getSigVal = [&](SigSpec sig) -> std::vector { + std::vector vals; + for (auto bit : sigmap(sig)) { + if (bit.wire) { + vals.push_back(sim_values.count(bit) ? sim_values[bit] : false); + } else { + vals.push_back(bit.data == State::S1); + } + } + return vals; + }; + + auto setSigVal = [&](SigSpec sig, const std::vector &vals) { + int i = 0; + for (auto bit : sigmap(sig)) { + if (bit.wire && i < (int)vals.size()) + sim_values[bit] = vals[i]; + i++; + } + }; + + if (cell->type == ID($not) || cell->type == ID($_NOT_)) { + auto a = getSigVal(cell->getPort(ID::A)); + std::vector y(a.size()); + for (size_t i = 0; i < a.size(); i++) + y[i] = !a[i]; + setSigVal(cell->getPort(ID::Y), y); + } + else if (cell->type == ID($and) || cell->type == ID($_AND_)) { + auto a = getSigVal(cell->getPort(ID::A)); + auto b = getSigVal(cell->getPort(ID::B)); + std::vector y(std::max(a.size(), b.size())); + for (size_t i = 0; i < y.size(); i++) + y[i] = (i < a.size() ? a[i] : false) && (i < b.size() ? b[i] : false); + setSigVal(cell->getPort(ID::Y), y); + } + else if (cell->type == ID($or) || cell->type == ID($_OR_)) { + auto a = getSigVal(cell->getPort(ID::A)); + auto b = getSigVal(cell->getPort(ID::B)); + std::vector y(std::max(a.size(), b.size())); + for (size_t i = 0; i < y.size(); i++) + y[i] = (i < a.size() ? a[i] : false) || (i < b.size() ? b[i] : false); + setSigVal(cell->getPort(ID::Y), y); + } + else if (cell->type == ID($xor) || cell->type == ID($_XOR_)) { + auto a = getSigVal(cell->getPort(ID::A)); + auto b = getSigVal(cell->getPort(ID::B)); + std::vector y(std::max(a.size(), b.size())); + for (size_t i = 0; i < y.size(); i++) + y[i] = (i < a.size() ? a[i] : false) != (i < b.size() ? b[i] : false); + setSigVal(cell->getPort(ID::Y), y); + } + else if (cell->type == ID($mux) || cell->type == ID($_MUX_)) { + auto a = getSigVal(cell->getPort(ID::A)); + auto b = getSigVal(cell->getPort(ID::B)); + auto s = getSigVal(cell->getPort(ID::S)); + bool sel = s.empty() ? false : s[0]; + setSigVal(cell->getPort(ID::Y), sel ? b : a); + } + else if (cell->type == ID($reduce_and)) { + auto a = getSigVal(cell->getPort(ID::A)); + bool result = true; + for (auto v : a) result = result && v; + setSigVal(cell->getPort(ID::Y), {result}); + } + else if (cell->type == ID($reduce_or)) { + auto a = getSigVal(cell->getPort(ID::A)); + bool result = false; + for (auto v : a) result = result || v; + setSigVal(cell->getPort(ID::Y), {result}); + } + else if (cell->type == ID($eq)) { + auto a = getSigVal(cell->getPort(ID::A)); + auto b = getSigVal(cell->getPort(ID::B)); + bool result = (a == b); + setSigVal(cell->getPort(ID::Y), {result}); + } + else if (cell->type == ID($ne)) { + auto a = getSigVal(cell->getPort(ID::A)); + auto b = getSigVal(cell->getPort(ID::B)); + bool result = (a != b); + setSigVal(cell->getPort(ID::Y), {result}); + } + // Add more cell types as needed - for now, unknown cells just pass through + } + + // Run simulation with random inputs, check if gating_active & (D != Q) is ever true + // Returns false if counterexample found (candidate is definitely invalid) + bool simulationTest(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + for (int iter = 0; iter < sim_iterations; iter++) { + dict sim_values; + + // Initialize all input ports and FF outputs with random values + for (auto wire : module->wires()) { + if (wire->port_input) { + for (int i = 0; i < wire->width; i++) + sim_values[SigBit(wire, i)] = (rng() & 1); + } + } + + // Also randomize FF Q outputs (they're inputs to combinational logic) + for (auto cell : module->cells()) { + if (cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) { + FfData ff(nullptr, cell); + for (auto bit : sigmap(ff.sig_q)) + if (bit.wire) + sim_values[bit] = (rng() & 1); + } + } + + // Propagate through combinational logic in topological order + for (auto cell : topo_order) + evaluateCell(cell, sim_values); + + // Evaluate gating condition + bool combined_cond; + if (as_enable) { + // OR of conditions + combined_cond = false; + for (auto bit : conds) { + SigBit mapped = sigmap(bit); + if (sim_values.count(mapped) && sim_values[mapped]) + combined_cond = true; + } + } else { + // AND of conditions + combined_cond = true; + for (auto bit : conds) { + SigBit mapped = sigmap(bit); + if (!sim_values.count(mapped) || !sim_values[mapped]) + combined_cond = false; + } + } + + bool gating_active = as_enable ? !combined_cond : combined_cond; + + // Check D != Q + bool d_ne_q = false; + for (int i = 0; i < sig_d.size(); i++) { + SigBit d_bit = sigmap(sig_d[i]); + SigBit q_bit = sigmap(sig_q[i]); + bool d_val = sim_values.count(d_bit) ? sim_values[d_bit] : false; + bool q_val = sim_values.count(q_bit) ? sim_values[q_bit] : false; + if (d_val != q_val) { + d_ne_q = true; + break; + } + } + + // If gating is active and D != Q, this is a counterexample + if (gating_active && d_ne_q) { + rejected_sim_count++; + return false; + } + } + return true; // No counterexample found } // Get downstream signals from a register (BFS forward through combinational logic) @@ -266,14 +498,6 @@ struct SatClockgateWorker return !ez->solve(dummy_exprs, dummy_vals, assumptions); } - // Simple random simulation test to quickly prune candidates - // bool simulationTest(SigBit candidate, SigSpec sig_d, SigSpec sig_q, bool as_enable) - // { - // // For now, skip simulation and go straight to SAT - // // TODO: Implement random simulation for faster pruning - // return true; - // } - // Binary search to minimize the gating condition set // Tries to remove half of the signals at a time void minimizeGatingCondition( @@ -308,11 +532,19 @@ struct SatClockgateWorker } // Check if OR/AND of signals forms a valid gating condition + // Uses simulation first to quickly prune invalid candidates, then SAT to prove bool isValidGatingSet(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) { if (conds.empty()) return false; + // Quick simulation filter first - catches most invalid candidates fast + if (!simulationTest(conds, sig_d, sig_q, as_enable)) { + log_debug(" Rejected by simulation\n"); + return false; + } + + // SAT only if simulation passes std::vector d_vec = satgen.importSigSpec(sig_d); std::vector q_vec = satgen.importSigSpec(sig_q); @@ -340,7 +572,10 @@ struct SatClockgateWorker std::vector dummy_exprs; std::vector dummy_vals; - return !ez->solve(dummy_exprs, dummy_vals, assumptions); + bool is_valid = !ez->solve(dummy_exprs, dummy_vals, assumptions); + if (!is_valid) + rejected_sat_count++; + return is_valid; } // Find gating condition for a register @@ -395,7 +630,6 @@ struct SatClockgateWorker } } - rejected_sat_count++; return {{}, false}; } @@ -582,8 +816,8 @@ struct SatClockgateWorker } log(" Inserted %d clock gates\n", gates_inserted); - log(" Statistics: accepted=%d, rejected_sat=%d\n", - accepted_count, rejected_sat_count); + log(" Statistics: accepted=%d, rejected_sim=%d, rejected_sat=%d\n", + accepted_count, rejected_sim_count, rejected_sat_count); log(" SAT stats: literals=%d, expressions=%d\n", ez->numLiterals(), ez->numExpressions()); } From 499e83a5492d572b4ee944a415e784e60f49139e Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 12:41:59 -0800 Subject: [PATCH 36/55] Switched to using CE module. Mostly retaining SAT gates. Still needs speedup --- passes/silimate/sat_clockgate.cc | 221 ++++++------------------------- 1 file changed, 38 insertions(+), 183 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 1e7ae5acc..e8c3d1d64 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -20,6 +20,7 @@ #include "kernel/sigtools.h" #include "kernel/ff.h" #include "kernel/satgen.h" +#include "kernel/consteval.h" #include #include #include @@ -119,9 +120,8 @@ struct SatClockgateWorker ezSatPtr ez; SatGen satgen; - // Simulation infrastructure - std::vector topo_order; // Cells in topological order for simulation - std::mt19937 rng; // Random number generator + // Random number generator for simulation + std::mt19937 rng; // Statistics int accepted_count = 0; @@ -156,223 +156,78 @@ struct SatClockgateWorker ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) satgen.importCell(cell); - - // Build topological order for simulation - buildTopoOrder(); } - // Build topological order of combinational cells for simulation - void buildTopoOrder() - { - dict cell_deps; // Number of unresolved input dependencies - dict> bit_to_cells; // Which cells need this bit - - for (auto cell : module->cells()) { - // Skip FFs - if (cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), - ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), - ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), - ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) - continue; - - int deps = 0; - for (auto &conn : cell->connections()) { - if (cell->input(conn.first)) { - for (auto bit : sigmap(conn.second)) { - if (bit.wire && sig_to_driver.count(bit)) { - Cell *driver = sig_to_driver[bit]; - if (!driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), - ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), - ID($dffsre), ID($_DFF_P_), ID($_DFF_N_))) { - deps++; - bit_to_cells[bit].insert(cell); - } - } - } - } - } - cell_deps[cell] = deps; - } - - // Kahn's algorithm - std::queue ready; - for (auto &[cell, deps] : cell_deps) { - if (deps == 0) - ready.push(cell); - } - - while (!ready.empty()) { - Cell *cell = ready.front(); - ready.pop(); - topo_order.push_back(cell); - - // Decrement deps for cells that depend on this cell's outputs - for (auto &conn : cell->connections()) { - if (cell->output(conn.first)) { - for (auto bit : sigmap(conn.second)) { - for (auto sink : bit_to_cells[bit]) { - if (--cell_deps[sink] == 0) - ready.push(sink); - } - } - } - } - } - } - - // Evaluate a single cell given current simulation values - void evaluateCell(Cell *cell, dict &sim_values) - { - auto getSigVal = [&](SigSpec sig) -> std::vector { - std::vector vals; - for (auto bit : sigmap(sig)) { - if (bit.wire) { - vals.push_back(sim_values.count(bit) ? sim_values[bit] : false); - } else { - vals.push_back(bit.data == State::S1); - } - } - return vals; - }; - - auto setSigVal = [&](SigSpec sig, const std::vector &vals) { - int i = 0; - for (auto bit : sigmap(sig)) { - if (bit.wire && i < (int)vals.size()) - sim_values[bit] = vals[i]; - i++; - } - }; - - if (cell->type == ID($not) || cell->type == ID($_NOT_)) { - auto a = getSigVal(cell->getPort(ID::A)); - std::vector y(a.size()); - for (size_t i = 0; i < a.size(); i++) - y[i] = !a[i]; - setSigVal(cell->getPort(ID::Y), y); - } - else if (cell->type == ID($and) || cell->type == ID($_AND_)) { - auto a = getSigVal(cell->getPort(ID::A)); - auto b = getSigVal(cell->getPort(ID::B)); - std::vector y(std::max(a.size(), b.size())); - for (size_t i = 0; i < y.size(); i++) - y[i] = (i < a.size() ? a[i] : false) && (i < b.size() ? b[i] : false); - setSigVal(cell->getPort(ID::Y), y); - } - else if (cell->type == ID($or) || cell->type == ID($_OR_)) { - auto a = getSigVal(cell->getPort(ID::A)); - auto b = getSigVal(cell->getPort(ID::B)); - std::vector y(std::max(a.size(), b.size())); - for (size_t i = 0; i < y.size(); i++) - y[i] = (i < a.size() ? a[i] : false) || (i < b.size() ? b[i] : false); - setSigVal(cell->getPort(ID::Y), y); - } - else if (cell->type == ID($xor) || cell->type == ID($_XOR_)) { - auto a = getSigVal(cell->getPort(ID::A)); - auto b = getSigVal(cell->getPort(ID::B)); - std::vector y(std::max(a.size(), b.size())); - for (size_t i = 0; i < y.size(); i++) - y[i] = (i < a.size() ? a[i] : false) != (i < b.size() ? b[i] : false); - setSigVal(cell->getPort(ID::Y), y); - } - else if (cell->type == ID($mux) || cell->type == ID($_MUX_)) { - auto a = getSigVal(cell->getPort(ID::A)); - auto b = getSigVal(cell->getPort(ID::B)); - auto s = getSigVal(cell->getPort(ID::S)); - bool sel = s.empty() ? false : s[0]; - setSigVal(cell->getPort(ID::Y), sel ? b : a); - } - else if (cell->type == ID($reduce_and)) { - auto a = getSigVal(cell->getPort(ID::A)); - bool result = true; - for (auto v : a) result = result && v; - setSigVal(cell->getPort(ID::Y), {result}); - } - else if (cell->type == ID($reduce_or)) { - auto a = getSigVal(cell->getPort(ID::A)); - bool result = false; - for (auto v : a) result = result || v; - setSigVal(cell->getPort(ID::Y), {result}); - } - else if (cell->type == ID($eq)) { - auto a = getSigVal(cell->getPort(ID::A)); - auto b = getSigVal(cell->getPort(ID::B)); - bool result = (a == b); - setSigVal(cell->getPort(ID::Y), {result}); - } - else if (cell->type == ID($ne)) { - auto a = getSigVal(cell->getPort(ID::A)); - auto b = getSigVal(cell->getPort(ID::B)); - bool result = (a != b); - setSigVal(cell->getPort(ID::Y), {result}); - } - // Add more cell types as needed - for now, unknown cells just pass through - } - - // Run simulation with random inputs, check if gating_active & (D != Q) is ever true + // Run simulation with random inputs using ConstEval // Returns false if counterexample found (candidate is definitely invalid) bool simulationTest(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) { for (int iter = 0; iter < sim_iterations; iter++) { - dict sim_values; + ConstEval ce(module); - // Initialize all input ports and FF outputs with random values + // Generate random values for all input ports for (auto wire : module->wires()) { if (wire->port_input) { + Const rand_val(State::S0, wire->width); for (int i = 0; i < wire->width; i++) - sim_values[SigBit(wire, i)] = (rng() & 1); + rand_val.bits()[i] = (rng() & 1) ? State::S1 : State::S0; + ce.set(SigSpec(wire), rand_val); } } - // Also randomize FF Q outputs (they're inputs to combinational logic) + // Randomize FF Q outputs (they're inputs to combinational logic) for (auto cell : module->cells()) { if (cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) { FfData ff(nullptr, cell); - for (auto bit : sigmap(ff.sig_q)) - if (bit.wire) - sim_values[bit] = (rng() & 1); + Const rand_val(State::S0, ff.width); + for (int i = 0; i < ff.width; i++) + rand_val.bits()[i] = (rng() & 1) ? State::S1 : State::S0; + ce.set(ff.sig_q, rand_val); } } - // Propagate through combinational logic in topological order - for (auto cell : topo_order) - evaluateCell(cell, sim_values); - - // Evaluate gating condition + // Evaluate gating condition signals bool combined_cond; if (as_enable) { - // OR of conditions combined_cond = false; for (auto bit : conds) { - SigBit mapped = sigmap(bit); - if (sim_values.count(mapped) && sim_values[mapped]) - combined_cond = true; + SigSpec sig(bit); + if (ce.eval(sig)) { + if (sig[0] == State::S1) + combined_cond = true; + } } } else { - // AND of conditions combined_cond = true; for (auto bit : conds) { - SigBit mapped = sigmap(bit); - if (!sim_values.count(mapped) || !sim_values[mapped]) + SigSpec sig(bit); + if (ce.eval(sig)) { + if (sig[0] != State::S1) + combined_cond = false; + } else { combined_cond = false; + } } } bool gating_active = as_enable ? !combined_cond : combined_cond; - // Check D != Q + // Evaluate D and Q, check if D != Q + SigSpec d_eval = sig_d; + SigSpec q_eval = sig_q; + bool d_ok = ce.eval(d_eval); + bool q_ok = ce.eval(q_eval); + bool d_ne_q = false; - for (int i = 0; i < sig_d.size(); i++) { - SigBit d_bit = sigmap(sig_d[i]); - SigBit q_bit = sigmap(sig_q[i]); - bool d_val = sim_values.count(d_bit) ? sim_values[d_bit] : false; - bool q_val = sim_values.count(q_bit) ? sim_values[q_bit] : false; - if (d_val != q_val) { - d_ne_q = true; - break; + if (d_ok && q_ok) { + for (int i = 0; i < sig_d.size(); i++) { + if (d_eval[i] != q_eval[i]) { + d_ne_q = true; + break; + } } } From dc4ca2c62188cf84c0e2dfbb0ca64a20684bb399 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 12:42:20 -0800 Subject: [PATCH 37/55] Added TODO for eliminating false paths --- notes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/notes.txt b/notes.txt index cd7d871f0..1c2755f2b 100644 --- a/notes.txt +++ b/notes.txt @@ -370,3 +370,4 @@ TODOs: 7) Check recursion 8) Check isValidGatingSet and findGatingCondition +Add a new feature to not do simulation or SAT based on the false paths \ No newline at end of file From efcabb270fb56a5bd680fdd3d8b355db797bd633 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 13:38:32 -0800 Subject: [PATCH 38/55] Added caching of simulation runs for speed --- passes/silimate/sat_clockgate.cc | 81 +++++++++++++++++--------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index e8c3d1d64..4d4e08899 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -98,7 +98,7 @@ void profileFlipFlops(Module *module, const std::string &filename, const std::st // Configuration static const int DEFAULT_MAX_COVER = 100; // Max candidate signals to consider static const int DEFAULT_MIN_REGS = 10; // Min registers per clock gate -static const int DEFAULT_SIM_ITERATIONS = 10; // Random simulation iterations for pruning +static const int DEFAULT_SIM_ITERATIONS = 1000; // Random simulation iterations for pruning struct SatClockgateWorker { @@ -120,8 +120,8 @@ struct SatClockgateWorker ezSatPtr ez; SatGen satgen; - // Random number generator for simulation - std::mt19937 rng; + // Cached simulation results: [iteration][SigBit] = evaluated State + std::vector> cached_sim_results; // Statistics int accepted_count = 0; @@ -131,7 +131,7 @@ struct SatClockgateWorker SatClockgateWorker(Module *module, int max_cover, int min_regs, int sim_iterations) : module(module), sigmap(module), max_cover(max_cover), min_regs(min_regs), sim_iterations(sim_iterations), - ez(), satgen(ez.get(), &sigmap), rng(42) + ez(), satgen(ez.get(), &sigmap) { // Build driver and sink maps for (auto cell : module->cells()) { @@ -156,16 +156,14 @@ struct SatClockgateWorker ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) satgen.importCell(cell); - } - - // Run simulation with random inputs using ConstEval - // Returns false if counterexample found (candidate is definitely invalid) - bool simulationTest(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) - { + + // Pre-run simulations and cache all signal values + std::mt19937 rng(42); + cached_sim_results.resize(sim_iterations); for (int iter = 0; iter < sim_iterations; iter++) { ConstEval ce(module); - // Generate random values for all input ports + // Set random values for input ports for (auto wire : module->wires()) { if (wire->port_input) { Const rand_val(State::S0, wire->width); @@ -174,8 +172,7 @@ struct SatClockgateWorker ce.set(SigSpec(wire), rand_val); } } - - // Randomize FF Q outputs (they're inputs to combinational logic) + // Set random values for FF Q outputs for (auto cell : module->cells()) { if (cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), @@ -189,45 +186,55 @@ struct SatClockgateWorker } } - // Evaluate gating condition signals + // Evaluate and cache ALL wire signals + for (auto wire : module->wires()) { + for (int i = 0; i < wire->width; i++) { + SigBit bit(wire, i); + SigSpec sig(bit); + if (ce.eval(sig)) + cached_sim_results[iter][sigmap(bit)] = sig[0].data; + } + } + } + } + + // Check cached simulation results for counterexamples + // Returns false if counterexample found (candidate is definitely invalid) + bool simulationTest(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + for (int iter = 0; iter < sim_iterations; iter++) { + auto &cache = cached_sim_results[iter]; + + // Lookup gating condition from cache bool combined_cond; if (as_enable) { combined_cond = false; for (auto bit : conds) { - SigSpec sig(bit); - if (ce.eval(sig)) { - if (sig[0] == State::S1) - combined_cond = true; - } + SigBit mapped = sigmap(bit); + if (cache.count(mapped) && cache[mapped] == State::S1) + combined_cond = true; } } else { combined_cond = true; for (auto bit : conds) { - SigSpec sig(bit); - if (ce.eval(sig)) { - if (sig[0] != State::S1) - combined_cond = false; - } else { + SigBit mapped = sigmap(bit); + if (!cache.count(mapped) || cache[mapped] != State::S1) combined_cond = false; - } } } bool gating_active = as_enable ? !combined_cond : combined_cond; - // Evaluate D and Q, check if D != Q - SigSpec d_eval = sig_d; - SigSpec q_eval = sig_q; - bool d_ok = ce.eval(d_eval); - bool q_ok = ce.eval(q_eval); - + // Lookup D and Q from cache, check if D != Q bool d_ne_q = false; - if (d_ok && q_ok) { - for (int i = 0; i < sig_d.size(); i++) { - if (d_eval[i] != q_eval[i]) { - d_ne_q = true; - break; - } + for (int i = 0; i < sig_d.size(); i++) { + SigBit d_bit = sigmap(sig_d[i]); + SigBit q_bit = sigmap(sig_q[i]); + State d_val = cache.count(d_bit) ? cache[d_bit] : State::S0; + State q_val = cache.count(q_bit) ? cache[q_bit] : State::S0; + if (d_val != q_val) { + d_ne_q = true; + break; } } From fa9e7a77d7df2d52fad9fcd24b6c9001e8495d76 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 13:43:22 -0800 Subject: [PATCH 39/55] Removed normal clockgate pass options form sate_clockgate pass --- passes/silimate/sat_clockgate.cc | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/sat_clockgate.cc index 4d4e08899..0a704b8f7 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/sat_clockgate.cc @@ -401,10 +401,10 @@ struct SatClockgateWorker return false; // Quick simulation filter first - catches most invalid candidates fast - if (!simulationTest(conds, sig_d, sig_q, as_enable)) { - log_debug(" Rejected by simulation\n"); - return false; - } + // if (!simulationTest(conds, sig_d, sig_q, as_enable)) { + // log_debug(" Rejected by simulation\n"); + // return false; + // } // SAT only if simulation passes std::vector d_vec = satgen.importSigSpec(sig_d); @@ -718,7 +718,6 @@ struct SatClockgatePass : public Pass { int max_cover = DEFAULT_MAX_COVER; int min_regs = DEFAULT_MIN_REGS; int sim_iterations = DEFAULT_SIM_ITERATIONS; - std::vector clockgate_args; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { @@ -730,14 +729,9 @@ struct SatClockgatePass : public Pass { min_regs = std::stoi(args[++argidx]); continue; } - // Pass remaining args to clockgate - if (args[argidx][0] == '-') { - clockgate_args.push_back(args[argidx]); - continue; - } - // Non-flag argument (value for previous flag) - clockgate_args.push_back(args[argidx]); + break; } + extra_args(args, argidx, design); log("Configuration: max_cover=%d, min_regs=%d\n", max_cover, min_regs); @@ -762,13 +756,6 @@ struct SatClockgatePass : public Pass { } log("Total clock gates inserted: %d\n", total_gates); - - // Convert CEs to actual clock gate cells - std::string clockgate_cmd = "clockgate"; - for (auto &arg : clockgate_args) - clockgate_cmd += " " + arg; - log("Calling clockgate with args: %s\n", clockgate_cmd); - Pass::call(design, clockgate_cmd); } } SatClockgatePass; From a8e4fccc56a06b3516beff449f3924248ffd35e8 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 14:07:22 -0800 Subject: [PATCH 40/55] Removed simulation and isValidGatingSignal function --- passes/silimate/Makefile.inc | 2 +- .../{sat_clockgate.cc => infer_ce.cc} | 185 +++--------------- passes/techmap/clockgate.cc | 12 ++ 3 files changed, 42 insertions(+), 157 deletions(-) rename passes/silimate/{sat_clockgate.cc => infer_ce.cc} (76%) diff --git a/passes/silimate/Makefile.inc b/passes/silimate/Makefile.inc index dfb1a4ffe..bfdc21ce8 100644 --- a/passes/silimate/Makefile.inc +++ b/passes/silimate/Makefile.inc @@ -10,7 +10,7 @@ OBJS += passes/silimate/mux_push.o OBJS += passes/silimate/obs_clean.o OBJS += passes/silimate/segv.o OBJS += passes/silimate/reg_rename.o -OBJS += passes/silimate/sat_clockgate.o +OBJS += passes/silimate/infer_ce.o OBJS += passes/silimate/splitfanout.o OBJS += passes/silimate/splitlarge.o OBJS += passes/silimate/splitnetlist.o diff --git a/passes/silimate/sat_clockgate.cc b/passes/silimate/infer_ce.cc similarity index 76% rename from passes/silimate/sat_clockgate.cc rename to passes/silimate/infer_ce.cc index 0a704b8f7..b2d393ce6 100644 --- a/passes/silimate/sat_clockgate.cc +++ b/passes/silimate/infer_ce.cc @@ -20,11 +20,9 @@ #include "kernel/sigtools.h" #include "kernel/ff.h" #include "kernel/satgen.h" -#include "kernel/consteval.h" #include #include #include -#include USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN @@ -98,9 +96,8 @@ void profileFlipFlops(Module *module, const std::string &filename, const std::st // Configuration static const int DEFAULT_MAX_COVER = 100; // Max candidate signals to consider static const int DEFAULT_MIN_REGS = 10; // Min registers per clock gate -static const int DEFAULT_SIM_ITERATIONS = 1000; // Random simulation iterations for pruning -struct SatClockgateWorker +struct InferCeWorker { Module *module; SigMap sigmap; @@ -108,7 +105,6 @@ struct SatClockgateWorker // Configuration int max_cover; int min_regs; - int sim_iterations; // Maps output signal bits to their driver cells dict sig_to_driver; @@ -120,17 +116,13 @@ struct SatClockgateWorker ezSatPtr ez; SatGen satgen; - // Cached simulation results: [iteration][SigBit] = evaluated State - std::vector> cached_sim_results; - // Statistics int accepted_count = 0; - int rejected_sim_count = 0; int rejected_sat_count = 0; - SatClockgateWorker(Module *module, int max_cover, int min_regs, int sim_iterations) + InferCeWorker(Module *module, int max_cover, int min_regs) : module(module), sigmap(module), - max_cover(max_cover), min_regs(min_regs), sim_iterations(sim_iterations), + max_cover(max_cover), min_regs(min_regs), ez(), satgen(ez.get(), &sigmap) { // Build driver and sink maps @@ -156,95 +148,6 @@ struct SatClockgateWorker ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) satgen.importCell(cell); - - // Pre-run simulations and cache all signal values - std::mt19937 rng(42); - cached_sim_results.resize(sim_iterations); - for (int iter = 0; iter < sim_iterations; iter++) { - ConstEval ce(module); - - // Set random values for input ports - for (auto wire : module->wires()) { - if (wire->port_input) { - Const rand_val(State::S0, wire->width); - for (int i = 0; i < wire->width; i++) - rand_val.bits()[i] = (rng() & 1) ? State::S1 : State::S0; - ce.set(SigSpec(wire), rand_val); - } - } - // Set random values for FF Q outputs - for (auto cell : module->cells()) { - if (cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), - ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), - ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), - ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) { - FfData ff(nullptr, cell); - Const rand_val(State::S0, ff.width); - for (int i = 0; i < ff.width; i++) - rand_val.bits()[i] = (rng() & 1) ? State::S1 : State::S0; - ce.set(ff.sig_q, rand_val); - } - } - - // Evaluate and cache ALL wire signals - for (auto wire : module->wires()) { - for (int i = 0; i < wire->width; i++) { - SigBit bit(wire, i); - SigSpec sig(bit); - if (ce.eval(sig)) - cached_sim_results[iter][sigmap(bit)] = sig[0].data; - } - } - } - } - - // Check cached simulation results for counterexamples - // Returns false if counterexample found (candidate is definitely invalid) - bool simulationTest(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) - { - for (int iter = 0; iter < sim_iterations; iter++) { - auto &cache = cached_sim_results[iter]; - - // Lookup gating condition from cache - bool combined_cond; - if (as_enable) { - combined_cond = false; - for (auto bit : conds) { - SigBit mapped = sigmap(bit); - if (cache.count(mapped) && cache[mapped] == State::S1) - combined_cond = true; - } - } else { - combined_cond = true; - for (auto bit : conds) { - SigBit mapped = sigmap(bit); - if (!cache.count(mapped) || cache[mapped] != State::S1) - combined_cond = false; - } - } - - bool gating_active = as_enable ? !combined_cond : combined_cond; - - // Lookup D and Q from cache, check if D != Q - bool d_ne_q = false; - for (int i = 0; i < sig_d.size(); i++) { - SigBit d_bit = sigmap(sig_d[i]); - SigBit q_bit = sigmap(sig_q[i]); - State d_val = cache.count(d_bit) ? cache[d_bit] : State::S0; - State q_val = cache.count(q_bit) ? cache[q_bit] : State::S0; - if (d_val != q_val) { - d_ne_q = true; - break; - } - } - - // If gating is active and D != Q, this is a counterexample - if (gating_active && d_ne_q) { - rejected_sim_count++; - return false; - } - } - return true; // No counterexample found } // Get downstream signals from a register (BFS forward through combinational logic) @@ -334,32 +237,6 @@ struct SatClockgateWorker return visited; } - // Check if a candidate signal is a valid gating condition using SAT - // Safe gating check: sig=1 → D==Q (i.e., (sig ∧ (D≠Q)) is UNSAT) - bool isValidGatingSignal(SigBit candidate, SigSpec sig_d, SigSpec sig_q, bool as_enable) - { - std::vector d_vec = satgen.importSigSpec(sig_d); - std::vector q_vec = satgen.importSigSpec(sig_q); - int cand_var = satgen.importSigSpec(SigSpec(candidate))[0]; - - // D != Q - int d_ne_q = ez->vec_ne(d_vec, q_vec); - - // For clock enable (active high): when enable=0, D must equal Q - // Check: (!enable ∧ (D≠Q)) is UNSAT - // For clock disable (active low): when disable=1, D must equal Q - // Check: (disable ∧ (D≠Q)) is UNSAT - - int gating_active = as_enable ? ez->NOT(cand_var) : cand_var; - int query = ez->AND(gating_active, d_ne_q); - - std::vector assumptions = {query}; - std::vector dummy_exprs; - std::vector dummy_vals; - - return !ez->solve(dummy_exprs, dummy_vals, assumptions); - } - // Binary search to minimize the gating condition set // Tries to remove half of the signals at a time void minimizeGatingCondition( @@ -393,20 +270,12 @@ struct SatClockgateWorker } } - // Check if OR/AND of signals forms a valid gating condition - // Uses simulation first to quickly prune invalid candidates, then SAT to prove + // Check if OR/AND of signals forms a valid gating condition using SAT bool isValidGatingSet(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) { if (conds.empty()) return false; - // Quick simulation filter first - catches most invalid candidates fast - // if (!simulationTest(conds, sig_d, sig_q, as_enable)) { - // log_debug(" Rejected by simulation\n"); - // return false; - // } - - // SAT only if simulation passes std::vector d_vec = satgen.importSigSpec(sig_d); std::vector q_vec = satgen.importSigSpec(sig_q); @@ -447,12 +316,12 @@ struct SatClockgateWorker FfData ff(nullptr, reg); // Get candidate signals downstream of this register - pool downstream = getDownstreamSignals(reg, max_cover); + // pool downstream = getDownstreamSignals(reg, max_cover); - if (downstream.empty()) { - log_debug(" No downstream candidates for %s\n", log_id(reg)); - return {{}, false}; - } + // if (downstream.empty()) { + // log_debug(" No downstream candidates for %s\n", log_id(reg)); + // return {{}, false}; + // } // Also include upstream signals that could affect D pool d_inputs; @@ -463,11 +332,16 @@ struct SatClockgateWorker // Combine and limit candidates std::vector candidates; - for (auto bit : downstream) - candidates.push_back(bit); + // for (auto bit : downstream) + // candidates.push_back(bit); + // for (auto bit : upstream) + // if (!downstream.count(bit)) + // candidates.push_back(bit); + for (auto bit : upstream) - if (!downstream.count(bit)) - candidates.push_back(bit); + candidates.push_back(bit); + + if ((int)candidates.size() > max_cover) candidates.resize(max_cover); @@ -678,22 +552,22 @@ struct SatClockgateWorker } log(" Inserted %d clock gates\n", gates_inserted); - log(" Statistics: accepted=%d, rejected_sim=%d, rejected_sat=%d\n", - accepted_count, rejected_sim_count, rejected_sat_count); + log(" Statistics: accepted=%d, rejected_sat=%d\n", + accepted_count, rejected_sat_count); log(" SAT stats: literals=%d, expressions=%d\n", ez->numLiterals(), ez->numExpressions()); } }; -struct SatClockgatePass : public Pass { - SatClockgatePass() : Pass("sat_clockgate", "SAT-based automatic clock gating") { } +struct InferCePass : public Pass { + InferCePass() : Pass("infer_ce", "Infer clock enable signals from conditional logic") { } void help() override { log("\n"); - log(" sat_clockgate [options] [selection]\n"); + log(" infer_ce [options] [selection]\n"); log("\n"); - log("This command performs SAT-based automatic clock gating insertion.\n"); + log("This command infers clock enable (CE) signals from conditional logic.\n"); log("It analyzes registers and uses SAT solving to find signals that can\n"); log("serve as clock enable conditions (when the signal is low, D==Q).\n"); log("\n"); @@ -713,11 +587,10 @@ struct SatClockgatePass : public Pass { void execute(std::vector args, RTLIL::Design *design) override { - log_header(design, "Executing SAT_CLOCKGATE pass.\n"); + log_header(design, "Executing INFER_CE pass.\n"); int max_cover = DEFAULT_MAX_COVER; int min_regs = DEFAULT_MIN_REGS; - int sim_iterations = DEFAULT_SIM_ITERATIONS; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { @@ -745,18 +618,18 @@ struct SatClockgatePass : public Pass { for (auto module : design->selected_modules()) { // Profile BEFORE clock gating - profileFlipFlops(module, "ff_profile.txt", "BEFORE sat_clockgate"); + profileFlipFlops(module, "ff_profile.txt", "BEFORE infer_ce"); - SatClockgateWorker worker(module, max_cover, min_regs, sim_iterations); + InferCeWorker worker(module, max_cover, min_regs); worker.run(); total_gates += worker.accepted_count; // Profile AFTER clock gating - profileFlipFlops(module, "ff_profile.txt", "AFTER sat_clockgate"); + profileFlipFlops(module, "ff_profile.txt", "AFTER infer_ce"); } log("Total clock gates inserted: %d\n", total_gates); } -} SatClockgatePass; +} InferCePass; PRIVATE_NAMESPACE_END diff --git a/passes/techmap/clockgate.cc b/passes/techmap/clockgate.cc index 58b7520ac..b112d7811 100644 --- a/passes/techmap/clockgate.cc +++ b/passes/techmap/clockgate.cc @@ -328,6 +328,8 @@ struct ClockgatePass : public Pass { pool ce_ffs; dict clk_nets; + + log("Found %zu CE FFs\n", ce_ffs.size()); int gated_flop_count = 0; for (auto module : design->selected_unboxed_whole_modules()) { for (auto cell : module->cells()) { @@ -337,11 +339,14 @@ struct ClockgatePass : public Pass { FfData ff(nullptr, cell); // It would be odd to get constants, but we better handle it if (ff.has_ce) { + log("FF %s has CE\n", cell->name); if (!ff.sig_clk.is_bit() || !ff.sig_ce.is_bit()) continue; if (!ff.sig_clk[0].is_wire() || !ff.sig_ce[0].is_wire()) continue; + log("FF %s has valid CE and CLK\n", cell->name); + ce_ffs.insert(cell); ClkNetInfo info = clk_info_from_ff(ff); @@ -352,6 +357,8 @@ struct ClockgatePass : public Pass { } } + log("Found %zu clk_nets\n", clk_nets.size()); + for (auto& clk_net : clk_nets) { auto& clk = clk_net.first; auto& gclk = clk_net.second; @@ -385,15 +392,20 @@ struct ClockgatePass : public Pass { } } + log("Found %zu clk_nets\n", clk_nets.size()); + for (auto cell : ce_ffs) { FfData ff(nullptr, cell); ClkNetInfo info = clk_info_from_ff(ff); auto it = clk_nets.find(info); log_assert(it != clk_nets.end() && "Bug: desync ce_ffs and clk_nets"); + log("Found new_net for %s\n", cell->name); if (!it->second.new_net) continue; + log("Tryuing to fix up FF %s\n", cell->name); + log_debug("Fix up FF %s\n", cell->name); // Now we start messing with the design ff.has_ce = false; From c8b6869e6592033b6625e3f6549ce1f5963c4a6e Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 15:20:57 -0800 Subject: [PATCH 41/55] Removed optimizations from infer_ce.cc for profiling --- passes/silimate/infer_ce.cc | 38 ------------------------------------- 1 file changed, 38 deletions(-) diff --git a/passes/silimate/infer_ce.cc b/passes/silimate/infer_ce.cc index b2d393ce6..b890c235f 100644 --- a/passes/silimate/infer_ce.cc +++ b/passes/silimate/infer_ce.cc @@ -270,44 +270,6 @@ struct InferCeWorker } } - // Check if OR/AND of signals forms a valid gating condition using SAT - bool isValidGatingSet(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) - { - if (conds.empty()) - return false; - - std::vector d_vec = satgen.importSigSpec(sig_d); - std::vector q_vec = satgen.importSigSpec(sig_q); - - // Build OR (for enable) or AND (for disable) of condition signals - std::vector cond_vars; - for (auto bit : conds) - cond_vars.push_back(satgen.importSigSpec(SigSpec(bit))[0]); - - int combined_cond; - if (as_enable) { - // Clock enable: OR of signals (any signal high = enable) - combined_cond = ez->expression(ezSAT::OpOr, cond_vars); - } else { - // Clock disable: AND of signals (all signals high = disable) - combined_cond = ez->expression(ezSAT::OpAnd, cond_vars); - } - - int d_ne_q = ez->vec_ne(d_vec, q_vec); - - // Safe gating: when gating is active (enable=0 or disable=1), D must equal Q - int gating_active = as_enable ? ez->NOT(combined_cond) : combined_cond; - int query = ez->AND(gating_active, d_ne_q); - - std::vector assumptions = {query}; - std::vector dummy_exprs; - std::vector dummy_vals; - - bool is_valid = !ez->solve(dummy_exprs, dummy_vals, assumptions); - if (!is_valid) - rejected_sat_count++; - return is_valid; - } // Find gating condition for a register // Returns empty vector if no valid condition found From 2ab89e11460e060d8087b6d42d3aa5147b3c0b80 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 16:13:51 -0800 Subject: [PATCH 42/55] Passing equiv_opt pass and speed boosts --- passes/silimate/infer_ce.cc | 430 ++++++++++++++++++------------------ 1 file changed, 211 insertions(+), 219 deletions(-) diff --git a/passes/silimate/infer_ce.cc b/passes/silimate/infer_ce.cc index b890c235f..a4985b026 100644 --- a/passes/silimate/infer_ce.cc +++ b/passes/silimate/infer_ce.cc @@ -22,77 +22,10 @@ #include "kernel/satgen.h" #include #include -#include USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN -// Profile all flip-flops and write to file -void profileFlipFlops(Module *module, const std::string &filename, const std::string &label) -{ - std::ofstream out(filename, std::ios::app); - out << "\n=== " << label << " ===\n"; - out << "Module: " << log_id(module) << "\n\n"; - - int total_ffs = 0; - int ffs_with_ce = 0; - int ffs_with_arst = 0; - int ffs_with_srst = 0; - int total_bits = 0; - int bits_with_ce = 0; - - for (auto cell : module->cells()) { - if (!cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), - ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), - ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), - ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) - continue; - - FfData ff(nullptr, cell); - total_ffs++; - total_bits += ff.width; - - out << "FF: " << log_id(cell) << "\n"; - out << " type: " << log_id(cell->type) << "\n"; - out << " width: " << ff.width << "\n"; - out << " has_clk: " << (ff.has_clk ? "yes" : "no") << "\n"; - out << " has_ce: " << (ff.has_ce ? "yes" : "no"); - if (ff.has_ce) { - out << " (sig_ce: " << log_signal(ff.sig_ce) << ", pol: " << (ff.pol_ce ? "active-high" : "active-low") << ")"; - ffs_with_ce++; - bits_with_ce += ff.width; - } - out << "\n"; - out << " has_arst: " << (ff.has_arst ? "yes" : "no"); - if (ff.has_arst) { - out << " (sig_arst: " << log_signal(ff.sig_arst) << ")"; - ffs_with_arst++; - } - out << "\n"; - out << " has_srst: " << (ff.has_srst ? "yes" : "no"); - if (ff.has_srst) { - out << " (sig_srst: " << log_signal(ff.sig_srst) << ")"; - ffs_with_srst++; - } - out << "\n"; - out << " sig_clk: " << log_signal(ff.sig_clk) << "\n"; - out << " sig_d: " << log_signal(ff.sig_d) << "\n"; - out << " sig_q: " << log_signal(ff.sig_q) << "\n"; - out << "\n"; - } - - out << "--- Summary ---\n"; - out << "Total FFs: " << total_ffs << "\n"; - out << "Total bits: " << total_bits << "\n"; - out << "FFs with CE: " << ffs_with_ce << " (" << (total_ffs ? 100*ffs_with_ce/total_ffs : 0) << "%)\n"; - out << "Bits with CE: " << bits_with_ce << " (" << (total_bits ? 100*bits_with_ce/total_bits : 0) << "%)\n"; - out << "FFs with ARST: " << ffs_with_arst << "\n"; - out << "FFs with SRST: " << ffs_with_srst << "\n"; - out << "\n"; - - out.close(); -} - // Configuration static const int DEFAULT_MAX_COVER = 100; // Max candidate signals to consider static const int DEFAULT_MIN_REGS = 10; // Min registers per clock gate @@ -109,21 +42,20 @@ struct InferCeWorker // Maps output signal bits to their driver cells dict sig_to_driver; - // Maps cell input pins to their source signals + // Maps cell input pins to their source signals dict> sig_to_sinks; - // SAT solver and generator - created once per module - ezSatPtr ez; - SatGen satgen; + // Pre-computed list of combinational cells (for SAT import) + std::vector comb_cells; // Statistics int accepted_count = 0; int rejected_sat_count = 0; + int sat_solves = 0; InferCeWorker(Module *module, int max_cover, int min_regs) : module(module), sigmap(module), - max_cover(max_cover), min_regs(min_regs), - ez(), satgen(ez.get(), &sigmap) + max_cover(max_cover), min_regs(min_regs) { // Build driver and sink maps for (auto cell : module->cells()) { @@ -139,15 +71,15 @@ struct InferCeWorker sig_to_sinks[bit].insert(cell); } } - } - - // Import all cells once - circuit constraints are permanent - for (auto cell : module->cells()) + + // Collect combinational cells for SAT if (!cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), - ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) - satgen.importCell(cell); + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) { + comb_cells.push_back(cell); + } + } } // Get downstream signals from a register (BFS forward through combinational logic) @@ -156,41 +88,32 @@ struct InferCeWorker pool visited; std::queue worklist; - // Start from register output Q FfData ff(nullptr, reg); - for (auto bit : sigmap(ff.sig_q)) { + for (auto bit : sigmap(ff.sig_q)) if (bit.wire) { worklist.push(bit); visited.insert(bit); } - } while (!worklist.empty() && (int)visited.size() < limit) { SigBit bit = worklist.front(); worklist.pop(); - // Find cells driven by this signal for (auto sink_cell : sig_to_sinks[bit]) { - // Skip registers - don't traverse through them if (sink_cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), ID($dffsre), ID($_DFF_P_), ID($_DFF_N_))) continue; - // Add outputs of this cell to worklist - for (auto &conn : sink_cell->connections()) { - if (sink_cell->output(conn.first)) { - for (auto out_bit : sigmap(conn.second)) { + for (auto &conn : sink_cell->connections()) + if (sink_cell->output(conn.first)) + for (auto out_bit : sigmap(conn.second)) if (out_bit.wire && !visited.count(out_bit)) { visited.insert(out_bit); worklist.push(out_bit); } - } - } - } } } - return visited; } @@ -209,7 +132,48 @@ struct InferCeWorker SigBit bit = worklist.front(); worklist.pop(); - // Find driver cell + if (!sig_to_driver.count(bit)) + continue; + + Cell *driver = sig_to_driver[bit]; + + if (driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_))) + continue; + + for (auto &conn : driver->connections()) + if (driver->input(conn.first)) + for (auto in_bit : sigmap(conn.second)) + if (in_bit.wire && !visited.count(in_bit)) { + visited.insert(in_bit); + worklist.push(in_bit); + } + } + return visited; + } + + // Get cells in the transitive fanin cone of given signals (for SAT import) + // This is much faster than importing ALL cells + pool getConeOfLogic(SigSpec sig) + { + pool cone_cells; + pool visited; + std::queue worklist; + + // Start from all bits in sig + for (auto bit : sigmap(sig)) { + if (bit.wire && !visited.count(bit)) { + visited.insert(bit); + worklist.push(bit); + } + } + + // BFS backward through drivers + while (!worklist.empty()) { + SigBit bit = worklist.front(); + worklist.pop(); + if (!sig_to_driver.count(bit)) continue; @@ -218,9 +182,15 @@ struct InferCeWorker // Skip registers if (driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), - ID($dffsre), ID($_DFF_P_), ID($_DFF_N_))) + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) continue; + // Add this cell to cone + if (cone_cells.count(driver)) + continue; // Already processed + cone_cells.insert(driver); + // Add inputs of driver to worklist for (auto &conn : driver->connections()) { if (driver->input(conn.first)) { @@ -234,18 +204,80 @@ struct InferCeWorker } } - return visited; + return cone_cells; + } + + // Check if OR/AND of signals forms a valid gating condition using SAT + // Uses a PRE-CREATED SAT solver (passed in) to avoid recreating for each check + bool isValidGatingSetWithSolver(ezSatPtr &ez, SatGen &satgen, + const std::vector &conds, + SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + if (conds.empty()) + return false; + + sat_solves++; + + std::vector d_vec = satgen.importSigSpec(sig_d); + std::vector q_vec = satgen.importSigSpec(sig_q); + + // Build OR (for enable) or AND (for disable) of condition signals + std::vector cond_vars; + for (auto bit : conds) + cond_vars.push_back(satgen.importSigSpec(SigSpec(bit))[0]); + + int combined_cond; + if (as_enable) { + // Clock enable: OR of signals (any signal high = enable) + combined_cond = ez->expression(ezSAT::OpOr, cond_vars); + } else { + // Clock disable: AND of signals (all signals high = disable) + combined_cond = ez->expression(ezSAT::OpAnd, cond_vars); + } + + int d_ne_q = ez->vec_ne(d_vec, q_vec); + + // Safe gating: when gating is active (enable=0 or disable=1), D must equal Q + int gating_active = as_enable ? ez->NOT(combined_cond) : combined_cond; + int query = ez->AND(gating_active, d_ne_q); + + std::vector assumptions = {query}; + std::vector dummy_exprs; + std::vector dummy_vals; + + bool is_valid = !ez->solve(dummy_exprs, dummy_vals, assumptions); + if (!is_valid) + rejected_sat_count++; + return is_valid; + } + + // Wrapper that creates a fresh SAT solver (used for standalone checks) + bool isValidGatingSet(const std::vector &conds, SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + if (conds.empty()) + return false; + + pool cone = getConeOfLogic(sig_d); + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + for (auto cell : cone) + satgen.importCell(cell); + + return isValidGatingSetWithSolver(ez, satgen, conds, sig_d, sig_q, as_enable); } // Binary search to minimize the gating condition set // Tries to remove half of the signals at a time - void minimizeGatingCondition( + // Uses pre-created SAT solver to avoid recreating for each check + void minimizeGatingConditionWithSolver( + ezSatPtr &ez, SatGen &satgen, std::vector &good_conds, std::vector::iterator begin, std::vector::iterator end, SigSpec sig_d, SigSpec sig_q, bool as_enable) { int half_len = (end - begin) / 2; + if (half_len == 0) return; @@ -257,78 +289,90 @@ struct InferCeWorker test_conds.insert(test_conds.end(), begin, mid); test_conds.insert(test_conds.end(), end, good_conds.end()); - if (!test_conds.empty() && isValidGatingSet(test_conds, sig_d, sig_q, as_enable)) { + if (!test_conds.empty() && isValidGatingSetWithSolver(ez, satgen, test_conds, sig_d, sig_q, as_enable)) { // Can remove [mid, end) good_conds.erase(mid, end); // Recurse on remaining half - minimizeGatingCondition(good_conds, begin, begin + half_len, sig_d, sig_q, as_enable); + minimizeGatingConditionWithSolver(ez, satgen, good_conds, begin, begin + half_len, sig_d, sig_q, as_enable); } else { // Cannot remove all of [mid, end), try to minimize each half if (end - mid > 1) - minimizeGatingCondition(good_conds, mid, end, sig_d, sig_q, as_enable); - minimizeGatingCondition(good_conds, begin, mid, sig_d, sig_q, as_enable); + minimizeGatingConditionWithSolver(ez, satgen, good_conds, mid, end, sig_d, sig_q, as_enable); + minimizeGatingConditionWithSolver(ez, satgen, good_conds, begin, mid, sig_d, sig_q, as_enable); } } + // Wrapper for standalone use (creates fresh solver) + void minimizeGatingCondition( + std::vector &good_conds, + std::vector::iterator begin, + std::vector::iterator end, + SigSpec sig_d, SigSpec sig_q, bool as_enable) + { + pool cone = getConeOfLogic(sig_d); + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + for (auto cell : cone) + satgen.importCell(cell); + + minimizeGatingConditionWithSolver(ez, satgen, good_conds, begin, end, sig_d, sig_q, as_enable); + } // Find gating condition for a register - // Returns empty vector if no valid condition found - std::pair, bool> findGatingCondition(Cell *reg) + // Returns: {gating_conds, is_enable, cone_size} + std::tuple, bool, int> findGatingCondition(Cell *reg) { FfData ff(nullptr, reg); - // Get candidate signals downstream of this register - // pool downstream = getDownstreamSignals(reg, max_cover); - - // if (downstream.empty()) { - // log_debug(" No downstream candidates for %s\n", log_id(reg)); - // return {{}, false}; - // } - - // Also include upstream signals that could affect D pool d_inputs; for (auto bit : sigmap(ff.sig_d)) if (bit.wire) d_inputs.insert(bit); + pool upstream = getUpstreamSignals(d_inputs, max_cover); - // Combine and limit candidates std::vector candidates; - // for (auto bit : downstream) - // candidates.push_back(bit); - // for (auto bit : upstream) - // if (!downstream.count(bit)) - // candidates.push_back(bit); - for (auto bit : upstream) candidates.push_back(bit); - - if ((int)candidates.size() > max_cover) candidates.resize(max_cover); - log_debug(" Found %zu candidate signals\n", candidates.size()); + if (candidates.empty()) + return {{}, false, 0}; - // Try as clock enable first (more common) - if (isValidGatingSet(candidates, ff.sig_d, ff.sig_q, true)) { - minimizeGatingCondition(candidates, candidates.begin(), candidates.end(), - ff.sig_d, ff.sig_q, true); - if (!candidates.empty()) { - return {candidates, true}; // true = clock enable - } + // Create SAT solver ONCE for this register + pool cone = getConeOfLogic(ff.sig_d); + int cone_size = (int)cone.size(); + + // Skip registers with trivial cones (not worth gating) or huge cones (too expensive) + const int MIN_CONE_SIZE = 5; + const int MAX_CONE_SIZE = 500; + if (cone_size < MIN_CONE_SIZE || cone_size > MAX_CONE_SIZE) + return {{}, false, cone_size}; + + ezSatPtr ez; + SatGen satgen(ez.get(), &sigmap); + for (auto cell : cone) + satgen.importCell(cell); + + // Try as clock enable first + if (isValidGatingSetWithSolver(ez, satgen, candidates, ff.sig_d, ff.sig_q, true)) { + minimizeGatingConditionWithSolver(ez, satgen, candidates, candidates.begin(), candidates.end(), + ff.sig_d, ff.sig_q, true); + if (!candidates.empty()) + return {candidates, true, cone_size}; } // Try as clock disable - if (isValidGatingSet(candidates, ff.sig_d, ff.sig_q, false)) { - minimizeGatingCondition(candidates, candidates.begin(), candidates.end(), - ff.sig_d, ff.sig_q, false); - if (!candidates.empty()) { - return {candidates, false}; // false = clock disable - } + if (isValidGatingSetWithSolver(ez, satgen, candidates, ff.sig_d, ff.sig_q, false)) { + minimizeGatingConditionWithSolver(ez, satgen, candidates, candidates.begin(), candidates.end(), + ff.sig_d, ff.sig_q, false); + if (!candidates.empty()) + return {candidates, false, cone_size}; } - return {{}, false}; + return {{}, false, cone_size}; } // Insert clock gating logic for a group of registers @@ -339,9 +383,6 @@ struct InferCeWorker if (regs.empty() || gating_conds.empty()) return; - log(" Inserting clock gate for %zu registers with %zu condition signals\n", - regs.size(), gating_conds.size()); - // Build gating condition: OR for enable, AND for disable SigBit gating_signal; if (gating_conds.size() == 1) { @@ -371,7 +412,6 @@ struct InferCeWorker FfData ff(nullptr, reg); if (ff.has_ce) { - // Already has CE, AND with new condition Wire *combined_ce = module->addWire(NEW_ID); module->addAnd(NEW_ID, ff.sig_ce, gating_signal, combined_ce); ff.sig_ce = combined_ce; @@ -385,8 +425,7 @@ struct InferCeWorker } } - // Check if register can be added to an existing gate (subset/superset matching) - // Returns true if the existing gate's condition is a valid gating condition for this register + // Check if register can be added to an existing gate bool canReuseGate(const std::vector &existing_conds, Cell *reg, bool is_enable) { FfData ff(nullptr, reg); @@ -396,9 +435,6 @@ struct InferCeWorker // Main processing function void run() { - log("Processing module %s\n", log_id(module)); - - // Collect all registers std::vector registers; for (auto cell : module->cells()) { if (!cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), @@ -408,116 +444,87 @@ struct InferCeWorker continue; FfData ff(nullptr, cell); - - // Skip registers that already have CE - if (ff.has_ce) { - log_debug(" Skipping %s: already has CE\n", log_id(cell)); + if (ff.has_ce || !ff.has_clk) continue; - } - - if (!ff.has_clk) { - log_debug(" Skipping %s: no clock\n", log_id(cell)); - continue; - } registers.push_back(cell); } - log(" Found %zu registers without CE\n", registers.size()); + log("Processing module %s: %zu cells, %zu flip-flops, %zu wires\n", + log_id(module), module->cells().size(), registers.size(), module->wires().size()); + + if (registers.empty()) + return; - // Inverted index approach: net -> list of gate indices containing that net - // This allows finding subsets/supersets, not just exact matches struct AcceptedGate { std::vector conds; - pool cond_set; // For fast subset/superset checks + pool cond_set; std::vector regs; bool is_enable; }; std::vector accepted_gates; - dict> net_to_accepted; // Inverted index + dict> net_to_accepted; - int processed = 0; + int reg_idx = 0; for (auto reg : registers) { - if (processed % 100 == 0 && processed > 0) - log(" Processed %d/%zu registers\n", processed, registers.size()); - processed++; + auto [gating_conds, is_enable, cone_size] = findGatingCondition(reg); + log("Processing register %d/%zu: %s (cone=%d)\n", ++reg_idx, registers.size(), log_id(reg), cone_size); - log_debug("Processing register %s\n", log_id(reg)); - - auto [gating_conds, is_enable] = findGatingCondition(reg); - - if (gating_conds.empty()) { - log_debug(" No valid gating condition found\n"); + if (gating_conds.empty()) continue; - } - // Build set of condition signals for this register pool cond_set; for (auto bit : gating_conds) cond_set.insert(bit); - // Find all accepted gates sharing any net with this register's condition + // Find candidate gates sharing any net pool candidate_gates; - for (auto bit : gating_conds) { - if (net_to_accepted.count(bit)) { + for (auto bit : gating_conds) + if (net_to_accepted.count(bit)) for (auto idx : net_to_accepted[bit]) candidate_gates.insert(idx); - } - } - // Try to find a compatible existing gate (SAT-verify each candidate) + // HEURISTIC: Only check top 3 gates (by size) for reuse + const int MAX_REUSE_CHECKS = 3; + + std::vector sorted_candidates(candidate_gates.begin(), candidate_gates.end()); + std::sort(sorted_candidates.begin(), sorted_candidates.end(), [&](size_t a, size_t b) { + return accepted_gates[a].regs.size() > accepted_gates[b].regs.size(); + }); + bool found_match = false; - for (auto idx : candidate_gates) { - auto &gate = accepted_gates[idx]; + int checked = 0; + for (auto idx : sorted_candidates) { + if (checked >= MAX_REUSE_CHECKS) + break; - // Must match enable/disable polarity + auto &gate = accepted_gates[idx]; if (gate.is_enable != is_enable) continue; - // Check if existing gate's condition works for this register - // This allows: gate condition {x,y} can work for register with {x,y,z} - // (existing is subset) or register with {x} (existing is superset) + checked++; if (canReuseGate(gate.conds, reg, is_enable)) { gate.regs.push_back(reg); - log_debug(" Reusing existing gate %zu for %s (flexible match)\n", - idx, log_id(reg)); found_match = true; break; } } if (!found_match) { - // Create new accepted gate size_t new_idx = accepted_gates.size(); accepted_gates.push_back({gating_conds, cond_set, {reg}, is_enable}); - - // Update inverted index for (auto bit : gating_conds) net_to_accepted[bit].push_back(new_idx); - - log(" Found new gating condition for %s (%s)\n", - log_id(reg), is_enable ? "enable" : "disable"); } } - // Insert clock gates for groups that meet minimum register threshold - int gates_inserted = 0; + // Insert clock gates for groups meeting threshold for (auto &gate : accepted_gates) { if ((int)gate.regs.size() >= min_regs) { insertClockGate(gate.regs, gate.conds, gate.is_enable); - gates_inserted++; accepted_count += gate.regs.size(); - } else { - log_debug(" Skipping gating condition (only %zu registers, need %d)\n", - gate.regs.size(), min_regs); } } - - log(" Inserted %d clock gates\n", gates_inserted); - log(" Statistics: accepted=%d, rejected_sat=%d\n", - accepted_count, rejected_sat_count); - log(" SAT stats: literals=%d, expressions=%d\n", - ez->numLiterals(), ez->numExpressions()); } }; @@ -568,29 +575,14 @@ struct InferCePass : public Pass { } extra_args(args, argidx, design); - log("Configuration: max_cover=%d, min_regs=%d\n", max_cover, min_regs); - - // Clear profile file and write header - std::ofstream clear_file("ff_profile.txt", std::ios::trunc); - clear_file << "Flip-Flop Profile Report\n"; - clear_file << "========================\n"; - clear_file.close(); - int total_gates = 0; - for (auto module : design->selected_modules()) { - // Profile BEFORE clock gating - profileFlipFlops(module, "ff_profile.txt", "BEFORE infer_ce"); - InferCeWorker worker(module, max_cover, min_regs); worker.run(); total_gates += worker.accepted_count; - - // Profile AFTER clock gating - profileFlipFlops(module, "ff_profile.txt", "AFTER infer_ce"); } - log("Total clock gates inserted: %d\n", total_gates); + log("Inserted clock enables for %d registers.\n", total_gates); } } InferCePass; From 90dbb91cae605b690ff6f360499d7dfec0dd51c9 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 16:22:05 -0800 Subject: [PATCH 43/55] Changed min cone size --- passes/silimate/infer_ce.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/passes/silimate/infer_ce.cc b/passes/silimate/infer_ce.cc index a4985b026..5b589ca09 100644 --- a/passes/silimate/infer_ce.cc +++ b/passes/silimate/infer_ce.cc @@ -346,7 +346,7 @@ struct InferCeWorker int cone_size = (int)cone.size(); // Skip registers with trivial cones (not worth gating) or huge cones (too expensive) - const int MIN_CONE_SIZE = 5; + const int MIN_CONE_SIZE = 2; const int MAX_CONE_SIZE = 500; if (cone_size < MIN_CONE_SIZE || cone_size > MAX_CONE_SIZE) return {{}, false, cone_size}; From 6cb9faddeddb1ef7c269f1fe805ccf9fdab488ff Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Tue, 17 Feb 2026 16:22:59 -0800 Subject: [PATCH 44/55] Removed downstream signals causing equiv_opt failures due to feedback loop --- passes/silimate/infer_ce.cc | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/passes/silimate/infer_ce.cc b/passes/silimate/infer_ce.cc index 5b589ca09..72b4eddc9 100644 --- a/passes/silimate/infer_ce.cc +++ b/passes/silimate/infer_ce.cc @@ -82,40 +82,6 @@ struct InferCeWorker } } - // Get downstream signals from a register (BFS forward through combinational logic) - pool getDownstreamSignals(Cell *reg, int limit) - { - pool visited; - std::queue worklist; - - FfData ff(nullptr, reg); - for (auto bit : sigmap(ff.sig_q)) - if (bit.wire) { - worklist.push(bit); - visited.insert(bit); - } - - while (!worklist.empty() && (int)visited.size() < limit) { - SigBit bit = worklist.front(); - worklist.pop(); - - for (auto sink_cell : sig_to_sinks[bit]) { - if (sink_cell->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), - ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), - ID($dffsre), ID($_DFF_P_), ID($_DFF_N_))) - continue; - - for (auto &conn : sink_cell->connections()) - if (sink_cell->output(conn.first)) - for (auto out_bit : sigmap(conn.second)) - if (out_bit.wire && !visited.count(out_bit)) { - visited.insert(out_bit); - worklist.push(out_bit); - } - } - } - return visited; - } // Get upstream signals feeding into given signals (BFS backward) pool getUpstreamSignals(const pool &start_signals, int limit) From ee896b9eee1af0770ca85aecd2c7232151373015 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 18 Feb 2026 09:08:25 -0800 Subject: [PATCH 45/55] Removed sorting of similar candidate_gates for unnessessary optimization --- passes/silimate/infer_ce.cc | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/passes/silimate/infer_ce.cc b/passes/silimate/infer_ce.cc index 72b4eddc9..a411b49dd 100644 --- a/passes/silimate/infer_ce.cc +++ b/passes/silimate/infer_ce.cc @@ -450,17 +450,12 @@ struct InferCeWorker for (auto idx : net_to_accepted[bit]) candidate_gates.insert(idx); - // HEURISTIC: Only check top 3 gates (by size) for reuse - const int MAX_REUSE_CHECKS = 3; - - std::vector sorted_candidates(candidate_gates.begin(), candidate_gates.end()); - std::sort(sorted_candidates.begin(), sorted_candidates.end(), [&](size_t a, size_t b) { - return accepted_gates[a].regs.size() > accepted_gates[b].regs.size(); - }); + // HEURISTIC: Only check limited gates for reuse + const int MAX_REUSE_CHECKS = 20; bool found_match = false; int checked = 0; - for (auto idx : sorted_candidates) { + for (auto idx : candidate_gates) { if (checked >= MAX_REUSE_CHECKS) break; From d84e56ecac7becde5c4d4e6a15204d94c60f8149 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 18 Feb 2026 16:03:34 -0800 Subject: [PATCH 46/55] Added naming for the new icg cells --- passes/techmap/clockgate.cc | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/passes/techmap/clockgate.cc b/passes/techmap/clockgate.cc index b112d7811..f9dea91bd 100644 --- a/passes/techmap/clockgate.cc +++ b/passes/techmap/clockgate.cc @@ -222,6 +222,8 @@ struct ClockgatePass : public Pass { log(" Intended for DFT scan-enable pins.\n"); log(" -min_net_size \n"); log(" Only transform sets of at least eligible FFs.\n"); + log(" -max_src \n"); + log(" Maximum number of src attributes to copy to ICG cells (default: unlimited).\n"); log(" \n"); } @@ -252,6 +254,12 @@ struct ClockgatePass : public Pass { int net_size; // After ICG generation, we have new gated CLK signals Wire* new_net; + // The ICG cell created for this clock net + Cell* icg_cell = nullptr; + // The CE inverter cell (if pol_ce is negative) + Cell* ce_not_cell = nullptr; + // Count of src attributes added + int src_count = 0; }; ClkNetInfo clk_info_from_ff(FfData& ff) { @@ -270,6 +278,7 @@ struct ClockgatePass : public Pass { std::vector liberty_files; std::vector dont_use_cells; int min_net_size = 0; + int max_src = -1; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { @@ -301,6 +310,10 @@ struct ClockgatePass : public Pass { min_net_size = atoi(args[++argidx].c_str()); continue; } + if (args[argidx] == "-max_src" && argidx+1 < args.size()) { + max_src = atoi(args[++argidx].c_str()); + continue; + } break; } @@ -381,14 +394,19 @@ struct ClockgatePass : public Pass { icg->setPort(matching_icg_desc->ce_pin, clk.ce_bit); icg->setPort(matching_icg_desc->clk_in_pin, clk.clk_bit); gclk.new_net = module->addWire(NEW_ID2_SUFFIX("gclk")); + gclk.icg_cell = icg; icg->setPort(matching_icg_desc->clk_out_pin, gclk.new_net); // Tie low DFT ports like scan chain enable for (auto port : matching_icg_desc->tie_lo_pins) icg->setPort(port, Const(0, 1)); // Fix CE polarity if needed if (!clk.pol_ce) { - SigBit ce_fixed_pol = module->NotGate(NEW_ID2_SUFFIX("ce_not"), clk.ce_bit); - icg->setPort(matching_icg_desc->ce_pin, ce_fixed_pol); + Wire *ce_not_wire = module->addWire(NEW_ID2_SUFFIX("ce_not_w")); + Cell *ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($_NOT_)); + ce_not->setPort(ID::A, clk.ce_bit); + ce_not->setPort(ID::Y, ce_not_wire); + gclk.ce_not_cell = ce_not; + icg->setPort(matching_icg_desc->ce_pin, ce_not_wire); } } @@ -404,6 +422,14 @@ struct ClockgatePass : public Pass { if (!it->second.new_net) continue; + // Accumulate src attributes from all FFs sharing this ICG + if (max_src < 0 || it->second.src_count < max_src) { + it->second.icg_cell->add_strpool_attribute(ID::src, cell->get_strpool_attribute(ID::src)); + if (it->second.ce_not_cell) + it->second.ce_not_cell->add_strpool_attribute(ID::src, cell->get_strpool_attribute(ID::src)); + it->second.src_count++; + } + log("Tryuing to fix up FF %s\n", cell->name); log_debug("Fix up FF %s\n", cell->name); @@ -428,3 +454,4 @@ struct ClockgatePass : public Pass { PRIVATE_NAMESPACE_END + From 5e58bf22e095b0fccdbb231217a0e8e7552732c5 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Thu, 19 Feb 2026 09:42:59 -0800 Subject: [PATCH 47/55] Changed param naming for consistancy --- passes/silimate/infer_ce.cc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/passes/silimate/infer_ce.cc b/passes/silimate/infer_ce.cc index a411b49dd..1f764d1d3 100644 --- a/passes/silimate/infer_ce.cc +++ b/passes/silimate/infer_ce.cc @@ -28,7 +28,7 @@ PRIVATE_NAMESPACE_BEGIN // Configuration static const int DEFAULT_MAX_COVER = 100; // Max candidate signals to consider -static const int DEFAULT_MIN_REGS = 10; // Min registers per clock gate +static const int DEFAULT_MIN_NET_SIZE = 10; // Min registers per clock gate struct InferCeWorker { @@ -37,7 +37,7 @@ struct InferCeWorker // Configuration int max_cover; - int min_regs; + int min_net_size; // Maps output signal bits to their driver cells dict sig_to_driver; @@ -53,9 +53,9 @@ struct InferCeWorker int rejected_sat_count = 0; int sat_solves = 0; - InferCeWorker(Module *module, int max_cover, int min_regs) + InferCeWorker(Module *module, int max_cover, int min_net_size) : module(module), sigmap(module), - max_cover(max_cover), min_regs(min_regs) + max_cover(max_cover), min_net_size(min_net_size) { // Build driver and sink maps for (auto cell : module->cells()) { @@ -481,7 +481,7 @@ struct InferCeWorker // Insert clock gates for groups meeting threshold for (auto &gate : accepted_gates) { - if ((int)gate.regs.size() >= min_regs) { + if ((int)gate.regs.size() >= min_net_size) { insertClockGate(gate.regs, gate.conds, gate.is_enable); accepted_count += gate.regs.size(); } @@ -509,9 +509,9 @@ struct InferCePass : public Pass { log(" maximum number of candidate signals to consider per register\n"); log(" (default: %d)\n", DEFAULT_MAX_COVER); log("\n"); - log(" -min_regs \n"); + log(" -min_net_size \n"); log(" minimum number of registers that must share a gating condition\n"); - log(" for a clock gate to be inserted (default: %d)\n", DEFAULT_MIN_REGS); + log(" for a clock gate to be inserted (default: %d)\n", DEFAULT_MIN_NET_SIZE); log("\n"); } @@ -520,7 +520,7 @@ struct InferCePass : public Pass { log_header(design, "Executing INFER_CE pass.\n"); int max_cover = DEFAULT_MAX_COVER; - int min_regs = DEFAULT_MIN_REGS; + int min_net_size = DEFAULT_MIN_NET_SIZE; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { @@ -528,8 +528,8 @@ struct InferCePass : public Pass { max_cover = std::stoi(args[++argidx]); continue; } - if (args[argidx] == "-min_regs" && argidx+1 < args.size()) { - min_regs = std::stoi(args[++argidx]); + if (args[argidx] == "-min_net_size" && argidx+1 < args.size()) { + min_net_size = std::stoi(args[++argidx]); continue; } break; @@ -538,7 +538,7 @@ struct InferCePass : public Pass { int total_gates = 0; for (auto module : design->selected_modules()) { - InferCeWorker worker(module, max_cover, min_regs); + InferCeWorker worker(module, max_cover, min_net_size); worker.run(); total_gates += worker.accepted_count; } From ec537b189f8ac022869dbd0c5a46d0e500021c49 Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Fri, 20 Feb 2026 11:34:08 -0800 Subject: [PATCH 48/55] Added is_clock_gated attr to flops created via clockgate.cc pass --- kernel/constids.inc | 1 + passes/techmap/clockgate.cc | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/constids.inc b/kernel/constids.inc index 59f7246e9..02dc64a12 100644 --- a/kernel/constids.inc +++ b/kernel/constids.inc @@ -909,6 +909,7 @@ X(interface_type) X(interfaces_replaced_in_module) X(invertible_pin) X(iopad_external_pin) +X(is_clock_gated) X(is_inferred) X(is_interface) X(it) diff --git a/passes/techmap/clockgate.cc b/passes/techmap/clockgate.cc index 7c90ba630..04d17cf09 100644 --- a/passes/techmap/clockgate.cc +++ b/passes/techmap/clockgate.cc @@ -448,7 +448,8 @@ struct ClockgatePass : public Pass { ff.sig_clk = (*it).second.new_net; // Rebuild the flop - (void)ff.emit(); + Cell *new_ff = ff.emit(); + new_ff->set_bool_attribute(ID::is_clock_gated); gated_flop_count++; } From ce95d7cbcf50dec1860963b75ff4da058a69ae7b Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Fri, 27 Feb 2026 12:22:29 -0800 Subject: [PATCH 49/55] Removed notes.txt --- notes.txt | 373 ------------------------------------------------------ 1 file changed, 373 deletions(-) delete mode 100644 notes.txt diff --git a/notes.txt b/notes.txt deleted file mode 100644 index 1c2755f2b..000000000 --- a/notes.txt +++ /dev/null @@ -1,373 +0,0 @@ -Clock gating - - - -need to determine when the D == Q - -Make sure that the flip flop has a clock but not ce: !ff.has_ce && ff.has_clk -check if - -USE sat to determine when the Q is the same as D - -Q is the feedback, and then there's also the D which does in and select for the mux -becomes the enable - - -D = f(Q, other_inputs) - -!(D ^ f(Q, other_inputs)) - - -Look somewhat like this: - Q is always going to be D - But D itself has an enable built into it - - - So what D really is is: - Mux (D_r, Q, en) where en is the enable signal - - Our goal is to find en - - Equation: - Q_next = (en ∧ D_r) ∨ (¬en ∧ Q) - - Equality question: - !((Q_next) ^ ((en ∧ D_r) ∨ (¬en ∧ Q))) - - Universal Quantization - FA(Q_next, D_r, Q) !((Q_next) ^ ((en ∧ D_r) ∨ (¬en ∧ Q))) - - SAT Equation - FA(Q_next, D_r, Q) !((Q_next) ^ ((en ∧ D_r) ∨ (¬en ∧ Q))) - -Now the issue is to determine what the en is and what the D_r in. In order -to continue using this approach, a way of differentiating that would be needed. - -Need a simpler approach which just considers all of the inputs and then performs -SAT. Here's the approach: - Consider the flip flops which go into D. Consider all of those inputs and seen -if D != Q is UNSAT. Meaning for that set of inputs into D, D is going to be Q. -Then try to minimize this set (optimization phase). - - -1) Find the input's into D and determine if there's any level at which you can -determine if (Exists(x1, x2, x3, ..., xn) | D != Q) == UNSAT (menaing for that -combination of inputs of x1, x2, x3, ..., xn, D is always = Q. - -Algorithm version one: -This version doesn't take into accound the threshold (doesn't try and insert) -the same CE into multiple different clocks, it also doesn't do any pre SAT simulation -optimization. Fruthermore, it also doesn't try and find the minimal set, just a set. -Lastly, this also does a form of safe clock gating which means that: - input_set == 0 -> D == Q -Which means there's cases when input_set may be 1 but D != Q - -// determines if the input set serves as an enable -input_set_is_en(input_set, D, Q): - return (Exists(x1, x2, x3, ..., xn) | D != Q) == UNSAT - -// determines the input set -determine_en_rec (input_set&, D, Q): - if (count(input_set) > N): - return false: - if input_set_is_en(input_set, D, Q): - // for now this returns. Later, when optimizing, this will try and find a smaller subset within the set - return true - else: - // Detemine set of inputs - input_set_new = Do a BFS on the Data pin in the clock and add those pins to set - determine_en_rec(input_set_new, D, Q) - -// create the CE based on the input set -// adds the CE into the clock -create_ce_logic(input_set, D, Q, ffData): - // CE = OR(all signals in input_set) - // When any input is 1 → CE=1 (update register) - // When all inputs are 0 → CE=0 (hold, since SAT proved D==Q) - - if input_set.size() == 1: - ce_signal = input_set[0] - else: - ce_wire = module.addWire(NEW_ID) - module.addReduceOr(NEW_ID, input_set, ce_wire) - ce_signal = ce_wire - - ffData.has_ce = true - ffData.sig_ce = ce_signal - ffData.pol_ce = true // active high - ffData.emit() // rebuild the FF with CE - -set_ff_ces(design): - for module in design: - for cell in module: - if cell is_builtin_ff: - ffFata ff = cell; - if (!ff.has_ce && ff.has_clk && ff.has_d && ff.has_Q): - input_set = {} - if (determine_en_rec(input_set, D, Q)): - create_ce_logic(input_set, D, Q, ffData) - - // insert the ICG gates based on the new CEs inserted - pass::call("clockgate", design); - - - -(input_set=0) AND (D≠Q) == UNSAT - -> if one of them is 1 and D = Q -Scenario What happens OK? -CE=0, D==Q Gate clock, hold value Correct (power saved) -CE=0, D≠Q Gate clock, lose data BUG -CE=1, D==Q Clock passes, write same value Correct (wasted power) -CE=1, D≠Q Clock passes, update register Correct - - - - - -(input_set=0) AND (D≠Q) == UNSAT -> -Existential Quantization of input_set such that -To ensure that if (CE=0) then D==Q: - ((combination of inputs) AND (D≠Q)) = UNSAT - - This is functionally accurate but there might be cases when CE=1 but D==Q which - is a waste of power -To ensure that if (CE=1) then D!=Q: - ((combination of inputs) AND (D==Q)) = UNSAT - - This alone is risky since there might be combinations such that CE=0 but D!=Q - which is incorrect behaviour - -Need to ensure CE=1 <-> D!=Q: - BOTH conditions must hold: - 1) ((CE=0) AND (D≠Q)) = UNSAT // CE=0 → D==Q (safe to gate) - 2) ((CE=1) AND (D==Q)) = UNSAT // CE=1 → D≠Q (no wasted power) - - Combined: CE must be the exact boolean function where CE = (D ≠ Q) - For MUX pattern D = sel ? new_val : Q, CE = sel satisfies both. - - -In order words, we need -Exist a combination of inputs such that UNSAT((combination of inputs) ^ (D==Q)) -Which means -((!COI) && (D!=Q)) && ((COI) && (D==Q)) - -Exit a set of inputs such that COI <-> D != Q -(COI && (D != Q)) && (!COI && (D == Q)) - - - -The final equation for the UNSAT is: - ((D != Q) != (COI)) -> UNSAT => COI = (D != Q) - Exists COI such that ((D ^ Q) ^ (COI)) -> UNSAT - - -So in the new pseudo code algorithm, once the pool is populated (the input_set), -we create this miter with the exestential quantization with the input_set. - -The issue is that since the input set isn't a boolean function (is a BFS traversal), -we need to manually create a boolean function out of the input set. - -Another issue is that we must ensure that these somehow impact the D and/or the Q. This - - - - - - - - - - - - - - - - - -Future TODOs: -1) Recursively minimize the set which is actually needed -2) Add threshold (how many flops depend on the same enable signal) -3) Add different setting for type of mitter (maybe add just the !COI -> D == Q) - for a weaker clock gate (still consumes power if COI and D == Q but might be faster) -4) Deal with posede vs negedge of the clocks -5) Experiment with different logical combinations of the COI set (rather than just - or-ing them all together) -6) Consider pruning ezSAT expressions list — accumulates across queries, may cause memory growth - - -=== FIXED input_set_is_enable IMPLEMENTATIONS === - -// VERSION 1: Safe clock gating (current approach, cleaned up) -// Checks: (input_set=0) AND (D≠Q) == UNSAT -// Meaning: when all inputs are 0, D is guaranteed to equal Q -bool input_set_is_enable_safe(const pool &input_set, SigSpec sig_d, SigSpec sig_q) -{ - if (input_set.empty()) - return false; - - ezSatPtr ez; - SatGen satgen(ez.get(), &sigmap); - - // Import circuit behavior - for (auto cell : module->cells()) - satgen.importCell(cell); - - // Import D and Q - std::vector d_vec = satgen.importSigSpec(sig_d); - std::vector q_vec = satgen.importSigSpec(sig_q); - - // Constraint 1: All input_set bits = 0 - for (auto bit : input_set) { - int bit_var = satgen.importSigSpec(SigSpec(bit))[0]; - ez->assume(ez->NOT(bit_var)); - } - - // Constraint 2: D != Q - ez->assume(ez->vec_ne(d_vec, q_vec)); - - // If UNSAT: no way for D≠Q when inputs=0 → valid enable - return !ez->solve(); -} - -// VERSION 2: Exact clock gating (stronger, no wasted power) -// Checks: (D≠Q) XOR (OR(input_set)) == UNSAT -// Meaning: COI is exactly equivalent to D≠Q -bool input_set_is_enable_exact(const pool &input_set, SigSpec sig_d, SigSpec sig_q) -{ - if (input_set.empty()) - return false; - - ezSatPtr ez; - SatGen satgen(ez.get(), &sigmap); - - // Import circuit behavior - for (auto cell : module->cells()) - satgen.importCell(cell); - - // Import D and Q - std::vector d_vec = satgen.importSigSpec(sig_d); - std::vector q_vec = satgen.importSigSpec(sig_q); - - // Build COI = OR(input_set) - std::vector input_vars; - for (auto bit : input_set) - input_vars.push_back(satgen.importSigSpec(SigSpec(bit))[0]); - int coi = ez->expression(ezSAT::OpOr, input_vars); - - // Build D != Q (single bit: is any bit different?) - int d_ne_q = ez->vec_ne(d_vec, q_vec); - - // Constraint: COI XOR (D≠Q) — want this UNSAT (meaning COI ↔ D≠Q) - ez->assume(ez->XOR(coi, d_ne_q)); - - // If UNSAT: COI is exactly when D≠Q → perfect enable - return !ez->solve(); -} - - - - -Setting up the SAT condition: - - Need to have the equation for Q, need to have the equation for D - need to XOR those equations, need to XOR that equation with the new - one. Need to make sure that that's never SAT. - - - - - - - - - -Ok so I have this idea: - - - - -I'm doing to take the input variables, universally quantize them and also take D and Q and universilly quantise them - -Then I have this formula SAT((D^Q) !^ MUX)) - -Where the MUX has (for the select inputs the input values, and then has random variables d_0 to D_2^n when there's n inputs. - -SAT returns the combination of the values for d which make this work. - -But in this case, how can I go from determining the values of the Ds to determine the gates and converting that to combinational logic? And also, how can I Universially Quantize the other values? - - - -This is difficult due to QBF (Quantified Boolean Format) engines being very expensive and slow. -Rather than this, potentially trying CEGAR (not sure if this is practical). Idea is this: - 1. Start with a CANDIDATE solution (guess/abstraction) - 2. CHECK: Does candidate work for ALL inputs? (via SAT) (UNSAT for XOR means they are the same) - - If YES → Done, return candidate ✓ - - If NO → SAT gives a COUNTEREXAMPLE (inputs where it fails) - 3. REFINE: Use counterexample to improve candidate - 4. GOTO 2 - - - - The approach by this paper: https://dl.acm.org/doi/epdf/10.1145/1391469.1391637 - -Try each signal individually → collect ones that work → OR the winners -Your approach: "OR(all inputs) == enable?" -Paper's approach: "Which individual signals could BE the enable?" -Paper's Algorithm in Pseudocode (Yosys-feasible): -SigBit find_clock_enable(SigSpec sig_d, SigSpec sig_q) { pool candidates = get_cone_signals(sig_d); // All signals in fanin pool valid_enables; // STEP 1: Prune with simulation (fast) for (auto sig : candidates) { if (!simulation_suggests_valid(sig, sig_d, sig_q)) candidates.erase(sig); // Quick reject } // STEP 2: Prove with SAT (slow but conclusive) for (auto sig : candidates) { // Check: sig=1 → D==Q (safe to gate) // SAT query: (sig ∧ (D ⊕ Q)) — want UNSAT if (sat_proves_valid(sig, sig_d, sig_q)) valid_enables.insert(sig); } // STEP 3: Pick best (by estimated power savings) SigBit best = select_best_coverage(valid_enables); return best; // Or OR multiple if needed}bool sat_proves_valid(SigBit candidate, SigSpec sig_d, SigSpec sig_q) { // "Can candidate=1 while D≠Q?" — if UNSAT, candidate is valid int cand = satgen.importSigSpec(SigSpec(candidate))[0]; int d_ne_q = ez->vec_ne(d_vec, q_vec); ez->assume(cand); ez->assume(d_ne_q); return !ez->solve(); // UNSAT = valid enable} - - -=== SAFE GATING vs EXACT GATING === - -Safe Gating: - SAT check: sig ∧ (D≠Q) = UNSAT - Meaning: When sig=1, D is guaranteed to equal Q (safe to gate clock) - - sig=1 → gate clock, hold register (D==Q guaranteed) - - sig=0 → clock runs freely (D may or may not equal Q) - Allows wasted power (clock runs when D==Q but sig=0), but NEVER loses data. - -Exact Gating: - SAT check: sig ⊕ (D≠Q) = UNSAT - Meaning: sig is EXACTLY equivalent to (D≠Q) - - sig=1 ↔ D≠Q (perfect bidirectional match) - No wasted power, but much harder to find matching signals. - -Comparison: - | Type | SAT Check | Finds more? | Power optimal? | - |--------|-----------------------|-------------|----------------| - | Safe | sig ∧ (D≠Q) = UNSAT | Yes | No (some waste)| - | Exact | sig ⊕ (D≠Q) = UNSAT | No | Yes (perfect) | - -Recommendation: - • Use SAFE GATING — faster (weaker SAT query), finds more candidates - • Safe gating is industry standard (used in paper, commercial tools) - • Exact gating rarely finds matches unless design has explicit MUX-with-Q pattern - • Power difference is minor — safe gating still saves most power - • Safe gating has better QoR: more FFs get clock-gated - - There's also clock as_enable and as_disable - as_enable = true (clock enable): - Signal high → clock runs. Signal low → clock blocked. Check: (!enable ∧ D≠Q) must be UNSAT. - as_enable = false (clock disable): - Signal high → clock blocked. Signal low → clock runs. Check: (disable ∧ D≠Q) must be UNSAT. - - - - - -TODOs: -1) Convert from the string hash to an integer hash -3) See why this path is needed: - if (ff.has_ce) { - // Already has CE, AND with new condition - Wire *combined_ce = module->addWire(NEW_ID); - module->addAnd(NEW_ID, ff.sig_ce, gating_signal, combined_ce); - ff.sig_ce = combined_ce; - } else { -4) Print the netlist before and after (checkout ways to determine # of flipflips) -5) Power analysis -6) Remove redundant vectors (visited and result) from getDownstreamSignals - and getUpstreamSignals -7) Check recursion -8) Check isValidGatingSet and findGatingCondition - -Add a new feature to not do simulation or SAT based on the false paths \ No newline at end of file From 90aa1cc01618a4fed68b95122a1f6771d63abb9b Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Fri, 27 Feb 2026 12:24:31 -0800 Subject: [PATCH 50/55] Checked out main passes/techmap/clockgate.cc for source attributes and removed logging --- passes/techmap/clockgate.cc | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/passes/techmap/clockgate.cc b/passes/techmap/clockgate.cc index 04d17cf09..6a5f95d5b 100644 --- a/passes/techmap/clockgate.cc +++ b/passes/techmap/clockgate.cc @@ -341,8 +341,6 @@ struct ClockgatePass : public Pass { pool ce_ffs; dict clk_nets; - - log("Found %zu CE FFs\n", ce_ffs.size()); int gated_flop_count = 0; for (auto module : design->selected_unboxed_whole_modules()) { for (auto cell : module->cells()) { @@ -352,14 +350,11 @@ struct ClockgatePass : public Pass { FfData ff(nullptr, cell); // It would be odd to get constants, but we better handle it if (ff.has_ce) { - log("FF %s has CE\n", cell->name); if (!ff.sig_clk.is_bit() || !ff.sig_ce.is_bit()) continue; if (!ff.sig_clk[0].is_wire() || !ff.sig_ce[0].is_wire()) continue; - log("FF %s has valid CE and CLK\n", cell->name); - ce_ffs.insert(cell); ClkNetInfo info = clk_info_from_ff(ff); @@ -370,8 +365,6 @@ struct ClockgatePass : public Pass { } } - log("Found %zu clk_nets\n", clk_nets.size()); - for (auto& clk_net : clk_nets) { auto& clk = clk_net.first; auto& gclk = clk_net.second; @@ -410,15 +403,12 @@ struct ClockgatePass : public Pass { } } - log("Found %zu clk_nets\n", clk_nets.size()); - for (auto cell : ce_ffs) { FfData ff(nullptr, cell); ClkNetInfo info = clk_info_from_ff(ff); auto it = clk_nets.find(info); log_assert(it != clk_nets.end() && "Bug: desync ce_ffs and clk_nets"); - log("Found new_net for %s\n", cell->name); if (!it->second.new_net) continue; @@ -430,16 +420,6 @@ struct ClockgatePass : public Pass { it->second.src_count++; } - // Accumulate src attributes from all FFs sharing this ICG - if (max_src < 0 || it->second.src_count < max_src) { - it->second.icg_cell->add_strpool_attribute(ID::src, cell->get_strpool_attribute(ID::src)); - if (it->second.ce_not_cell) - it->second.ce_not_cell->add_strpool_attribute(ID::src, cell->get_strpool_attribute(ID::src)); - it->second.src_count++; - } - - log("Tryuing to fix up FF %s\n", cell->name); - log_debug("Fix up FF %s\n", cell->name); // Now we start messing with the design ff.has_ce = false; From 8974f3473fb15e583305ea3c1fb93546da5b7a78 Mon Sep 17 00:00:00 2001 From: Advay Singh <144560982+AdvaySingh1@users.noreply.github.com> Date: Fri, 27 Feb 2026 12:37:49 -0800 Subject: [PATCH 51/55] Update passes/silimate/infer_ce.cc Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- passes/silimate/infer_ce.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/passes/silimate/infer_ce.cc b/passes/silimate/infer_ce.cc index 1f764d1d3..04034cf99 100644 --- a/passes/silimate/infer_ce.cc +++ b/passes/silimate/infer_ce.cc @@ -103,9 +103,10 @@ struct InferCeWorker Cell *driver = sig_to_driver[bit]; - if (driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), - ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), - ID($dffsre), ID($_DFF_P_), ID($_DFF_N_))) + if (driver->type.in(ID($ff), ID($dff), ID($dffe), ID($adff), ID($adffe), + ID($sdff), ID($sdffe), ID($sdffce), ID($dffsr), + ID($dffsre), ID($_DFF_P_), ID($_DFF_N_), ID($_DFFE_PP_), + ID($_DFFE_PN_), ID($_DFFE_NP_), ID($_DFFE_NN_))) continue; for (auto &conn : driver->connections()) From 877e97de061a66d0d4c5a4b1f220f5b6999e214a Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Fri, 27 Feb 2026 15:23:50 -0800 Subject: [PATCH 52/55] Changed to for chacterization --- passes/techmap/clockgate.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/passes/techmap/clockgate.cc b/passes/techmap/clockgate.cc index 6a5f95d5b..ca7d01ec3 100644 --- a/passes/techmap/clockgate.cc +++ b/passes/techmap/clockgate.cc @@ -395,9 +395,12 @@ struct ClockgatePass : public Pass { // Fix CE polarity if needed if (!clk.pol_ce) { Wire *ce_not_wire = module->addWire(NEW_ID2_SUFFIX("ce_not_w")); - Cell *ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($_NOT_)); - ce_not->setPort(ID::A, clk.ce_bit); - ce_not->setPort(ID::Y, ce_not_wire); + Cell *ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($not)); + ce_not->setParam(ID::A_SIGNED, 0); + ce_not->setParam(ID::A_WIDTH, 1); + ce_not->setParam(ID::Y_WIDTH, 1); + ce_not->setPort(ID::A, clk.ce_bit); + ce_not->setPort(ID::Y, ce_not_wire); gclk.ce_not_cell = ce_not; icg->setPort(matching_icg_desc->ce_pin, ce_not_wire); } From 26adc17fd7e7ff94c6c8558e152b506644618f3c Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 4 Mar 2026 10:43:53 -0800 Subject: [PATCH 53/55] Revert "Changed to for chacterization" Removing changing _DFF_ to dff for chacterization --- passes/techmap/clockgate.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/passes/techmap/clockgate.cc b/passes/techmap/clockgate.cc index ca7d01ec3..6a5f95d5b 100644 --- a/passes/techmap/clockgate.cc +++ b/passes/techmap/clockgate.cc @@ -395,12 +395,9 @@ struct ClockgatePass : public Pass { // Fix CE polarity if needed if (!clk.pol_ce) { Wire *ce_not_wire = module->addWire(NEW_ID2_SUFFIX("ce_not_w")); - Cell *ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($not)); - ce_not->setParam(ID::A_SIGNED, 0); - ce_not->setParam(ID::A_WIDTH, 1); - ce_not->setParam(ID::Y_WIDTH, 1); - ce_not->setPort(ID::A, clk.ce_bit); - ce_not->setPort(ID::Y, ce_not_wire); + Cell *ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($_NOT_)); + ce_not->setPort(ID::A, clk.ce_bit); + ce_not->setPort(ID::Y, ce_not_wire); gclk.ce_not_cell = ce_not; icg->setPort(matching_icg_desc->ce_pin, ce_not_wire); } From 2836cc8f2540166d744d9124630f77114085139a Mon Sep 17 00:00:00 2001 From: AdvaySingh1 Date: Wed, 4 Mar 2026 12:03:38 -0800 Subject: [PATCH 54/55] Added test cases for the infer_ce pass --- tests/silimate/infer_ce.ys | 217 +++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 tests/silimate/infer_ce.ys diff --git a/tests/silimate/infer_ce.ys b/tests/silimate/infer_ce.ys new file mode 100644 index 000000000..68b6bf628 --- /dev/null +++ b/tests/silimate/infer_ce.ys @@ -0,0 +1,217 @@ +# ============================================================================= +# Test 1: Basic enable inference with non-trivial cone +# infer_ce needs cone_size >= 2, so we add combinational logic before the mux. +# We use proc; opt_expr; opt_clean (NOT full opt) to avoid opt_dff stealing +# the mux-feedback pattern before infer_ce gets a chance. +# ============================================================================= +log -header "Basic enable inference" +log -push +design -reset +read_verilog < Date: Wed, 4 Mar 2026 14:51:47 -0800 Subject: [PATCH 55/55] Added -word arg --- passes/techmap/clockgate.cc | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/passes/techmap/clockgate.cc b/passes/techmap/clockgate.cc index 6a5f95d5b..50d557c08 100644 --- a/passes/techmap/clockgate.cc +++ b/passes/techmap/clockgate.cc @@ -224,6 +224,8 @@ struct ClockgatePass : public Pass { log(" Only transform sets of at least eligible FFs.\n"); log(" -max_src \n"); log(" Maximum number of src attributes to copy to ICG cells (default: unlimited).\n"); + log(" -word\n"); + log(" Use word-level $not cell for CE inversion instead of gate-level $_NOT_.\n"); log(" \n"); } @@ -279,6 +281,7 @@ struct ClockgatePass : public Pass { std::vector dont_use_cells; int min_net_size = 0; int max_src = -1; + bool word_level = false; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { @@ -314,6 +317,10 @@ struct ClockgatePass : public Pass { max_src = atoi(args[++argidx].c_str()); continue; } + if (args[argidx] == "-word") { + word_level = true; + continue; + } break; } @@ -395,9 +402,19 @@ struct ClockgatePass : public Pass { // Fix CE polarity if needed if (!clk.pol_ce) { Wire *ce_not_wire = module->addWire(NEW_ID2_SUFFIX("ce_not_w")); - Cell *ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($_NOT_)); - ce_not->setPort(ID::A, clk.ce_bit); - ce_not->setPort(ID::Y, ce_not_wire); + Cell *ce_not; + if (word_level) { + ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($not)); + ce_not->setParam(ID::A_SIGNED, 0); + ce_not->setParam(ID::A_WIDTH, 1); + ce_not->setParam(ID::Y_WIDTH, 1); + ce_not->setPort(ID::A, clk.ce_bit); + ce_not->setPort(ID::Y, ce_not_wire); + } else { + ce_not = module->addCell(NEW_ID2_SUFFIX("ce_not"), ID($_NOT_)); + ce_not->setPort(ID::A, clk.ce_bit); + ce_not->setPort(ID::Y, ce_not_wire); + } gclk.ce_not_cell = ce_not; icg->setPort(matching_icg_desc->ce_pin, ce_not_wire); }