diff --git a/.github/actions/setup-build-env/action.yml b/.github/actions/setup-build-env/action.yml index c9dc5fc22..c1d3e2b01 100644 --- a/.github/actions/setup-build-env/action.yml +++ b/.github/actions/setup-build-env/action.yml @@ -42,7 +42,7 @@ runs: if: runner.os == 'Linux' && inputs.get-build-deps == 'true' uses: awalsh128/cache-apt-pkgs-action@v1.6.0 with: - packages: bison clang flex libffi-dev libfl-dev libreadline-dev pkg-config tcl-dev zlib1g-dev libgtest-dev + packages: bison clang flex libffi-dev libfl-dev libreadline-dev pkg-config tcl-dev zlib1g-dev libgtest-dev libgmock-dev version: ${{ inputs.runs-on }}-buildys - name: Linux docs dependencies diff --git a/frontends/rtlil/rtlil_frontend.cc b/frontends/rtlil/rtlil_frontend.cc index a1412d983..7e2ec5460 100644 --- a/frontends/rtlil/rtlil_frontend.cc +++ b/frontends/rtlil/rtlil_frontend.cc @@ -286,6 +286,7 @@ struct RTLILFrontendWorker { if (width > MAX_CONST_WIDTH) error("Constant width %lld out of range before `%s`.", width, error_token()); bits.reserve(width); + int start_idx = idx; while (true) { RTLIL::State bit; switch (line[idx]) { @@ -300,8 +301,9 @@ struct RTLILFrontendWorker { bits.push_back(bit); ++idx; } - done: - std::reverse(bits.begin(), bits.end()); + done: + if (start_idx < idx) + std::reverse(bits.begin(), bits.end()); if (GetSize(bits) > width) bits.resize(width); diff --git a/kernel/ffinit.h b/kernel/ffinit.h index 920fba307..8b4758f60 100644 --- a/kernel/ffinit.h +++ b/kernel/ffinit.h @@ -22,6 +22,7 @@ #include "kernel/yosys.h" #include "kernel/sigtools.h" +#include "kernel/threading.h" YOSYS_NAMESPACE_BEGIN @@ -35,34 +36,55 @@ struct FfInitVals sigmap = sigmap_; initbits.clear(); for (auto wire : module->wires()) + if (wire->attributes.count(ID::init)) + process_wire(wire); + } + + void process_wire(RTLIL::Wire *wire) + { + SigSpec wirebits = (*sigmap)(wire); + Const initval = wire->attributes.at(ID::init); + + for (int i = 0; i < GetSize(wirebits) && i < GetSize(initval); i++) { - if (wire->attributes.count(ID::init) == 0) + SigBit bit = wirebits[i]; + State val = initval[i]; + + if (val != State::S0 && val != State::S1 && bit.wire != nullptr) continue; - SigSpec wirebits = (*sigmap)(wire); - Const initval = wire->attributes.at(ID::init); - - for (int i = 0; i < GetSize(wirebits) && i < GetSize(initval); i++) - { - SigBit bit = wirebits[i]; - State val = initval[i]; - - if (val != State::S0 && val != State::S1 && bit.wire != nullptr) - continue; - - if (initbits.count(bit)) { - if (initbits.at(bit).first != val) - log_error("Conflicting init values for signal %s (%s = %s != %s).\n", - log_signal(bit), log_signal(SigBit(wire, i)), - log_signal(val), log_signal(initbits.at(bit).first)); - continue; - } - - initbits[bit] = std::make_pair(val,SigBit(wire,i)); + if (initbits.count(bit)) { + if (initbits.at(bit).first != val) + log_error("Conflicting init values for signal %s (%s = %s != %s).\n", + log_signal(bit), log_signal(SigBit(wire, i)), + log_signal(val), log_signal(initbits.at(bit).first)); + continue; } + + initbits[bit] = std::make_pair(val,SigBit(wire,i)); } } + void set_parallel(const SigMapView *sigmap_, ParallelDispatchThreadPool &thread_pool, RTLIL::Module *module) + { + sigmap = sigmap_; + initbits.clear(); + + const RTLIL::Module *const_module = module; + ParallelDispatchThreadPool::Subpool subpool(thread_pool, ThreadPool::work_pool_size(0, module->wires_size(), 1000)); + ShardedVector init_wires(subpool); + subpool.run([const_module, &init_wires](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i : ctx.item_range(const_module->wires_size())) { + RTLIL::Wire *wire = const_module->wire_at(i); + if (wire->attributes.count(ID::init)) + init_wires.insert(ctx, wire); + } + }); + + for (RTLIL::Wire *wire : init_wires) + process_wire(wire); + } + RTLIL::State operator()(RTLIL::SigBit bit) const { auto it = initbits.find((*sigmap)(bit)); diff --git a/kernel/log.cc b/kernel/log.cc index 018a19081..b114f1eaf 100644 --- a/kernel/log.cc +++ b/kernel/log.cc @@ -324,6 +324,14 @@ void log_formatted_file_info(std::string_view filename, int lineno, std::string log("%s:%d: Info: %s", filename, lineno, str); } +void log_suppressed() { + if (log_debug_suppressed && !log_make_debug) { + constexpr const char* format = "\n"; + logv_string(format, stringf(format, log_debug_suppressed)); + log_debug_suppressed = 0; + } +} + [[noreturn]] static void log_error_with_prefix(std::string_view prefix, std::string str) { @@ -345,7 +353,9 @@ static void log_error_with_prefix(std::string_view prefix, std::string str) } log_last_error = std::move(str); - log("%s%s", prefix, log_last_error); + std::string message(prefix); + message += log_last_error; + logv_string("%s%s", message); log_flush(); log_make_debug = bak_log_make_debug; @@ -355,7 +365,7 @@ static void log_error_with_prefix(std::string_view prefix, std::string str) item.current_count++; for (auto &[_, item] : log_expect_prefix_error) - if (std::regex_search(string(prefix) + string(log_last_error), item.pattern)) + if (std::regex_search(message, item.pattern)) item.current_count++; log_check_expected(); diff --git a/kernel/log.h b/kernel/log.h index 63faf7091..d132ba1a0 100644 --- a/kernel/log.h +++ b/kernel/log.h @@ -206,12 +206,7 @@ template log_formatted_cmd_error(fmt.format(args...)); } -static inline void log_suppressed() { - if (log_debug_suppressed && !log_make_debug) { - log("\n", log_debug_suppressed); - log_debug_suppressed = 0; - } -} +void log_suppressed(); struct LogMakeDebugHdl { bool status = false; diff --git a/kernel/rtlil.cc b/kernel/rtlil.cc index 66bf3b9f7..279c9b0e6 100644 --- a/kernel/rtlil.cc +++ b/kernel/rtlil.cc @@ -22,6 +22,7 @@ #include "kernel/newcelltypes.h" #include "kernel/binding.h" #include "kernel/sigtools.h" +#include "kernel/threading.h" #include "frontends/verilog/verilog_frontend.h" #include "frontends/verilog/preproc.h" #include "backends/rtlil/rtlil_backend.h" @@ -142,9 +143,17 @@ static constexpr bool check_well_known_id_order() // and in sorted ascii order, as required by the ID macro. static_assert(check_well_known_id_order()); +constexpr int STATIC_ID_END = static_cast(RTLIL::StaticId::STATIC_ID_END); + struct IdStringCollector { + IdStringCollector(std::vector &live_ids) + : live_ids(live_ids) {} + void trace(IdString id) { - live.insert(id.index_); + if (id.index_ >= STATIC_ID_END) + live_ids[id.index_ - STATIC_ID_END].set(); + else if (id.index_ < 0) + live_autoidx_ids.push_back(id.index_); } template void trace(const T* v) { trace(*v); @@ -178,10 +187,6 @@ struct IdStringCollector { trace(element); } - void trace(const RTLIL::Design &design) { - trace_values(design.modules_); - trace(design.selection_vars); - } void trace(const RTLIL::Selection &selection_var) { trace(selection_var.selected_modules); trace(selection_var.selected_members); @@ -190,15 +195,6 @@ struct IdStringCollector { trace_keys(named.attributes); trace(named.name); } - void trace(const RTLIL::Module &module) { - trace_named(module); - trace_values(module.wires_); - trace_values(module.cells_); - trace(module.avail_parameters); - trace_keys(module.parameter_default_values); - trace_values(module.memories); - trace_values(module.processes); - } void trace(const RTLIL::Wire &wire) { trace_named(wire); if (wire.known_driver()) @@ -234,7 +230,8 @@ struct IdStringCollector { trace(action.memid); } - std::unordered_set live; + std::vector &live_ids; + std::vector live_autoidx_ids; }; int64_t RTLIL::OwningIdString::gc_ns; @@ -243,20 +240,55 @@ int RTLIL::OwningIdString::gc_count; void RTLIL::OwningIdString::collect_garbage() { int64_t start = PerformanceTimer::query(); - IdStringCollector collector; - for (auto &[idx, design] : *RTLIL::Design::get_all_designs()) { - collector.trace(*design); - } - int size = GetSize(global_id_storage_); - for (int i = static_cast(StaticId::STATIC_ID_END); i < size; ++i) { - RTLIL::IdString::Storage &storage = global_id_storage_.at(i); - if (storage.buf == nullptr) - continue; - if (collector.live.find(i) != collector.live.end()) - continue; - if (global_refcount_storage_.find(i) != global_refcount_storage_.end()) - continue; + int pool_size = 0; + for (auto &[idx, design] : *RTLIL::Design::get_all_designs()) + for (RTLIL::Module *module : design->modules()) + pool_size = std::max(pool_size, ThreadPool::work_pool_size(0, module->cells_size(), 1000)); + ParallelDispatchThreadPool thread_pool(pool_size); + + int size = GetSize(global_id_storage_); + std::vector live_ids(size - STATIC_ID_END); + std::vector collectors; + int num_threads = thread_pool.num_threads(); + collectors.reserve(num_threads); + for (int i = 0; i < num_threads; ++i) + collectors.emplace_back(live_ids); + + for (auto &[idx, design] : *RTLIL::Design::get_all_designs()) { + for (RTLIL::Module *module : design->modules()) { + collectors[0].trace_named(*module); + ParallelDispatchThreadPool::Subpool subpool(thread_pool, ThreadPool::work_pool_size(0, module->cells_size(), 1000)); + subpool.run([&collectors, module](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i : ctx.item_range(module->cells_size())) + collectors[ctx.thread_num].trace(module->cell_at(i)); + for (int i : ctx.item_range(module->wires_size())) + collectors[ctx.thread_num].trace(module->wire_at(i)); + }); + collectors[0].trace(module->avail_parameters); + collectors[0].trace_keys(module->parameter_default_values); + collectors[0].trace_values(module->memories); + collectors[0].trace_values(module->processes); + } + collectors[0].trace(design->selection_vars); + } + + ShardedVector free_ids(thread_pool); + thread_pool.run([&live_ids, size, &free_ids](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i : ctx.item_range(size - STATIC_ID_END)) { + int index = i + STATIC_ID_END; + RTLIL::IdString::Storage &storage = global_id_storage_.at(index); + if (storage.buf == nullptr) + continue; + if (live_ids[i].load()) + continue; + if (global_refcount_storage_.find(index) != global_refcount_storage_.end()) + continue; + free_ids.insert(ctx, index); + } + }); + for (int i : free_ids) { + RTLIL::IdString::Storage &storage = global_id_storage_.at(i); if (yosys_xtrace) { log("#X# Removed IdString '%s' with index %d.\n", storage.buf, i); log_backtrace("-X- ", yosys_xtrace-1); @@ -268,8 +300,13 @@ void RTLIL::OwningIdString::collect_garbage() global_free_idx_list_.push_back(i); } + std::unordered_set live_autoidx_ids; + for (IdStringCollector &collector : collectors) + for (int id : collector.live_autoidx_ids) + live_autoidx_ids.insert(id); + for (auto it = global_autoidx_id_storage_.begin(); it != global_autoidx_id_storage_.end();) { - if (collector.live.find(it->first) != collector.live.end()) { + if (live_autoidx_ids.find(it->first) != live_autoidx_ids.end()) { ++it; continue; } @@ -1324,15 +1361,21 @@ void RTLIL::Design::sort_modules() modules_.sort(sort_by_id_str()); } +void check_module(RTLIL::Module *module, ParallelDispatchThreadPool &thread_pool); + void RTLIL::Design::check() { #ifndef NDEBUG log_assert(!selection_stack.empty()); + int pool_size = 0; + for (auto &it : modules_) + pool_size = std::max(pool_size, ThreadPool::work_pool_size(0, it.second->cells_size(), 1000)); + ParallelDispatchThreadPool thread_pool(pool_size); for (auto &it : modules_) { log_assert(this == it.second->design); log_assert(it.first == it.second->name); log_assert(!it.first.empty()); - it.second->check(); + check_module(it.second, thread_pool); } #endif } @@ -1568,11 +1611,11 @@ size_t RTLIL::Module::count_id(RTLIL::IdString id) namespace { struct InternalCellChecker { - RTLIL::Module *module; + const RTLIL::Module *module; RTLIL::Cell *cell; pool expected_params, expected_ports; - InternalCellChecker(RTLIL::Module *module, RTLIL::Cell *cell) : module(module), cell(cell) { } + InternalCellChecker(const RTLIL::Module *module, RTLIL::Cell *cell) : module(module), cell(cell) { } void error(int linenr) { @@ -2548,88 +2591,96 @@ void RTLIL::Module::sort() it.second->attributes.sort(sort_by_id_str()); } -void RTLIL::Module::check() +void check_module(RTLIL::Module *module, ParallelDispatchThreadPool &thread_pool) { #ifndef NDEBUG - std::vector ports_declared; - for (auto &it : wires_) { - log_assert(this == it.second->module); - log_assert(it.first == it.second->name); - log_assert(!it.first.empty()); - log_assert(it.second->width >= 0); - log_assert(it.second->port_id >= 0); - for (auto &it2 : it.second->attributes) - log_assert(!it2.first.empty()); - if (it.second->port_id) { - log_assert(GetSize(ports) >= it.second->port_id); - log_assert(ports.at(it.second->port_id-1) == it.first); - log_assert(it.second->port_input || it.second->port_output); - if (GetSize(ports_declared) < it.second->port_id) - ports_declared.resize(it.second->port_id); - log_assert(ports_declared[it.second->port_id-1] == false); - ports_declared[it.second->port_id-1] = true; - } else - log_assert(!it.second->port_input && !it.second->port_output); - } - for (auto port_declared : ports_declared) - log_assert(port_declared == true); - log_assert(GetSize(ports) == GetSize(ports_declared)); + ParallelDispatchThreadPool::Subpool subpool(thread_pool, ThreadPool::work_pool_size(0, module->cells_size(), 1000)); + const RTLIL::Module *const_module = module; - for (auto &it : memories) { + pool memory_strings; + for (auto &it : module->memories) { log_assert(it.first == it.second->name); log_assert(!it.first.empty()); log_assert(it.second->width >= 0); log_assert(it.second->size >= 0); for (auto &it2 : it.second->attributes) log_assert(!it2.first.empty()); + memory_strings.insert(it.second->name.str()); } - pool packed_memids; + std::vector ports_declared(GetSize(module->ports)); + ShardedVector memids(subpool); + subpool.run([const_module, &ports_declared, &memory_strings, &memids](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i : ctx.item_range(const_module->cells_size())) { + auto it = *const_module->cells_.element(i); + log_assert(const_module == it.second->module); + log_assert(it.first == it.second->name); + log_assert(!it.first.empty()); + log_assert(!it.second->type.empty()); + for (auto &it2 : it.second->connections()) { + log_assert(!it2.first.empty()); + it2.second.check(const_module); + } + for (auto &it2 : it.second->attributes) + log_assert(!it2.first.empty()); + for (auto &it2 : it.second->parameters) + log_assert(!it2.first.empty()); + InternalCellChecker checker(const_module, it.second); + checker.check(); + if (it.second->has_memid()) { + log_assert(memory_strings.count(it.second->parameters.at(ID::MEMID).decode_string())); + } else if (it.second->is_mem_cell()) { + std::string memid = it.second->parameters.at(ID::MEMID).decode_string(); + log_assert(!memory_strings.count(memid)); + memids.insert(ctx, std::move(memid)); + } + auto cell_mod = const_module->design->module(it.first); + if (cell_mod != nullptr) { + // assertion check below to make sure that there are no + // cases where a cell has a blackbox attribute since + // that is deprecated + #ifdef __GNUC__ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #endif + log_assert(!it.second->get_blackbox_attribute()); + #ifdef __GNUC__ + #pragma GCC diagnostic pop + #endif + } + } - for (auto &it : cells_) { - log_assert(this == it.second->module); - log_assert(it.first == it.second->name); - log_assert(!it.first.empty()); - log_assert(!it.second->type.empty()); - for (auto &it2 : it.second->connections()) { - log_assert(!it2.first.empty()); - it2.second.check(this); + for (int i : ctx.item_range(const_module->wires_size())) { + auto it = *const_module->wires_.element(i); + log_assert(const_module == it.second->module); + log_assert(it.first == it.second->name); + log_assert(!it.first.empty()); + log_assert(it.second->width >= 0); + log_assert(it.second->port_id >= 0); + for (auto &it2 : it.second->attributes) + log_assert(!it2.first.empty()); + if (it.second->port_id) { + log_assert(GetSize(const_module->ports) >= it.second->port_id); + log_assert(const_module->ports.at(it.second->port_id-1) == it.first); + log_assert(it.second->port_input || it.second->port_output); + log_assert(it.second->port_id <= GetSize(ports_declared)); + bool previously_declared = ports_declared[it.second->port_id-1].set_and_return_old(); + log_assert(previously_declared == false); + } else + log_assert(!it.second->port_input && !it.second->port_output); } - for (auto &it2 : it.second->attributes) - log_assert(!it2.first.empty()); - for (auto &it2 : it.second->parameters) - log_assert(!it2.first.empty()); - InternalCellChecker checker(this, it.second); - checker.check(); - if (it.second->has_memid()) { - log_assert(memories.count(it.second->parameters.at(ID::MEMID).decode_string())); - } else if (it.second->is_mem_cell()) { - IdString memid = it.second->parameters.at(ID::MEMID).decode_string(); - log_assert(!memories.count(memid)); - log_assert(!packed_memids.count(memid)); - packed_memids.insert(memid); - } - auto cell_mod = design->module(it.first); - if (cell_mod != nullptr) { - // assertion check below to make sure that there are no - // cases where a cell has a blackbox attribute since - // that is deprecated - #ifdef __GNUC__ - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - #endif - log_assert(!it.second->get_blackbox_attribute()); - #ifdef __GNUC__ - #pragma GCC diagnostic pop - #endif - } - } + }); + for (const MonotonicFlag &port_declared : ports_declared) + log_assert(port_declared.load() == true); + pool memids_pool; + for (std::string &memid : memids) + log_assert(memids_pool.insert(memid).second); - for (auto &it : processes) { + for (auto &it : module->processes) { log_assert(it.first == it.second->name); log_assert(!it.first.empty()); log_assert(it.second->root_case.compare.empty()); - std::vector all_cases = {&it.second->root_case}; + std::vector all_cases = {&it.second->root_case}; for (size_t i = 0; i < all_cases.size(); i++) { for (auto &switch_it : all_cases[i]->switches) { for (auto &case_it : switch_it->cases) { @@ -2642,34 +2693,41 @@ void RTLIL::Module::check() } for (auto &sync_it : it.second->syncs) { switch (sync_it->type) { - case SyncType::ST0: - case SyncType::ST1: - case SyncType::STp: - case SyncType::STn: - case SyncType::STe: + case RTLIL::SyncType::ST0: + case RTLIL::SyncType::ST1: + case RTLIL::SyncType::STp: + case RTLIL::SyncType::STn: + case RTLIL::SyncType::STe: log_assert(!sync_it->signal.empty()); break; - case SyncType::STa: - case SyncType::STg: - case SyncType::STi: + case RTLIL::SyncType::STa: + case RTLIL::SyncType::STg: + case RTLIL::SyncType::STi: log_assert(sync_it->signal.empty()); break; } } } - for (auto &it : connections_) { + for (auto &it : module->connections_) { log_assert(it.first.size() == it.second.size()); log_assert(!it.first.has_const()); - it.first.check(this); - it.second.check(this); + it.first.check(module); + it.second.check(module); } - for (auto &it : attributes) + for (auto &it : module->attributes) log_assert(!it.first.empty()); #endif } +void RTLIL::Module::check() +{ + int pool_size = ThreadPool::work_pool_size(0, cells_size(), 1000); + ParallelDispatchThreadPool thread_pool(pool_size); + check_module(this, thread_pool); +} + void RTLIL::Module::optimize() { } @@ -5328,7 +5386,7 @@ RTLIL::SigSpec RTLIL::SigSpec::repeat(int num) const } #ifndef NDEBUG -void RTLIL::SigSpec::check(Module *mod) const +void RTLIL::SigSpec::check(const Module *mod) const { if (rep_ == CHUNK) { diff --git a/kernel/rtlil.h b/kernel/rtlil.h index 6f26d0d39..b9d86b91c 100644 --- a/kernel/rtlil.h +++ b/kernel/rtlil.h @@ -275,6 +275,17 @@ struct RTLIL::IdString *out += std::to_string(-index_); } + std::string unescape() const { + if (index_ < 0) { + // Must start with "$auto$" so no unescaping required. + return str(); + } + std::string_view str = global_id_storage_.at(index_).str_view(); + if (str.size() < 2 || str[0] != '\\' || str[1] == '$' || str[1] == '\\' || (str[1] >= '0' && str[1] <= '9')) + return std::string(str); + return std::string(str.substr(1)); + } + class Substrings { std::string_view first_; int suffix_number; @@ -759,7 +770,7 @@ namespace RTLIL { } static inline std::string unescape_id(RTLIL::IdString str) { - return unescape_id(str.str()); + return str.unescape(); } static inline const char *id2cstr(RTLIL::IdString str) { @@ -1749,9 +1760,9 @@ public: } #ifndef NDEBUG - void check(Module *mod = nullptr) const; + void check(const Module *mod = nullptr) const; #else - void check(Module *mod = nullptr) const { (void)mod; } + void check(const Module *mod = nullptr) const { (void)mod; } #endif }; diff --git a/kernel/threading.cc b/kernel/threading.cc index dcc044c89..3766c4ddf 100644 --- a/kernel/threading.cc +++ b/kernel/threading.cc @@ -17,6 +17,20 @@ static int get_max_threads() return max_threads; } +static int init_work_units_per_thread_override() +{ + const char *v = getenv("YOSYS_WORK_UNITS_PER_THREAD"); + if (v == nullptr) + return 0; + return atoi(v); +} + +static int get_work_units_per_thread_override() +{ + static int work_units_per_thread = init_work_units_per_thread_override(); + return work_units_per_thread; +} + void DeferredLogs::flush() { for (auto &m : logs) @@ -37,6 +51,14 @@ int ThreadPool::pool_size(int reserved_cores, int max_worker_threads) #endif } +int ThreadPool::work_pool_size(int reserved_cores, int work_units, int work_units_per_thread) +{ + int work_units_per_thread_override = get_work_units_per_thread_override(); + if (work_units_per_thread_override > 0) + work_units_per_thread = work_units_per_thread_override; + return pool_size(reserved_cores, work_units / work_units_per_thread); +} + ThreadPool::ThreadPool(int pool_size, std::function b) : body(std::move(b)) { @@ -57,4 +79,72 @@ ThreadPool::~ThreadPool() #endif } +IntRange item_range_for_worker(int num_items, int thread_num, int num_threads) +{ + if (num_threads <= 1) { + return {0, num_items}; + } + int items_per_thread = num_items / num_threads; + int extra_items = num_items % num_threads; + // The first `extra_items` threads get one extra item. + int start = thread_num * items_per_thread + std::min(thread_num, extra_items); + int end = (thread_num + 1) * items_per_thread + std::min(thread_num + 1, extra_items); + return {start, end}; +} + +ParallelDispatchThreadPool::ParallelDispatchThreadPool(int pool_size) + : num_worker_threads_(std::max(1, pool_size) - 1) +{ +#ifdef YOSYS_ENABLE_THREADS + main_to_workers_signal.resize(num_worker_threads_, 0); +#endif + // Don't start the threads until we've constructed all our data members. + thread_pool = std::make_unique(num_worker_threads_, [this](int thread_num){ + run_worker(thread_num); + }); +} + +ParallelDispatchThreadPool::~ParallelDispatchThreadPool() +{ +#ifdef YOSYS_ENABLE_THREADS + if (num_worker_threads_ == 0) + return; + current_work = nullptr; + num_active_worker_threads_ = num_worker_threads_; + signal_workers_start(); + wait_for_workers_done(); +#endif +} + +void ParallelDispatchThreadPool::run(std::function work, int max_threads) +{ + Multithreading multithreading; + num_active_worker_threads_ = num_threads(max_threads) - 1; + if (num_active_worker_threads_ == 0) { + work({{0}, 1}); + return; + } +#ifdef YOSYS_ENABLE_THREADS + current_work = &work; + signal_workers_start(); + work({{0}, num_active_worker_threads_ + 1}); + wait_for_workers_done(); +#endif +} + +void ParallelDispatchThreadPool::run_worker(int thread_num) +{ +#ifdef YOSYS_ENABLE_THREADS + while (true) + { + worker_wait_for_start(thread_num); + if (current_work == nullptr) + break; + (*current_work)({{thread_num + 1}, num_active_worker_threads_ + 1}); + signal_worker_done(); + } + signal_worker_done(); +#endif +} + YOSYS_NAMESPACE_END diff --git a/kernel/threading.h b/kernel/threading.h index b8cd62f87..fb1602cf5 100644 --- a/kernel/threading.h +++ b/kernel/threading.h @@ -8,6 +8,7 @@ #include "kernel/yosys_common.h" #include "kernel/log.h" +#include "kernel/utils.h" #ifndef YOSYS_THREADING_H #define YOSYS_THREADING_H @@ -131,6 +132,11 @@ public: // The result may be 0. static int pool_size(int reserved_cores, int max_worker_threads); + // Computes the number of worker threads to use, by dividing work_units among threads. + // For testing purposes you can set YOSYS_WORK_UNITS_PER_THREAD to override `work_units_per_thread`. + // The result may be 0. + static int work_pool_size(int reserved_cores, int work_units, int work_units_per_thread); + // Create a pool of threads running the given closure (parameterized by thread number). // `pool_size` must be the result of a `pool_size()` call. ThreadPool(int pool_size, std::function b); @@ -154,6 +160,123 @@ private: #endif }; +// Divides some number of items into `num_threads` subranges and returns the +// `thread_num`'th subrange. If `num_threads` is zero, returns the whole range. +IntRange item_range_for_worker(int num_items, int thread_num, int num_threads); + +// A type that encapsulates the index of a thread in some list of threads. Useful for +// stronger typechecking and code readability. +struct ThreadIndex { + int thread_num; +}; + +// A set of threads with a `run()` API that runs a closure on all of the threads +// and wait for all those closures to complete. This is a convenient way to implement +// parallel algorithms that use barrier synchronization. +class ParallelDispatchThreadPool +{ +public: + // Create a pool of threads running the given closure (parameterized by thread number). + // `pool_size` must be the result of a `pool_size()` call. + // `pool_size` can be zero, which we treat as 1. + ParallelDispatchThreadPool(int pool_size); + ~ParallelDispatchThreadPool(); + + // For each thread running a closure, a `RunCtx` is passed to the closure. Currently + // it contains the thread index and the total number of threads. It can be passed + // directly to any APIs requiring a `ThreadIndex`. + struct RunCtx : public ThreadIndex { + int num_threads; + IntRange item_range(int num_items) const { + return item_range_for_worker(num_items, thread_num, num_threads); + } + }; + // Sometimes we only want to activate a subset of the threads in the pool. This + // class provides a way to do that. It provides the same `num_threads()` + // and `run()` APIs as a `ParallelDispatchThreadPool`. + class Subpool { + public: + Subpool(ParallelDispatchThreadPool &parent, int max_threads) + : parent(parent), max_threads(max_threads) {} + // Returns the number of threads that will be used when calling `run()`. + int num_threads() const { + return parent.num_threads(max_threads); + } + void run(std::function work) { + parent.run(std::move(work), max_threads); + } + ParallelDispatchThreadPool &thread_pool() { return parent; } + private: + ParallelDispatchThreadPool &parent; + int max_threads; + }; + + // Run the `work` function in parallel on each thread in the pool (parameterized by + // thread number). Waits for all work functions to complete. Only one `run()` can be + // active at a time. + // Uses no more than `max_threads` threads (but at least one). + void run(std::function work) { + run(std::move(work), INT_MAX); + } + + // Returns the number of threads that will be used when calling `run()`. + int num_threads() const { + return num_threads(INT_MAX); + } +private: + friend class Subpool; + + void run(std::function work, int max_threads); + int num_threads(int max_threads) const { + return std::min(num_worker_threads_ + 1, std::max(1, max_threads)); + } + void run_worker(int thread_num); + + std::unique_ptr thread_pool; + std::function *current_work = nullptr; + // Keeps a correct count even when threads are exiting. + int num_worker_threads_; + // The count of active workerthreads for the current `run()`. + int num_active_worker_threads_ = 0; + +#ifdef YOSYS_ENABLE_THREADS + // Not especially efficient for large numbers of threads. Worker wakeup could scale + // better by conceptually organising workers into a tree and having workers wake + // up their children. + std::mutex main_to_workers_signal_mutex; + std::condition_variable main_to_workers_signal_cv; + std::vector main_to_workers_signal; + void signal_workers_start() { + std::unique_lock lock(main_to_workers_signal_mutex); + std::fill(main_to_workers_signal.begin(), main_to_workers_signal.begin() + num_active_worker_threads_, 1); + // When `num_active_worker_threads_` is small compared to `num_worker_threads_`, we have a "thundering herd" + // problem here. Fixing that would add complexity so don't worry about it for now. + main_to_workers_signal_cv.notify_all(); + } + void worker_wait_for_start(int thread_num) { + std::unique_lock lock(main_to_workers_signal_mutex); + main_to_workers_signal_cv.wait(lock, [this, thread_num] { return main_to_workers_signal[thread_num] > 0; }); + main_to_workers_signal[thread_num] = 0; + } + + std::atomic done_workers = 0; + std::mutex workers_to_main_signal_mutex; + std::condition_variable workers_to_main_signal_cv; + void signal_worker_done() { + int d = done_workers.fetch_add(1, std::memory_order_release); + if (d + 1 == num_active_worker_threads_) { + std::unique_lock lock(workers_to_main_signal_mutex); + workers_to_main_signal_cv.notify_all(); + } + } + void wait_for_workers_done() { + std::unique_lock lock(workers_to_main_signal_mutex); + workers_to_main_signal_cv.wait(lock, [this] { return done_workers.load(std::memory_order_acquire) == num_active_worker_threads_; }); + done_workers.store(0, std::memory_order_relaxed); + } +#endif +}; + template class ConcurrentStack { @@ -181,6 +304,382 @@ private: std::vector contents; }; +// A vector that is sharded into buckets, one per thread. This lets multiple threads write +// efficiently to the vector without synchronization overhead. After all writers have +// finished writing, the vector can be iterated over. The iteration order is deterministic: +// all the elements written by thread 0 in the order it inserted them, followed by all elements +// written by thread 1, etc. +template +class ShardedVector { +public: + ShardedVector(const ParallelDispatchThreadPool &thread_pool) { + init(thread_pool.num_threads()); + } + ShardedVector(const ParallelDispatchThreadPool::Subpool &thread_pool) { + init(thread_pool.num_threads()); + } + + // Insert a value, passing the `ThreadIndex` of the writer thread. + // Parallel inserts with different `ThreadIndex` values are fine. + // Inserts must not run concurrently with any other methods (e.g. + // iteration or `empty()`.) + void insert(const ThreadIndex &thread, T value) { + buckets[thread.thread_num].emplace_back(std::move(value)); + } + + bool empty() const { + for (const std::vector &bucket : buckets) + if (!bucket.empty()) + return false; + return true; + } + + using Buckets = std::vector>; + class iterator { + public: + iterator(typename Buckets::iterator bucket_it, typename Buckets::iterator bucket_end) + : bucket_it(std::move(bucket_it)), bucket_end(std::move(bucket_end)) { + if (bucket_it != bucket_end) + inner_it = bucket_it->begin(); + normalize(); + } + T& operator*() const { return *inner_it.value(); } + iterator &operator++() { + ++*inner_it; + normalize(); + return *this; + } + bool operator!=(const iterator &other) const { + return bucket_it != other.bucket_it || inner_it != other.inner_it; + } + private: + void normalize() { + if (bucket_it == bucket_end) + return; + while (inner_it == bucket_it->end()) { + ++bucket_it; + if (bucket_it == bucket_end) { + inner_it.reset(); + return; + } + inner_it = bucket_it->begin(); + } + } + std::optional::iterator> inner_it; + typename Buckets::iterator bucket_it; + typename Buckets::iterator bucket_end; + }; + iterator begin() { return iterator(buckets.begin(), buckets.end()); } + iterator end() { return iterator(buckets.end(), buckets.end()); } +private: + void init(int num_threads) { + buckets.resize(num_threads); + } + Buckets buckets; +}; + +// This collision handler for `ShardedHashtable` resolves collisions by keeping +// the current value and discarding the other. This is correct when all values with the +// same key are interchangeable, i.e. when the hashtable is being used as a set instead +// of a map. +template +struct SetCollisionHandler { + void operator()(typename V::Accumulated &, typename V::Accumulated &) const {} +}; + +// A hashtable that can be efficiently built in parallel and then looked up concurrently. +// `V` is the type of elements that will be added to the hashtable. It must have a +// member type `Accumulated` representing the combination of multiple `V` elements. This +// can be the same as `V`, but for example `V` could contain a Wire* and `V::Accumulated` +// could contain a `pool`. `KeyEquality` is a class containing an `operator()` that +// returns true of two `V` elements have equal keys. +// `CollisionHandler` is used to reduce two `V::Accumulated` values into a single value. +// +// To use this, first construct a `Builder` and fill it in (in parallel), then construct +// a `ShardedHashtable` from the `Builder`. +template +class ShardedHashtable { +public: + // A combination of a `V` and its hash value. + struct Value { + Value(V value, unsigned int hash) : value(std::move(value)), hash(hash) {} + Value(Value &&) = default; + Value(const Value &) = delete; + Value &operator=(const Value &) = delete; + V value; + unsigned int hash; + }; + // A combination of a `V::Accumulated` and its hash value. + struct AccumulatedValue { + AccumulatedValue(typename V::Accumulated value, unsigned int hash) : value(std::move(value)), hash(hash) {} + AccumulatedValue(AccumulatedValue &&) = default; +#if defined(_MSC_VER) + AccumulatedValue(const AccumulatedValue &) { + log_error("Copy constructor called on AccumulatedValue"); + } + AccumulatedValue &operator=(const AccumulatedValue &) { + log_error("Copy assignment called on AccumulatedValue"); + return *this; + } +#else + AccumulatedValue(const AccumulatedValue &) = delete; + AccumulatedValue &operator=(const AccumulatedValue &) = delete; +#endif + typename V::Accumulated value; + unsigned int hash; + }; + // A class containing an `operator()` that returns true of two `AccumulatedValue` + // elements have equal keys. + // Required to insert `AccumulatedValue`s into an `std::unordered_set`. + struct AccumulatedValueEquality { + KeyEquality inner; + AccumulatedValueEquality(const KeyEquality &inner) : inner(inner) {} + bool operator()(const AccumulatedValue &v1, const AccumulatedValue &v2) const { + return inner(v1.value, v2.value); + } + }; + // A class containing an `operator()` that returns the hash value of an `AccumulatedValue`. + // Required to insert `AccumulatedValue`s into an `std::unordered_set`. + struct AccumulatedValueHashOp { + size_t operator()(const AccumulatedValue &v) const { + return static_cast(v.hash); + } + }; + using Shard = std::unordered_set; + + // First construct one of these. Then populate it in parallel by calling `insert()` from many threads. + // Then do another parallel phase calling `process()` from many threads. + class Builder { + public: + Builder(const ParallelDispatchThreadPool &thread_pool, KeyEquality equality = KeyEquality(), CollisionHandler collision_handler = CollisionHandler()) + : collision_handler(std::move(collision_handler)) { + init(thread_pool.num_threads(), std::move(equality)); + } + Builder(const ParallelDispatchThreadPool::Subpool &thread_pool, KeyEquality equality = KeyEquality(), CollisionHandler collision_handler = CollisionHandler()) + : collision_handler(std::move(collision_handler)) { + init(thread_pool.num_threads(), std::move(equality)); + } + // First call `insert` to insert all elements. All inserts must finish + // before calling any `process()`. + void insert(const ThreadIndex &thread, Value v) { + // You might think that for the single-threaded case, we can optimize by + // inserting directly into the `std::unordered_set` here. But that slows things down + // a lot and I never got around to figuring out why. + std::vector> &buckets = all_buckets[thread.thread_num]; + size_t bucket = static_cast(v.hash) % buckets.size(); + buckets[bucket].emplace_back(std::move(v)); + } + // Then call `process` for each thread. All `process()`s must finish before using + // the `Builder` to construct a `ShardedHashtable`. + void process(const ThreadIndex &thread) { + int size = 0; + for (std::vector> &buckets : all_buckets) + size += GetSize(buckets[thread.thread_num]); + Shard &shard = shards[thread.thread_num]; + shard.reserve(size); + for (std::vector> &buckets : all_buckets) { + for (Value &value : buckets[thread.thread_num]) + accumulate(value, shard); + // Free as much memory as we can during the parallel phase. + std::vector().swap(buckets[thread.thread_num]); + } + } + private: + friend class ShardedHashtable; + void accumulate(Value &value, Shard &shard) { + // With C++20 we could make this more efficient using heterogenous lookup + AccumulatedValue accumulated_value{std::move(value.value), value.hash}; + auto [it, inserted] = shard.insert(std::move(accumulated_value)); + if (!inserted) + collision_handler(const_cast(it->value), accumulated_value.value); + } + void init(int num_threads, KeyEquality equality) { + all_buckets.resize(num_threads); + for (std::vector> &buckets : all_buckets) + buckets.resize(num_threads); + for (int i = 0; i < num_threads; ++i) + shards.emplace_back(0, AccumulatedValueHashOp(), AccumulatedValueEquality(equality)); + } + const CollisionHandler collision_handler; + // A num_threads x num_threads matrix of buckets. + // In the first phase, each thread i gemerates elements and writes them to + // bucket [i][j] where j = hash(element) % num_threads. + // In the second phase, thread i reads from bucket [j][i] for all j, collecting + // all elements where i = hash(element) % num_threads. + std::vector>> all_buckets; + std::vector shards; + }; + + // Then finally construct the hashtable: + ShardedHashtable(Builder &builder) : shards(std::move(builder.shards)) { + // Check that all necessary 'process()' calls were made. + for (std::vector> &buckets : builder.all_buckets) + for (std::vector &bucket : buckets) + log_assert(bucket.empty()); + // Free memory. + std::vector>>().swap(builder.all_buckets); + } + ShardedHashtable(ShardedHashtable &&other) = default; + ShardedHashtable() {} + + ShardedHashtable &operator=(ShardedHashtable &&other) = default; + + // Look up by `AccumulatedValue`. If we switch to C++20 then we could use + // heterogenous lookup to support looking up by `Value` here. Returns nullptr + // if the key is not found. + const typename V::Accumulated *find(const AccumulatedValue &v) const { + size_t num_shards = shards.size(); + if (num_shards == 0) + return nullptr; + size_t shard = static_cast(v.hash) % num_shards; + auto it = shards[shard].find(v); + if (it == shards[shard].end()) + return nullptr; + return &it->value; + } + + // Insert an element into the table. The caller is responsible for ensuring this does not + // happen concurrently with any other method calls. + void insert(AccumulatedValue v) { + size_t num_shards = shards.size(); + if (num_shards == 0) + return; + size_t shard = static_cast(v.hash) % num_shards; + shards[shard].insert(v); + } + + // Call this for each shard to implement parallel destruction. For very large `ShardedHashtable`s, + // deleting all elements of all shards on a single thread can be a performance bottleneck. + void clear(const ThreadIndex &shard) { + AccumulatedValueEquality equality = shards[0].key_eq(); + shards[shard.thread_num] = Shard(0, AccumulatedValueHashOp(), equality); + } +private: + std::vector shards; +}; + +// A concurrent work-queue that can share batches of work across threads. +// Uses a naive implementation of work-stealing. +template +class ConcurrentWorkQueue { +public: + // Create a queue that supports the given number of threads and + // groups work into `batch_size` units. + ConcurrentWorkQueue(int num_threads, int batch_size = 100) + : batch_size(batch_size), thread_states(num_threads) {} + int num_threads() const { return GetSize(thread_states); } + // Push some work to do. Pushes and pops with the same `thread` must + // not happen concurrently. + void push(const ThreadIndex &thread, T work) { + ThreadState &thread_state = thread_states[thread.thread_num]; + thread_state.next_batch.emplace_back(std::move(work)); + if (GetSize(thread_state.next_batch) < batch_size) + return; + bool was_empty; + { + std::unique_lock lock(thread_state.batches_lock); + was_empty = thread_state.batches.empty(); + thread_state.batches.push_back(std::move(thread_state.next_batch)); + } + if (was_empty) { + std::unique_lock lock(waiters_lock); + if (num_waiters > 0) { + waiters_cv.notify_one(); + } + } + } + // Grab some work to do. + // If all threads enter `pop_batch()`, then instead of deadlocking the + // queue will return no work. That is the only case in which it will + // return no work. + std::vector pop_batch(const ThreadIndex &thread) { + ThreadState &thread_state = thread_states[thread.thread_num]; + if (!thread_state.next_batch.empty()) + return std::move(thread_state.next_batch); + // Empty our own work queue first. + { + std::unique_lock lock(thread_state.batches_lock); + if (!thread_state.batches.empty()) { + std::vector batch = std::move(thread_state.batches.back()); + thread_state.batches.pop_back(); + return batch; + } + } + // From here on in this function, our work queue is empty. + while (true) { + std::vector batch = try_steal(thread); + if (!batch.empty()) { + return std::move(batch); + } + // Termination: if all threads run out of work, then all of + // them will eventually enter this loop and there will be no further + // notifications on waiters_cv, so all will eventually increment + // num_waiters and wait, so num_waiters == num_threads() + // will become true. + std::unique_lock lock(waiters_lock); + ++num_waiters; + if (num_waiters == num_threads()) { + waiters_cv.notify_all(); + return {}; + } + // As above, it's possible that we'll wait here even when there + // are work batches posted by other threads. That's OK. + waiters_cv.wait(lock); + if (num_waiters == num_threads()) + return {}; + --num_waiters; + } + } +private: + std::vector try_steal(const ThreadIndex &thread) { + for (int i = 1; i < num_threads(); i++) { + int other_thread_num = (thread.thread_num + i) % num_threads(); + ThreadState &other_thread_state = thread_states[other_thread_num]; + std::unique_lock lock(other_thread_state.batches_lock); + if (!other_thread_state.batches.empty()) { + std::vector batch = std::move(other_thread_state.batches.front()); + other_thread_state.batches.pop_front(); + return batch; + } + } + return {}; + } + + int batch_size; + + struct ThreadState { + // Entirely thread-local. + std::vector next_batch; + + std::mutex batches_lock; + // Only the associated thread ever adds to this, and only at the back. + // Other threads can remove elements from the front. + std::deque> batches; + }; + std::vector thread_states; + + std::mutex waiters_lock; + std::condition_variable waiters_cv; + // Number of threads waiting for work. Their queues are empty. + int num_waiters = 0; +}; + +// A monotonic flag. Starts false, and can be set to true in a thread-safe way. +// Once `load()` returns true, it will always return true. +// Uses relaxed atomics so there are no memory ordering guarantees. Do not use this +// to guard access to shared memory. +class MonotonicFlag { +public: + MonotonicFlag() : value(false) {} + bool load() const { return value.load(std::memory_order_relaxed); } + void set() { value.store(true, std::memory_order_relaxed); } + bool set_and_return_old() { + return value.exchange(true, std::memory_order_relaxed); + } +private: + std::atomic value; +}; + YOSYS_NAMESPACE_END #endif // YOSYS_THREADING_H diff --git a/kernel/utils.h b/kernel/utils.h index e90ba09d8..46a196789 100644 --- a/kernel/utils.h +++ b/kernel/utils.h @@ -299,6 +299,24 @@ auto reversed(const T& container) { return reverse_view{container}; } +// A range of integers [start_, end_) that can be iterated over with a +// C++ range-based for loop. +struct IntRange { + int start_; + int end_; + struct Int { + int v; + int operator*() const { return v; } + Int &operator++() { ++v; return *this; } + bool operator!=(const Int &other) const { return v != other.v; } + }; + Int begin() const { return {start_}; } + Int end() const { return {end_}; } + + bool operator==(const IntRange &other) const { return start_ == other.start_ && end_ == other.end_; } + bool operator!=(const IntRange &other) const { return !(*this == other); } +}; + YOSYS_NAMESPACE_END #endif diff --git a/passes/opt/Makefile.inc b/passes/opt/Makefile.inc index 5dee824ff..e7b62fc6a 100644 --- a/passes/opt/Makefile.inc +++ b/passes/opt/Makefile.inc @@ -9,7 +9,6 @@ OBJS += passes/opt/opt_muxtree.o OBJS += passes/opt/opt_reduce.o OBJS += passes/opt/opt_dff.o OBJS += passes/opt/opt_share.o -OBJS += passes/opt/opt_clean.o OBJS += passes/opt/opt_expr.o OBJS += passes/opt/opt_hier.o @@ -40,3 +39,5 @@ PEEPOPT_PATTERN += passes/opt/peepopt_formal_clockgateff.pmg passes/opt/peepopt_pm.h: passes/pmgen/pmgen.py $(PEEPOPT_PATTERN) $(P) mkdir -p $(dir $@) && $(PYTHON_EXECUTABLE) $< -o $@ -p peepopt $(filter-out $<,$^) endif + +include $(YOSYS_SRC)/passes/opt/opt_clean/Makefile.inc \ No newline at end of file diff --git a/passes/opt/opt_clean.cc b/passes/opt/opt_clean.cc deleted file mode 100644 index 72d22ddd3..000000000 --- a/passes/opt/opt_clean.cc +++ /dev/null @@ -1,785 +0,0 @@ -/* - * yosys -- Yosys Open SYnthesis Suite - * - * Copyright (C) 2012 Claire Xenia Wolf - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - */ - -#include "kernel/register.h" -#include "kernel/sigtools.h" -#include "kernel/log.h" -#include "kernel/celltypes.h" -#include "kernel/newcelltypes.h" -#include "kernel/ffinit.h" -#include -#include -#include - -USING_YOSYS_NAMESPACE -PRIVATE_NAMESPACE_BEGIN - -using RTLIL::id2cstr; - -struct keep_cache_t -{ - Design *design; - dict cache; - bool purge_mode = false; - - void reset(Design *design = nullptr, bool purge_mode = false) - { - this->design = design; - this->purge_mode = purge_mode; - cache.clear(); - } - - bool query(Module *module) - { - log_assert(design != nullptr); - - if (module == nullptr) - return false; - - if (cache.count(module)) - return cache.at(module); - - cache[module] = true; - if (!module->get_bool_attribute(ID::keep)) { - bool found_keep = false; - for (auto cell : module->cells()) - if (query(cell, true /* ignore_specify */)) { - found_keep = true; - break; - } - for (auto wire : module->wires()) - if (wire->get_bool_attribute(ID::keep)) { - found_keep = true; - break; - } - cache[module] = found_keep; - } - - return cache[module]; - } - - bool query(Cell *cell, bool ignore_specify = false) - { - if (cell->type.in(ID($assert), ID($assume), ID($live), ID($fair), ID($cover))) - return true; - - if (cell->type.in(ID($overwrite_tag))) - return true; - - if (!ignore_specify && cell->type.in(ID($specify2), ID($specify3), ID($specrule))) - return true; - - if (cell->type == ID($print) || cell->type == ID($check)) - return true; - - if (cell->has_keep_attr()) - return true; - - if (!purge_mode && cell->type == ID($scopeinfo)) - return true; - - if (cell->module && cell->module->design) - return query(cell->module->design->module(cell->type)); - - return false; - } -}; - -keep_cache_t keep_cache; -static constexpr auto ct_reg = StaticCellTypes::Categories::join( - StaticCellTypes::Compat::mem_ff, - StaticCellTypes::categories.is_anyinit); -NewCellTypes ct_all; -int count_rm_cells, count_rm_wires; - -void rmunused_module_cells(Module *module, bool verbose) -{ - SigMap sigmap(module); - dict> mem2cells; - pool mem_unused; - pool queue, unused; - pool used_raw_bits; - dict> wire2driver; - dict> driver_driver_logs; - FfInitVals ffinit(&sigmap, module); - - SigMap raw_sigmap; - for (auto &it : module->connections_) { - for (int i = 0; i < GetSize(it.second); i++) { - if (it.second[i].wire != nullptr) - raw_sigmap.add(it.first[i], it.second[i]); - } - } - - for (auto &it : module->memories) { - mem_unused.insert(it.first); - } - - for (Cell *cell : module->cells()) { - if (cell->type.in(ID($memwr), ID($memwr_v2), ID($meminit), ID($meminit_v2))) { - IdString mem_id = cell->getParam(ID::MEMID).decode_string(); - mem2cells[mem_id].insert(cell); - } - } - - for (auto &it : module->cells_) { - Cell *cell = it.second; - for (auto &it2 : cell->connections()) { - if (ct_all.cell_known(cell->type) && !ct_all.cell_output(cell->type, it2.first)) - continue; - for (auto raw_bit : it2.second) { - if (raw_bit.wire == nullptr) - continue; - auto bit = sigmap(raw_bit); - if (bit.wire == nullptr && ct_all.cell_known(cell->type)) - driver_driver_logs[raw_sigmap(raw_bit)].push_back(stringf("Driver-driver conflict " - "for %s between cell %s.%s and constant %s in %s: Resolved using constant.", - log_signal(raw_bit), log_id(cell), log_id(it2.first), log_signal(bit), log_id(module))); - if (bit.wire != nullptr) - wire2driver[bit].insert(cell); - } - } - if (keep_cache.query(cell)) - queue.insert(cell); - else - unused.insert(cell); - } - - for (auto &it : module->wires_) { - Wire *wire = it.second; - if (wire->port_output || wire->get_bool_attribute(ID::keep)) { - for (auto bit : sigmap(wire)) - for (auto c : wire2driver[bit]) - queue.insert(c), unused.erase(c); - for (auto raw_bit : SigSpec(wire)) - used_raw_bits.insert(raw_sigmap(raw_bit)); - } - } - - while (!queue.empty()) - { - pool bits; - pool mems; - for (auto cell : queue) { - for (auto &it : cell->connections()) - if (!ct_all.cell_known(cell->type) || ct_all.cell_input(cell->type, it.first)) - for (auto bit : sigmap(it.second)) - bits.insert(bit); - - if (cell->type.in(ID($memrd), ID($memrd_v2))) { - IdString mem_id = cell->getParam(ID::MEMID).decode_string(); - if (mem_unused.count(mem_id)) { - mem_unused.erase(mem_id); - mems.insert(mem_id); - } - } - } - - queue.clear(); - - for (auto bit : bits) - for (auto c : wire2driver[bit]) - if (unused.count(c)) - queue.insert(c), unused.erase(c); - - for (auto mem : mems) - for (auto c : mem2cells[mem]) - if (unused.count(c)) - queue.insert(c), unused.erase(c); - } - - unused.sort(RTLIL::sort_by_name_id()); - - for (auto cell : unused) { - if (verbose) - log_debug(" removing unused `%s' cell `%s'.\n", cell->type, cell->name); - module->design->scratchpad_set_bool("opt.did_something", true); - if (cell->is_builtin_ff()) - ffinit.remove_init(cell->getPort(ID::Q)); - module->remove(cell); - count_rm_cells++; - } - - for (auto it : mem_unused) - { - if (verbose) - log_debug(" removing unused memory `%s'.\n", it); - delete module->memories.at(it); - module->memories.erase(it); - } - - for (auto &it : module->cells_) { - Cell *cell = it.second; - for (auto &it2 : cell->connections()) { - if (ct_all.cell_known(cell->type) && !ct_all.cell_input(cell->type, it2.first)) - continue; - for (auto raw_bit : raw_sigmap(it2.second)) - used_raw_bits.insert(raw_bit); - } - } - - for (auto it : driver_driver_logs) { - if (used_raw_bits.count(it.first)) - for (auto msg : it.second) - log_warning("%s\n", msg); - } -} - -int count_nontrivial_wire_attrs(RTLIL::Wire *w) -{ - int count = w->attributes.size(); - count -= w->attributes.count(ID::src); - count -= w->attributes.count(ID::hdlname); - count -= w->attributes.count(ID(scopename)); - count -= w->attributes.count(ID::unused_bits); - return count; -} - -// Should we pick `s2` over `s1` to represent a signal? -bool compare_signals(RTLIL::SigBit &s1, RTLIL::SigBit &s2, SigPool ®s, SigPool &conns, pool &direct_wires) -{ - RTLIL::Wire *w1 = s1.wire; - RTLIL::Wire *w2 = s2.wire; - - if (w1 == NULL || w2 == NULL) - return w2 == NULL; - - if (w1->port_input != w2->port_input) - return w2->port_input; - - if ((w1->port_input && w1->port_output) != (w2->port_input && w2->port_output)) - return !(w2->port_input && w2->port_output); - - if (w1->name.isPublic() && w2->name.isPublic()) { - if (regs.check(s1) != regs.check(s2)) - return regs.check(s2); - if (direct_wires.count(w1) != direct_wires.count(w2)) - return direct_wires.count(w2) != 0; - if (conns.check_any(s1) != conns.check_any(s2)) - return conns.check_any(s2); - } - - if (w1 == w2) - return s2.offset < s1.offset; - - if (w1->port_output != w2->port_output) - return w2->port_output; - - if (w1->name[0] != w2->name[0]) - return w2->name.isPublic(); - - int attrs1 = count_nontrivial_wire_attrs(w1); - int attrs2 = count_nontrivial_wire_attrs(w2); - - if (attrs1 != attrs2) - return attrs2 > attrs1; - - return w2->name.lt_by_name(w1->name); -} - -bool check_public_name(RTLIL::IdString id) -{ - if (id.begins_with("$")) - return false; - const std::string &id_str = id.str(); - if (id.begins_with("\\_") && (id.ends_with("_") || id_str.find("_[") != std::string::npos)) - return false; - if (id_str.find(".$") != std::string::npos) - return false; - return true; -} - -bool rmunused_module_signals(RTLIL::Module *module, bool purge_mode, bool verbose) -{ - // `register_signals` and `connected_signals` will help us decide later on - // on picking representatives out of groups of connected signals - SigPool register_signals; - SigPool connected_signals; - if (!purge_mode) - for (auto &it : module->cells_) { - RTLIL::Cell *cell = it.second; - if (ct_reg(cell->type)) { - bool clk2fflogic = cell->get_bool_attribute(ID(clk2fflogic)); - for (auto &it2 : cell->connections()) - if (clk2fflogic ? it2.first == ID::D : ct_all.cell_output(cell->type, it2.first)) - register_signals.add(it2.second); - } - for (auto &it2 : cell->connections()) - connected_signals.add(it2.second); - } - - SigMap assign_map(module); - - // construct a pool of wires which are directly driven by a known celltype, - // this will influence our choice of representatives - pool direct_wires; - { - pool direct_sigs; - for (auto &it : module->cells_) { - RTLIL::Cell *cell = it.second; - if (ct_all.cell_known(cell->type)) - for (auto &it2 : cell->connections()) - if (ct_all.cell_output(cell->type, it2.first)) - direct_sigs.insert(assign_map(it2.second)); - } - for (auto &it : module->wires_) { - if (direct_sigs.count(assign_map(it.second)) || it.second->port_input) - direct_wires.insert(it.second); - } - } - - // weight all options for representatives with `compare_signals`, - // the one that wins will be what `assign_map` maps to - for (auto &it : module->wires_) { - RTLIL::Wire *wire = it.second; - for (int i = 0; i < wire->width; i++) { - RTLIL::SigBit s1 = RTLIL::SigBit(wire, i), s2 = assign_map(s1); - if (compare_signals(s2, s1, register_signals, connected_signals, direct_wires)) - assign_map.add(s1); - } - } - - // we are removing all connections - module->connections_.clear(); - - // used signals sigmapped - SigPool used_signals; - // used signals pre-sigmapped - SigPool raw_used_signals; - // used signals sigmapped, ignoring drivers (we keep track of this to set `unused_bits`) - SigPool used_signals_nodrivers; - - // gather the usage information for cells - for (auto &it : module->cells_) { - RTLIL::Cell *cell = it.second; - for (auto &it2 : cell->connections_) { - assign_map.apply(it2.second); // modify the cell connection in place - raw_used_signals.add(it2.second); - used_signals.add(it2.second); - if (!ct_all.cell_output(cell->type, it2.first)) - used_signals_nodrivers.add(it2.second); - } - } - - // gather the usage information for ports, wires with `keep`, - // also gather init bits - dict init_bits; - for (auto &it : module->wires_) { - RTLIL::Wire *wire = it.second; - if (wire->port_id > 0) { - RTLIL::SigSpec sig = RTLIL::SigSpec(wire); - raw_used_signals.add(sig); - assign_map.apply(sig); - used_signals.add(sig); - if (!wire->port_input) - used_signals_nodrivers.add(sig); - } - if (wire->get_bool_attribute(ID::keep)) { - RTLIL::SigSpec sig = RTLIL::SigSpec(wire); - assign_map.apply(sig); - used_signals.add(sig); - } - auto it2 = wire->attributes.find(ID::init); - if (it2 != wire->attributes.end()) { - RTLIL::Const &val = it2->second; - SigSpec sig = assign_map(wire); - for (int i = 0; i < GetSize(val) && i < GetSize(sig); i++) - if (val[i] != State::Sx) - init_bits[sig[i]] = val[i]; - wire->attributes.erase(it2); - } - } - - // set init attributes on all wires of a connected group - for (auto wire : module->wires()) { - bool found = false; - Const val(State::Sx, wire->width); - for (int i = 0; i < wire->width; i++) { - auto it = init_bits.find(RTLIL::SigBit(wire, i)); - if (it != init_bits.end()) { - val.set(i, it->second); - found = true; - } - } - if (found) - wire->attributes[ID::init] = val; - } - - // now decide for each wire if we should be deleting it - pool del_wires_queue; - for (auto wire : module->wires()) - { - SigSpec s1 = SigSpec(wire), s2 = assign_map(s1); - log_assert(GetSize(s1) == GetSize(s2)); - - Const initval; - if (wire->attributes.count(ID::init)) - initval = wire->attributes.at(ID::init); - if (GetSize(initval) != GetSize(wire)) - initval.resize(GetSize(wire), State::Sx); - if (initval.is_fully_undef()) - wire->attributes.erase(ID::init); - - if (GetSize(wire) == 0) { - // delete zero-width wires, unless they are module ports - if (wire->port_id == 0) - goto delete_this_wire; - } else - if (wire->port_id != 0 || wire->get_bool_attribute(ID::keep) || !initval.is_fully_undef()) { - // do not delete anything with "keep" or module ports or initialized wires - } else - if (!purge_mode && check_public_name(wire->name) && (raw_used_signals.check_any(s1) || used_signals.check_any(s2) || s1 != s2)) { - // do not get rid of public names unless in purge mode or if the wire is entirely unused, not even aliased - } else - if (!raw_used_signals.check_any(s1)) { - // delete wires that aren't used by anything directly - goto delete_this_wire; - } - - if (0) - { - delete_this_wire: - del_wires_queue.insert(wire); - } - else - { - RTLIL::SigSig new_conn; - for (int i = 0; i < GetSize(s1); i++) - if (s1[i] != s2[i]) { - if (s2[i] == State::Sx && (initval[i] == State::S0 || initval[i] == State::S1)) { - s2[i] = initval[i]; - initval.set(i, State::Sx); - } - new_conn.first.append(s1[i]); - new_conn.second.append(s2[i]); - } - if (new_conn.first.size() > 0) { - if (initval.is_fully_undef()) - wire->attributes.erase(ID::init); - else - wire->attributes.at(ID::init) = initval; - module->connect(new_conn); - } - - if (!used_signals_nodrivers.check_all(s2)) { - std::string unused_bits; - for (int i = 0; i < GetSize(s2); i++) { - if (s2[i].wire == NULL) - continue; - if (!used_signals_nodrivers.check(s2[i])) { - if (!unused_bits.empty()) - unused_bits += " "; - unused_bits += stringf("%d", i); - } - } - if (unused_bits.empty() || wire->port_id != 0) - wire->attributes.erase(ID::unused_bits); - else - wire->attributes[ID::unused_bits] = RTLIL::Const(unused_bits); - } else { - wire->attributes.erase(ID::unused_bits); - } - } - } - - int del_temp_wires_count = 0; - for (auto wire : del_wires_queue) { - if (ys_debug() || (check_public_name(wire->name) && verbose)) - log_debug(" removing unused non-port wire %s.\n", wire->name); - else - del_temp_wires_count++; - } - - module->remove(del_wires_queue); - count_rm_wires += GetSize(del_wires_queue); - - if (verbose && del_temp_wires_count) - log_debug(" removed %d unused temporary wires.\n", del_temp_wires_count); - - if (!del_wires_queue.empty()) - module->design->scratchpad_set_bool("opt.did_something", true); - - return !del_wires_queue.empty(); -} - -bool rmunused_module_init(RTLIL::Module *module, bool verbose) -{ - bool did_something = false; - - SigMap sigmap(module); - dict qbits; - - for (auto cell : module->cells()) - if (StaticCellTypes::Compat::internals_mem_ff(cell->type) && cell->hasPort(ID::Q)) - { - SigSpec sig = cell->getPort(ID::Q); - - for (int i = 0; i < GetSize(sig); i++) - { - SigBit bit = sig[i]; - - if (bit.wire == nullptr || bit.wire->attributes.count(ID::init) == 0) - continue; - - Const init = bit.wire->attributes.at(ID::init); - - if (i >= GetSize(init) || init[i] == State::Sx || init[i] == State::Sz) - continue; - - sigmap.add(bit); - qbits[bit] = init[i]; - } - } - - for (auto wire : module->wires()) - { - if (wire->attributes.count(ID::init) == 0) - continue; - - Const init = wire->attributes.at(ID::init); - - for (int i = 0; i < GetSize(wire) && i < GetSize(init); i++) - { - if (init[i] == State::Sx || init[i] == State::Sz) - continue; - - SigBit wire_bit = SigBit(wire, i); - SigBit mapped_wire_bit = sigmap(wire_bit); - - if (wire_bit == mapped_wire_bit) - goto next_wire; - - if (mapped_wire_bit.wire) { - if (qbits.count(mapped_wire_bit) == 0) - goto next_wire; - - if (qbits.at(mapped_wire_bit) != init[i]) - goto next_wire; - } - else { - if (mapped_wire_bit == State::Sx || mapped_wire_bit == State::Sz) - goto next_wire; - - if (mapped_wire_bit != init[i]) { - log_warning("Initial value conflict for %s resolving to %s but with init %s.\n", log_signal(wire_bit), log_signal(mapped_wire_bit), log_signal(init[i])); - goto next_wire; - } - } - } - - if (verbose) - log_debug(" removing redundant init attribute on %s.\n", log_id(wire)); - - wire->attributes.erase(ID::init); - did_something = true; - next_wire:; - } - - if (did_something) - module->design->scratchpad_set_bool("opt.did_something", true); - - return did_something; -} - -void rmunused_module(RTLIL::Module *module, bool purge_mode, bool verbose, bool rminit) -{ - if (verbose) - log("Finding unused cells or wires in module %s..\n", module->name); - - std::vector delcells; - for (auto cell : module->cells()) { - if (cell->type.in(ID($pos), ID($_BUF_), ID($buf)) && !cell->has_keep_attr()) { - bool is_signed = cell->type == ID($pos) && cell->getParam(ID::A_SIGNED).as_bool(); - RTLIL::SigSpec a = cell->getPort(ID::A); - RTLIL::SigSpec y = cell->getPort(ID::Y); - a.extend_u0(GetSize(y), is_signed); - - if (a.has_const(State::Sz)) { - SigSpec new_a; - SigSpec new_y; - for (int i = 0; i < GetSize(a); ++i) { - SigBit b = a[i]; - if (b == State::Sz) - continue; - new_a.append(b); - new_y.append(y[i]); - } - a = std::move(new_a); - y = std::move(new_y); - } - if (!y.empty()) - module->connect(y, a); - delcells.push_back(cell); - } else if (cell->type.in(ID($connect)) && !cell->has_keep_attr()) { - RTLIL::SigSpec a = cell->getPort(ID::A); - RTLIL::SigSpec b = cell->getPort(ID::B); - if (a.has_const() && !b.has_const()) - std::swap(a, b); - module->connect(a, b); - delcells.push_back(cell); - } else if (cell->type.in(ID($input_port)) && !cell->has_keep_attr()) { - delcells.push_back(cell); - } - } - for (auto cell : delcells) { - if (verbose) { - if (cell->type == ID($connect)) - log_debug(" removing connect cell `%s': %s <-> %s\n", cell->name, - log_signal(cell->getPort(ID::A)), log_signal(cell->getPort(ID::B))); - else if (cell->type == ID($input_port)) - log_debug(" removing input port marker cell `%s': %s\n", cell->name, - log_signal(cell->getPort(ID::Y))); - else - log_debug(" removing buffer cell `%s': %s = %s\n", cell->name, - log_signal(cell->getPort(ID::Y)), log_signal(cell->getPort(ID::A))); - } - module->remove(cell); - } - if (!delcells.empty()) - module->design->scratchpad_set_bool("opt.did_something", true); - - rmunused_module_cells(module, verbose); - while (rmunused_module_signals(module, purge_mode, verbose)) { } - - if (rminit && rmunused_module_init(module, verbose)) - while (rmunused_module_signals(module, purge_mode, verbose)) { } -} - -struct OptCleanPass : public Pass { - OptCleanPass() : Pass("opt_clean", "remove unused cells and wires") { } - void help() override - { - // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| - log("\n"); - log(" opt_clean [options] [selection]\n"); - log("\n"); - log("This pass identifies wires and cells that are unused and removes them. Other\n"); - log("passes often remove cells but leave the wires in the design or reconnect the\n"); - log("wires but leave the old cells in the design. This pass can be used to clean up\n"); - log("after the passes that do the actual work.\n"); - log("\n"); - log("This pass only operates on completely selected modules without processes.\n"); - log("\n"); - log(" -purge\n"); - log(" also remove internal nets if they have a public name\n"); - log("\n"); - } - void execute(std::vector args, RTLIL::Design *design) override - { - bool purge_mode = false; - - log_header(design, "Executing OPT_CLEAN pass (remove unused cells and wires).\n"); - log_push(); - - size_t argidx; - for (argidx = 1; argidx < args.size(); argidx++) { - if (args[argidx] == "-purge") { - purge_mode = true; - continue; - } - break; - } - extra_args(args, argidx, design); - - keep_cache.reset(design, purge_mode); - - ct_all.setup(design); - - count_rm_cells = 0; - count_rm_wires = 0; - - for (auto module : design->selected_whole_modules_warn()) { - if (module->has_processes_warn()) - continue; - rmunused_module(module, purge_mode, true, true); - } - - if (count_rm_cells > 0 || count_rm_wires > 0) - log("Removed %d unused cells and %d unused wires.\n", count_rm_cells, count_rm_wires); - - design->optimize(); - design->check(); - - keep_cache.reset(); - ct_all.clear(); - log_pop(); - - request_garbage_collection(); - } -} OptCleanPass; - -struct CleanPass : public Pass { - CleanPass() : Pass("clean", "remove unused cells and wires") { } - void help() override - { - // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| - log("\n"); - log(" clean [options] [selection]\n"); - log("\n"); - log("This is identical to 'opt_clean', but less verbose.\n"); - log("\n"); - log("When commands are separated using the ';;' token, this command will be executed\n"); - log("between the commands.\n"); - log("\n"); - log("When commands are separated using the ';;;' token, this command will be executed\n"); - log("in -purge mode between the commands.\n"); - log("\n"); - } - void execute(std::vector args, RTLIL::Design *design) override - { - bool purge_mode = false; - - size_t argidx; - for (argidx = 1; argidx < args.size(); argidx++) { - if (args[argidx] == "-purge") { - purge_mode = true; - continue; - } - break; - } - extra_args(args, argidx, design); - - keep_cache.reset(design); - - ct_all.setup(design); - - count_rm_cells = 0; - count_rm_wires = 0; - - for (auto module : design->selected_unboxed_whole_modules()) { - if (module->has_processes()) - continue; - rmunused_module(module, purge_mode, ys_debug(), true); - } - - log_suppressed(); - if (count_rm_cells > 0 || count_rm_wires > 0) - log("Removed %d unused cells and %d unused wires.\n", count_rm_cells, count_rm_wires); - - design->optimize(); - design->check(); - - keep_cache.reset(); - ct_all.clear(); - - request_garbage_collection(); - } -} CleanPass; - -PRIVATE_NAMESPACE_END diff --git a/passes/opt/opt_clean/Makefile.inc b/passes/opt/opt_clean/Makefile.inc new file mode 100644 index 000000000..b4bb5b070 --- /dev/null +++ b/passes/opt/opt_clean/Makefile.inc @@ -0,0 +1,10 @@ +OPT_CLEAN_OBJS = +OPT_CLEAN_OBJS += passes/opt/opt_clean/cells_all.o +OPT_CLEAN_OBJS += passes/opt/opt_clean/cells_temp.o +OPT_CLEAN_OBJS += passes/opt/opt_clean/wires.o +OPT_CLEAN_OBJS += passes/opt/opt_clean/inits.o +OPT_CLEAN_OBJS += passes/opt/opt_clean/opt_clean.o + +$(OPT_CLEAN_OBJS): passes/opt/opt_clean/opt_clean.h + +OBJS += $(OPT_CLEAN_OBJS) diff --git a/passes/opt/opt_clean/cells_all.cc b/passes/opt/opt_clean/cells_all.cc new file mode 100644 index 000000000..aa97851e3 --- /dev/null +++ b/passes/opt/opt_clean/cells_all.cc @@ -0,0 +1,373 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Claire Xenia Wolf + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/ffinit.h" +#include "kernel/yosys_common.h" +#include "passes/opt/opt_clean/opt_clean.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +unsigned int hash_bit(const SigBit &bit) { + return static_cast(hash_ops::hash(bit).yield()); +} + +SigMap wire_sigmap(const RTLIL::Module* mod) { + SigMap map; + for (auto &it : mod->connections_) { + for (int i = 0; i < GetSize(it.second); i++) { + if (it.second[i].wire != nullptr) + map.add(it.first[i], it.second[i]); + } + } + return map; +} + +struct WireDrivers; +// Maps from a SigBit to a unique driver cell. +struct WireDriver { + using Accumulated = WireDrivers; + SigBit bit; + int driver_cell; +}; +// Maps from a SigBit to one or more driver cells. +struct WireDrivers { + WireDrivers() : driver_cell(0) {} + WireDrivers(WireDriver driver) : bit(driver.bit), driver_cell(driver.driver_cell) {} + WireDrivers(SigBit bit) : bit(bit), driver_cell(0) {} + WireDrivers(WireDrivers &&other) = default; + + class const_iterator { + public: + const_iterator(const WireDrivers &drivers, bool end) + : driver_cell(drivers.driver_cell), in_extra_cells(end) { + if (drivers.extra_driver_cells) { + if (end) { + extra_it = drivers.extra_driver_cells->end(); + } else { + extra_it = drivers.extra_driver_cells->begin(); + } + } + } + int operator*() const { + if (in_extra_cells) + return **extra_it; + return driver_cell; + } + const_iterator& operator++() { + if (in_extra_cells) + ++*extra_it; + else + in_extra_cells = true; + return *this; + } + bool operator!=(const const_iterator &other) const { + return !(*this == other); + } + bool operator==(const const_iterator &other) const { + return in_extra_cells == other.in_extra_cells && + extra_it == other.extra_it; + } + private: + std::optional::iterator> extra_it; + int driver_cell; + bool in_extra_cells; + }; + + const_iterator begin() const { return const_iterator(*this, false); } + const_iterator end() const { return const_iterator(*this, true); } + + SigBit bit; + int driver_cell; + std::unique_ptr> extra_driver_cells; +}; +struct WireDriversKeyEquality { + bool operator()(const WireDrivers &a, const WireDrivers &b) const { + return a.bit == b.bit; + } +}; +struct WireDriversCollisionHandler { + void operator()(WireDrivers &incumbent, WireDrivers &new_value) const { + log_assert(new_value.extra_driver_cells == nullptr); + if (!incumbent.extra_driver_cells) + incumbent.extra_driver_cells.reset(new pool()); + incumbent.extra_driver_cells->insert(new_value.driver_cell); + } +}; +using Wire2Drivers = ShardedHashtable; + +struct ConflictLogs { + ShardedVector> logs; + ConflictLogs(ParallelDispatchThreadPool::Subpool &subpool) : logs(subpool) {} + void print_warnings(pool& used_raw_bits, const SigMap& wire_map, const RTLIL::Module* mod, CleanRunContext &clean_ctx) { + if (!logs.empty()) { + // We could do this in parallel but hopefully this is rare. + for (auto [_, cell] : mod->cells_) { + for (auto &[port, sig] : cell->connections()) { + if (clean_ctx.ct_all.cell_known(cell->type) && !clean_ctx.ct_all.cell_input(cell->type, port)) + continue; + for (auto raw_bit : wire_map(sig)) + used_raw_bits.insert(raw_bit); + } + } + for (std::pair &it : logs) { + if (used_raw_bits.count(it.first)) + log_warning("%s\n", it.second); + } + } + } +}; + +struct CellTraversal { + ConcurrentWorkQueue queue; + Wire2Drivers wire2driver; + dict> mem2cells; + CellTraversal(int num_threads) : queue(num_threads), wire2driver(), mem2cells() {} +}; + +struct CellAnalysis { + ShardedVector keep_wires; + std::vector> unused; + + CellAnalysis(AnalysisContext& actx) + : keep_wires(actx.subpool), unused(actx.mod->cells_size()) {} + + pool analyze_kept_wires(CellTraversal& traversal, const SigMap& sigmap, const SigMap& wire_map, int num_threads) { + // Also enqueue cells that drive kept wires into cell_queue + // and mark those cells as used + // and mark all bits of those wires as used + pool used_raw_bits; + int i = 0; + for (Wire *wire : keep_wires) { + for (auto bit : sigmap(wire)) { + const WireDrivers *drivers = traversal.wire2driver.find({{bit}, hash_bit(bit)}); + if (drivers != nullptr) + for (int cell_index : *drivers) + if (unused[cell_index].exchange(false, std::memory_order_relaxed)) { + ThreadIndex fake_thread_index = {i++ % num_threads}; + traversal.queue.push(fake_thread_index, cell_index); + } + } + for (auto raw_bit : SigSpec(wire)) + used_raw_bits.insert(wire_map(raw_bit)); + } + return used_raw_bits; + } + + void mark_used_and_enqueue(int cell_idx, ConcurrentWorkQueue& queue, const ParallelDispatchThreadPool::RunCtx &ctx) { + if (unused[cell_idx].exchange(false, std::memory_order_relaxed)) + queue.push(ctx, cell_idx); + } +}; + +ConflictLogs explore(CellAnalysis& analysis, CellTraversal& traversal, const SigMap& wire_map, AnalysisContext& actx, CleanRunContext &clean_ctx) { + ConflictLogs logs(actx.subpool); + Wire2Drivers::Builder wire2driver_builder(actx.subpool); + ShardedVector> mem2cells_vector(actx.subpool); + + // Enqueue kept cells into traversal.queue + // Prepare input cone traversal into traversal.wire2driver + // Prepare "input cone" traversal from memory to write port or meminit as analysis.mem2cells + // Also check driver conflicts + // Also mark cells unused to true unless keep (we override this later) + actx.subpool.run([&analysis, &traversal, &logs, &wire_map, &mem2cells_vector, &wire2driver_builder, &actx, &clean_ctx](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i : ctx.item_range(actx.mod->cells_size())) { + Cell *cell = actx.mod->cell_at(i); + if (cell->type.in(ID($memwr), ID($memwr_v2), ID($meminit), ID($meminit_v2))) + mem2cells_vector.insert(ctx, {cell->getParam(ID::MEMID).decode_string(), i}); + + for (auto &it2 : cell->connections()) { + if (clean_ctx.ct_all.cell_known(cell->type) && !clean_ctx.ct_all.cell_output(cell->type, it2.first)) + continue; + for (auto raw_bit : it2.second) { + if (raw_bit.wire == nullptr) + continue; + auto bit = actx.assign_map(raw_bit); + if (bit.wire == nullptr && clean_ctx.ct_all.cell_known(cell->type)) { + std::string msg = stringf("Driver-driver conflict " + "for %s between cell %s.%s and constant %s in %s: Resolved using constant.", + log_signal(raw_bit), cell->name.unescape(), it2.first.unescape(), log_signal(bit), actx.mod->name.unescape()); + logs.logs.insert(ctx, {wire_map(raw_bit), msg}); + } + if (bit.wire != nullptr) + wire2driver_builder.insert(ctx, {{bit, i}, hash_bit(bit)}); + } + } + bool keep = clean_ctx.keep_cache.query(cell); + analysis.unused[i].store(!keep, std::memory_order_relaxed); + if (keep) + traversal.queue.push(ctx, i); + } + for (int i : ctx.item_range(actx.mod->wires_size())) { + Wire *wire = actx.mod->wire_at(i); + if (wire->port_output || wire->get_bool_attribute(ID::keep)) + analysis.keep_wires.insert(ctx, wire); + } + }); + // Finish by merging per-thread collected data + actx.subpool.run([&wire2driver_builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + wire2driver_builder.process(ctx); + }); + traversal.wire2driver = wire2driver_builder; + + for (std::pair &mem2cell : mem2cells_vector) + traversal.mem2cells[mem2cell.first].insert(mem2cell.second); + + return logs; +} + +struct MemAnalysis { + std::vector> unused; + dict indices; + MemAnalysis(const RTLIL::Module* mod) : unused(mod->memories.size()), indices() { + for (int i = 0; i < GetSize(mod->memories); ++i) { + indices[mod->memories.element(i)->first.str()] = i; + unused[i].store(true, std::memory_order_relaxed); + } + } +}; + +void fixup_unused_cells_and_mems(CellAnalysis& analysis, MemAnalysis& mem_analysis, CellTraversal& traversal, AnalysisContext& actx, CleanRunContext &clean_ctx) { + // Processes the cell queue in batches, traversing input cones by enqueuing more cells + // Discover and mark used memories and cells + actx.subpool.run([&analysis, &mem_analysis, &traversal, &actx, &clean_ctx](const ParallelDispatchThreadPool::RunCtx &ctx) { + pool bits; + pool mems; + while (true) { + std::vector cell_indices = traversal.queue.pop_batch(ctx); + if (cell_indices.empty()) + return; + for (auto cell_index : cell_indices) { + Cell *cell = actx.mod->cell_at(cell_index); + for (auto &it : cell->connections()) + if (!clean_ctx.ct_all.cell_known(cell->type) || clean_ctx.ct_all.cell_input(cell->type, it.first)) + for (auto bit : actx.assign_map(it.second)) + bits.insert(bit); + + if (cell->type.in(ID($memrd), ID($memrd_v2))) { + std::string mem_id = cell->getParam(ID::MEMID).decode_string(); + if (mem_analysis.indices.count(mem_id)) { + int mem_index = mem_analysis.indices[mem_id]; + // Memory fixup + if (mem_analysis.unused[mem_index].exchange(false, std::memory_order_relaxed)) + mems.insert(mem_id); + } + } + } + + for (auto bit : bits) { + // Cells fixup + const WireDrivers *drivers = traversal.wire2driver.find({{bit}, hash_bit(bit)}); + if (drivers != nullptr) + for (int cell_idx : *drivers) + analysis.mark_used_and_enqueue(cell_idx, traversal.queue, ctx); + } + bits.clear(); + + for (auto mem : mems) { + if (traversal.mem2cells.count(mem) == 0) + continue; + // Cells fixup + for (int cell_idx : traversal.mem2cells.at(mem)) + analysis.mark_used_and_enqueue(cell_idx, traversal.queue, ctx); + } + mems.clear(); + } + }); +} + +pool all_unused_cells(const Module *mod, const CellAnalysis& analysis, Wire2Drivers& wire2driver, ParallelDispatchThreadPool::Subpool &subpool) { + pool unused_cells; + ShardedVector sharded_unused_cells(subpool); + subpool.run([mod, &analysis, &wire2driver, &sharded_unused_cells](const ParallelDispatchThreadPool::RunCtx &ctx) { + // Parallel destruction of `wire2driver` + wire2driver.clear(ctx); + for (int i : ctx.item_range(mod->cells_size())) + if (analysis.unused[i].load(std::memory_order_relaxed)) + sharded_unused_cells.insert(ctx, i); + }); + for (int cell_index : sharded_unused_cells) + unused_cells.insert(mod->cell_at(cell_index)); + unused_cells.sort(RTLIL::sort_by_name_id()); + return unused_cells; +} + +void remove_cells(RTLIL::Module* mod, FfInitVals& ffinit, const pool& cells, bool verbose, RmStats& stats) { + for (auto cell : cells) { + if (verbose) + log_debug(" removing unused `%s' cell `%s'.\n", cell->type, cell->name); + mod->design->scratchpad_set_bool("opt.did_something", true); + if (cell->is_builtin_ff()) + ffinit.remove_init(cell->getPort(ID::Q)); + mod->remove(cell); + stats.count_rm_cells++; + } +} + +void remove_mems(RTLIL::Module* mod, const MemAnalysis& mem_analysis, bool verbose) { + for (const auto &it : mem_analysis.indices) { + if (!mem_analysis.unused[it.second].load(std::memory_order_relaxed)) + continue; + RTLIL::IdString id(it.first); + if (verbose) + log_debug(" removing unused memory `%s'.\n", id.unescape()); + delete mod->memories.at(id); + mod->memories.erase(id); + } +} + +PRIVATE_NAMESPACE_END + +YOSYS_NAMESPACE_BEGIN + +void rmunused_module_cells(Module *module, ParallelDispatchThreadPool::Subpool &subpool, CleanRunContext &clean_ctx) +{ + AnalysisContext actx(module, subpool); + + // Used for logging warnings only + SigMap wire_map = wire_sigmap(module); + + CellAnalysis analysis(actx); + CellTraversal traversal(subpool.num_threads()); + // Mark all unkept cells as unused initially + // and queue up cell traversal from those cells + auto logs = explore(analysis, traversal, wire_map, actx, clean_ctx); + // Mark cells that drive kept wires into cell_queue and those bits as used + // and queue up cell traversal from those cells + pool used_raw_bits = analysis.analyze_kept_wires(traversal, actx.assign_map, wire_map, subpool.num_threads()); + + // Mark all memories as unused initially + MemAnalysis mem_analysis(module); + // Marked all used cells and mems as used by traversing with cell queue + fixup_unused_cells_and_mems(analysis, mem_analysis, traversal, actx, clean_ctx); + // Analyses are now fully correct + + // unused_cells.contains(foo) iff analysis.used[foo] == true + // wire2driver is passed in only to destroy it + pool unused_cells = all_unused_cells(module, analysis, traversal.wire2driver, subpool); + + FfInitVals ffinit; + ffinit.set_parallel(&actx.assign_map, subpool.thread_pool(), module); + // Now we know what to kill + remove_cells(module, ffinit, unused_cells, clean_ctx.flags.verbose, clean_ctx.stats); + remove_mems(module, mem_analysis, clean_ctx.flags.verbose); + logs.print_warnings(used_raw_bits, wire_map, module, clean_ctx); +} + +YOSYS_NAMESPACE_END diff --git a/passes/opt/opt_clean/cells_temp.cc b/passes/opt/opt_clean/cells_temp.cc new file mode 100644 index 000000000..b325b68d9 --- /dev/null +++ b/passes/opt/opt_clean/cells_temp.cc @@ -0,0 +1,104 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Claire Xenia Wolf + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "passes/opt/opt_clean/opt_clean.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +bool is_signed(RTLIL::Cell* cell) { + return cell->type == ID($pos) && cell->getParam(ID::A_SIGNED).as_bool(); +} + +bool trim_buf(RTLIL::Cell* cell, ShardedVector& new_connections, const ParallelDispatchThreadPool::RunCtx &ctx) { + RTLIL::SigSpec a = cell->getPort(ID::A); + RTLIL::SigSpec y = cell->getPort(ID::Y); + a.extend_u0(GetSize(y), is_signed(cell)); + + if (a.has_const(State::Sz)) { + RTLIL::SigSpec new_a; + RTLIL::SigSpec new_y; + for (int i = 0; i < GetSize(a); ++i) { + RTLIL::SigBit b = a[i]; + if (b == State::Sz) + return false; + new_a.append(b); + new_y.append(y[i]); + } + a = std::move(new_a); + y = std::move(new_y); + } + if (!y.empty()) + new_connections.insert(ctx, {y, a}); + return true; +} + +bool remove(ShardedVector& cells, RTLIL::Module* mod, bool verbose) { + bool did_something = false; + for (RTLIL::Cell *cell : cells) { + if (verbose) { + if (cell->type == ID($connect)) + log_debug(" removing connect cell `%s': %s <-> %s\n", cell->name, + log_signal(cell->getPort(ID::A)), log_signal(cell->getPort(ID::B))); + else if (cell->type == ID($input_port)) + log_debug(" removing input port marker cell `%s': %s\n", cell->name, + log_signal(cell->getPort(ID::Y))); + else + log_debug(" removing buffer cell `%s': %s = %s\n", cell->name, + log_signal(cell->getPort(ID::Y)), log_signal(cell->getPort(ID::A))); + } + mod->remove(cell); + did_something = true; + } + return did_something; +} +PRIVATE_NAMESPACE_END +YOSYS_NAMESPACE_BEGIN + +void remove_temporary_cells(RTLIL::Module *module, ParallelDispatchThreadPool::Subpool &subpool, bool verbose) +{ + ShardedVector delcells(subpool); + ShardedVector new_connections(subpool); + const RTLIL::Module *const_module = module; + subpool.run([const_module, &delcells, &new_connections](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i : ctx.item_range(const_module->cells_size())) { + RTLIL::Cell *cell = const_module->cell_at(i); + if (cell->type.in(ID($pos), ID($_BUF_), ID($buf)) && !cell->has_keep_attr()) { + if (trim_buf(cell, new_connections, ctx)) + delcells.insert(ctx, cell); + } else if (cell->type.in(ID($connect)) && !cell->has_keep_attr()) { + RTLIL::SigSpec a = cell->getPort(ID::A); + RTLIL::SigSpec b = cell->getPort(ID::B); + if (a.has_const() && !b.has_const()) + std::swap(a, b); + new_connections.insert(ctx, {a, b}); + delcells.insert(ctx, cell); + } else if (cell->type.in(ID($input_port)) && !cell->has_keep_attr()) { + delcells.insert(ctx, cell); + } + } + }); + for (RTLIL::SigSig &connection : new_connections) { + module->connect(connection); + } + if (remove(delcells, module, verbose)) + module->design->scratchpad_set_bool("opt.did_something", true); +} + +YOSYS_NAMESPACE_END diff --git a/passes/opt/opt_clean/inits.cc b/passes/opt/opt_clean/inits.cc new file mode 100644 index 000000000..70c2ef9e2 --- /dev/null +++ b/passes/opt/opt_clean/inits.cc @@ -0,0 +1,137 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Claire Xenia Wolf + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "passes/opt/opt_clean/opt_clean.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +ShardedVector> build_inits(AnalysisContext& actx) { + ShardedVector> results(actx.subpool); + actx.subpool.run([&results, &actx](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i : ctx.item_range(actx.mod->cells_size())) { + RTLIL::Cell *cell = actx.mod->cell_at(i); + if (StaticCellTypes::Compat::internals_mem_ff(cell->type) && cell->hasPort(ID::Q)) + { + SigSpec sig = cell->getPort(ID::Q); + + for (int i = 0; i < GetSize(sig); i++) + { + SigBit bit = sig[i]; + + if (bit.wire == nullptr || bit.wire->attributes.count(ID::init) == 0) + continue; + + Const init = bit.wire->attributes.at(ID::init); + + if (i >= GetSize(init) || init[i] == State::Sx || init[i] == State::Sz) + continue; + + results.insert(ctx, {bit, init[i]}); + } + } + } + }); + return results; +} + +dict qbits_from_inits(ShardedVector>& inits, SigMap& assign_map) { + dict qbits; + for (std::pair &p : inits) { + assign_map.add(p.first); + qbits[p.first] = p.second; + } + return qbits; +} + +ShardedVector deferred_init_transfer(const dict& qbits, AnalysisContext& actx) { + ShardedVector wire_results(actx.subpool); + actx.subpool.run([&actx, &qbits, &wire_results](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int j : ctx.item_range(actx.mod->wires_size())) { + RTLIL::Wire *wire = actx.mod->wire_at(j); + if (wire->attributes.count(ID::init) == 0) + continue; + Const init = wire->attributes.at(ID::init); + + for (int i = 0; i < GetSize(wire) && i < GetSize(init); i++) + { + if (init[i] == State::Sx || init[i] == State::Sz) + continue; + + SigBit wire_bit = SigBit(wire, i); + SigBit mapped_wire_bit = actx.assign_map(wire_bit); + + if (wire_bit == mapped_wire_bit) + goto next_wire; + + if (mapped_wire_bit.wire) { + if (qbits.count(mapped_wire_bit) == 0) + goto next_wire; + + if (qbits.at(mapped_wire_bit) != init[i]) + goto next_wire; + } + else { + if (mapped_wire_bit == State::Sx || mapped_wire_bit == State::Sz) + goto next_wire; + + if (mapped_wire_bit != init[i]) { + log_warning("Initial value conflict for %s resolving to %s but with init %s.\n", log_signal(wire_bit), log_signal(mapped_wire_bit), log_signal(init[i])); + goto next_wire; + } + } + } + wire_results.insert(ctx, wire); + + next_wire:; + } + }); + return wire_results; +} + +bool remove_redundant_inits(ShardedVector wires, bool verbose) { + bool did_something = false; + for (RTLIL::Wire *wire : wires) { + if (verbose) + log_debug(" removing redundant init attribute on %s.\n", log_id(wire)); + wire->attributes.erase(ID::init); + did_something = true; + } + return did_something; +} + +PRIVATE_NAMESPACE_END +YOSYS_NAMESPACE_BEGIN + +bool rmunused_module_init(RTLIL::Module *module, ParallelDispatchThreadPool::Subpool &subpool, bool verbose) +{ + AnalysisContext actx(module, subpool); + + ShardedVector> inits = build_inits(actx); + dict qbits = qbits_from_inits(inits, actx.assign_map); + ShardedVector inits_to_transfer = deferred_init_transfer(qbits, actx); + + bool did_something = remove_redundant_inits(inits_to_transfer, verbose); + if (did_something) + module->design->scratchpad_set_bool("opt.did_something", true); + + return did_something; +} + +YOSYS_NAMESPACE_END diff --git a/passes/opt/opt_clean/keep_cache.h b/passes/opt/opt_clean/keep_cache.h new file mode 100644 index 000000000..7405f24fc --- /dev/null +++ b/passes/opt/opt_clean/keep_cache.h @@ -0,0 +1,167 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Claire Xenia Wolf + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/rtlil.h" +#include "kernel/sigtools.h" +#include "kernel/threading.h" +#include "kernel/celltypes.h" +#include "kernel/yosys_common.h" + +#ifndef OPT_CLEAN_KEEP_CACHE_H +#define OPT_CLEAN_KEEP_CACHE_H + +YOSYS_NAMESPACE_BEGIN + +struct KeepCache +{ + dict keep_modules; + bool purge_mode; + + KeepCache(bool purge_mode, ParallelDispatchThreadPool &thread_pool, const std::vector &selected_modules) + : purge_mode(purge_mode) { + + std::vector scan_modules_worklist; + dict> dependents; + std::vector propagate_kept_modules_worklist; + for (RTLIL::Module *module : selected_modules) { + if (keep_modules.count(module)) + continue; + bool keep = scan_module(module, thread_pool, dependents, ALL_CELLS, scan_modules_worklist); + keep_modules[module] = keep; + if (keep) + propagate_kept_modules_worklist.push_back(module); + } + + while (!scan_modules_worklist.empty()) { + RTLIL::Module *module = scan_modules_worklist.back(); + scan_modules_worklist.pop_back(); + if (keep_modules.count(module)) + continue; + bool keep = scan_module(module, thread_pool, dependents, MINIMUM_CELLS, scan_modules_worklist); + keep_modules[module] = keep; + if (keep) + propagate_kept_modules_worklist.push_back(module); + } + + while (!propagate_kept_modules_worklist.empty()) { + RTLIL::Module *module = propagate_kept_modules_worklist.back(); + propagate_kept_modules_worklist.pop_back(); + for (RTLIL::Module *dependent : dependents[module]) { + if (keep_modules[dependent]) + continue; + keep_modules[dependent] = true; + propagate_kept_modules_worklist.push_back(dependent); + } + } + } + + bool query(Cell *cell) const + { + if (keep_cell(cell, purge_mode)) + return true; + if (cell->type.in(ID($specify2), ID($specify3), ID($specrule))) + return true; + if (cell->module && cell->module->design) { + RTLIL::Module *cell_module = cell->module->design->module(cell->type); + return cell_module != nullptr && keep_modules.at(cell_module); + } + return false; + } + +private: + enum ScanCells { + // Scan every cell to see if it uses a module that is kept. + ALL_CELLS, + // Stop scanning cells if we determine early that this module is kept. + MINIMUM_CELLS, + }; + bool scan_module(Module *module, ParallelDispatchThreadPool &thread_pool, dict> &dependents, + ScanCells scan_cells, std::vector &worklist) const + { + MonotonicFlag keep_module; + if (module->get_bool_attribute(ID::keep)) { + if (scan_cells == MINIMUM_CELLS) + return true; + keep_module.set(); + } + + ParallelDispatchThreadPool::Subpool subpool(thread_pool, ThreadPool::work_pool_size(0, module->cells_size(), 1000)); + ShardedVector deps(subpool); + const RTLIL::Module *const_module = module; + bool purge_mode = this->purge_mode; + subpool.run([purge_mode, const_module, scan_cells, &deps, &keep_module](const ParallelDispatchThreadPool::RunCtx &ctx) { + bool keep = false; + for (int i : ctx.item_range(const_module->cells_size())) { + Cell *cell = const_module->cell_at(i); + if (keep_cell(cell, purge_mode)) { + if (scan_cells == MINIMUM_CELLS) { + keep_module.set(); + return; + } + keep = true; + } + if (const_module->design) { + RTLIL::Module *cell_module = const_module->design->module(cell->type); + if (cell_module != nullptr) + deps.insert(ctx, cell_module); + } + } + if (keep) { + keep_module.set(); + return; + } + for (int i : ctx.item_range(const_module->wires_size())) { + Wire *wire = const_module->wire_at(i); + if (wire->get_bool_attribute(ID::keep)) { + keep_module.set(); + return; + } + } + }); + if (scan_cells == MINIMUM_CELLS && keep_module.load()) + return true; + for (Module *dep : deps) { + dependents[dep].push_back(module); + worklist.push_back(dep); + } + return keep_module.load(); + } + + static bool keep_cell(Cell *cell, bool purge_mode) + { + if (cell->type.in(ID($assert), ID($assume), ID($live), ID($fair), ID($cover))) + return true; + + if (cell->type.in(ID($overwrite_tag))) + return true; + + if (cell->type == ID($print) || cell->type == ID($check)) + return true; + + if (cell->has_keep_attr()) + return true; + + if (!purge_mode && cell->type == ID($scopeinfo)) + return true; + return false; + } +}; + +YOSYS_NAMESPACE_END +#endif /* OPT_CLEAN_KEEP_CACHE_H */ diff --git a/passes/opt/opt_clean/opt_clean.cc b/passes/opt/opt_clean/opt_clean.cc new file mode 100644 index 000000000..87597d721 --- /dev/null +++ b/passes/opt/opt_clean/opt_clean.cc @@ -0,0 +1,151 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Claire Xenia Wolf + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/register.h" +#include "kernel/log.h" +#include "passes/opt/opt_clean/opt_clean.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +void rmunused_module(RTLIL::Module *module, bool rminit, CleanRunContext &clean_ctx) +{ + if (clean_ctx.flags.verbose) + log("Finding unused cells or wires in module %s..\n", module->name); + + // Use no more than one worker per thousand cells, rounded down, so + // we only start multithreading with at least 2000 cells. + int num_worker_threads = ThreadPool::work_pool_size(0, module->cells_size(), 10000); + ParallelDispatchThreadPool::Subpool subpool(clean_ctx.thread_pool, num_worker_threads); + remove_temporary_cells(module, subpool, clean_ctx.flags.verbose); + rmunused_module_cells(module, subpool, clean_ctx); + while (rmunused_module_signals(module, subpool, clean_ctx)) { } + + if (rminit && rmunused_module_init(module, subpool, clean_ctx.flags.verbose)) + while (rmunused_module_signals(module, subpool, clean_ctx)) { } +} + +struct OptCleanPass : public Pass { + OptCleanPass() : Pass("opt_clean", "remove unused cells and wires") { } + void help() override + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" opt_clean [options] [selection]\n"); + log("\n"); + log("This pass identifies wires and cells that are unused and removes them. Other\n"); + log("passes often remove cells but leave the wires in the design or reconnect the\n"); + log("wires but leave the old cells in the design. This pass can be used to clean up\n"); + log("after the passes that do the actual work.\n"); + log("\n"); + log("This pass only operates on completely selected modules without processes.\n"); + log("\n"); + log(" -purge\n"); + log(" also remove internal nets if they have a public name\n"); + log("\n"); + } + void execute(std::vector args, RTLIL::Design *design) override + { + bool purge_mode = false; + + log_header(design, "Executing OPT_CLEAN pass (remove unused cells and wires).\n"); + log_push(); + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-purge") { + purge_mode = true; + continue; + } + break; + } + extra_args(args, argidx, design); + + { + std::vector selected_modules; + for (auto module : design->selected_whole_modules_warn()) + if (!module->has_processes_warn()) + selected_modules.push_back(module); + CleanRunContext clean_ctx(design, selected_modules, {purge_mode, true}); + for (auto module : selected_modules) + rmunused_module(module, true, clean_ctx); + clean_ctx.stats.log(); + + design->optimize(); + design->check(); + } + + log_pop(); + + request_garbage_collection(); + } +} OptCleanPass; + +struct CleanPass : public Pass { + CleanPass() : Pass("clean", "remove unused cells and wires") { } + void help() override + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" clean [options] [selection]\n"); + log("\n"); + log("This is identical to 'opt_clean', but less verbose.\n"); + log("\n"); + log("When commands are separated using the ';;' token, this command will be executed\n"); + log("between the commands.\n"); + log("\n"); + log("When commands are separated using the ';;;' token, this command will be executed\n"); + log("in -purge mode between the commands.\n"); + log("\n"); + } + void execute(std::vector args, RTLIL::Design *design) override + { + bool purge_mode = false; + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-purge") { + purge_mode = true; + continue; + } + break; + } + extra_args(args, argidx, design); + + { + std::vector selected_modules; + for (auto module : design->selected_unboxed_whole_modules()) + if (!module->has_processes()) + selected_modules.push_back(module); + CleanRunContext clean_ctx(design, selected_modules, {purge_mode, ys_debug()}); + for (auto module : selected_modules) + rmunused_module(module, true, clean_ctx); + + log_suppressed(); + clean_ctx.stats.log(); + + design->optimize(); + design->check(); + } + + request_garbage_collection(); + } +} CleanPass; + +PRIVATE_NAMESPACE_END diff --git a/passes/opt/opt_clean/opt_clean.h b/passes/opt/opt_clean/opt_clean.h new file mode 100644 index 000000000..c48a8188a --- /dev/null +++ b/passes/opt/opt_clean/opt_clean.h @@ -0,0 +1,92 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Claire Xenia Wolf + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/rtlil.h" +#include "kernel/threading.h" +#include "passes/opt/opt_clean/keep_cache.h" + +#ifndef OPT_CLEAN_SHARED_H +#define OPT_CLEAN_SHARED_H + +YOSYS_NAMESPACE_BEGIN + +struct AnalysisContext { + SigMap assign_map; + const RTLIL::Module *mod; + ParallelDispatchThreadPool::Subpool &subpool; + AnalysisContext(RTLIL::Module* m, ParallelDispatchThreadPool::Subpool &p) : assign_map(m), mod(m), subpool(p) {} +}; + +struct RmStats { + int count_rm_cells = 0; + int count_rm_wires = 0; + + void log() + { + if (count_rm_cells > 0 || count_rm_wires > 0) + YOSYS_NAMESPACE_PREFIX log("Removed %d unused cells and %d unused wires.\n", count_rm_cells, count_rm_wires); + } +}; + +struct Flags { + bool purge = false; + bool verbose = false; +}; +struct CleanRunContext { + static constexpr auto ct_reg = StaticCellTypes::Categories::join( + StaticCellTypes::Compat::mem_ff, + StaticCellTypes::categories.is_anyinit); + NewCellTypes ct_all; + RmStats stats; + ParallelDispatchThreadPool thread_pool; + KeepCache keep_cache; + Flags flags; + +private: + // Helper to compute thread pool size + static int compute_thread_pool_size(const std::vector& selected_modules) { + int thread_pool_size = 0; + for (auto module : selected_modules) + thread_pool_size = std::max(thread_pool_size, + ThreadPool::work_pool_size(0, module->cells_size(), 10000)); + return thread_pool_size; + } + +public: + CleanRunContext(RTLIL::Design* design, const std::vector& selected_modules, Flags f) + : thread_pool(compute_thread_pool_size(selected_modules)), + keep_cache(f.purge, thread_pool, selected_modules), + flags(f) + { + ct_all.setup(design); + } + + ~CleanRunContext() { + ct_all.clear(); + } +}; + +void remove_temporary_cells(RTLIL::Module *module, ParallelDispatchThreadPool::Subpool &subpool, bool verbose); +void rmunused_module_cells(Module *module, ParallelDispatchThreadPool::Subpool &subpool, CleanRunContext &clean_ctx); +bool rmunused_module_signals(RTLIL::Module *module, ParallelDispatchThreadPool::Subpool &subpool, CleanRunContext &clean_ctx); +bool rmunused_module_init(RTLIL::Module *module, ParallelDispatchThreadPool::Subpool &subpool, bool verbose); + +YOSYS_NAMESPACE_END + +#endif /* OPT_CLEAN_SHARED_H */ diff --git a/passes/opt/opt_clean/wires.cc b/passes/opt/opt_clean/wires.cc new file mode 100644 index 000000000..28c792936 --- /dev/null +++ b/passes/opt/opt_clean/wires.cc @@ -0,0 +1,585 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Claire Xenia Wolf + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "passes/opt/opt_clean/opt_clean.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +// No collision handler for these, since we will use them such that collisions don't happen +struct ShardedSigBit { + using Accumulated = ShardedSigBit; + RTLIL::SigBit bit; + ShardedSigBit() = default; + ShardedSigBit(const RTLIL::SigBit &bit) : bit(bit) {} +}; +struct ShardedSigBitEquality { + bool operator()(const ShardedSigBit &b1, const ShardedSigBit &b2) const { + return b1.bit == b2.bit; + } +}; +using ShardedSigPool = ShardedHashtable>; + +struct ShardedSigSpec { + using Accumulated = ShardedSigSpec; + RTLIL::SigSpec spec; + ShardedSigSpec() = default; + ShardedSigSpec(RTLIL::SigSpec spec) : spec(std::move(spec)) {} + ShardedSigSpec(ShardedSigSpec &&) = default; +}; +struct ShardedSigSpecEquality { + bool operator()(const ShardedSigSpec &s1, const ShardedSigSpec &s2) const { + return s1.spec == s2.spec; + } +}; +using ShardedSigSpecPool = ShardedHashtable>; + +struct ExactCellWires { + const ShardedSigSpecPool &exact_cells; + const SigMap &assign_map; + dict cache; + + ExactCellWires(const ShardedSigSpecPool &exact_cells, const SigMap &assign_map) : exact_cells(exact_cells), assign_map(assign_map) {} + void cache_result_for_bit(const SigBit &bit) { + if (bit.wire != nullptr) + (void)is_exactly_cell_driven(bit.wire); + } + bool is_exactly_cell_driven(RTLIL::Wire *wire) { + if (wire->port_input) + return true; + auto it = cache.find(wire); + if (it != cache.end()) + return it->second; + SigSpec sig = assign_map(wire); + bool direct = exact_cells.find({sig, sig.hash_into(Hasher()).yield()}) != nullptr; + cache.insert({wire, direct}); + return direct; + } + void cache_all(ShardedVector &bits) { + for (RTLIL::SigBit candidate : bits) { + cache_result_for_bit(candidate); + cache_result_for_bit(assign_map(candidate)); + } + + } +}; + +int count_nontrivial_wire_attrs(RTLIL::Wire *w) +{ + int count = w->attributes.size(); + count -= w->attributes.count(ID::src); + count -= w->attributes.count(ID::hdlname); + count -= w->attributes.count(ID::scopename); + count -= w->attributes.count(ID::unused_bits); + return count; +} + +// Should we pick `s2` over `s1` to represent a signal? +bool compare_signals(const RTLIL::SigBit &s1, const RTLIL::SigBit &s2, const ShardedSigPool ®s, const ShardedSigPool &conns, ExactCellWires &cell_wires) +{ + if (s1 == s2) + return false; + + RTLIL::Wire *w1 = s1.wire; + RTLIL::Wire *w2 = s2.wire; + + if (w1 == NULL || w2 == NULL) + return w2 == NULL; + + if (w1->port_input != w2->port_input) + return w2->port_input; + + if ((w1->port_input && w1->port_output) != (w2->port_input && w2->port_output)) + return !(w2->port_input && w2->port_output); + + if (w1->name.isPublic() && w2->name.isPublic()) { + ShardedSigPool::AccumulatedValue s1_val = {s1, s1.hash_top().yield()}; + ShardedSigPool::AccumulatedValue s2_val = {s2, s2.hash_top().yield()}; + bool regs1 = regs.find(s1_val) != nullptr; + bool regs2 = regs.find(s2_val) != nullptr; + if (regs1 != regs2) + return regs2; + bool w1_exact = cell_wires.is_exactly_cell_driven(w1); + bool w2_exact = cell_wires.is_exactly_cell_driven(w2); + if (w1_exact != w2_exact) + return w2_exact; + bool conns1 = conns.find(s1_val) != nullptr; + bool conns2 = conns.find(s2_val) != nullptr; + if (conns1 != conns2) + return conns2; + } + + if (w1 == w2) + return s2.offset < s1.offset; + + if (w1->port_output != w2->port_output) + return w2->port_output; + + if (w1->name[0] != w2->name[0]) + return w2->name.isPublic(); + + int attrs1 = count_nontrivial_wire_attrs(w1); + int attrs2 = count_nontrivial_wire_attrs(w2); + + if (attrs1 != attrs2) + return attrs2 > attrs1; + + return w2->name.lt_by_name(w1->name); +} + +bool check_public_name(RTLIL::IdString id) +{ + if (id.begins_with("$")) + return false; + const std::string &id_str = id.str(); + if (id.begins_with("\\_") && (id.ends_with("_") || id_str.find("_[") != std::string::npos)) + return false; + if (id_str.find(".$") != std::string::npos) + return false; + return true; +} + +void add_spec(ShardedSigPool::Builder &builder, const ThreadIndex &thread, const RTLIL::SigSpec &spec) { + for (SigBit bit : spec) + if (bit.wire != nullptr) + builder.insert(thread, {bit, bit.hash_top().yield()}); +} + +bool check_any(const ShardedSigPool &sigs, const RTLIL::SigSpec &spec) { + for (SigBit b : spec) + if (sigs.find({b, b.hash_top().yield()}) != nullptr) + return true; + return false; +} + +bool check_all(const ShardedSigPool &sigs, const RTLIL::SigSpec &spec) { + for (SigBit b : spec) + if (sigs.find({b, b.hash_top().yield()}) == nullptr) + return false; + return true; +} + +struct UpdateConnection { + RTLIL::Cell *cell; + RTLIL::IdString port; + RTLIL::SigSpec spec; +}; +void fixup_cell_ports(ShardedVector &update_connections) +{ + for (UpdateConnection &update : update_connections) + update.cell->connections_.at(update.port) = std::move(update.spec); +} + +struct InitBits { + dict values; + // Wires that appear in the keys of the `values` dict + pool wires; + + // Set init attributes on all wires of a connected group + void apply_normalised_inits() { + for (RTLIL::Wire *wire : wires) { + bool found = false; + Const val(State::Sx, wire->width); + for (int i = 0; i < wire->width; i++) { + auto it = values.find(RTLIL::SigBit(wire, i)); + if (it != values.end()) { + val.set(i, it->second); + found = true; + } + } + if (found) + wire->attributes[ID::init] = val; + } + } +}; +static InitBits consume_inits(ShardedVector &initialized_wires, const SigMap &assign_map) +{ + InitBits init_bits; + for (RTLIL::Wire *initialized_wire : initialized_wires) { + auto it = initialized_wire->attributes.find(ID::init); + RTLIL::Const &val = it->second; + SigSpec sig = assign_map(initialized_wire); + for (int i = 0; i < GetSize(val) && i < GetSize(sig); i++) + if (val[i] != State::Sx && sig[i].wire != nullptr) { + init_bits.values[sig[i]] = val[i]; + init_bits.wires.insert(sig[i].wire); + } + initialized_wire->attributes.erase(it); + } + return init_bits; +} + +/** + * What kinds of things are signals connected to? + * Helps pick representatives out of groups of connected signals */ +struct SigConnKinds { + // Wire bits directly driven by registers (with clk2fflogic exception) + ShardedSigPool raw_registers; + // Wire bits directly connected to any cell port + ShardedSigPool raw_cell_connected; + + // Signals exactly driven by a known cell output, + // this will influence only our choice of representatives. + // A signal is exactly driven by a cell output iff all its bits are driven by this output + // and all bits of this output drive a bit of this signal. + // Additionally, all signals that sigmap to this signal are exactly driven by the port, too + ShardedSigSpecPool exact_cells; + + SigConnKinds(bool purge_mode, const AnalysisContext& actx, CleanRunContext& clean_ctx) { + ShardedSigPool::Builder raw_register_builder(actx.subpool); + ShardedSigPool::Builder raw_cell_connected_builder(actx.subpool); + ShardedSigSpecPool::Builder exact_cell_output_builder(actx.subpool); + actx.subpool.run([&exact_cell_output_builder, &raw_register_builder, &raw_cell_connected_builder, purge_mode, &actx, &clean_ctx](const ParallelDispatchThreadPool::RunCtx &ctx) { + + for (int i : ctx.item_range(actx.mod->cells_size())) { + RTLIL::Cell *cell = actx.mod->cell_at(i); + if (!purge_mode) { + if (clean_ctx.ct_reg(cell->type)) { + // Improve witness signal naming when clk2fflogic used + // see commit message e36c71b5 + bool clk2fflogic = cell->get_bool_attribute(ID::clk2fflogic); + for (auto &[port, sig] : cell->connections()) + if (clk2fflogic ? port == ID::D : clean_ctx.ct_all.cell_output(cell->type, port)) + add_spec(raw_register_builder, ctx, sig); + } + for (auto &[_, sig] : cell->connections()) + add_spec(raw_cell_connected_builder, ctx, sig); + } + if (clean_ctx.ct_all.cell_known(cell->type)) + for (auto &[port, sig] : cell->connections()) + if (clean_ctx.ct_all.cell_output(cell->type, port)) { + RTLIL::SigSpec spec = actx.assign_map(sig); + unsigned int hash = spec.hash_into(Hasher()).yield(); + exact_cell_output_builder.insert(ctx, {std::move(spec), hash}); + } + } + }); + actx.subpool.run([&raw_register_builder, &raw_cell_connected_builder, &exact_cell_output_builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + raw_register_builder.process(ctx); + raw_cell_connected_builder.process(ctx); + exact_cell_output_builder.process(ctx); + }); + raw_registers = raw_register_builder; + raw_cell_connected = raw_cell_connected_builder; + exact_cells = exact_cell_output_builder; + } + void clear(const ParallelDispatchThreadPool::RunCtx &ctx) { + raw_registers.clear(ctx); + raw_cell_connected.clear(ctx); + exact_cells.clear(ctx); + } +}; + +ShardedVector build_candidates(ExactCellWires& cell_wires, const SigConnKinds& sig_analysis, const AnalysisContext& actx) { + ShardedVector candidates(actx.subpool); + actx.subpool.run([&actx, &sig_analysis, &candidates, &cell_wires](const ParallelDispatchThreadPool::RunCtx &ctx) { + std::optional local_cell_wires; + ExactCellWires *this_thread_cell_wires = &cell_wires; + if (ctx.thread_num > 0) { + local_cell_wires.emplace(sig_analysis.exact_cells, actx.assign_map); + this_thread_cell_wires = &local_cell_wires.value(); + } + for (int i : ctx.item_range(actx.mod->wires_size())) { + RTLIL::Wire *wire = actx.mod->wire_at(i); + for (int j = 0; j < wire->width; ++j) { + RTLIL::SigBit s1(wire, j); + RTLIL::SigBit s2 = actx.assign_map(s1); + if (compare_signals(s2, s1, sig_analysis.raw_registers, sig_analysis.raw_cell_connected, *this_thread_cell_wires)) + candidates.insert(ctx, s1); + } + } + }); + return candidates; +} + +void update_assign_map(SigMap& assign_map, ShardedVector& sigmap_canonical_candidates, ExactCellWires& cell_wires, const SigConnKinds& sig_analysis) { + for (RTLIL::SigBit candidate : sigmap_canonical_candidates) { + RTLIL::SigBit current_canonical = assign_map(candidate); + // Resolves if two threads in build_candidates found different candidates + // for the same set + // TODO adds effort for single-threaded? + if (compare_signals(current_canonical, candidate, sig_analysis.raw_registers, sig_analysis.raw_cell_connected, cell_wires)) + assign_map.add(candidate); + } +} + +struct DeferredUpdates { + // Deferred updates to the assign_map + ShardedVector update_connections; + // Wires we should remove init from + ShardedVector initialized_wires; + DeferredUpdates(ParallelDispatchThreadPool::Subpool &subpool) : update_connections(subpool), initialized_wires(subpool) {} +}; +struct UsedSignals { + // here, "connected" means "driven or driving something" + // meanwhile, "used" means "driving something" + // sigmapped + ShardedSigPool connected; + // pre-sigmapped + ShardedSigPool raw_connected; + // sigmapped + ShardedSigPool used; + + void clear(ParallelDispatchThreadPool::Subpool &subpool) { + subpool.run([this](const ParallelDispatchThreadPool::RunCtx &ctx) { + connected.clear(ctx); + raw_connected.clear(ctx); + used.clear(ctx); + }); + } +}; + +DeferredUpdates analyse_connectivity(UsedSignals& used, SigConnKinds& sig_analysis, const AnalysisContext& actx, CleanRunContext &clean_ctx) { + DeferredUpdates deferred(actx.subpool); + ShardedSigPool::Builder conn_builder(actx.subpool); + ShardedSigPool::Builder raw_conn_builder(actx.subpool); + ShardedSigPool::Builder used_builder(actx.subpool); + + // gather the usage information for cells and update cell connections with the altered sigmap + // also gather the usage information for ports, wires with `keep` + // also gather init bits + actx.subpool.run([&deferred, &conn_builder, &raw_conn_builder, &used_builder, &sig_analysis, &actx, &clean_ctx](const ParallelDispatchThreadPool::RunCtx &ctx) { + // Parallel destruction of these sharded structures + sig_analysis.clear(ctx); + + for (int i : ctx.item_range(actx.mod->cells_size())) { + RTLIL::Cell *cell = actx.mod->cell_at(i); + for (const auto &[port, sig] : cell->connections_) { + SigSpec spec = actx.assign_map(sig); + if (spec != sig) + deferred.update_connections.insert(ctx, {cell, port, spec}); + add_spec(raw_conn_builder, ctx, spec); + add_spec(conn_builder, ctx, spec); + if (!clean_ctx.ct_all.cell_output(cell->type, port)) + add_spec(used_builder, ctx, spec); + } + } + for (int i : ctx.item_range(actx.mod->wires_size())) { + RTLIL::Wire *wire = actx.mod->wire_at(i); + if (wire->port_id > 0) { + RTLIL::SigSpec sig = RTLIL::SigSpec(wire); + add_spec(raw_conn_builder, ctx, sig); + actx.assign_map.apply(sig); + add_spec(conn_builder, ctx, sig); + if (!wire->port_input) + add_spec(used_builder, ctx, sig); + } + if (wire->get_bool_attribute(ID::keep)) { + RTLIL::SigSpec sig = RTLIL::SigSpec(wire); + actx.assign_map.apply(sig); + add_spec(conn_builder, ctx, sig); + } + auto it = wire->attributes.find(ID::init); + if (it != wire->attributes.end()) + deferred.initialized_wires.insert(ctx, wire); + } + }); + actx.subpool.run([&conn_builder, &raw_conn_builder, &used_builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + conn_builder.process(ctx); + raw_conn_builder.process(ctx); + used_builder.process(ctx); + }); + used = {conn_builder, raw_conn_builder, used_builder}; + return deferred; +} + +struct WireDeleter { + pool del_wires_queue; + ShardedVector remove_init; + ShardedVector> set_init; + ShardedVector new_connections; + ShardedVector remove_unused_bits; + ShardedVector> set_unused_bits; + WireDeleter(UsedSignals& used_sig_analysis, bool purge_mode, const AnalysisContext& actx) : + remove_init(actx.subpool), + set_init(actx.subpool), + new_connections(actx.subpool), + remove_unused_bits(actx.subpool), + set_unused_bits(actx.subpool) { + ShardedVector del_wires(actx.subpool); + actx.subpool.run([&actx, purge_mode, &del_wires, &used_sig_analysis, this](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i : ctx.item_range(actx.mod->wires_size())) { + RTLIL::Wire *wire = actx.mod->wire_at(i); + SigSpec s1 = SigSpec(wire), s2 = actx.assign_map(s1); + log_assert(GetSize(s1) == GetSize(s2)); + + Const initval; + bool has_init_attribute = wire->attributes.count(ID::init); + bool init_changed = false; + if (has_init_attribute) + initval = wire->attributes.at(ID::init); + if (GetSize(initval) != GetSize(wire)) { + initval.resize(GetSize(wire), State::Sx); + init_changed = true; + } + + if (GetSize(wire) == 0) { + // delete zero-width wires, unless they are module ports + if (wire->port_id == 0) + goto delete_this_wire; + } else + if (wire->port_id != 0 || wire->get_bool_attribute(ID::keep) || !initval.is_fully_undef()) { + // do not delete anything with "keep" or module ports or initialized wires + } else + if (!purge_mode && check_public_name(wire->name) && (check_any(used_sig_analysis.raw_connected, s1) || check_any(used_sig_analysis.connected, s2) || s1 != s2)) { + // do not get rid of public names unless in purge mode or if the wire is entirely unused, not even aliased + } else + if (!check_any(used_sig_analysis.raw_connected, s1)) { + // delete wires that aren't used by anything directly + goto delete_this_wire; + } + + if (0) + { + delete_this_wire: + del_wires.insert(ctx, wire); + } + else + { + RTLIL::SigSig new_conn; + for (int i = 0; i < GetSize(s1); i++) + if (s1[i] != s2[i]) { + if (s2[i] == State::Sx && (initval[i] == State::S0 || initval[i] == State::S1)) { + s2[i] = initval[i]; + initval.set(i, State::Sx); + init_changed = true; + } + new_conn.first.append(s1[i]); + new_conn.second.append(s2[i]); + } + if (new_conn.first.size() > 0) + new_connections.insert(ctx, std::move(new_conn)); + if (initval.is_fully_undef()) { + if (has_init_attribute) + remove_init.insert(ctx, wire); + } else + if (init_changed) + set_init.insert(ctx, {wire, std::move(initval)}); + + std::string unused_bits; + if (!check_all(used_sig_analysis.used, s2)) { + for (int i = 0; i < GetSize(s2); i++) { + if (s2[i].wire == NULL) + continue; + SigBit b = s2[i]; + if (used_sig_analysis.used.find({b, b.hash_top().yield()}) == nullptr) { + if (!unused_bits.empty()) + unused_bits += " "; + unused_bits += stringf("%d", i); + } + } + } + if (unused_bits.empty() || wire->port_id != 0) { + if (wire->attributes.count(ID::unused_bits)) + remove_unused_bits.insert(ctx, wire); + } else { + RTLIL::Const unused_bits_const(std::move(unused_bits)); + if (wire->attributes.count(ID::unused_bits)) { + RTLIL::Const &unused_bits_attr = wire->attributes.at(ID::unused_bits); + if (unused_bits_attr != unused_bits_const) + set_unused_bits.insert(ctx, {wire, std::move(unused_bits_const)}); + } else + set_unused_bits.insert(ctx, {wire, std::move(unused_bits_const)}); + } + } + } + }); + del_wires_queue.insert(del_wires.begin(), del_wires.end()); + } + // Decide for each wire if we should be deleting it + // and fix up attributes + void commit_changes(RTLIL::Module* mod) { + for (RTLIL::Wire *wire : remove_init) + wire->attributes.erase(ID::init); + for (auto &p : set_init) + p.first->attributes[ID::init] = std::move(p.second); + for (auto &conn : new_connections) + mod->connect(std::move(conn)); + for (RTLIL::Wire *wire : remove_unused_bits) + wire->attributes.erase(ID::unused_bits); + for (auto &p : set_unused_bits) + p.first->attributes[ID::unused_bits] = std::move(p.second); + } + int delete_wires(RTLIL::Module* mod, bool verbose) { + int deleted_and_unreported = 0; + for (auto wire : del_wires_queue) { + if (ys_debug() || (check_public_name(wire->name) && verbose)) + log_debug(" removing unused non-port wire %s.\n", wire->name); + else + deleted_and_unreported++; + } + mod->remove(del_wires_queue); + return deleted_and_unreported; + } +}; + +PRIVATE_NAMESPACE_END + +YOSYS_NAMESPACE_BEGIN + +bool rmunused_module_signals(RTLIL::Module *module, ParallelDispatchThreadPool::Subpool &subpool, CleanRunContext &clean_ctx) +{ + // Passing actx to function == function does parallel work + // Not passing module as function argument == function does not modify module + // The above sentence signals intent; it's not enforced due to constness laundering in wire_at / cell_at + AnalysisContext actx(module, subpool); + SigConnKinds conn_kinds(clean_ctx.flags.purge, actx, clean_ctx); + + ExactCellWires cell_wires(conn_kinds.exact_cells, actx.assign_map); + // Collect sigmap representative candidates as built in parallel + // With parallel runs, this creates redundant candidates that have to resolve in update_assign_map + ShardedVector new_sigmap_rep_candidates = build_candidates(cell_wires, conn_kinds, actx); + + // Cache all the cell_wires results that we might possible need. This avoids the results + // changing when we update `assign_map` below. + cell_wires.cache_all(new_sigmap_rep_candidates); + // Modify assign_map to reflect the connectivity we want, not the one we have + // this changes representative selection in assign_map + update_assign_map(actx.assign_map, new_sigmap_rep_candidates, cell_wires, conn_kinds); + + // Remove all wire-wire connections + module->connections_.clear(); + + UsedSignals used; + DeferredUpdates deferred = analyse_connectivity(used, conn_kinds, actx, clean_ctx); + fixup_cell_ports(deferred.update_connections); + // Rip up and re-apply init attributes onto representative wires with x-bits + // in place of unset init bits + consume_inits(deferred.initialized_wires, actx.assign_map).apply_normalised_inits(); + + WireDeleter deleter(used, clean_ctx.flags.purge, actx); + + used.clear(subpool); + + deleter.commit_changes(module); + int deleted_and_unreported = deleter.delete_wires(module, clean_ctx.flags.verbose); + int deleted_total = GetSize(deleter.del_wires_queue); + + clean_ctx.stats.count_rm_wires += deleted_total; + + if (clean_ctx.flags.verbose && deleted_and_unreported) + log_debug(" removed %d unused temporary wires.\n", deleted_and_unreported); + + if (deleted_total) + module->design->scratchpad_set_bool("opt.did_something", true); + + return deleted_total != 0; +} + +YOSYS_NAMESPACE_END diff --git a/tests/opt/opt_clean_init_const.ys b/tests/opt/opt_clean_init_const.ys new file mode 100644 index 000000000..1b3d5db63 --- /dev/null +++ b/tests/opt/opt_clean_init_const.ys @@ -0,0 +1,9 @@ +read_rtlil << EOT +module \top + attribute \init 1'0 + wire \w + + connect \w 1'0 +end +EOT +opt_clean diff --git a/tests/tools/rtlil-fuzz-grammar.json b/tests/tools/rtlil-fuzz-grammar.json index c27b160f4..96af9bde3 100644 --- a/tests/tools/rtlil-fuzz-grammar.json +++ b/tests/tools/rtlil-fuzz-grammar.json @@ -8,7 +8,7 @@ "end\n" ] ], - "": [ [ " wire width ", "", " ", "", " ", "", "\n" ] ], + "": [ [ "", " wire width ", "", " ", "", " ", "", "\n" ] ], "": [ [ "1" ], [ "2" ], [ "3" ], [ "4" ], [ "32" ], [ "128" ] ], "": [ [ "input ", "" ], [ "output ", "" ], [ "inout ", "" ], [] ], "": [ @@ -71,6 +71,7 @@ " end\n" ] ], + "": [ [ " attribute \\init ", "", "\n" ] ], "": [ [ "\\wire_a" ], [ "\\wire_b" ], [ "\\wire_c" ], [ "\\wire_d" ], [ "\\wire_e" ], [ "\\wire_f" ], [ "\\wire_g" ], [ "\\wire_h" ], [ "\\wire_i" ], [ "\\wire_j" ] ], "": [ [ "\\cell_a" ], [ "\\cell_b" ], [ "\\cell_c" ], [ "\\cell_d" ], [ "\\cell_e" ], [ "\\cell_f" ], [ "\\cell_g" ], [ "\\cell_h" ], [ "\\cell_i" ], [ "\\cell_j" ] ], "": [ [ "\\bb1" ], [ "\\bb2" ] ], @@ -97,6 +98,7 @@ "": [ [ " connect ", "", " ", "", "\n" ] ], "": [ [ ], [ "", "" ] ], + "": [ [ ], [ "", "" ] ], "": [ [ ], [ "", "" ] ], "": [ [ ], [ "", "" ] ], "": [ [ ], [ "", "" ] ], diff --git a/tests/unit/Makefile b/tests/unit/Makefile index e8f76cba9..88f449bf8 100644 --- a/tests/unit/Makefile +++ b/tests/unit/Makefile @@ -4,10 +4,10 @@ UNAME_S := $(shell uname -s) GTEST_PREFIX := $(shell brew --prefix googletest 2>/dev/null) ifeq ($(GTEST_PREFIX),) GTEST_CXXFLAGS := - GTEST_LDFLAGS := -lgtest -lgtest_main + GTEST_LDFLAGS := -lgtest -lgmock -lgtest_main else GTEST_CXXFLAGS := -I$(GTEST_PREFIX)/include - GTEST_LDFLAGS := -L$(GTEST_PREFIX)/lib -lgtest -lgtest_main + GTEST_LDFLAGS := -L$(GTEST_PREFIX)/lib -lgtest -lgmock -lgtest_main endif ifeq ($(UNAME_S),Darwin) diff --git a/tests/unit/kernel/threadingTest.cc b/tests/unit/kernel/threadingTest.cc new file mode 100644 index 000000000..cbab4d118 --- /dev/null +++ b/tests/unit/kernel/threadingTest.cc @@ -0,0 +1,442 @@ +#include +#include +#include "kernel/threading.h" + +YOSYS_NAMESPACE_BEGIN + +class ThreadingTest : public testing::Test { +protected: + ThreadingTest() { + if (log_files.empty()) + log_files.emplace_back(stdout); + } +}; + +TEST_F(ThreadingTest, ParallelDispatchThreadPoolCreate) { + // Test creating a pool with 0 threads (treated as 1) + ParallelDispatchThreadPool pool0(0); + EXPECT_EQ(pool0.num_threads(), 1); + + // Test creating a pool with 1 thread + ParallelDispatchThreadPool pool1(1); + EXPECT_EQ(pool1.num_threads(), 1); + + // Test creating a pool with 2 threads + ParallelDispatchThreadPool pool2(2); + // YOSYS_MAX_THREADS or system configuration could mean we + // decide to only use one thread. + EXPECT_GE(pool2.num_threads(), 1); + EXPECT_LE(pool2.num_threads(), 2); +} + +TEST_F(ThreadingTest, ParallelDispatchThreadPoolRunSimple) { + ParallelDispatchThreadPool pool(2); + + std::atomic counter{0}; + pool.run([&counter](const ParallelDispatchThreadPool::RunCtx &) { + counter.fetch_add(1, std::memory_order_relaxed); + }); + + EXPECT_EQ(counter.load(), pool.num_threads()); +} + +TEST_F(ThreadingTest, ParallelDispatchThreadPoolRunMultiple) { + ParallelDispatchThreadPool pool(2); + + std::atomic counter{0}; + // Run multiple times to verify the pool can be reused + for (int i = 0; i < 5; ++i) + pool.run([&counter](const ParallelDispatchThreadPool::RunCtx &) { + counter.fetch_add(1, std::memory_order_relaxed); + }); + + EXPECT_EQ(counter.load(), pool.num_threads() * 5); +} + +TEST_F(ThreadingTest, ParallelDispatchThreadPoolRunCtxThreadNums) { + ParallelDispatchThreadPool pool(4); + + std::vector thread_nums(pool.num_threads(), -1); + pool.run([&thread_nums](const ParallelDispatchThreadPool::RunCtx &ctx) { + thread_nums[ctx.thread_num] = ctx.thread_num; + }); + + // Every thread should have recorded its own thread number + for (int i = 0; i < pool.num_threads(); ++i) + EXPECT_EQ(thread_nums[i], i); +} + +TEST_F(ThreadingTest, ParallelDispatchThreadPoolItemRange) { + ParallelDispatchThreadPool pool(3); + + const int num_items = 100; + std::vector> item_counts(num_items); + for (std::atomic &c : item_counts) + c.store(0); + + pool.run([&item_counts](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i : ctx.item_range(num_items)) + item_counts[i].fetch_add(1); + }); + + // Each item should have been processed exactly once + for (int i = 0; i < num_items; ++i) + EXPECT_EQ(item_counts[i].load(), 1); +} + +TEST_F(ThreadingTest, ParallelDispatchThreadPoolSubpool) { + ParallelDispatchThreadPool pool(4); + + // Subpool limited to 2 threads + ParallelDispatchThreadPool::Subpool subpool(pool, 2); + EXPECT_LE(subpool.num_threads(), 2); + + std::atomic counter{0}; + subpool.run([&counter](const ParallelDispatchThreadPool::RunCtx &) { + counter.fetch_add(1, std::memory_order_relaxed); + }); + + EXPECT_EQ(counter.load(), subpool.num_threads()); +} + +TEST_F(ThreadingTest, IntRangeIteration) { + IntRange range{3, 7}; + std::vector values; + for (int i : range) + values.push_back(i); + EXPECT_THAT(values, testing::ElementsAre(3, 4, 5, 6)); +} + +TEST_F(ThreadingTest, IntRangeEmpty) { + IntRange range{5, 5}; + for (int _ : range) + FAIL(); +} + +TEST_F(ThreadingTest, ItemRangeForWorker) { + EXPECT_EQ(item_range_for_worker(10, 0, 3), (IntRange{0, 4})); + EXPECT_EQ(item_range_for_worker(10, 1, 3), (IntRange{4, 7})); + EXPECT_EQ(item_range_for_worker(10, 2, 3), (IntRange{7, 10})); +} + +TEST_F(ThreadingTest, ItemRangeForWorkerZeroThreads) { + EXPECT_EQ(item_range_for_worker(10, 0, 0), (IntRange{0, 10})); +} + +TEST_F(ThreadingTest, ShardedVectorBasic) { + ParallelDispatchThreadPool pool(2); + ShardedVector vec(pool); + pool.run([&vec](const ParallelDispatchThreadPool::RunCtx &ctx) { + vec.insert(ctx, ctx.thread_num * 10); + vec.insert(ctx, ctx.thread_num * 10 + 1); + }); + + EXPECT_FALSE(vec.empty()); + + // Count elements + std::vector elements; + for (int v : vec) { + elements.push_back(v); + } + + if (pool.num_threads() == 2) + EXPECT_THAT(elements, testing::ElementsAre(0, 1, 10, 11)); + else + EXPECT_THAT(elements, testing::ElementsAre(0, 1)); +} + +TEST_F(ThreadingTest, MonotonicFlagBasic) { + MonotonicFlag flag; + EXPECT_FALSE(flag.load()); + flag.set(); + EXPECT_TRUE(flag.load()); + flag.set(); + EXPECT_TRUE(flag.load()); +} + +TEST_F(ThreadingTest, MonotonicFlagSetAndReturnOld) { + MonotonicFlag flag; + EXPECT_FALSE(flag.set_and_return_old()); + EXPECT_TRUE(flag.load()); + EXPECT_TRUE(flag.set_and_return_old()); +} + +TEST_F(ThreadingTest, ConcurrentQueueBasic) { + ConcurrentQueue queue; + queue.push_back(1); + queue.push_back(2); + queue.push_back(3); + + auto v1 = queue.pop_front(); + auto v2 = queue.pop_front(); + auto v3 = queue.pop_front(); + + ASSERT_TRUE(v1.has_value()); + ASSERT_TRUE(v2.has_value()); + ASSERT_TRUE(v3.has_value()); + EXPECT_EQ(*v1, 1); + EXPECT_EQ(*v2, 2); + EXPECT_EQ(*v3, 3); +} + +TEST_F(ThreadingTest, ConcurrentQueueTryPopEmpty) { + ConcurrentQueue queue; + auto v = queue.try_pop_front(); + EXPECT_FALSE(v.has_value()); +} + +TEST_F(ThreadingTest, ConcurrentQueueClose) { + ConcurrentQueue queue; + queue.push_back(42); + queue.close(); + + // Can still pop existing elements + auto v1 = queue.pop_front(); + ASSERT_TRUE(v1.has_value()); + EXPECT_EQ(*v1, 42); + + // After close and empty, pop_front returns nullopt + auto v2 = queue.pop_front(); + EXPECT_FALSE(v2.has_value()); +} + +TEST_F(ThreadingTest, ThreadPoolCreate) { + // pool_size of 0 means no worker threads + ThreadPool pool0(0, [](int) {}); + EXPECT_EQ(pool0.num_threads(), 0); + + // pool_size of 1 means 1 worker thread + std::atomic counter{0}; + { + ThreadPool pool1(1, [&counter](int thread_num) { + EXPECT_EQ(thread_num, 0); + counter.fetch_add(1); + }); + } +#ifdef YOSYS_ENABLE_THREADS + EXPECT_EQ(counter.load(), 1); +#else + EXPECT_EQ(counter.load(), 0); +#endif +} + +TEST_F(ThreadingTest, ThreadPoolMultipleThreads) { + std::atomic counter{0}; + { + ThreadPool pool(2, [&counter](int) { + counter.fetch_add(1); + }); + EXPECT_LE(pool.num_threads(), 2); + } +#ifdef YOSYS_ENABLE_THREADS + EXPECT_GE(counter.load(), 1); + EXPECT_LE(counter.load(), 2); +#else + EXPECT_EQ(counter.load(), 0); +#endif +} + +// Helper types for ShardedHashtable tests +struct IntValue { + using Accumulated = IntValue; + int value; + operator int() const { return value; } +}; + +struct IntValueEquality { + bool operator()(int a, int b) const { return a == b; } +}; + +TEST_F(ThreadingTest, ShardedHashtableBasic) { + ParallelDispatchThreadPool pool(1); + + using HashSet = ShardedHashtable>; + HashSet::Builder builder(pool); + + // Insert some values + pool.run([&builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + builder.insert(ctx, {{10}, 10}); + builder.insert(ctx, {{20}, 20}); + builder.insert(ctx, {{30}, 30}); + }); + + // Process + pool.run([&builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + builder.process(ctx); + }); + + // Build and lookup + HashSet set(builder); + const IntValue *found10 = set.find({{10}, 10}); + const IntValue *found20 = set.find({{20}, 20}); + const IntValue *found99 = set.find({{99}, 99}); + + ASSERT_NE(found10, nullptr); + ASSERT_NE(found20, nullptr); + EXPECT_EQ(found99, nullptr); + EXPECT_EQ(*found10, 10); + EXPECT_EQ(*found20, 20); +} + +TEST_F(ThreadingTest, ShardedHashtableParallelInsert) { + ParallelDispatchThreadPool pool(3); + + using HashSet = ShardedHashtable>; + HashSet::Builder builder(pool); + + // Insert values from multiple threads + pool.run([&builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i = 0; i < 10; ++i) { + int val = ctx.thread_num * 100 + i; + builder.insert(ctx, {{val}, static_cast(val)}); + } + }); + + pool.run([&builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + builder.process(ctx); + }); + + HashSet set(builder); + + // Verify all values can be found + for (int t = 0; t < pool.num_threads(); ++t) { + for (int i = 0; i < 10; ++i) { + int val = t * 100 + i; + const IntValue *found = set.find({{val}, static_cast(val)}); + ASSERT_NE(found, nullptr) << "Value " << val << " not found"; + EXPECT_EQ(*found, val); + } + } +} + +// Helper types for ShardedHashtable tests +struct IntDictValue { + using Accumulated = IntDictValue; + int key; + int value; + bool operator==(const IntDictValue &other) const { return key == other.key && value == other.value; } + bool operator!=(const IntDictValue &other) const { return !(*this == other); } +}; + +struct IntDictKeyEquality { + bool operator()(const IntDictValue &a, const IntDictValue &b) const { return a.key == b.key; } +}; + +// Collision handler that sums values +struct SumCollisionHandler { + void operator()(IntDictValue &existing, IntDictValue &incoming) const { + existing.value += incoming.value; + } +}; + +TEST_F(ThreadingTest, ShardedHashtableCollision) { + ParallelDispatchThreadPool pool(1); + + using HashSet = ShardedHashtable; + HashSet::Builder builder(pool); + + // Insert duplicate keys with same hash - duplicates should collapse + pool.run([&builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + builder.insert(ctx, {{5, 10}, 5}); + builder.insert(ctx, {{5, 12}, 5}); // Duplicate key/hash + builder.insert(ctx, {{5, 14}, 5}); // Another duplicate + }); + + pool.run([&builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + builder.process(ctx); + }); + + HashSet set(builder); + const IntDictValue *found = set.find({{5, 0}, 5}); + ASSERT_NE(found, nullptr); + // With default collision handler, first value is kept + EXPECT_EQ(*found, (IntDictValue{5, 36})); +} + +TEST_F(ThreadingTest, ShardedHashtableEmpty) { + ParallelDispatchThreadPool pool(1); + + using HashSet = ShardedHashtable>; + HashSet::Builder builder(pool); + + // Don't insert anything, just process + pool.run([&builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + builder.process(ctx); + }); + + HashSet set(builder); + const IntValue *found = set.find({{42}, 42}); + EXPECT_EQ(found, nullptr); +} + +TEST_F(ThreadingTest, ConcurrentWorkQueueSingleThread) { + ConcurrentWorkQueue queue(1, 10); // 1 thread, batch size 10 + EXPECT_EQ(queue.num_threads(), 1); + + ThreadIndex thread{0}; + + // Push some items (less than batch size) + for (int i = 0; i < 5; ++i) + queue.push(thread, i); + + // Pop should return those items + std::vector batch = queue.pop_batch(thread); + EXPECT_THAT(batch, testing::UnorderedElementsAre(0, 1, 2, 3, 4)); + + // Next pop should return empty (all threads "waiting") + std::vector empty_batch = queue.pop_batch(thread); + EXPECT_TRUE(empty_batch.empty()); +} + +TEST_F(ThreadingTest, ConcurrentWorkQueueBatching) { + ConcurrentWorkQueue queue(1, 3); // batch size 3 + ThreadIndex thread{0}; + + queue.push(thread, 10); + queue.push(thread, 20); + queue.push(thread, 30); + queue.push(thread, 40); + queue.push(thread, 50); + + std::vector popped; + while (true) { + std::vector batch = queue.pop_batch(thread); + if (batch.empty()) + break; + popped.insert(popped.end(), batch.begin(), batch.end()); + } + EXPECT_THAT(popped, testing::UnorderedElementsAre(10, 20, 30, 40, 50)); +} + +TEST_F(ThreadingTest, ConcurrentWorkQueueParallel) { + ParallelDispatchThreadPool pool(2); + if (pool.num_threads() < 2) { + // Skip test if we don't have multiple threads + return; + } + + ConcurrentWorkQueue queue(2, 3); + std::atomic sum{0}; + + pool.run([&queue, &sum](const ParallelDispatchThreadPool::RunCtx &ctx) { + // Each thread pushes some work + for (int i = 0; i < 10; ++i) + queue.push(ctx, ctx.thread_num * 100 + i); + + // Each thread processes work until done + while (true) { + std::vector batch = queue.pop_batch(ctx); + if (batch.empty()) + break; + for (int v : batch) + sum.fetch_add(v); + } + }); + + // Thread 0 pushes: 0+1+2+...+9 = 45 + // Thread 1 pushes: 100+101+...+109 = 1045 + // Total = 45 + 1045 = 1090 + EXPECT_EQ(sum.load(), 1090); +} + +YOSYS_NAMESPACE_END