From 6ac3bccf162db1764db9b852be4db19efdab87b8 Mon Sep 17 00:00:00 2001 From: "Emil J. Tywoniak" Date: Wed, 10 Dec 2025 01:04:55 +0100 Subject: [PATCH] opt_merge: refactor --- passes/opt/opt_merge.cc | 75 +++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/passes/opt/opt_merge.cc b/passes/opt/opt_merge.cc index e8c083f98..631baa341 100644 --- a/passes/opt/opt_merge.cc +++ b/passes/opt/opt_merge.cc @@ -61,14 +61,14 @@ struct CellHash // bucket k by iterating over all shards of the bucket. // The input to each thread in the "compute cell hashes" phase. -struct ComputeCellHashes +struct CellRange { - int cell_index_begin; - int cell_index_end; + int begin; + int end; }; // The output from each thread in the "compute cell hashes" phase. -struct ComputeCellHashesOut +struct CellHashes { // Entry i contains the hashes where hash_value % bucketed_cell_hashes.size() == i std::vector> bucketed_cell_hashes; @@ -84,13 +84,14 @@ struct DuplicateCell }; // The input to each thread in the "find duplicate cells" phase. -struct FindDuplicateCells +// Shards of buckets of cell hashes +struct Shards { std::vector>> &bucketed_cell_hashes; }; -// The oputut from each thread in the "find duplicate cells" phase. -struct FindDuplicateCellsOut +// The output from each thread in the "find duplicate cells" phase. +struct FoundDuplicates { std::vector duplicates; }; @@ -271,10 +272,10 @@ struct OptMergeThreadWorker { } - ComputeCellHashesOut compute_cell_hashes(const ComputeCellHashes &in) const + CellHashes compute_cell_hashes(const CellRange &cell_range) const { std::vector> bucketed_cell_hashes(workers); - for (int cell_index = in.cell_index_begin; cell_index < in.cell_index_end; ++cell_index) { + for (int cell_index = cell_range.begin; cell_index < cell_range.end; ++cell_index) { const RTLIL::Cell *cell = module->cell_at(cell_index); if (!module->selected(cell)) continue; @@ -299,7 +300,7 @@ struct OptMergeThreadWorker return {std::move(bucketed_cell_hashes)}; } - FindDuplicateCellsOut find_duplicate_cells(int index, const FindDuplicateCells &in) const + FoundDuplicates find_duplicate_cells(int index, const Shards &in) const { // We keep a set of known cells. They're hashed with our hash_cell_function // and compared with our compare_cell_parameters_and_connections. @@ -368,8 +369,10 @@ struct OptMergeWorker // Use no more than one worker per thousand cells, rounded down, so // we only start multithreading with at least 2000 cells. + // TODO configurable limit? int num_worker_threads = ThreadPool::pool_size(0, module->cells_size()/1000); int workers = std::max(1, num_worker_threads); + // The main thread doesn't do any work, so if there is only one worker thread, // just run everything on the main thread instead. // This avoids creating and waiting on a thread, which is pretty high overhead @@ -378,16 +381,16 @@ struct OptMergeWorker num_worker_threads = 0; OptMergeThreadWorker thread_worker(module, initvals, assign_map, ct, workers, mode_share_all, mode_keepdc); - std::vector> compute_cell_hashes(num_worker_threads); - std::vector> compute_cell_hashes_out(num_worker_threads); - std::vector> find_duplicate_cells(num_worker_threads); - std::vector> find_duplicate_cells_out(num_worker_threads); + std::vector> cell_ranges_queues(num_worker_threads); + std::vector> cell_hashes_queues(num_worker_threads); + std::vector> shards_queues(num_worker_threads); + std::vector> duplicates_queues(num_worker_threads); ThreadPool thread_pool(num_worker_threads, [&](int i) { - while (std::optional c = compute_cell_hashes[i].pop_front()) { - compute_cell_hashes_out[i].push_back(thread_worker.compute_cell_hashes(*c)); - std::optional f = find_duplicate_cells[i].pop_front(); - find_duplicate_cells_out[i].push_back(thread_worker.find_duplicate_cells(i, *f)); + while (std::optional c = cell_ranges_queues[i].pop_front()) { + cell_hashes_queues[i].push_back(thread_worker.compute_cell_hashes(*c)); + std::optional shards = shards_queues[i].pop_front(); + duplicates_queues[i].push_back(thread_worker.find_duplicate_cells(i, *shards)); } }); @@ -399,7 +402,7 @@ struct OptMergeWorker { int cells_size = module->cells_size(); log("Computing hashes of %d cells of `%s'.\n", cells_size, module->name); - std::vector>> bucketed_cell_hashes(workers); + std::vector>> sharded_bucketed_cell_hashes(workers); int cell_index = 0; int cells_size_mod_workers = cells_size % workers; @@ -407,48 +410,48 @@ struct OptMergeWorker Multithreading multithreading; for (int i = 0; i < workers; ++i) { int num_cells = cells_size/workers + ((i < cells_size_mod_workers) ? 1 : 0); - ComputeCellHashes c = { cell_index, cell_index + num_cells }; + CellRange c = { cell_index, cell_index + num_cells }; cell_index += num_cells; if (num_worker_threads > 0) - compute_cell_hashes[i].push_back(c); + cell_ranges_queues[i].push_back(c); else - bucketed_cell_hashes[i] = std::move(thread_worker.compute_cell_hashes(c).bucketed_cell_hashes); + sharded_bucketed_cell_hashes[i] = std::move(thread_worker.compute_cell_hashes(c).bucketed_cell_hashes); } log_assert(cell_index == cells_size); if (num_worker_threads > 0) for (int i = 0; i < workers; ++i) - bucketed_cell_hashes[i] = std::move(compute_cell_hashes_out[i].pop_front()->bucketed_cell_hashes); + sharded_bucketed_cell_hashes[i] = std::move(cell_hashes_queues[i].pop_front()->bucketed_cell_hashes); } log("Finding duplicate cells in `%s'.\n", module->name); - std::vector duplicates; + std::vector merged_duplicates; { Multithreading multithreading; for (int i = 0; i < workers; ++i) { - FindDuplicateCells f = { bucketed_cell_hashes }; + Shards thread_shards = { sharded_bucketed_cell_hashes }; if (num_worker_threads > 0) - find_duplicate_cells[i].push_back(f); + shards_queues[i].push_back(thread_shards); else { - std::vector d = std::move(thread_worker.find_duplicate_cells(i, f).duplicates); - duplicates.insert(duplicates.end(), d.begin(), d.end()); + std::vector d = std::move(thread_worker.find_duplicate_cells(i, thread_shards).duplicates); + merged_duplicates.insert(merged_duplicates.end(), d.begin(), d.end()); } } if (num_worker_threads > 0) for (int i = 0; i < workers; ++i) { - std::vector d = std::move(find_duplicate_cells_out[i].pop_front()->duplicates); - duplicates.insert(duplicates.end(), d.begin(), d.end()); + std::vector d = std::move(duplicates_queues[i].pop_front()->duplicates); + merged_duplicates.insert(merged_duplicates.end(), d.begin(), d.end()); } } - std::sort(duplicates.begin(), duplicates.end(), [](const DuplicateCell &lhs, const DuplicateCell &rhs) { + std::sort(merged_duplicates.begin(), merged_duplicates.end(), [](const DuplicateCell &lhs, const DuplicateCell &rhs) { // Sort them by the order in which duplicates would have been detected in a single-threaded - // run. The cell at which the duplicate would have been detected is the later of the two + // run. The cell at which the duplicate would have been detected is the latter of the two // cells involved. return std::max(lhs.remove_cell, lhs.keep_cell) < std::max(rhs.remove_cell, rhs.keep_cell); }); // Convert to cell pointers because removing cells will invalidate the indices. std::vector> cell_ptrs; - for (DuplicateCell dup : duplicates) + for (DuplicateCell dup : merged_duplicates) cell_ptrs.push_back({module->cell_at(dup.remove_cell), module->cell_at(dup.keep_cell)}); for (auto [remove_cell, keep_cell] : cell_ptrs) @@ -475,13 +478,13 @@ struct OptMergeWorker module->remove(remove_cell); total_count++; } - did_something = !duplicates.empty(); + did_something = !merged_duplicates.empty(); } - for (ConcurrentQueue &q : compute_cell_hashes) + for (ConcurrentQueue &q : cell_ranges_queues) q.close(); - for (ConcurrentQueue &q : find_duplicate_cells) + for (ConcurrentQueue &q : shards_queues) q.close(); log_suppressed();