From c6b876fc8522c55bca917db7ff5a750fb2beb118 Mon Sep 17 00:00:00 2001 From: myrtle Date: Thu, 2 Apr 2026 13:36:50 +0200 Subject: [PATCH] control set awareness in the HeAP legaliser (#1678) * xilinx: Index control sets Signed-off-by: gatecat * heap: data structure for control sets Signed-off-by: gatecat * heap: fail faster on control set mismatch Signed-off-by: gatecat * xilinx: Reduce control set search radius Signed-off-by: gatecat * Fix compiler warning Signed-off-by: gatecat * heap: Allow disabling control set awareness for comparison/debug Signed-off-by: gatecat * heap: Add some notes about control sets Signed-off-by: gatecat * heap: Fix typo and regression Signed-off-by: gatecat * heap: Add a schedule for ctrlset search radius Signed-off-by: gatecat * heap: Tidy up Signed-off-by: gatecat --------- Signed-off-by: gatecat --- common/kernel/array2d.h | 1 + common/kernel/command.cc | 5 + common/place/placer_heap.cc | 259 ++++++++++++++++++++++++++++-- common/place/placer_heap.h | 29 ++++ himbaechel/uarch/xilinx/xilinx.cc | 65 +++++++- himbaechel/uarch/xilinx/xilinx.h | 2 + 6 files changed, 349 insertions(+), 12 deletions(-) diff --git a/common/kernel/array2d.h b/common/kernel/array2d.h index 513dabae..0498f474 100644 --- a/common/kernel/array2d.h +++ b/common/kernel/array2d.h @@ -62,6 +62,7 @@ template class array2d if ((new_width * new_height) > m_size) { delete[] data; m_size = new_width * new_height; + NPNR_ASSERT(m_size >= 0); data = new T[m_size]; } m_width = new_width; diff --git a/common/kernel/command.cc b/common/kernel/command.cc index 04e3537c..5b0f4695 100644 --- a/common/kernel/command.cc +++ b/common/kernel/command.cc @@ -391,6 +391,8 @@ po::options_description CommandHandler::getGeneralOptions() "allow placer to attempt up to max(10000, total cells^2 / N) iterations to place a cell (int " "N, default: 8, 0 for no timeout)"); + general.add_options()("placer-heap-no-ctrl-set", "disable control set awareness in placer heap"); + general.add_options()("static-dump-density", "write density csv files during placer-static flow"); #if !defined(NPNR_DISABLE_THREADS) @@ -536,6 +538,9 @@ void CommandHandler::setupContext(Context *ctx) ctx->settings[ctx->id("placerHeap/cellPlacementTimeout")] = std::to_string(std::max(0, vm["placer-heap-cell-placement-timeout"].as())); + if (vm.count("placer-heap-no-ctrl-set")) + ctx->settings[ctx->id("placerHeap/noCtrlSet")] = true; + if (vm.count("parallel-refine")) ctx->settings[ctx->id("placerHeap/parallelRefine")] = true; diff --git a/common/place/placer_heap.cc b/common/place/placer_heap.cc index cdd924da..c274b30b 100644 --- a/common/place/placer_heap.cc +++ b/common/place/placer_heap.cc @@ -41,6 +41,7 @@ #include #include #include +#include "array2d.h" #include "fast_bels.h" #include "log.h" #include "nextpnr.h" @@ -132,6 +133,29 @@ template struct EquationSystem } }; +struct ControlSetState +{ + int32_t ctrl_set = -1; + int32_t count = 0; + void bind(int32_t ctrl_set) + { + if (count == 0) { + this->ctrl_set = ctrl_set; + } else { + NPNR_ASSERT(this->ctrl_set == ctrl_set); + } + ++count; + } + void unbind() + { + --count; + NPNR_ASSERT(count >= 0); + if (count == 0) + this->ctrl_set = -1; + } + bool check(int32_t ctrl_set) { return count == 0 || ctrl_set == this->ctrl_set; } +}; + } // namespace class HeAPPlacer @@ -156,6 +180,7 @@ class HeAPPlacer ScopeLock lock(ctx); place_constraints(); build_fast_bels(); + alloc_control_sets(); seed_placement(); update_all_chains(); wirelen_t hpwl = total_hpwl(); @@ -182,7 +207,8 @@ class HeAPPlacer } wirelen_t solved_hpwl = 0, spread_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits::max(); - int iter = 0, stalled = 0; + iter = 0; + int stalled = 0; std::vector> solution; @@ -425,9 +451,14 @@ class HeAPPlacer std::vector solve_cells; dict> cluster2cells; + dict cell_ctrl_set; dict chain_size; + // Tracking control sets + array2d> control_sets; + dict z_to_ctrl_set; // Performance counting double solve_time = 0, cl_time = 0, sl_time = 0; + int iter = 0; // Place cells with the BEL attribute set to constrain them void place_constraints() @@ -528,6 +559,144 @@ class HeAPPlacer } } + void alloc_control_sets() + { + if (cfg.ff_bel_bucket == BelBucketId() || cfg.disableCtrlSet) + return; + FastBels::FastBelsData *ff_bels; + fast_bels.getBelsForBelBucket(cfg.ff_bel_bucket, &ff_bels); + control_sets.reset(max_x + 1, max_y + 1); + for (int x = 0; x <= max_x; x++) { + if (x >= int(ff_bels->size())) + continue; + auto &col = ff_bels->at(x); + for (int y = 0; y <= max_y; y++) { + if (y < int(col.size()) && !col.at(y).empty()) + control_sets.at(x, y).resize(cfg.ff_control_set_groups.size()); + } + } + for (int g = 0; g < int(cfg.ff_control_set_groups.size()); g++) { + for (int z : cfg.ff_control_set_groups.at(g)) { + z_to_ctrl_set[z] = g; + } + } + // determine cell control sets + for (const auto &cell : ctx->cells) { + const CellInfo *ci = cell.second.get(); + if (ctx->getBelBucketForCellType(ci->type) != cfg.ff_bel_bucket) + continue; + auto ctrl_set = cfg.get_cell_control_set(ctx, ci); + if (ctrl_set != -1) { + cell_ctrl_set[ci->name] = ctrl_set; + if (ci->bel != BelId()) + bind_ctrl_set(ci->bel, ci->name); + } + } + } + + bool test_ctrl_set(BelId bel, IdString cell) + { + if (cfg.ff_bel_bucket == BelBucketId() || cfg.disableCtrlSet) + return true; + if (ctx->getBelBucketForBel(bel) != cfg.ff_bel_bucket) + return true; + auto loc = ctx->getBelLocation(bel); + return control_sets.at(loc.x, loc.y).at(z_to_ctrl_set.at(loc.z)).check(cell_ctrl_set.at(cell)); + } + + void bind_ctrl_set(BelId bel, IdString cell) + { + if (cfg.ff_bel_bucket == BelBucketId() || cfg.disableCtrlSet) + return; + if (ctx->getBelBucketForBel(bel) != cfg.ff_bel_bucket) + return; + auto loc = ctx->getBelLocation(bel); + control_sets.at(loc.x, loc.y).at(z_to_ctrl_set.at(loc.z)).bind(cell_ctrl_set.at(cell)); + } + + void unbind_ctrl_set(BelId bel) + { + if (cfg.ff_bel_bucket == BelBucketId() || cfg.disableCtrlSet) + return; + if (ctx->getBelBucketForBel(bel) != cfg.ff_bel_bucket) + return; + auto loc = ctx->getBelLocation(bel); + auto &tile = control_sets.at(loc.x, loc.y); + if (tile.empty()) + return; + auto fnd = z_to_ctrl_set.find(loc.z); + if (fnd == z_to_ctrl_set.end()) + return; + tile.at(fnd->second).unbind(); + } + + int32_t get_cluster_control_set(ClusterId cluster) + { + int32_t ctrl_set = -1; + if (cfg.ff_bel_bucket == BelBucketId() || cfg.disableCtrlSet) + return -1; + for (auto cell : cluster2cells.at(cluster)) { + auto ofs = ctx->getClusterOffset(cell); + if (ofs.x != 0 || ofs.y != 0) + return -1; // big cluster + if (ctx->getBelBucketForCellType(cell->type) != cfg.ff_bel_bucket) + continue; + auto cell_ctrl_set = cfg.get_cell_control_set(ctx, cell); + if (cell_ctrl_set == -1) + continue; + if (ctrl_set == -1 || ctrl_set == cell_ctrl_set) { + ctrl_set = cell_ctrl_set; + } else { + // mismatch, complex cluster + return -1; + } + } + return ctrl_set; + } + + std::vector find_control_set_candidates(int cx, int cy, int32_t ctrl_set, int max_radius, int &nonempty) + { + std::vector result; + + int radius = 1; + nonempty = 0; + auto process_location = [&](int x, int y) { + if (y < 0 || y > max_y) + return; + if (x < 0 || x > max_x) + return; + const auto &tile = control_sets.at(x, y); + if (tile.empty()) + return; + ++nonempty; + for (int g = 0; g < int(tile.size()); g++) { + if (tile.at(g).count > 0 && tile.at(g).ctrl_set == ctrl_set) { + result.emplace_back(x, y, g); + } + } + }; + process_location(cx, cy); + while (radius < max_radius && int(result.size()) < 10) { + for (int y = cy - radius; y <= cy + radius; y++) { + process_location(cx - radius, y); + process_location(cx + radius, y); + } + for (int x = cx - (radius - 1); x <= cx + (radius - 1); x++) { + process_location(x, cy - radius); + process_location(x, cy + radius); + } + ++radius; + } + + std::stable_sort(result.begin(), result.end(), [&](Loc a, Loc b) { + int d0 = std::abs(a.x - cx) + std::abs(a.y - cy); + int d1 = std::abs(b.y - cx) + std::abs(b.y - cy); + return d0 < d1; + }); + + return result; + } + // Build and solve in one direction void build_solve_direction(bool yaxis, int iter) { @@ -645,12 +814,14 @@ class HeAPPlacer placed = true; } else { ctx->bindBel(bel, ci, STRENGTH_STRONG); + bind_ctrl_set(bel, ci->name); if (ctx->isBelLocationValid(bel)) { cell_locs[cell.first].locked = true; placed = true; bels_used.insert(bel); } else { ctx->unbindBel(bel); + unbind_ctrl_set(bel); available_bels.at(ci->type).push_front(bel); } } @@ -908,8 +1079,10 @@ class HeAPPlacer CellInfo *ci = cell.second.get(); if (ci->bel != BelId() && (ci->udata != dont_solve || - (ci->cluster != ClusterId() && ctx->getClusterRootCell(ci->cluster)->udata != dont_solve))) + (ci->cluster != ClusterId() && ctx->getClusterRootCell(ci->cluster)->udata != dont_solve))) { + p->unbind_ctrl_set(ci->bel); ctx->unbindBel(ci->bel); + } } // At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using @@ -970,6 +1143,39 @@ class HeAPPlacer log_error("Unable to find legal placement for all cells, design is probably at utilisation limit.\n"); } + if (p->cfg.ff_bel_bucket != BelBucketId() && !p->cfg.disableCtrlSet) { + // Try placing based on same control set in window first + int32_t ctrl_set = -1; + if (ci->cluster != ClusterId()) { + ctrl_set = p->get_cluster_control_set(ci->cluster); + } else if (ctx->getBelBucketForCellType(ci->type) == p->cfg.ff_bel_bucket) { + ctrl_set = p->cfg.get_cell_control_set(ctx, ci); + } + if (ctrl_set != -1) { + int nonempty = 0; + int ctrl_set_radius = p->cfg.ctrl_set_max_radius.at( + std::min(p->iter, int(p->cfg.ctrl_set_max_radius.size()) - 1)); + auto candidates = + p->find_control_set_candidates(p->cell_locs.at(ci->name).x, p->cell_locs.at(ci->name).y, + ctrl_set, ctrl_set_radius, nonempty); + // log_info("%s %d/%d %d (%d, %d)\n", ci->name.c_str(ctx), int(candidates.size()), nonempty, + // ctrl_set, + // int(p->cell_locs.at(ci->name).x), int(p->cell_locs.at(ci->name).y)); + for (auto loc : candidates) { + + if (ci->cluster == ClusterId()) { + try_place_cell(ci, loc.x, loc.y, loc.z); + } else { + try_place_cluster(ci, loc.x, loc.y, loc.z); + } + + if (placed) { + return; + } + } + } + } + while (!placed) { if (p->cfg.cell_placement_timeout > 0 && total_iters_for_cell > p->cfg.cell_placement_timeout) log_error("Unable to find legal placement for cell '%s' of type '%s' after %d attempts, check " @@ -1028,10 +1234,12 @@ class HeAPPlacer if (iter_at_radius >= need_to_explore && bestBel != BelId()) { CellInfo *bound = ctx->getBoundBelCell(bestBel); if (bound != nullptr) { + p->unbind_ctrl_set(bound->bel); ctx->unbindBel(bound->bel); remaining.emplace(p->chain_size[bound->name] * p->cfg.get_cell_legalisation_weight(ctx, bound), bound->name); } + p->bind_ctrl_set(bestBel, ci->name); ctx->bindBel(bestBel, ci, STRENGTH_WEAK); placed = true; Loc loc = ctx->getBelLocation(bestBel); @@ -1095,15 +1303,20 @@ class HeAPPlacer return std::make_pair(nx, ny); } - void try_place_cell(CellInfo *ci, int nx, int ny) + void try_place_cell(CellInfo *ci, int nx, int ny, int ctrl_set_group = -1) { for (auto sz : fb->at(nx).at(ny)) { // Look through all bels in this tile; checking region constraint if applicable if (!ci->testRegion(sz)) continue; + if (ctrl_set_group != -1 && p->z_to_ctrl_set.at(ctx->getBelLocation(sz).z) != ctrl_set_group) + continue; + if (ctrl_set_group == -1 && !p->test_ctrl_set(sz, ci->name)) + continue; // Prefer available bels; unless we are dealing with a wide radius (e.g. difficult control sets) // or occasionally trigger a tiebreaker - if (ctx->checkBelAvail(sz) || (radius > ripup_radius || ctx->rng(20000) < 10)) { + if (ctx->checkBelAvail(sz) || + (ctrl_set_group == -1 && (radius > ripup_radius || ctx->rng(20000) < 10))) { CellInfo *bound = ctx->getBoundBelCell(sz); if (bound != nullptr) { // Only rip up cells without constraints @@ -1117,13 +1330,15 @@ class HeAPPlacer // New location is not legal; unbind the cell (and rebind the cell we ripped up if // applicable) ctx->unbindBel(sz); - if (bound != nullptr) + if (bound != nullptr) { ctx->bindBel(sz, bound, STRENGTH_WEAK); - } else if (iter_at_radius < need_to_explore) { + } + } else if (ctrl_set_group == -1 && iter_at_radius < need_to_explore) { // It's legal, but we haven't tried enough locations yet ctx->unbindBel(sz); - if (bound != nullptr) + if (bound != nullptr) { ctx->bindBel(sz, bound, STRENGTH_WEAK); + } int input_len = 0; // Compute a fast input wirelength metric at this bel; and save if better than our last // try @@ -1146,11 +1361,14 @@ class HeAPPlacer break; } else { // It's legal, and we've tried enough. Finish. - if (bound != nullptr) + if (bound != nullptr) { + p->unbind_ctrl_set(sz); remaining.emplace(p->chain_size[bound->name] * p->cfg.get_cell_legalisation_weight(ctx, bound), bound->name); + } Loc loc = ctx->getBelLocation(sz); + p->bind_ctrl_set(sz, ci->name); p->cell_locs[ci->name].x = loc.x; p->cell_locs[ci->name].y = loc.y; placed = true; @@ -1160,7 +1378,7 @@ class HeAPPlacer } } - void try_place_cluster(CellInfo *ci, int nx, int ny) + void try_place_cluster(CellInfo *ci, int nx, int ny, int ctrl_set_group = -1) { // We do have relative constraints for (auto sz : fb->at(nx).at(ny)) { @@ -1172,19 +1390,30 @@ class HeAPPlacer if (!ctx->getClusterPlacement(ci->cluster, sz, targets)) continue; + bool ctrl_set_match = false; + for (auto &target : targets) { // Check it satisfies the region constraint if applicable if (!target.first->testRegion(target.second)) goto fail; + if (ctrl_set_group != -1 && ctx->getBelBucketForBel(target.second) == p->cfg.ff_bel_bucket && + p->z_to_ctrl_set.at(ctx->getBelLocation(target.second).z) == ctrl_set_group) + ctrl_set_match = true; CellInfo *bound = ctx->getBoundBelCell(target.second); // Chains cannot overlap; so if we have to ripup a cell make sure it isn't part of a chain if (bound != nullptr) { + if (ctrl_set_group != -1) + goto fail; if (bound->belStrength > (p->cfg.chainRipup ? STRENGTH_STRONG : STRENGTH_WEAK)) goto fail; if (bound->cluster != ClusterId() && (!p->cfg.chainRipup || radius < chain_ripup_radius)) goto fail; } } + + if (ctrl_set_group != -1 && !ctrl_set_match) + goto fail; + // Actually perform the move; keeping track of the moves we make so we can revert them if needed for (auto &target : targets) { CellInfo *bound = ctx->getBoundBelCell(target.second); @@ -1213,17 +1442,24 @@ class HeAPPlacer fail: // If the move turned out to be illegal; revert all the moves we made for (auto &move : moves_made) { - if (ctx->getBoundBelCell(move.first)) + if (ctx->getBoundBelCell(move.first)) { ctx->unbindBel(move.first); - if (move.second != nullptr) + } + if (move.second != nullptr) { ctx->bindBel(move.first, move.second, STRENGTH_WEAK); + } } continue; } + for (auto &move : moves_made) { + if (move.second) + p->unbind_ctrl_set(move.first); + } for (auto &target : targets) { Loc loc = ctx->getBelLocation(target.second); p->cell_locs[target.first->name].x = loc.x; p->cell_locs[target.first->name].y = loc.y; + p->bind_ctrl_set(target.second, target.first->name); // log_info("%s %d %d %d\n", target.first->name.c_str(ctx), loc.x, loc.y, loc.z); } for (auto &move : moves_made) { @@ -1931,6 +2167,7 @@ PlacerHeapCfg::PlacerHeapCfg(Context *ctx) timingWeight = ctx->setting("placerHeap/timingWeight"); parallelRefine = ctx->setting("placerHeap/parallelRefine", false); netShareWeight = ctx->setting("placerHeap/netShareWeight", 0); + disableCtrlSet = ctx->setting("placerHeap/noCtrlSet", false); timing_driven = ctx->setting("timing_driven"); solverTolerance = 1e-5; diff --git a/common/place/placer_heap.h b/common/place/placer_heap.h index d91120eb..bbd516e4 100644 --- a/common/place/placer_heap.h +++ b/common/place/placer_heap.h @@ -59,6 +59,35 @@ struct PlacerHeapCfg // this is an optional callback to prioritise certain cells/clusters for legalisation std::function get_cell_legalisation_weight = [](Context *, CellInfo *) { return 1; }; + + bool disableCtrlSet; + + /* + Control set API + HeAP legalisation can be sped up by directly searching for nearby tiles to place an FF with a compatible control + set. Only one shared control set is currently supported, however, as a full validity check is always performed too, + this doesn't need to encompass every possible incompatibility (this is only for performance/QoR not correctness) + + ff_bel_bucket is the bel bucket ID for the flipflop (or logic cell if combined with LUT) bel type + + ff_control_set_groups contains the Z-location of flipflops in a control set group. + Each entry in this represents a SLICE, i.e. the set of flipflops that share the control set. In XC7 this would be + the two SLICEs in a tile. + + get_cell_control_set should return a unique index for every control set possibility. i.e. if this function returns + the same value the flipflops could be placed in the same group. + */ + + BelBucketId ff_bel_bucket = BelBucketId(); + std::vector> ff_control_set_groups; + + // ctrl_set_max_radius is specified as a schedule per iteration, in general this should decrease over time + std::vector ctrl_set_max_radius; + + // TODO: control sets might have a hierarchy, like ultrascale+ CE vs CLK/SR + std::function get_cell_control_set = [](Context *, const CellInfo *) { + return -1; + }; }; extern bool placer_heap(Context *ctx, PlacerHeapCfg cfg); diff --git a/himbaechel/uarch/xilinx/xilinx.cc b/himbaechel/uarch/xilinx/xilinx.cc index ad41cdbd..0865a14b 100644 --- a/himbaechel/uarch/xilinx/xilinx.cc +++ b/himbaechel/uarch/xilinx/xilinx.cc @@ -39,6 +39,30 @@ NEXTPNR_NAMESPACE_BEGIN +struct FFControlSet +{ + unsigned flags = 0; + enum + { + IS_LATCH = 1, + IS_CLKINV = 2, + IS_SRINV = 4, + FFSYNC = 8, + }; + IdString clk, sr, ce; + bool operator==(const FFControlSet &other) const + { + return flags == other.flags && clk == other.clk && ce == other.ce && sr == other.sr; + }; + unsigned hash() const + { + unsigned hash = mkhash(clk.hash(), sr.hash()); + hash = mkhash(hash, ce.hash()); + hash = mkhash(hash, flags); + return hash; + } +}; + XilinxImpl::~XilinxImpl() {}; po::options_description XilinxImpl::getUArchOptions() @@ -274,7 +298,11 @@ bool XilinxImpl::is_pip_unavail(PipId pip) const return false; } -void XilinxImpl::prePlace() { assign_cell_tags(); } +void XilinxImpl::prePlace() +{ + assign_cell_tags(); + index_control_sets(); +} void XilinxImpl::postPlace() { @@ -295,6 +323,21 @@ void XilinxImpl::configurePlacerHeap(PlacerHeapCfg &cfg) // Place memory first, because they require entire SLICEMs return tags->lut.is_memory ? 100 : 1; }; + + cfg.ff_bel_bucket = id_SLICE_FFX; + cfg.ff_control_set_groups.resize(2); + for (int z = 0; z < 8; z++) { + cfg.ff_control_set_groups.at(z / 4).push_back((z << 4) | BEL_FF); + cfg.ff_control_set_groups.at(z / 4).push_back((z << 4) | BEL_FF2); + } + cfg.ctrl_set_max_radius = std::vector{18, 15, 12, 9, 6, 3}; + + cfg.get_cell_control_set = [this](Context *, const CellInfo *ci) { + if (ci->type != id_SLICE_FFX) + return -1; + auto tags = get_tags(ci); + return tags->ff.control_set; + }; } void XilinxImpl::configurePlacerStatic(PlacerStaticCfg &cfg) @@ -542,6 +585,26 @@ void XilinxImpl::assign_cell_tags() } } +void XilinxImpl::index_control_sets() +{ + idict control_sets; + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (ci->type == id_SLICE_FFX) { + auto &ct = cell_tags.at(ci->flat_index); + FFControlSet ctrl_set; + ctrl_set.clk = ct.ff.clk ? ct.ff.clk->name : IdString(); + ctrl_set.ce = ct.ff.ce ? ct.ff.ce->name : IdString(); + ctrl_set.sr = ct.ff.sr ? ct.ff.sr->name : IdString(); + ctrl_set.flags = (ct.ff.is_clkinv ? FFControlSet::IS_CLKINV : 0) | + (ct.ff.is_srinv ? FFControlSet::IS_SRINV : 0) | + (ct.ff.is_latch ? FFControlSet::IS_LATCH : 0) | (ct.ff.ffsync ? FFControlSet::FFSYNC : 0); + ct.ff.control_set = control_sets(ctrl_set); + } + } + log_info("Indexed %d control sets.\n", int(control_sets.size())); +} + bool XilinxImpl::is_general_routing(WireId wire) const { IdString intent = ctx->getWireType(wire); diff --git a/himbaechel/uarch/xilinx/xilinx.h b/himbaechel/uarch/xilinx/xilinx.h index ceaa2c87..434d85c5 100644 --- a/himbaechel/uarch/xilinx/xilinx.h +++ b/himbaechel/uarch/xilinx/xilinx.h @@ -49,6 +49,7 @@ struct XilinxCellTags bool is_latch, is_clkinv, is_srinv, ffsync; bool is_paired; NetInfo *clk, *sr, *ce, *d; + int32_t control_set; } ff; struct { @@ -183,6 +184,7 @@ struct XilinxImpl : HimbaechelAPI private: HimbaechelHelpers h; void assign_cell_tags(); + void index_control_sets(); }; NEXTPNR_NAMESPACE_END