control set awareness in the HeAP legaliser (#1678)

* xilinx: Index control sets

Signed-off-by: gatecat <gatecat@ds0.me>

* heap: data structure for control sets

Signed-off-by: gatecat <gatecat@ds0.me>

* heap: fail faster on control set mismatch

Signed-off-by: gatecat <gatecat@ds0.me>

* xilinx: Reduce control set search radius

Signed-off-by: gatecat <gatecat@ds0.me>

* Fix compiler warning

Signed-off-by: gatecat <gatecat@ds0.me>

* heap: Allow disabling control set awareness for comparison/debug

Signed-off-by: gatecat <gatecat@ds0.me>

* heap: Add some notes about control sets

Signed-off-by: gatecat <gatecat@ds0.me>

* heap: Fix typo and regression

Signed-off-by: gatecat <gatecat@ds0.me>

* heap: Add a schedule for ctrlset search radius

Signed-off-by: gatecat <gatecat@ds0.me>

* heap: Tidy up

Signed-off-by: gatecat <gatecat@ds0.me>

---------

Signed-off-by: gatecat <gatecat@ds0.me>
This commit is contained in:
myrtle 2026-04-02 13:36:50 +02:00 committed by GitHub
parent 12bb6df237
commit c6b876fc85
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 349 additions and 12 deletions

View File

@ -62,6 +62,7 @@ template <typename T> class array2d
if ((new_width * new_height) > m_size) {
delete[] data;
m_size = new_width * new_height;
NPNR_ASSERT(m_size >= 0);
data = new T[m_size];
}
m_width = new_width;

View File

@ -391,6 +391,8 @@ po::options_description CommandHandler::getGeneralOptions()
"allow placer to attempt up to max(10000, total cells^2 / N) iterations to place a cell (int "
"N, default: 8, 0 for no timeout)");
general.add_options()("placer-heap-no-ctrl-set", "disable control set awareness in placer heap");
general.add_options()("static-dump-density", "write density csv files during placer-static flow");
#if !defined(NPNR_DISABLE_THREADS)
@ -536,6 +538,9 @@ void CommandHandler::setupContext(Context *ctx)
ctx->settings[ctx->id("placerHeap/cellPlacementTimeout")] =
std::to_string(std::max(0, vm["placer-heap-cell-placement-timeout"].as<int>()));
if (vm.count("placer-heap-no-ctrl-set"))
ctx->settings[ctx->id("placerHeap/noCtrlSet")] = true;
if (vm.count("parallel-refine"))
ctx->settings[ctx->id("placerHeap/parallelRefine")] = true;

View File

@ -41,6 +41,7 @@
#include <numeric>
#include <queue>
#include <tuple>
#include "array2d.h"
#include "fast_bels.h"
#include "log.h"
#include "nextpnr.h"
@ -132,6 +133,29 @@ template <typename T> struct EquationSystem
}
};
struct ControlSetState
{
int32_t ctrl_set = -1;
int32_t count = 0;
void bind(int32_t ctrl_set)
{
if (count == 0) {
this->ctrl_set = ctrl_set;
} else {
NPNR_ASSERT(this->ctrl_set == ctrl_set);
}
++count;
}
void unbind()
{
--count;
NPNR_ASSERT(count >= 0);
if (count == 0)
this->ctrl_set = -1;
}
bool check(int32_t ctrl_set) { return count == 0 || ctrl_set == this->ctrl_set; }
};
} // namespace
class HeAPPlacer
@ -156,6 +180,7 @@ class HeAPPlacer
ScopeLock<Context> lock(ctx);
place_constraints();
build_fast_bels();
alloc_control_sets();
seed_placement();
update_all_chains();
wirelen_t hpwl = total_hpwl();
@ -182,7 +207,8 @@ class HeAPPlacer
}
wirelen_t solved_hpwl = 0, spread_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits<wirelen_t>::max();
int iter = 0, stalled = 0;
iter = 0;
int stalled = 0;
std::vector<std::tuple<CellInfo *, BelId, PlaceStrength>> solution;
@ -425,9 +451,14 @@ class HeAPPlacer
std::vector<CellInfo *> solve_cells;
dict<ClusterId, std::vector<CellInfo *>> cluster2cells;
dict<IdString, int> cell_ctrl_set;
dict<ClusterId, int> chain_size;
// Tracking control sets
array2d<std::vector<ControlSetState>> control_sets;
dict<int, int> z_to_ctrl_set;
// Performance counting
double solve_time = 0, cl_time = 0, sl_time = 0;
int iter = 0;
// Place cells with the BEL attribute set to constrain them
void place_constraints()
@ -528,6 +559,144 @@ class HeAPPlacer
}
}
void alloc_control_sets()
{
if (cfg.ff_bel_bucket == BelBucketId() || cfg.disableCtrlSet)
return;
FastBels::FastBelsData *ff_bels;
fast_bels.getBelsForBelBucket(cfg.ff_bel_bucket, &ff_bels);
control_sets.reset(max_x + 1, max_y + 1);
for (int x = 0; x <= max_x; x++) {
if (x >= int(ff_bels->size()))
continue;
auto &col = ff_bels->at(x);
for (int y = 0; y <= max_y; y++) {
if (y < int(col.size()) && !col.at(y).empty())
control_sets.at(x, y).resize(cfg.ff_control_set_groups.size());
}
}
for (int g = 0; g < int(cfg.ff_control_set_groups.size()); g++) {
for (int z : cfg.ff_control_set_groups.at(g)) {
z_to_ctrl_set[z] = g;
}
}
// determine cell control sets
for (const auto &cell : ctx->cells) {
const CellInfo *ci = cell.second.get();
if (ctx->getBelBucketForCellType(ci->type) != cfg.ff_bel_bucket)
continue;
auto ctrl_set = cfg.get_cell_control_set(ctx, ci);
if (ctrl_set != -1) {
cell_ctrl_set[ci->name] = ctrl_set;
if (ci->bel != BelId())
bind_ctrl_set(ci->bel, ci->name);
}
}
}
bool test_ctrl_set(BelId bel, IdString cell)
{
if (cfg.ff_bel_bucket == BelBucketId() || cfg.disableCtrlSet)
return true;
if (ctx->getBelBucketForBel(bel) != cfg.ff_bel_bucket)
return true;
auto loc = ctx->getBelLocation(bel);
return control_sets.at(loc.x, loc.y).at(z_to_ctrl_set.at(loc.z)).check(cell_ctrl_set.at(cell));
}
void bind_ctrl_set(BelId bel, IdString cell)
{
if (cfg.ff_bel_bucket == BelBucketId() || cfg.disableCtrlSet)
return;
if (ctx->getBelBucketForBel(bel) != cfg.ff_bel_bucket)
return;
auto loc = ctx->getBelLocation(bel);
control_sets.at(loc.x, loc.y).at(z_to_ctrl_set.at(loc.z)).bind(cell_ctrl_set.at(cell));
}
void unbind_ctrl_set(BelId bel)
{
if (cfg.ff_bel_bucket == BelBucketId() || cfg.disableCtrlSet)
return;
if (ctx->getBelBucketForBel(bel) != cfg.ff_bel_bucket)
return;
auto loc = ctx->getBelLocation(bel);
auto &tile = control_sets.at(loc.x, loc.y);
if (tile.empty())
return;
auto fnd = z_to_ctrl_set.find(loc.z);
if (fnd == z_to_ctrl_set.end())
return;
tile.at(fnd->second).unbind();
}
int32_t get_cluster_control_set(ClusterId cluster)
{
int32_t ctrl_set = -1;
if (cfg.ff_bel_bucket == BelBucketId() || cfg.disableCtrlSet)
return -1;
for (auto cell : cluster2cells.at(cluster)) {
auto ofs = ctx->getClusterOffset(cell);
if (ofs.x != 0 || ofs.y != 0)
return -1; // big cluster
if (ctx->getBelBucketForCellType(cell->type) != cfg.ff_bel_bucket)
continue;
auto cell_ctrl_set = cfg.get_cell_control_set(ctx, cell);
if (cell_ctrl_set == -1)
continue;
if (ctrl_set == -1 || ctrl_set == cell_ctrl_set) {
ctrl_set = cell_ctrl_set;
} else {
// mismatch, complex cluster
return -1;
}
}
return ctrl_set;
}
std::vector<Loc> find_control_set_candidates(int cx, int cy, int32_t ctrl_set, int max_radius, int &nonempty)
{
std::vector<Loc> result;
int radius = 1;
nonempty = 0;
auto process_location = [&](int x, int y) {
if (y < 0 || y > max_y)
return;
if (x < 0 || x > max_x)
return;
const auto &tile = control_sets.at(x, y);
if (tile.empty())
return;
++nonempty;
for (int g = 0; g < int(tile.size()); g++) {
if (tile.at(g).count > 0 && tile.at(g).ctrl_set == ctrl_set) {
result.emplace_back(x, y, g);
}
}
};
process_location(cx, cy);
while (radius < max_radius && int(result.size()) < 10) {
for (int y = cy - radius; y <= cy + radius; y++) {
process_location(cx - radius, y);
process_location(cx + radius, y);
}
for (int x = cx - (radius - 1); x <= cx + (radius - 1); x++) {
process_location(x, cy - radius);
process_location(x, cy + radius);
}
++radius;
}
std::stable_sort(result.begin(), result.end(), [&](Loc a, Loc b) {
int d0 = std::abs(a.x - cx) + std::abs(a.y - cy);
int d1 = std::abs(b.y - cx) + std::abs(b.y - cy);
return d0 < d1;
});
return result;
}
// Build and solve in one direction
void build_solve_direction(bool yaxis, int iter)
{
@ -645,12 +814,14 @@ class HeAPPlacer
placed = true;
} else {
ctx->bindBel(bel, ci, STRENGTH_STRONG);
bind_ctrl_set(bel, ci->name);
if (ctx->isBelLocationValid(bel)) {
cell_locs[cell.first].locked = true;
placed = true;
bels_used.insert(bel);
} else {
ctx->unbindBel(bel);
unbind_ctrl_set(bel);
available_bels.at(ci->type).push_front(bel);
}
}
@ -908,8 +1079,10 @@ class HeAPPlacer
CellInfo *ci = cell.second.get();
if (ci->bel != BelId() &&
(ci->udata != dont_solve ||
(ci->cluster != ClusterId() && ctx->getClusterRootCell(ci->cluster)->udata != dont_solve)))
(ci->cluster != ClusterId() && ctx->getClusterRootCell(ci->cluster)->udata != dont_solve))) {
p->unbind_ctrl_set(ci->bel);
ctx->unbindBel(ci->bel);
}
}
// At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using
@ -970,6 +1143,39 @@ class HeAPPlacer
log_error("Unable to find legal placement for all cells, design is probably at utilisation limit.\n");
}
if (p->cfg.ff_bel_bucket != BelBucketId() && !p->cfg.disableCtrlSet) {
// Try placing based on same control set in window first
int32_t ctrl_set = -1;
if (ci->cluster != ClusterId()) {
ctrl_set = p->get_cluster_control_set(ci->cluster);
} else if (ctx->getBelBucketForCellType(ci->type) == p->cfg.ff_bel_bucket) {
ctrl_set = p->cfg.get_cell_control_set(ctx, ci);
}
if (ctrl_set != -1) {
int nonempty = 0;
int ctrl_set_radius = p->cfg.ctrl_set_max_radius.at(
std::min(p->iter, int(p->cfg.ctrl_set_max_radius.size()) - 1));
auto candidates =
p->find_control_set_candidates(p->cell_locs.at(ci->name).x, p->cell_locs.at(ci->name).y,
ctrl_set, ctrl_set_radius, nonempty);
// log_info("%s %d/%d %d (%d, %d)\n", ci->name.c_str(ctx), int(candidates.size()), nonempty,
// ctrl_set,
// int(p->cell_locs.at(ci->name).x), int(p->cell_locs.at(ci->name).y));
for (auto loc : candidates) {
if (ci->cluster == ClusterId()) {
try_place_cell(ci, loc.x, loc.y, loc.z);
} else {
try_place_cluster(ci, loc.x, loc.y, loc.z);
}
if (placed) {
return;
}
}
}
}
while (!placed) {
if (p->cfg.cell_placement_timeout > 0 && total_iters_for_cell > p->cfg.cell_placement_timeout)
log_error("Unable to find legal placement for cell '%s' of type '%s' after %d attempts, check "
@ -1028,10 +1234,12 @@ class HeAPPlacer
if (iter_at_radius >= need_to_explore && bestBel != BelId()) {
CellInfo *bound = ctx->getBoundBelCell(bestBel);
if (bound != nullptr) {
p->unbind_ctrl_set(bound->bel);
ctx->unbindBel(bound->bel);
remaining.emplace(p->chain_size[bound->name] * p->cfg.get_cell_legalisation_weight(ctx, bound),
bound->name);
}
p->bind_ctrl_set(bestBel, ci->name);
ctx->bindBel(bestBel, ci, STRENGTH_WEAK);
placed = true;
Loc loc = ctx->getBelLocation(bestBel);
@ -1095,15 +1303,20 @@ class HeAPPlacer
return std::make_pair(nx, ny);
}
void try_place_cell(CellInfo *ci, int nx, int ny)
void try_place_cell(CellInfo *ci, int nx, int ny, int ctrl_set_group = -1)
{
for (auto sz : fb->at(nx).at(ny)) {
// Look through all bels in this tile; checking region constraint if applicable
if (!ci->testRegion(sz))
continue;
if (ctrl_set_group != -1 && p->z_to_ctrl_set.at(ctx->getBelLocation(sz).z) != ctrl_set_group)
continue;
if (ctrl_set_group == -1 && !p->test_ctrl_set(sz, ci->name))
continue;
// Prefer available bels; unless we are dealing with a wide radius (e.g. difficult control sets)
// or occasionally trigger a tiebreaker
if (ctx->checkBelAvail(sz) || (radius > ripup_radius || ctx->rng(20000) < 10)) {
if (ctx->checkBelAvail(sz) ||
(ctrl_set_group == -1 && (radius > ripup_radius || ctx->rng(20000) < 10))) {
CellInfo *bound = ctx->getBoundBelCell(sz);
if (bound != nullptr) {
// Only rip up cells without constraints
@ -1117,13 +1330,15 @@ class HeAPPlacer
// New location is not legal; unbind the cell (and rebind the cell we ripped up if
// applicable)
ctx->unbindBel(sz);
if (bound != nullptr)
if (bound != nullptr) {
ctx->bindBel(sz, bound, STRENGTH_WEAK);
} else if (iter_at_radius < need_to_explore) {
}
} else if (ctrl_set_group == -1 && iter_at_radius < need_to_explore) {
// It's legal, but we haven't tried enough locations yet
ctx->unbindBel(sz);
if (bound != nullptr)
if (bound != nullptr) {
ctx->bindBel(sz, bound, STRENGTH_WEAK);
}
int input_len = 0;
// Compute a fast input wirelength metric at this bel; and save if better than our last
// try
@ -1146,11 +1361,14 @@ class HeAPPlacer
break;
} else {
// It's legal, and we've tried enough. Finish.
if (bound != nullptr)
if (bound != nullptr) {
p->unbind_ctrl_set(sz);
remaining.emplace(p->chain_size[bound->name] *
p->cfg.get_cell_legalisation_weight(ctx, bound),
bound->name);
}
Loc loc = ctx->getBelLocation(sz);
p->bind_ctrl_set(sz, ci->name);
p->cell_locs[ci->name].x = loc.x;
p->cell_locs[ci->name].y = loc.y;
placed = true;
@ -1160,7 +1378,7 @@ class HeAPPlacer
}
}
void try_place_cluster(CellInfo *ci, int nx, int ny)
void try_place_cluster(CellInfo *ci, int nx, int ny, int ctrl_set_group = -1)
{
// We do have relative constraints
for (auto sz : fb->at(nx).at(ny)) {
@ -1172,19 +1390,30 @@ class HeAPPlacer
if (!ctx->getClusterPlacement(ci->cluster, sz, targets))
continue;
bool ctrl_set_match = false;
for (auto &target : targets) {
// Check it satisfies the region constraint if applicable
if (!target.first->testRegion(target.second))
goto fail;
if (ctrl_set_group != -1 && ctx->getBelBucketForBel(target.second) == p->cfg.ff_bel_bucket &&
p->z_to_ctrl_set.at(ctx->getBelLocation(target.second).z) == ctrl_set_group)
ctrl_set_match = true;
CellInfo *bound = ctx->getBoundBelCell(target.second);
// Chains cannot overlap; so if we have to ripup a cell make sure it isn't part of a chain
if (bound != nullptr) {
if (ctrl_set_group != -1)
goto fail;
if (bound->belStrength > (p->cfg.chainRipup ? STRENGTH_STRONG : STRENGTH_WEAK))
goto fail;
if (bound->cluster != ClusterId() && (!p->cfg.chainRipup || radius < chain_ripup_radius))
goto fail;
}
}
if (ctrl_set_group != -1 && !ctrl_set_match)
goto fail;
// Actually perform the move; keeping track of the moves we make so we can revert them if needed
for (auto &target : targets) {
CellInfo *bound = ctx->getBoundBelCell(target.second);
@ -1213,17 +1442,24 @@ class HeAPPlacer
fail:
// If the move turned out to be illegal; revert all the moves we made
for (auto &move : moves_made) {
if (ctx->getBoundBelCell(move.first))
if (ctx->getBoundBelCell(move.first)) {
ctx->unbindBel(move.first);
if (move.second != nullptr)
}
if (move.second != nullptr) {
ctx->bindBel(move.first, move.second, STRENGTH_WEAK);
}
}
continue;
}
for (auto &move : moves_made) {
if (move.second)
p->unbind_ctrl_set(move.first);
}
for (auto &target : targets) {
Loc loc = ctx->getBelLocation(target.second);
p->cell_locs[target.first->name].x = loc.x;
p->cell_locs[target.first->name].y = loc.y;
p->bind_ctrl_set(target.second, target.first->name);
// log_info("%s %d %d %d\n", target.first->name.c_str(ctx), loc.x, loc.y, loc.z);
}
for (auto &move : moves_made) {
@ -1931,6 +2167,7 @@ PlacerHeapCfg::PlacerHeapCfg(Context *ctx)
timingWeight = ctx->setting<int>("placerHeap/timingWeight");
parallelRefine = ctx->setting<bool>("placerHeap/parallelRefine", false);
netShareWeight = ctx->setting<float>("placerHeap/netShareWeight", 0);
disableCtrlSet = ctx->setting<bool>("placerHeap/noCtrlSet", false);
timing_driven = ctx->setting<bool>("timing_driven");
solverTolerance = 1e-5;

View File

@ -59,6 +59,35 @@ struct PlacerHeapCfg
// this is an optional callback to prioritise certain cells/clusters for legalisation
std::function<float(Context *, CellInfo *)> get_cell_legalisation_weight = [](Context *, CellInfo *) { return 1; };
bool disableCtrlSet;
/*
Control set API
HeAP legalisation can be sped up by directly searching for nearby tiles to place an FF with a compatible control
set. Only one shared control set is currently supported, however, as a full validity check is always performed too,
this doesn't need to encompass every possible incompatibility (this is only for performance/QoR not correctness)
ff_bel_bucket is the bel bucket ID for the flipflop (or logic cell if combined with LUT) bel type
ff_control_set_groups contains the Z-location of flipflops in a control set group.
Each entry in this represents a SLICE, i.e. the set of flipflops that share the control set. In XC7 this would be
the two SLICEs in a tile.
get_cell_control_set should return a unique index for every control set possibility. i.e. if this function returns
the same value the flipflops could be placed in the same group.
*/
BelBucketId ff_bel_bucket = BelBucketId();
std::vector<std::vector<int>> ff_control_set_groups;
// ctrl_set_max_radius is specified as a schedule per iteration, in general this should decrease over time
std::vector<int> ctrl_set_max_radius;
// TODO: control sets might have a hierarchy, like ultrascale+ CE vs CLK/SR
std::function<int32_t(Context *, const CellInfo *)> get_cell_control_set = [](Context *, const CellInfo *) {
return -1;
};
};
extern bool placer_heap(Context *ctx, PlacerHeapCfg cfg);

View File

@ -39,6 +39,30 @@
NEXTPNR_NAMESPACE_BEGIN
struct FFControlSet
{
unsigned flags = 0;
enum
{
IS_LATCH = 1,
IS_CLKINV = 2,
IS_SRINV = 4,
FFSYNC = 8,
};
IdString clk, sr, ce;
bool operator==(const FFControlSet &other) const
{
return flags == other.flags && clk == other.clk && ce == other.ce && sr == other.sr;
};
unsigned hash() const
{
unsigned hash = mkhash(clk.hash(), sr.hash());
hash = mkhash(hash, ce.hash());
hash = mkhash(hash, flags);
return hash;
}
};
XilinxImpl::~XilinxImpl() {};
po::options_description XilinxImpl::getUArchOptions()
@ -274,7 +298,11 @@ bool XilinxImpl::is_pip_unavail(PipId pip) const
return false;
}
void XilinxImpl::prePlace() { assign_cell_tags(); }
void XilinxImpl::prePlace()
{
assign_cell_tags();
index_control_sets();
}
void XilinxImpl::postPlace()
{
@ -295,6 +323,21 @@ void XilinxImpl::configurePlacerHeap(PlacerHeapCfg &cfg)
// Place memory first, because they require entire SLICEMs
return tags->lut.is_memory ? 100 : 1;
};
cfg.ff_bel_bucket = id_SLICE_FFX;
cfg.ff_control_set_groups.resize(2);
for (int z = 0; z < 8; z++) {
cfg.ff_control_set_groups.at(z / 4).push_back((z << 4) | BEL_FF);
cfg.ff_control_set_groups.at(z / 4).push_back((z << 4) | BEL_FF2);
}
cfg.ctrl_set_max_radius = std::vector<int>{18, 15, 12, 9, 6, 3};
cfg.get_cell_control_set = [this](Context *, const CellInfo *ci) {
if (ci->type != id_SLICE_FFX)
return -1;
auto tags = get_tags(ci);
return tags->ff.control_set;
};
}
void XilinxImpl::configurePlacerStatic(PlacerStaticCfg &cfg)
@ -542,6 +585,26 @@ void XilinxImpl::assign_cell_tags()
}
}
void XilinxImpl::index_control_sets()
{
idict<FFControlSet> control_sets;
for (auto &cell : ctx->cells) {
CellInfo *ci = cell.second.get();
if (ci->type == id_SLICE_FFX) {
auto &ct = cell_tags.at(ci->flat_index);
FFControlSet ctrl_set;
ctrl_set.clk = ct.ff.clk ? ct.ff.clk->name : IdString();
ctrl_set.ce = ct.ff.ce ? ct.ff.ce->name : IdString();
ctrl_set.sr = ct.ff.sr ? ct.ff.sr->name : IdString();
ctrl_set.flags = (ct.ff.is_clkinv ? FFControlSet::IS_CLKINV : 0) |
(ct.ff.is_srinv ? FFControlSet::IS_SRINV : 0) |
(ct.ff.is_latch ? FFControlSet::IS_LATCH : 0) | (ct.ff.ffsync ? FFControlSet::FFSYNC : 0);
ct.ff.control_set = control_sets(ctrl_set);
}
}
log_info("Indexed %d control sets.\n", int(control_sets.size()));
}
bool XilinxImpl::is_general_routing(WireId wire) const
{
IdString intent = ctx->getWireType(wire);

View File

@ -49,6 +49,7 @@ struct XilinxCellTags
bool is_latch, is_clkinv, is_srinv, ffsync;
bool is_paired;
NetInfo *clk, *sr, *ce, *d;
int32_t control_set;
} ff;
struct
{
@ -183,6 +184,7 @@ struct XilinxImpl : HimbaechelAPI
private:
HimbaechelHelpers h;
void assign_cell_tags();
void index_control_sets();
};
NEXTPNR_NAMESPACE_END