From 501b36e64683b9e747787febc33a83c3f8bed943 Mon Sep 17 00:00:00 2001 From: myrtle Date: Tue, 24 Feb 2026 20:56:54 +0100 Subject: [PATCH] gowin placement performance improvements, phase 3 (#1646) * gowin: Improve placer performance Signed-off-by: gatecat * Add blocker cells for LUTRAM Signed-off-by: gatecat * gowin: Faster validity checks Signed-off-by: gatecat * heap: Improve macro handling, in verbose report per cell type Signed-off-by: gatecat --------- Signed-off-by: gatecat --- common/place/placer_heap.cc | 31 +++++++- himbaechel/uarch/gowin/constids.inc | 2 + himbaechel/uarch/gowin/gowin.cc | 109 +++++++++++++++++++--------- himbaechel/uarch/gowin/pack_luts.cc | 41 +++++++++++ 4 files changed, 146 insertions(+), 37 deletions(-) diff --git a/common/place/placer_heap.cc b/common/place/placer_heap.cc index 2bb9fec3..8a53f43e 100644 --- a/common/place/placer_heap.cc +++ b/common/place/placer_heap.cc @@ -363,6 +363,12 @@ class HeAPPlacer log_info(" of which spreading cells: %.02fs\n", cl_time); log_info(" of which strict legalisation: %.02fs\n", sl_time); + if (ctx->verbose) { + for (auto pair : time_per_cell_type) { + log_info(" %s %.03fs\n", ctx->nameOf(pair.first), pair.second); + } + } + ctx->check(); lock.unlock_early(); @@ -398,6 +404,8 @@ class HeAPPlacer dict constraint_region_bounds; + dict time_per_cell_type; + // In some cases, we can't use bindBel because we allow overlap in the earlier stages. So we use this custom // structure instead struct CellLocation @@ -876,6 +884,10 @@ class HeAPPlacer // Was now placed, ignore if (ci->bel != BelId()) continue; + std::chrono::high_resolution_clock::time_point ci_startt; + if (ctx->verbose) + ci_startt = std::chrono::high_resolution_clock::now(); + if (ctx->debug) log_info(" Legalising %s (%s) priority=%d\n", top.second.c_str(ctx), ci->type.c_str(ctx), top.first); FastBels::FastBelsData *fb; @@ -1135,6 +1147,12 @@ class HeAPPlacer } total_iters_for_cell++; + + + } + if (ctx->verbose) { + auto ci_endt = std::chrono::high_resolution_clock::now(); + time_per_cell_type[ci->type] += std::chrono::duration(ci_endt - ci_startt).count(); } } auto endt = std::chrono::high_resolution_clock::now(); @@ -1274,6 +1292,8 @@ class HeAPPlacer pool buckets; dict type_index; std::vector>> occupancy; + std::vector>> fixed_occupancy; + std::vector> groups; std::vector> chaines; std::map cell_extents; @@ -1291,7 +1311,7 @@ class HeAPPlacer { if (x >= int(fb.at(type)->size()) || y >= int(fb.at(type)->at(x).size())) return 0; - return int(fb.at(type)->at(x).at(y).size()); + return std::max(0, int(fb.at(type)->at(x).at(y).size()) - fixed_occupancy.at(x).at(y).at(type)); } bool is_cell_fixed(const CellInfo &cell) const @@ -1305,6 +1325,8 @@ class HeAPPlacer { occupancy.resize(p->max_x + 1, std::vector>(p->max_y + 1, std::vector(buckets.size(), 0))); + fixed_occupancy.resize(p->max_x + 1, + std::vector>(p->max_y + 1, std::vector(buckets.size(), 0))); groups.resize(p->max_x + 1, std::vector(p->max_y + 1, -1)); chaines.resize(p->max_x + 1, std::vector(p->max_y + 1)); cells_at_location.resize(p->max_x + 1, std::vector>(p->max_y + 1)); @@ -1339,8 +1361,11 @@ class HeAPPlacer if (cell.belStrength > STRENGTH_STRONG) { continue; } - - occupancy.at(cell_loc.second.x).at(cell_loc.second.y).at(cell_index(cell))++; + if (cell.cluster != ClusterId() && is_cell_fixed(*ctx->getClusterRootCell(cell.cluster))) { + fixed_occupancy.at(cell_loc.second.x).at(cell_loc.second.y).at(cell_index(cell))++; + } else { + occupancy.at(cell_loc.second.x).at(cell_loc.second.y).at(cell_index(cell))++; + } // Compute ultimate extent of each chain root if (cell.cluster != ClusterId()) { diff --git a/himbaechel/uarch/gowin/constids.inc b/himbaechel/uarch/gowin/constids.inc index 5726668f..d681b881 100644 --- a/himbaechel/uarch/gowin/constids.inc +++ b/himbaechel/uarch/gowin/constids.inc @@ -658,6 +658,8 @@ X(LUT4) X(LUT5) X(LUT6) X(LUT7) +X(BLOCKER_LUT) +X(BLOCKER_FF) X(IOBA) X(IOBB) diff --git a/himbaechel/uarch/gowin/gowin.cc b/himbaechel/uarch/gowin/gowin.cc index f14dc34a..922b296b 100644 --- a/himbaechel/uarch/gowin/gowin.cc +++ b/himbaechel/uarch/gowin/gowin.cc @@ -15,6 +15,7 @@ #include "gowin.h" #include "gowin_utils.h" #include "pack.h" +#include "array2d.h" #include "placer_heap.h" @@ -112,6 +113,8 @@ struct GowinImpl : HimbaechelAPI bool slice_valid(int x, int y, int z) const; bool dsp_valid(Loc l, IdString bel_type, bool explain_invalid) const; bool hclk_valid(BelId bel, IdString bel_type) const; + + array2d> fast_logic_cell; }; struct GowinArch : HimbaechelArch @@ -616,6 +619,13 @@ void GowinImpl::prePlace() { place_constrained_hclk_cells(); assign_cell_info(); + fast_logic_cell.reset(ctx->getGridDimX(), ctx->getGridDimY()); + for (auto bel : ctx->getBels()) { + if (ctx->getBelType(bel) == id_LUT4) { + Loc loc = ctx->getBelLocation(bel); + fast_logic_cell.at(loc.x, loc.y).resize(37); + } + } } void GowinImpl::postPlace() @@ -711,6 +721,19 @@ void GowinImpl::postRoute() } } } + std::vector to_remove; + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (ci->type.in(id_BLOCKER_LUT, id_BLOCKER_FF)) { + to_remove.push_back(ci); + } + } + for (auto ci : to_remove) { + auto root = ctx->cells.at(ci->cluster).get(); + root->constr_children.erase(std::remove_if(root->constr_children.begin(), + root->constr_children.end(), [&](CellInfo *c) { return c == ci; })); + ctx->cells.erase(ci->name); + } } bool GowinImpl::isBelLocationValid(BelId bel, bool explain_invalid) const @@ -763,10 +786,10 @@ IdString GowinImpl::getBelBucketForCellType(IdString cell_type) const if (cell_type.in(id_MIPI_OBUF, id_MIPI_OBUF_A)) { return id_MIPI_OBUF; } - if (type_is_lut(cell_type)) { + if (type_is_lut(cell_type) || cell_type == id_BLOCKER_LUT) { return id_LUT4; } - if (type_is_dff(cell_type)) { + if (type_is_dff(cell_type) || cell_type == id_BLOCKER_FF) { return id_DFF; } if (type_is_ssram(cell_type)) { @@ -804,10 +827,10 @@ bool GowinImpl::isValidBelForCellType(IdString cell_type, BelId bel) const return cell_type.in(id_MIPI_OBUF, id_MIPI_OBUF_A); } if (bel_type == id_LUT4) { - return type_is_lut(cell_type); + return type_is_lut(cell_type) || cell_type == id_BLOCKER_LUT; } if (bel_type == id_DFF) { - return type_is_dff(cell_type); + return type_is_dff(cell_type) || cell_type == id_BLOCKER_FF; } if (bel_type == id_RAM16SDP4) { return type_is_ssram(cell_type); @@ -1049,39 +1072,44 @@ bool GowinImpl::dsp_valid(Loc l, IdString bel_type, bool explain_invalid) const bool GowinImpl::slice_valid(int x, int y, int z) const { - const CellInfo *lut = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z * 2))); - const CellInfo *ff = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z * 2 + 1))); + auto &bels = fast_logic_cell.at(x, y); + const CellInfo *lut = bels.at(z * 2); + const CellInfo *ff = bels.at(z * 2 + 1); // There are only 6 ALUs - const CellInfo *alu = (z < 6) ? ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z + BelZ::ALU0_Z))) : nullptr; - const CellInfo *ramw = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, BelZ::RAMW_Z))); + const CellInfo *alu = (z < 6) ? bels.at(z + BelZ::ALU0_Z) : nullptr; + const CellInfo *ramw = bels.at(BelZ::RAMW_Z); - if (alu && lut) { + auto is_not_blocker = [](const CellInfo *ci) { + return ci && !ci->type.in(id_BLOCKER_LUT, id_BLOCKER_FF); + }; + + if (alu && lut && lut->type != id_BLOCKER_LUT) { return false; } if (ramw) { // FFs in slices 4 and 5 are not allowed // also temporarily disallow FF to be placed near RAM - if (ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 0 * 2 + 1))) || - ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 1 * 2 + 1))) || - ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 2 * 2 + 1))) || - ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 3 * 2 + 1))) || - ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 4 * 2 + 1))) || - ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 5 * 2 + 1)))) { + if (is_not_blocker(bels.at(0 * 2 + 1)) || + is_not_blocker(bels.at(1 * 2 + 1)) || + is_not_blocker(bels.at(2 * 2 + 1)) || + is_not_blocker(bels.at(3 * 2 + 1)) || + is_not_blocker(bels.at(4 * 2 + 1)) || + is_not_blocker(bels.at(5 * 2 + 1))) { return false; } if (gwu.has_DFF67()) { - if (ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 6 * 2 + 1))) || - ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 7 * 2 + 1)))) { + if (is_not_blocker(bels.at(6 * 2 + 1)) || + is_not_blocker(bels.at(7 * 2 + 1))) { return false; } } // ALU/LUTs in slices 4, 5, 6, 7 are not allowed for (int i = 4; i < 8; ++i) { - if (ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, i * 2)))) { + if (is_not_blocker(bels.at(i * 2))) { return false; } - if (i < 6 && ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, i + BelZ::ALU0_Z)))) { + if (i < 6 && bels.at(i + BelZ::ALU0_Z)) { return false; } } @@ -1090,17 +1118,17 @@ bool GowinImpl::slice_valid(int x, int y, int z) const // check for ALU/LUT in the adjacent cell int adj_lut_z = (1 - (z & 1) * 2 + z) * 2; int adj_alu_z = adj_lut_z / 2 + BelZ::ALU0_Z; - const CellInfo *adj_lut = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, adj_lut_z))); - const CellInfo *adj_ff = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, adj_lut_z + 1))); + const CellInfo *adj_lut = bels.at(adj_lut_z); + const CellInfo *adj_ff = bels.at(adj_lut_z + 1); const CellInfo *adj_alu = adj_alu_z < (6 + BelZ::ALU0_Z) - ? ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, adj_alu_z))) + ? bels.at(adj_alu_z) : nullptr; - if ((alu && (adj_lut || (adj_ff && !adj_alu))) || ((lut || (ff && !alu)) && adj_alu)) { + if ((alu && ((adj_lut && adj_lut->type != id_BLOCKER_LUT) || (adj_ff && !adj_alu))) || (((lut && lut->type != id_BLOCKER_LUT) || (ff && !alu)) && adj_alu)) { return false; } - if (ff) { + if (ff && ff->type != id_BLOCKER_FF) { static std::vector mux_z = {BelZ::MUX20_Z, BelZ::MUX21_Z, BelZ::MUX20_Z + 4, BelZ::MUX23_Z, BelZ::MUX20_Z + 8, BelZ::MUX21_Z + 8, BelZ::MUX20_Z + 12, BelZ::MUX27_Z}; const auto &ff_data = fast_cell_info.at(ff->flat_index); @@ -1108,7 +1136,7 @@ bool GowinImpl::slice_valid(int x, int y, int z) const // check implcit LUT(ALU) -> FF connection NPNR_ASSERT(!ramw); // XXX shouldn't happen for now if (lut || alu) { - if (lut) { + if (lut && lut->type != id_BLOCKER_LUT) { src = fast_cell_info.at(lut->flat_index).lut_f; } else { src = fast_cell_info.at(alu->flat_index).alu_sum; @@ -1139,7 +1167,7 @@ bool GowinImpl::slice_valid(int x, int y, int z) const // The 4th, 5th, 6th, and 7th DFFs have the same control wires. Let's check this. const int adj_top_ff_z = (5 - (z >> 1)) * 4 + 1; for (int i = 0; i < 4; i += 2) { - const CellInfo *adj_top_ff = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, adj_top_ff_z + i))); + const CellInfo *adj_top_ff = bels.at(adj_top_ff_z + i); if (adj_top_ff) { const auto &adj_top_ff_data = fast_cell_info.at(adj_top_ff->flat_index); if (adj_top_ff_data.ff_lsr != ff_data.ff_lsr) { @@ -1253,6 +1281,22 @@ bool GowinImpl::getClusterPlacement(ClusterId cluster, BelId root_bel, void GowinImpl::notifyBelChange(BelId bel, CellInfo *cell) { + + IdString bel_type = ctx->getBelType(bel); + switch (bel_type.hash()) { + case ID_LUT4: /* fall-through */ + case ID_DFF: + case ID_ALU: + case ID_RAM16SDP4: + case ID_MUX2_LUT5: + case ID_MUX2_LUT6: + case ID_MUX2_LUT7: + case ID_MUX2_LUT8: + auto loc = ctx->getBelLocation(bel); + fast_logic_cell.at(loc.x, loc.y).at(loc.z) = cell; + return; + } + if (cell != nullptr && !is_dsp(cell)) { return; } @@ -1315,16 +1359,13 @@ void GowinImpl::notifyBelChange(BelId bel, CellInfo *cell) void GowinImpl::configurePlacerHeap(PlacerHeapCfg &cfg) { - // SLICE types are closely associated with each other + // Use cell groups to enforce a legalisation order cfg.cellGroups.emplace_back(); - cfg.cellGroups.back().insert(id_LUT4); - cfg.cellGroups.back().insert(id_DFF); - cfg.cellGroups.back().insert(id_ALU); - cfg.cellGroups.back().insert(id_MUX2_LUT5); - cfg.cellGroups.back().insert(id_MUX2_LUT6); - cfg.cellGroups.back().insert(id_MUX2_LUT7); - cfg.cellGroups.back().insert(id_MUX2_LUT8); cfg.cellGroups.back().insert(id_RAM16SDP4); + cfg.cellGroups.emplace_back(); + cfg.cellGroups.back().insert(id_ALU); + + cfg.placeAllAtOnce = true; // Treat control and constants like IO buffers, because they have only one possible location cfg.ioBufTypes.insert(id_GOWIN_VCC); diff --git a/himbaechel/uarch/gowin/pack_luts.cc b/himbaechel/uarch/gowin/pack_luts.cc index 6d8b5b02..5173bbb2 100644 --- a/himbaechel/uarch/gowin/pack_luts.cc +++ b/himbaechel/uarch/gowin/pack_luts.cc @@ -479,6 +479,27 @@ void GowinPacker::pack_alus(void) for (auto &ncell : new_cells) { ctx->cells[ncell->name] = std::move(ncell); } + new_cells.clear(); + // The placer doesn't know "a priori" that LUTs and ALUs conflict. So create blocker LUTs to make this explicit and reduce wasted legalisation effort + for (auto &cell : ctx->cells) { + auto ci = cell.second.get(); + if (ci->cluster == ClusterId()) { + continue; + } + if (is_alu(ci)) { + auto cell = std::make_unique(ctx, ctx->idf("%s_BLOCKER_LUT", ctx->nameOf(ci)), id_BLOCKER_LUT); + cell->cluster = ci->cluster; + ctx->cells.at(cell->cluster)->constr_children.push_back(cell.get()); + cell->constr_abs_z = true; + cell->constr_x = ci->constr_x; + cell->constr_y = ci->constr_y; + cell->constr_z = 2 * (ci->constr_z - (ci->constr_abs_z ? BelZ::ALU0_Z : 0)); + new_cells.emplace_back(std::move(cell)); + } + } + for (auto &ncell : new_cells) { + ctx->cells[ncell->name] = std::move(ncell); + } } // =================================== @@ -587,6 +608,26 @@ void GowinPacker::pack_ssram(void) } } } + for (int i = 4; i < 8; ++i) { + auto cell = std::make_unique(ctx, ctx->idf("%s_BLOCKER_LUT_%d", ctx->nameOf(ci), i), id_BLOCKER_LUT); + cell->cluster = ci->cluster; + ci->constr_children.push_back(cell.get()); + cell->constr_abs_z = true; + cell->constr_x = 0; + cell->constr_y = 0; + cell->constr_z = 2 * i; + new_cells.emplace_back(std::move(cell)); + } + for (int i = 0; i < (gwu.has_DFF67() ? 8 : 6); ++i) { + auto cell = std::make_unique(ctx, ctx->idf("%s_BLOCKER_FF_%d", ctx->nameOf(ci), i), id_BLOCKER_FF); + cell->cluster = ci->cluster; + ci->constr_children.push_back(cell.get()); + cell->constr_abs_z = true; + cell->constr_x = 0; + cell->constr_y = 0; + cell->constr_z = 2 * i + 1; + new_cells.emplace_back(std::move(cell)); + } } } for (auto &ncell : new_cells) {