mirror of https://github.com/YosysHQ/nextpnr.git
gowin placement performance improvements, phase 3 (#1646)
* gowin: Improve placer performance Signed-off-by: gatecat <gatecat@ds0.me> * Add blocker cells for LUTRAM Signed-off-by: gatecat <gatecat@ds0.me> * gowin: Faster validity checks Signed-off-by: gatecat <gatecat@ds0.me> * heap: Improve macro handling, in verbose report per cell type Signed-off-by: gatecat <gatecat@ds0.me> --------- Signed-off-by: gatecat <gatecat@ds0.me>
This commit is contained in:
parent
2400a90e04
commit
501b36e646
|
|
@ -363,6 +363,12 @@ class HeAPPlacer
|
|||
log_info(" of which spreading cells: %.02fs\n", cl_time);
|
||||
log_info(" of which strict legalisation: %.02fs\n", sl_time);
|
||||
|
||||
if (ctx->verbose) {
|
||||
for (auto pair : time_per_cell_type) {
|
||||
log_info(" %s %.03fs\n", ctx->nameOf(pair.first), pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
ctx->check();
|
||||
lock.unlock_early();
|
||||
|
||||
|
|
@ -398,6 +404,8 @@ class HeAPPlacer
|
|||
|
||||
dict<IdString, BoundingBox> constraint_region_bounds;
|
||||
|
||||
dict<IdString, float> time_per_cell_type;
|
||||
|
||||
// In some cases, we can't use bindBel because we allow overlap in the earlier stages. So we use this custom
|
||||
// structure instead
|
||||
struct CellLocation
|
||||
|
|
@ -876,6 +884,10 @@ class HeAPPlacer
|
|||
// Was now placed, ignore
|
||||
if (ci->bel != BelId())
|
||||
continue;
|
||||
std::chrono::high_resolution_clock::time_point ci_startt;
|
||||
if (ctx->verbose)
|
||||
ci_startt = std::chrono::high_resolution_clock::now();
|
||||
|
||||
if (ctx->debug)
|
||||
log_info(" Legalising %s (%s) priority=%d\n", top.second.c_str(ctx), ci->type.c_str(ctx), top.first);
|
||||
FastBels::FastBelsData *fb;
|
||||
|
|
@ -1135,6 +1147,12 @@ class HeAPPlacer
|
|||
}
|
||||
|
||||
total_iters_for_cell++;
|
||||
|
||||
|
||||
}
|
||||
if (ctx->verbose) {
|
||||
auto ci_endt = std::chrono::high_resolution_clock::now();
|
||||
time_per_cell_type[ci->type] += std::chrono::duration<float>(ci_endt - ci_startt).count();
|
||||
}
|
||||
}
|
||||
auto endt = std::chrono::high_resolution_clock::now();
|
||||
|
|
@ -1274,6 +1292,8 @@ class HeAPPlacer
|
|||
pool<BelBucketId> buckets;
|
||||
dict<BelBucketId, size_t> type_index;
|
||||
std::vector<std::vector<std::vector<int>>> occupancy;
|
||||
std::vector<std::vector<std::vector<int>>> fixed_occupancy;
|
||||
|
||||
std::vector<std::vector<int>> groups;
|
||||
std::vector<std::vector<ChainExtent>> chaines;
|
||||
std::map<IdString, ChainExtent> cell_extents;
|
||||
|
|
@ -1291,7 +1311,7 @@ class HeAPPlacer
|
|||
{
|
||||
if (x >= int(fb.at(type)->size()) || y >= int(fb.at(type)->at(x).size()))
|
||||
return 0;
|
||||
return int(fb.at(type)->at(x).at(y).size());
|
||||
return std::max(0, int(fb.at(type)->at(x).at(y).size()) - fixed_occupancy.at(x).at(y).at(type));
|
||||
}
|
||||
|
||||
bool is_cell_fixed(const CellInfo &cell) const
|
||||
|
|
@ -1305,6 +1325,8 @@ class HeAPPlacer
|
|||
{
|
||||
occupancy.resize(p->max_x + 1,
|
||||
std::vector<std::vector<int>>(p->max_y + 1, std::vector<int>(buckets.size(), 0)));
|
||||
fixed_occupancy.resize(p->max_x + 1,
|
||||
std::vector<std::vector<int>>(p->max_y + 1, std::vector<int>(buckets.size(), 0)));
|
||||
groups.resize(p->max_x + 1, std::vector<int>(p->max_y + 1, -1));
|
||||
chaines.resize(p->max_x + 1, std::vector<ChainExtent>(p->max_y + 1));
|
||||
cells_at_location.resize(p->max_x + 1, std::vector<std::vector<CellInfo *>>(p->max_y + 1));
|
||||
|
|
@ -1339,8 +1361,11 @@ class HeAPPlacer
|
|||
if (cell.belStrength > STRENGTH_STRONG) {
|
||||
continue;
|
||||
}
|
||||
|
||||
occupancy.at(cell_loc.second.x).at(cell_loc.second.y).at(cell_index(cell))++;
|
||||
if (cell.cluster != ClusterId() && is_cell_fixed(*ctx->getClusterRootCell(cell.cluster))) {
|
||||
fixed_occupancy.at(cell_loc.second.x).at(cell_loc.second.y).at(cell_index(cell))++;
|
||||
} else {
|
||||
occupancy.at(cell_loc.second.x).at(cell_loc.second.y).at(cell_index(cell))++;
|
||||
}
|
||||
|
||||
// Compute ultimate extent of each chain root
|
||||
if (cell.cluster != ClusterId()) {
|
||||
|
|
|
|||
|
|
@ -658,6 +658,8 @@ X(LUT4)
|
|||
X(LUT5)
|
||||
X(LUT6)
|
||||
X(LUT7)
|
||||
X(BLOCKER_LUT)
|
||||
X(BLOCKER_FF)
|
||||
|
||||
X(IOBA)
|
||||
X(IOBB)
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
#include "gowin.h"
|
||||
#include "gowin_utils.h"
|
||||
#include "pack.h"
|
||||
#include "array2d.h"
|
||||
|
||||
#include "placer_heap.h"
|
||||
|
||||
|
|
@ -112,6 +113,8 @@ struct GowinImpl : HimbaechelAPI
|
|||
bool slice_valid(int x, int y, int z) const;
|
||||
bool dsp_valid(Loc l, IdString bel_type, bool explain_invalid) const;
|
||||
bool hclk_valid(BelId bel, IdString bel_type) const;
|
||||
|
||||
array2d<std::vector<CellInfo*>> fast_logic_cell;
|
||||
};
|
||||
|
||||
struct GowinArch : HimbaechelArch
|
||||
|
|
@ -616,6 +619,13 @@ void GowinImpl::prePlace()
|
|||
{
|
||||
place_constrained_hclk_cells();
|
||||
assign_cell_info();
|
||||
fast_logic_cell.reset(ctx->getGridDimX(), ctx->getGridDimY());
|
||||
for (auto bel : ctx->getBels()) {
|
||||
if (ctx->getBelType(bel) == id_LUT4) {
|
||||
Loc loc = ctx->getBelLocation(bel);
|
||||
fast_logic_cell.at(loc.x, loc.y).resize(37);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GowinImpl::postPlace()
|
||||
|
|
@ -711,6 +721,19 @@ void GowinImpl::postRoute()
|
|||
}
|
||||
}
|
||||
}
|
||||
std::vector<CellInfo*> to_remove;
|
||||
for (auto &cell : ctx->cells) {
|
||||
CellInfo *ci = cell.second.get();
|
||||
if (ci->type.in(id_BLOCKER_LUT, id_BLOCKER_FF)) {
|
||||
to_remove.push_back(ci);
|
||||
}
|
||||
}
|
||||
for (auto ci : to_remove) {
|
||||
auto root = ctx->cells.at(ci->cluster).get();
|
||||
root->constr_children.erase(std::remove_if(root->constr_children.begin(),
|
||||
root->constr_children.end(), [&](CellInfo *c) { return c == ci; }));
|
||||
ctx->cells.erase(ci->name);
|
||||
}
|
||||
}
|
||||
|
||||
bool GowinImpl::isBelLocationValid(BelId bel, bool explain_invalid) const
|
||||
|
|
@ -763,10 +786,10 @@ IdString GowinImpl::getBelBucketForCellType(IdString cell_type) const
|
|||
if (cell_type.in(id_MIPI_OBUF, id_MIPI_OBUF_A)) {
|
||||
return id_MIPI_OBUF;
|
||||
}
|
||||
if (type_is_lut(cell_type)) {
|
||||
if (type_is_lut(cell_type) || cell_type == id_BLOCKER_LUT) {
|
||||
return id_LUT4;
|
||||
}
|
||||
if (type_is_dff(cell_type)) {
|
||||
if (type_is_dff(cell_type) || cell_type == id_BLOCKER_FF) {
|
||||
return id_DFF;
|
||||
}
|
||||
if (type_is_ssram(cell_type)) {
|
||||
|
|
@ -804,10 +827,10 @@ bool GowinImpl::isValidBelForCellType(IdString cell_type, BelId bel) const
|
|||
return cell_type.in(id_MIPI_OBUF, id_MIPI_OBUF_A);
|
||||
}
|
||||
if (bel_type == id_LUT4) {
|
||||
return type_is_lut(cell_type);
|
||||
return type_is_lut(cell_type) || cell_type == id_BLOCKER_LUT;
|
||||
}
|
||||
if (bel_type == id_DFF) {
|
||||
return type_is_dff(cell_type);
|
||||
return type_is_dff(cell_type) || cell_type == id_BLOCKER_FF;
|
||||
}
|
||||
if (bel_type == id_RAM16SDP4) {
|
||||
return type_is_ssram(cell_type);
|
||||
|
|
@ -1049,39 +1072,44 @@ bool GowinImpl::dsp_valid(Loc l, IdString bel_type, bool explain_invalid) const
|
|||
|
||||
bool GowinImpl::slice_valid(int x, int y, int z) const
|
||||
{
|
||||
const CellInfo *lut = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z * 2)));
|
||||
const CellInfo *ff = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z * 2 + 1)));
|
||||
auto &bels = fast_logic_cell.at(x, y);
|
||||
const CellInfo *lut = bels.at(z * 2);
|
||||
const CellInfo *ff = bels.at(z * 2 + 1);
|
||||
// There are only 6 ALUs
|
||||
const CellInfo *alu = (z < 6) ? ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z + BelZ::ALU0_Z))) : nullptr;
|
||||
const CellInfo *ramw = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, BelZ::RAMW_Z)));
|
||||
const CellInfo *alu = (z < 6) ? bels.at(z + BelZ::ALU0_Z) : nullptr;
|
||||
const CellInfo *ramw = bels.at(BelZ::RAMW_Z);
|
||||
|
||||
if (alu && lut) {
|
||||
auto is_not_blocker = [](const CellInfo *ci) {
|
||||
return ci && !ci->type.in(id_BLOCKER_LUT, id_BLOCKER_FF);
|
||||
};
|
||||
|
||||
if (alu && lut && lut->type != id_BLOCKER_LUT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ramw) {
|
||||
// FFs in slices 4 and 5 are not allowed
|
||||
// also temporarily disallow FF to be placed near RAM
|
||||
if (ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 0 * 2 + 1))) ||
|
||||
ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 1 * 2 + 1))) ||
|
||||
ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 2 * 2 + 1))) ||
|
||||
ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 3 * 2 + 1))) ||
|
||||
ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 4 * 2 + 1))) ||
|
||||
ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 5 * 2 + 1)))) {
|
||||
if (is_not_blocker(bels.at(0 * 2 + 1)) ||
|
||||
is_not_blocker(bels.at(1 * 2 + 1)) ||
|
||||
is_not_blocker(bels.at(2 * 2 + 1)) ||
|
||||
is_not_blocker(bels.at(3 * 2 + 1)) ||
|
||||
is_not_blocker(bels.at(4 * 2 + 1)) ||
|
||||
is_not_blocker(bels.at(5 * 2 + 1))) {
|
||||
return false;
|
||||
}
|
||||
if (gwu.has_DFF67()) {
|
||||
if (ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 6 * 2 + 1))) ||
|
||||
ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, 7 * 2 + 1)))) {
|
||||
if (is_not_blocker(bels.at(6 * 2 + 1)) ||
|
||||
is_not_blocker(bels.at(7 * 2 + 1))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// ALU/LUTs in slices 4, 5, 6, 7 are not allowed
|
||||
for (int i = 4; i < 8; ++i) {
|
||||
if (ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, i * 2)))) {
|
||||
if (is_not_blocker(bels.at(i * 2))) {
|
||||
return false;
|
||||
}
|
||||
if (i < 6 && ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, i + BelZ::ALU0_Z)))) {
|
||||
if (i < 6 && bels.at(i + BelZ::ALU0_Z)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -1090,17 +1118,17 @@ bool GowinImpl::slice_valid(int x, int y, int z) const
|
|||
// check for ALU/LUT in the adjacent cell
|
||||
int adj_lut_z = (1 - (z & 1) * 2 + z) * 2;
|
||||
int adj_alu_z = adj_lut_z / 2 + BelZ::ALU0_Z;
|
||||
const CellInfo *adj_lut = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, adj_lut_z)));
|
||||
const CellInfo *adj_ff = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, adj_lut_z + 1)));
|
||||
const CellInfo *adj_lut = bels.at(adj_lut_z);
|
||||
const CellInfo *adj_ff = bels.at(adj_lut_z + 1);
|
||||
const CellInfo *adj_alu = adj_alu_z < (6 + BelZ::ALU0_Z)
|
||||
? ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, adj_alu_z)))
|
||||
? bels.at(adj_alu_z)
|
||||
: nullptr;
|
||||
|
||||
if ((alu && (adj_lut || (adj_ff && !adj_alu))) || ((lut || (ff && !alu)) && adj_alu)) {
|
||||
if ((alu && ((adj_lut && adj_lut->type != id_BLOCKER_LUT) || (adj_ff && !adj_alu))) || (((lut && lut->type != id_BLOCKER_LUT) || (ff && !alu)) && adj_alu)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ff) {
|
||||
if (ff && ff->type != id_BLOCKER_FF) {
|
||||
static std::vector<int> mux_z = {BelZ::MUX20_Z, BelZ::MUX21_Z, BelZ::MUX20_Z + 4, BelZ::MUX23_Z,
|
||||
BelZ::MUX20_Z + 8, BelZ::MUX21_Z + 8, BelZ::MUX20_Z + 12, BelZ::MUX27_Z};
|
||||
const auto &ff_data = fast_cell_info.at(ff->flat_index);
|
||||
|
|
@ -1108,7 +1136,7 @@ bool GowinImpl::slice_valid(int x, int y, int z) const
|
|||
// check implcit LUT(ALU) -> FF connection
|
||||
NPNR_ASSERT(!ramw); // XXX shouldn't happen for now
|
||||
if (lut || alu) {
|
||||
if (lut) {
|
||||
if (lut && lut->type != id_BLOCKER_LUT) {
|
||||
src = fast_cell_info.at(lut->flat_index).lut_f;
|
||||
} else {
|
||||
src = fast_cell_info.at(alu->flat_index).alu_sum;
|
||||
|
|
@ -1139,7 +1167,7 @@ bool GowinImpl::slice_valid(int x, int y, int z) const
|
|||
// The 4th, 5th, 6th, and 7th DFFs have the same control wires. Let's check this.
|
||||
const int adj_top_ff_z = (5 - (z >> 1)) * 4 + 1;
|
||||
for (int i = 0; i < 4; i += 2) {
|
||||
const CellInfo *adj_top_ff = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, adj_top_ff_z + i)));
|
||||
const CellInfo *adj_top_ff = bels.at(adj_top_ff_z + i);
|
||||
if (adj_top_ff) {
|
||||
const auto &adj_top_ff_data = fast_cell_info.at(adj_top_ff->flat_index);
|
||||
if (adj_top_ff_data.ff_lsr != ff_data.ff_lsr) {
|
||||
|
|
@ -1253,6 +1281,22 @@ bool GowinImpl::getClusterPlacement(ClusterId cluster, BelId root_bel,
|
|||
|
||||
void GowinImpl::notifyBelChange(BelId bel, CellInfo *cell)
|
||||
{
|
||||
|
||||
IdString bel_type = ctx->getBelType(bel);
|
||||
switch (bel_type.hash()) {
|
||||
case ID_LUT4: /* fall-through */
|
||||
case ID_DFF:
|
||||
case ID_ALU:
|
||||
case ID_RAM16SDP4:
|
||||
case ID_MUX2_LUT5:
|
||||
case ID_MUX2_LUT6:
|
||||
case ID_MUX2_LUT7:
|
||||
case ID_MUX2_LUT8:
|
||||
auto loc = ctx->getBelLocation(bel);
|
||||
fast_logic_cell.at(loc.x, loc.y).at(loc.z) = cell;
|
||||
return;
|
||||
}
|
||||
|
||||
if (cell != nullptr && !is_dsp(cell)) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -1315,16 +1359,13 @@ void GowinImpl::notifyBelChange(BelId bel, CellInfo *cell)
|
|||
|
||||
void GowinImpl::configurePlacerHeap(PlacerHeapCfg &cfg)
|
||||
{
|
||||
// SLICE types are closely associated with each other
|
||||
// Use cell groups to enforce a legalisation order
|
||||
cfg.cellGroups.emplace_back();
|
||||
cfg.cellGroups.back().insert(id_LUT4);
|
||||
cfg.cellGroups.back().insert(id_DFF);
|
||||
cfg.cellGroups.back().insert(id_ALU);
|
||||
cfg.cellGroups.back().insert(id_MUX2_LUT5);
|
||||
cfg.cellGroups.back().insert(id_MUX2_LUT6);
|
||||
cfg.cellGroups.back().insert(id_MUX2_LUT7);
|
||||
cfg.cellGroups.back().insert(id_MUX2_LUT8);
|
||||
cfg.cellGroups.back().insert(id_RAM16SDP4);
|
||||
cfg.cellGroups.emplace_back();
|
||||
cfg.cellGroups.back().insert(id_ALU);
|
||||
|
||||
cfg.placeAllAtOnce = true;
|
||||
|
||||
// Treat control and constants like IO buffers, because they have only one possible location
|
||||
cfg.ioBufTypes.insert(id_GOWIN_VCC);
|
||||
|
|
|
|||
|
|
@ -479,6 +479,27 @@ void GowinPacker::pack_alus(void)
|
|||
for (auto &ncell : new_cells) {
|
||||
ctx->cells[ncell->name] = std::move(ncell);
|
||||
}
|
||||
new_cells.clear();
|
||||
// The placer doesn't know "a priori" that LUTs and ALUs conflict. So create blocker LUTs to make this explicit and reduce wasted legalisation effort
|
||||
for (auto &cell : ctx->cells) {
|
||||
auto ci = cell.second.get();
|
||||
if (ci->cluster == ClusterId()) {
|
||||
continue;
|
||||
}
|
||||
if (is_alu(ci)) {
|
||||
auto cell = std::make_unique<CellInfo>(ctx, ctx->idf("%s_BLOCKER_LUT", ctx->nameOf(ci)), id_BLOCKER_LUT);
|
||||
cell->cluster = ci->cluster;
|
||||
ctx->cells.at(cell->cluster)->constr_children.push_back(cell.get());
|
||||
cell->constr_abs_z = true;
|
||||
cell->constr_x = ci->constr_x;
|
||||
cell->constr_y = ci->constr_y;
|
||||
cell->constr_z = 2 * (ci->constr_z - (ci->constr_abs_z ? BelZ::ALU0_Z : 0));
|
||||
new_cells.emplace_back(std::move(cell));
|
||||
}
|
||||
}
|
||||
for (auto &ncell : new_cells) {
|
||||
ctx->cells[ncell->name] = std::move(ncell);
|
||||
}
|
||||
}
|
||||
|
||||
// ===================================
|
||||
|
|
@ -587,6 +608,26 @@ void GowinPacker::pack_ssram(void)
|
|||
}
|
||||
}
|
||||
}
|
||||
for (int i = 4; i < 8; ++i) {
|
||||
auto cell = std::make_unique<CellInfo>(ctx, ctx->idf("%s_BLOCKER_LUT_%d", ctx->nameOf(ci), i), id_BLOCKER_LUT);
|
||||
cell->cluster = ci->cluster;
|
||||
ci->constr_children.push_back(cell.get());
|
||||
cell->constr_abs_z = true;
|
||||
cell->constr_x = 0;
|
||||
cell->constr_y = 0;
|
||||
cell->constr_z = 2 * i;
|
||||
new_cells.emplace_back(std::move(cell));
|
||||
}
|
||||
for (int i = 0; i < (gwu.has_DFF67() ? 8 : 6); ++i) {
|
||||
auto cell = std::make_unique<CellInfo>(ctx, ctx->idf("%s_BLOCKER_FF_%d", ctx->nameOf(ci), i), id_BLOCKER_FF);
|
||||
cell->cluster = ci->cluster;
|
||||
ci->constr_children.push_back(cell.get());
|
||||
cell->constr_abs_z = true;
|
||||
cell->constr_x = 0;
|
||||
cell->constr_y = 0;
|
||||
cell->constr_z = 2 * i + 1;
|
||||
new_cells.emplace_back(std::move(cell));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto &ncell : new_cells) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue