Gowin. DSP. Implement MULT27X36.

The new multiplier is made from two 27x18 units by switching inputs and
creating a cluster connected via CASO->CASI.

A second pass was required to process the multipliers created on the
fly—the processing of DSP cells was separated into a separate function,
which resulted in a large diff, but in reality there were very few
changes.

An important point is that in the 5A series, there is a gap between
adjacent DSPs in one row. There are still SIA/CASI wires, so the DSPs on
either side of the gap are connected, but the distance between them is
greater than usual. We take this fact into account based on the gap
coordinates from the chip database.

Signed-off-by: YRabbit <rabbit@yrabbit.cyou>
This commit is contained in:
YRabbit 2026-03-03 19:17:34 +10:00 committed by myrtle
parent 8c2f8810d4
commit 008ccae25b
6 changed files with 920 additions and 768 deletions

View File

@ -974,6 +974,9 @@ X(ASEL1)
X(BSEL)
X(BSEL0)
X(BSEL1)
X(CSEL)
X(PSEL)
X(PADDSUB)
X(SOA_REG)
X(DSIGN)
X(ACCLOAD)
@ -1003,6 +1006,8 @@ X(RESET1)
X(ACCSEL)
X(ACCSEL0)
X(ACCSEL1)
X(MULT27X36_MAIN)
X(MULT27X36_AUX)
// IOB types
X(IBUF)

View File

@ -73,7 +73,8 @@ inline bool is_bsram(const CellInfo *cell) { return type_is_bsram(cell->type); }
inline bool type_is_dsp(IdString cell_type)
{
return cell_type.in(id_PADD9, id_PADD18, id_MULT9X9, id_MULT18X18, id_MULT36X36, id_ALU54D, id_MULTALU18X18,
id_MULTALU36X18, id_MULTADDALU18X18, id_MULT12X12, id_MULTADDALU12X12, id_MULTALU27X18);
id_MULTALU36X18, id_MULTADDALU18X18, id_MULT12X12, id_MULTADDALU12X12, id_MULTALU27X18,
id_MULT27X36);
}
inline bool is_dsp(const CellInfo *cell) { return type_is_dsp(cell->type); }
@ -197,6 +198,8 @@ NPNR_PACKED_STRUCT(struct Extra_package_data_POD { RelSlice<Constraint_POD> cst;
NPNR_PACKED_STRUCT(struct Extra_chip_data_POD {
int32_t chip_flags;
int32_t dcs_prefix;
int16_t center_row;
int16_t center_col;
Bottom_io_POD bottom_io;
RelSlice<IdString> diff_io_types;
RelSlice<Spine_bel_POD> dqce_bels;

View File

@ -295,6 +295,8 @@ class SpineSelectWire(BBAStruct):
class ChipExtraData(BBAStruct):
strs: StringPool
flags: int
center_row: int
center_col: int
dcs_prefix: IdString = field(default = None)
bottom_io: BottomIO = field(default = None)
diff_io_types: list[IdString] = field(default_factory = list)
@ -383,6 +385,8 @@ class ChipExtraData(BBAStruct):
def serialise(self, context: str, bba: BBAWriter):
bba.u32(self.flags)
bba.u32(self.dcs_prefix.index)
bba.u16(self.center_row)
bba.u16(self.center_col)
self.bottom_io.serialise(f"{context}_bottom_io", bba)
bba.slice(f"{context}_diff_io_types", len(self.diff_io_types))
bba.slice(f"{context}_dqce_bels", len(self.dqce_bels))
@ -1550,7 +1554,19 @@ def create_packages(chip: Chip, db: chipdb):
# Extra chip data
def create_extra_data(chip: Chip, db: chipdb, chip_flags: int):
chip.extra_data = ChipExtraData(chip.strs, chip_flags)
# The coordinates of the chip center are useful when building a DSP chain
# because there is an area around this particular point that does not
# contain any DSP blocks, but there are cascade and shift wires, so the gap
# between adjacent DSPs is larger than usual at this point. The coordinates
# of this particular cell may be useful when working with 138k clock MUXs
# in the future.
center_row = 0
center_col = 0
if hasattr(db, 'center_row'):
center_row = db.center_row
center_col = db.center_col
chip.extra_data = ChipExtraData(chip.strs, chip_flags, center_row, center_col)
if hasattr(db, "dcs_prefix"):
chip.extra_data.set_dcs_prefix(db.dcs_prefix)
else:

View File

@ -510,7 +510,11 @@ Loc GowinUtils::get_dsp_next_in_chain_5a(Loc from) const
Loc res;
res.y = from.y;
// next DSP
res.x = from.x + 3;
int off = 3;
if (from.y == get_center_row() && (from.x + 5) == get_center_col()) {
off = 9;
}
res.x = from.x + off;
res.z = from.z;
return res;
}
@ -741,4 +745,16 @@ void GowinUtils::find_connected_bels(const CellInfo *cell, IdString port, IdStri
}
}
// Get spec locations
int GowinUtils::get_center_row(void) const
{
const Extra_chip_data_POD *extra = reinterpret_cast<const Extra_chip_data_POD *>(ctx->chip_info->extra_data.get());
return extra->center_row;
}
int GowinUtils::get_center_col(void) const
{
const Extra_chip_data_POD *extra = reinterpret_cast<const Extra_chip_data_POD *>(ctx->chip_info->extra_data.get());
return extra->center_col;
}
NEXTPNR_NAMESPACE_END

View File

@ -240,6 +240,10 @@ struct GowinUtils
// Find a maximum matching in a bipartite graph, g
std::vector<int> kuhn_find_maximum_bipartite_matching(int n, int k, std::vector<std::vector<int>> &g);
// Get spec locations
int get_center_row(void) const;
int get_center_col(void) const;
};
NEXTPNR_NAMESPACE_END

View File

@ -38,11 +38,11 @@ void GowinPacker::pack_dsp(void)
std::vector<std::unique_ptr<CellInfo>> new_cells;
log_info("Pack DSP...\n");
std::vector<std::unique_ptr<CellInfo>> new_dsps;
std::vector<CellInfo *> dsp_heads;
std::vector<IdString> cells_to_remove;
for (auto &cell : ctx->cells) {
auto ci = cell.second.get();
if (is_dsp(ci)) {
auto process_dsp_cell = [&](CellInfo *ci) {
if (ctx->verbose) {
log_info(" pack %s %s\n", ci->type.c_str(ctx), ctx->nameOf(ci));
}
@ -452,6 +452,93 @@ void GowinPacker::pack_dsp(void)
}
}
} break;
case ID_MULT27X36: {
// We assemble that primitive from two MultAlu27x18 connected via CASO->CASI.
IdString l_mult_name = gwu.create_aux_name(ci->name, 1);
std::unique_ptr<CellInfo> l_mult_cell = gwu.create_cell(l_mult_name, id_MULTALU27X18);
new_dsps.push_back(std::move(l_mult_cell));
CellInfo *l_mult = new_dsps.back().get();
IdString h_mult_name = gwu.create_aux_name(ci->name, 2);
std::unique_ptr<CellInfo> h_mult_cell = gwu.create_cell(h_mult_name, id_MULTALU27X18);
new_dsps.push_back(std::move(h_mult_cell));
CellInfo *h_mult = new_dsps.back().get();
// SIAs are disconnected
for (int i = 0; i < 27; ++i) {
l_mult->addInput(ctx->idf("SIA[%d]", i));
h_mult->addInput(ctx->idf("SIA[%d]", i));
l_mult->addOutput(ctx->idf("SOA[%d]", i));
h_mult->addOutput(ctx->idf("SOA[%d]", i));
}
// unused
l_mult->addInput(id_CSEL);
h_mult->addInput(id_CSEL);
l_mult->addInput(id_ASEL);
h_mult->addInput(id_ASEL);
l_mult->addInput(id_ACCSEL);
h_mult->addInput(id_ACCSEL);
for (int i = 0; i < 48; ++i) {
// C is disconnected
l_mult->addInput(ctx->idf("C[%d]", i));
h_mult->addInput(ctx->idf("C[%d]", i));
// low mult CASI is disconnected
l_mult->addInput(ctx->idf("CASI[%d]", i));
// high mult CASO is disconnected
h_mult->addOutput(ctx->idf("CASO[%d]", i));
// low CASO -> high CASI
l_mult->addOutput(ctx->idf("CASO[%d]", i));
h_mult->addInput(ctx->idf("CASI[%d]", i));
l_mult->connectPorts(ctx->idf("CASO[%d]", i), h_mult, ctx->idf("CASI[%d]", i));
}
// input A is shared
ci->movePortBusTo(id_A, 0, 1, l_mult, id_A, 0, 1, 27);
l_mult->copyPortBusTo(id_A, 0, 1, h_mult, id_A, 0, 1, 27);
// input D is shared
ci->movePortBusTo(id_D, 0, 1, l_mult, id_D, 0, 1, 26);
l_mult->copyPortBusTo(id_D, 0, 1, h_mult, id_D, 0, 1, 26);
// input PSEL is shared
ci->movePortTo(id_PSEL, l_mult, id_PSEL);
l_mult->copyPortBusTo(id_PSEL, 0, 1, h_mult, id_PSEL, 0, 1, 26);
// input PADDSUB is shared
ci->movePortTo(id_PADDSUB, l_mult, id_PADDSUB);
l_mult->copyPortTo(id_PADDSUB, h_mult, id_PADDSUB);
// input B is divided
ci->movePortBusTo(id_B, 0, 1, l_mult, id_B, 0, 1, 18);
ci->movePortBusTo(id_B, 18, 1, h_mult, id_B, 0, 1, 18);
// output DOUT is divided
ci->movePortBusTo(id_DOUT, 0, 1, l_mult, id_DOUT, 0, 1, 18);
ci->movePortBusTo(id_DOUT, 18, 1, h_mult, id_DOUT, 0, 1, 47);
// Control inputs are shared
ci->movePortBusTo(id_CLK, 0, 1, l_mult, id_CLK, 0, 1, 2);
l_mult->copyPortBusTo(id_CLK, 0, 1, h_mult, id_CLK, 0, 1, 2);
ci->movePortBusTo(id_CE, 0, 1, l_mult, id_CE, 0, 1, 2);
l_mult->copyPortBusTo(id_CE, 0, 1, h_mult, id_CE, 0, 1, 2);
ci->movePortBusTo(id_RESET, 0, 1, l_mult, id_RESET, 0, 1, 2);
l_mult->copyPortBusTo(id_RESET, 0, 1, h_mult, id_RESET, 0, 1, 2);
// copy params
for (auto param : ci->params) {
l_mult->setParam(param.first, param.second);
h_mult->setParam(param.first, param.second);
}
// mark as MULT27X36
// we will catch these attributes during packing and add the missing parameters
l_mult->setAttr(id_MULT27X36_MAIN, 1);
h_mult->setAttr(id_MULT27X36_AUX, 1);
// remove former Mult27x36
cells_to_remove.push_back(ci->name);
} break;
case ID_MULTALU27X18: {
for (int i = 0; i < 2; ++i) {
ci->renamePort(ctx->idf("CLK[%d]", i), ctx->idf("CLK%d", i));
@ -472,7 +559,7 @@ void GowinPacker::pack_dsp(void)
ci->renamePort(ctx->idf("C[%d]", i), ctx->idf("C%d", i));
}
pass_net_type(ci, id_ACCSEL);
ci->cell_bel_pins.at(id_ACCSEL).clear();
ci->cell_bel_pins[id_ACCSEL].clear();
ci->cell_bel_pins.at(id_ACCSEL).push_back(id_ACCSEL0);
ci->cell_bel_pins.at(id_ACCSEL).push_back(id_ACCSEL1);
@ -500,6 +587,9 @@ void GowinPacker::pack_dsp(void)
mult12x12_ci->constr_x = 0;
mult12x12_ci->constr_y = 0;
mult12x12_ci->constr_z = BelZ::MULT12X12_0_Z - BelZ::MULTALU27X18_Z + i;
if (ctx->verbose) {
log_info(" mark %s multiplier as used by %s\n", mult12x12_name.c_str(ctx), ctx->nameOf(ci));
}
}
// DSP head? This primitive has the ability to form chains using both SO[AB] -> SI[AB] and
@ -861,9 +951,27 @@ void GowinPacker::pack_dsp(void)
default:
log_error("Unsupported DSP type '%s'\n", ci->type.c_str(ctx));
}
};
for (auto &cell : ctx->cells) {
auto ci = cell.second.get();
if (is_dsp(ci)) {
process_dsp_cell(ci);
}
}
// Process new DSPs if any are generated
for (auto &cell : new_dsps) {
IdString name = cell->name;
ctx->cells[name] = std::move(cell);
auto ci = ctx->cells.at(name).get();
process_dsp_cell(ci);
}
for (auto cell : cells_to_remove) {
ctx->cells.erase(cell);
}
// add new cells
for (auto &cell : new_cells) {
if (cell->cluster != ClusterId()) {