Gowin. Implement GW5A DSP. (#1641)

* Gowin. DSP. Implement MULT12x12.

The 5A series DSP differs from previous ones. Many things have been
greatly simplified: there are only two control signals of one type per
cell (2 CLK, 2 CE and 2 RESET), and these signals are now explicitly
specified in the DSP attributes, which makes the automatic assignment
mechanism unnecessary for them.

The DSP occupies 3 cells instead of nine due to the exclusion of 4
low-bit multipliers - now there are only two 12x12. There will naturally
be clusters, but they will be simpler and consist of other primitives.

Signed-off-by: YRabbit <rabbit@yrabbit.cyou>

* Gowin. Implement MULTADDALU12X12.

Signed-off-by: YRabbit <rabbit@yrabbit.cyou>

---------

Signed-off-by: YRabbit <rabbit@yrabbit.cyou>
This commit is contained in:
YRabbit 2026-02-20 16:48:22 +10:00 committed by GitHub
parent f1fc47e139
commit 5bbaac8572
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 249 additions and 39 deletions

View File

@ -991,6 +991,16 @@ X(LAST_IN_CHAIN)
X(MULTALU18X18_MODE)
X(MULTADDALU18X18_MODE)
X(MULTALU36X18_MODE)
X(MULT12X12)
X(MULT27X36)
X(MULTALU27X18)
X(MULTADDALU12X12)
X(MULTACC)
X(RESET0)
X(RESET1)
X(ACCSEL)
X(ACCSEL0)
X(ACCSEL1)
// IOB types
X(IBUF)

View File

@ -52,7 +52,6 @@ struct GowinImpl : HimbaechelAPI
bool getClusterPlacement(ClusterId cluster, BelId root_bel,
std::vector<std::pair<CellInfo *, BelId>> &placement) const override;
void configurePlacerHeap(PlacerHeapCfg &cfg) override;
private:
@ -74,6 +73,8 @@ struct GowinImpl : HimbaechelAPI
// dsp info
const NetInfo *dsp_asign = nullptr, *dsp_bsign = nullptr, *dsp_asel = nullptr, *dsp_bsel = nullptr,
*dsp_ce = nullptr, *dsp_clk = nullptr, *dsp_reset = nullptr;
const NetInfo *dsp_5a_clk0 = nullptr, *dsp_5a_clk1 = nullptr, *dsp_5a_ce0 = nullptr, *dsp_5a_ce1 = nullptr,
*dsp_5a_reset0 = nullptr, *dsp_5a_reset1 = nullptr;
bool dsp_soa_reg;
};
std::vector<GowinCellInfo> fast_cell_info;
@ -281,6 +282,9 @@ void GowinImpl::pack()
// We also indicate to the router which Bel's pin to use.
void GowinImpl::adjust_dsp_pin_mapping(void)
{
if (gwu.has_5A_DSP()) {
return;
}
for (auto b2c : dsp_bel2cell) {
BelId bel = b2c.first;
Loc loc = ctx->getBelLocation(bel);
@ -735,6 +739,7 @@ bool GowinImpl::isBelLocationValid(BelId bel, bool explain_invalid) const
case ID_PADD9: /* fall-through */
case ID_PADD18: /* fall-through */
case ID_MULT9X9: /* fall-through */
case ID_MULT12X12: /* fall-through */
case ID_MULT18X18: /* fall-through */
case ID_MULTADDALU18X18: /* fall-through */
case ID_MULTALU18X18: /* fall-through */
@ -867,6 +872,12 @@ void GowinImpl::assign_cell_info()
fc.dsp_asel = get_net(id_ASEL);
fc.dsp_bsel = get_net(id_BSEL);
fc.dsp_soa_reg = ci->params.count(id_SOA_REG) && ci->params.at(id_SOA_REG).as_int64() == 1;
fc.dsp_5a_clk0 = get_net(id_CLK0);
fc.dsp_5a_clk1 = get_net(id_CLK1);
fc.dsp_5a_ce0 = get_net(id_CE0);
fc.dsp_5a_ce1 = get_net(id_CE1);
fc.dsp_5a_reset0 = get_net(id_RESET0);
fc.dsp_5a_reset1 = get_net(id_RESET1);
}
}
}
@ -994,6 +1005,26 @@ bool GowinImpl::dsp_valid(Loc l, IdString bel_type, bool explain_invalid) const
}
}
}
if (bel_type == id_MULT12X12) {
int pair_z = gwu.get_dsp_paired_12(l.z);
const CellInfo *adj_dsp12 = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(l.x, l.y, pair_z)));
if (adj_dsp12 != nullptr) {
const auto &adj_dsp12_data = fast_cell_info.at(adj_dsp12->flat_index);
if ((dsp_data.dsp_5a_clk0 != adj_dsp12_data.dsp_5a_clk0) ||
(dsp_data.dsp_5a_clk1 != adj_dsp12_data.dsp_5a_clk1) ||
(dsp_data.dsp_5a_ce0 != adj_dsp12_data.dsp_5a_ce0) ||
(dsp_data.dsp_5a_ce1 != adj_dsp12_data.dsp_5a_ce1) ||
(dsp_data.dsp_5a_reset0 != adj_dsp12_data.dsp_5a_reset0) ||
(dsp_data.dsp_5a_reset1 != adj_dsp12_data.dsp_5a_reset1)) {
if (explain_invalid) {
log_nonfatal_error("For MULT12X12 primitives the control signals must be same.\n");
}
return false;
}
}
}
// check for control nets "overflow"
BelId dsp_bel = ctx->getBelByLocation(Loc(l.x, l.y, BelZ::DSP_Z));
if (dsp_info.count(dsp_bel)) {
@ -1181,7 +1212,7 @@ bool GowinImpl::getClusterPlacement(ClusterId cluster, BelId root_bel,
{
CellInfo *root_ci = getClusterRootCell(cluster);
if (!root_ci->type.in(id_PADD9, id_MULT9X9, id_PADD18, id_MULT18X18, id_MULTALU18X18, id_MULTALU36X18,
id_MULTADDALU18X18, id_ALU54D)) {
id_MULTADDALU18X18, id_ALU54D, id_MULTADDALU12X12)) {
return HimbaechelAPI::getClusterPlacement(cluster, root_bel, placement);
}
@ -1302,7 +1333,6 @@ void GowinImpl::configurePlacerHeap(PlacerHeapCfg &cfg)
cfg.ioBufTypes.insert(id_GSR);
}
} // namespace
NEXTPNR_NAMESPACE_END

View File

@ -73,7 +73,7 @@ inline bool is_bsram(const CellInfo *cell) { return type_is_bsram(cell->type); }
inline bool type_is_dsp(IdString cell_type)
{
return cell_type.in(id_PADD9, id_PADD18, id_MULT9X9, id_MULT18X18, id_MULT36X36, id_ALU54D, id_MULTALU18X18,
id_MULTALU36X18, id_MULTADDALU18X18);
id_MULTALU36X18, id_MULTADDALU18X18, id_MULT12X12, id_MULTADDALU12X12);
}
inline bool is_dsp(const CellInfo *cell) { return type_is_dsp(cell->type); }
@ -222,6 +222,7 @@ NPNR_PACKED_STRUCT(struct Extra_chip_data_POD {
static constexpr int32_t NEED_SDP_FIX = 2048;
static constexpr int32_t NEED_CFGPINS_INVERSION = 4096;
static constexpr int32_t HAS_I2CCFG = 8192;
static constexpr int32_t HAS_5A_DSP = 16384;
});
} // namespace
@ -329,7 +330,11 @@ enum
CLKDIV_0_Z = 620,
CLKDIV_1_Z = 621,
CLKDIV_2_Z = 622,
CLKDIV_3_Z = 623
CLKDIV_3_Z = 623,
MULT12X12_0_Z = 640,
MULT12X12_1_Z = 641,
MULTADDALU12X12_Z = 642,
};
}

View File

@ -33,6 +33,7 @@ CHIP_NEED_BSRAM_RESET_FIX = 0x400
CHIP_NEED_SDP_FIX = 0x800
CHIP_NEED_CFGPINS_INVERSION = 0x1000
CHIP_HAS_I2CCFG = 0x2000
CHIP_HAS_5A_DSP = 0x4000
# Tile flags
TILE_I3C_CAPABLE_IO = 0x1
@ -136,6 +137,10 @@ CLKDIV_1_Z = 621
CLKDIV_2_Z = 622
CLKDIV_3_Z = 623
MULT12X12_0_Z = 640
MULT12X12_1_Z = 641
MULTADDALU12X12_Z = 642
# =======================================
# Chipdb additional info
# =======================================
@ -1155,6 +1160,64 @@ def create_bsram_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int, tde
tdesc.tiletype = tiletype
return tt
# GW5A series has different DSP
def create_dsp_5a_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int, tdesc: TypeDesc):
typename = "DSP"
tiletype = f"{typename}_{ttyp}"
if tdesc.sfx != 0:
tiletype += f"_{tdesc.sfx}"
tt = chip.create_tile_type(tiletype)
tt.extra_data = TileExtraData(chip.strs.id(typename))
# create big DSP
belname = f'DSP'
dsp = tt.create_bel(belname, "DSP", DSP_Z)
dsp.flags = BEL_FLAG_HIDDEN
# create DSP macro
belname = 'DSP0'
dsp = tt.create_bel(belname, "DSP", DSP_0_Z)
dsp.flags = BEL_FLAG_HIDDEN
# create multipliers
for idx in range(2):
belname = f'MULT12X120{idx}'
portmap = db[y, x].bels[belname].portmap
dsp = tt.create_bel(belname, "MULT12X12", eval(f'MULT12X12_{idx}_Z'))
for sfx in {'A', 'B'}:
for inp in range(12):
add_port_wire(tt, dsp, portmap, f"{sfx}{inp}", "DSP_I", PinType.INPUT)
for inp in range(2):
add_port_wire(tt, dsp, portmap, f"CE{inp}", "DSP_I", PinType.INPUT)
add_port_wire(tt, dsp, portmap, f"CLK{inp}", "DSP_I", PinType.INPUT)
add_port_wire(tt, dsp, portmap, f"RESET{inp}", "DSP_I", PinType.INPUT)
for outp in range(24):
add_port_wire(tt, dsp, portmap, f"DOUT{outp}", "DSP_O", PinType.OUTPUT)
# create MultAddAlu12x12
belname = f'MULTADDALU12X1200'
portmap = db[y, x].bels[belname].portmap
dsp = tt.create_bel(belname, "MULTADDALU12X12", MULTADDALU12X12_Z)
for sfx in {'A', 'B'}:
for mult in range(2):
for inp in range(12):
add_port_wire(tt, dsp, portmap, f"{sfx}{mult}{inp}", "DSP_I", PinType.INPUT)
for inp in range(2):
add_port_wire(tt, dsp, portmap, f"CE{inp}", "DSP_I", PinType.INPUT)
add_port_wire(tt, dsp, portmap, f"CLK{inp}", "DSP_I", PinType.INPUT)
add_port_wire(tt, dsp, portmap, f"RESET{inp}", "DSP_I", PinType.INPUT)
add_port_wire(tt, dsp, portmap, f"ADDSUB{inp}", "DSP_I", PinType.INPUT)
add_port_wire(tt, dsp, portmap, f"ACCSEL{inp}", "DSP_I", PinType.INPUT)
add_port_wire(tt, dsp, portmap, "CASISEL", "DSP_I", PinType.INPUT)
for outp in range(48):
add_port_wire(tt, dsp, portmap, f"DOUT{outp}", "DSP_O", PinType.OUTPUT)
tdesc.tiletype = tiletype
return tt
# DSP
_mult_inputs = {'ASEL', 'BSEL', 'ASIGN', 'BSIGN'}
def create_dsp_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int, tdesc: TypeDesc):
@ -1167,7 +1230,6 @@ def create_dsp_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int, tdesc
# create big DSP
belname = f'DSP'
portmap = db[y, x].bels[belname].portmap
dsp = tt.create_bel(belname, "DSP", DSP_Z)
dsp.flags = BEL_FLAG_HIDDEN
@ -1687,8 +1749,10 @@ def main():
chip_flags |= CHIP_NEED_SDP_FIX;
if "NEED_CFGPINS_INVERSION" in db.chip_flags:
chip_flags |= CHIP_NEED_CFGPINS_INVERSION;
if "CHIP_HAS_I2CCFG" in db.chip_flags:
if "HAS_I2CCFG" in db.chip_flags:
chip_flags |= CHIP_HAS_I2CCFG;
if "HAS_5A_DSP" in db.chip_flags:
chip_flags |= CHIP_HAS_5A_DSP;
X = db.cols;
Y = db.rows;
@ -1710,6 +1774,7 @@ def main():
pll_tiletypes = db.tile_types['P']
bsram_tiletypes = db.tile_types.get('B', set())
dsp_tiletypes = db.tile_types.get('D', set())
dsp_5a_tiletypes = db.tile_types.get('D5A', set())
# If Apicula does not specify a special location for the global GND and VCC
# sources, place them at X0Y0.
@ -1731,6 +1796,8 @@ def main():
create_tiletype(create_bsram_tiletype, ch, db, x, y, ttyp)
elif ttyp in dsp_tiletypes:
create_tiletype(create_dsp_tiletype, ch, db, x, y, ttyp)
elif ttyp in dsp_5a_tiletypes:
create_tiletype(create_dsp_5a_tiletype, ch, db, x, y, ttyp)
else:
create_tiletype(create_null_tiletype, ch, db, x, y, ttyp)

View File

@ -462,6 +462,12 @@ std::unique_ptr<CellInfo> GowinUtils::create_cell(IdString name, IdString type)
}
// DSP
bool GowinUtils::has_5A_DSP(void) const
{
const Extra_chip_data_POD *extra = reinterpret_cast<const Extra_chip_data_POD *>(ctx->chip_info->extra_data.get());
return extra->chip_flags & Extra_chip_data_POD::HAS_5A_DSP;
}
Loc GowinUtils::get_dsp_next_9_in_chain(Loc from) const
{
Loc res;
@ -499,6 +505,16 @@ Loc GowinUtils::get_dsp_next_macro_in_chain(Loc from) const
return res;
}
Loc GowinUtils::get_dsp_next_in_chain_5a(Loc from) const
{
Loc res;
res.y = from.y;
// next DSP
res.x = from.x + 3;
res.z = from.z;
return res;
}
Loc GowinUtils::get_dsp_next_in_chain(Loc from, IdString dsp_type) const
{
if (dsp_type.in(id_PADD9, id_PADD18, id_MULT9X9, id_MULT18X18)) {
@ -507,6 +523,9 @@ Loc GowinUtils::get_dsp_next_in_chain(Loc from, IdString dsp_type) const
if (dsp_type.in(id_ALU54D, id_MULTALU18X18, id_MULTALU36X18, id_MULTADDALU18X18)) {
return get_dsp_next_macro_in_chain(from);
}
if (dsp_type.in(id_MULTADDALU12X12)) {
return get_dsp_next_in_chain_5a(from);
}
NPNR_ASSERT_FALSE("Unknown DSP cell type.");
}

View File

@ -131,10 +131,12 @@ struct GowinUtils
bool has_spine_enable_nets(void) const;
// DSP
bool has_5A_DSP(void) const;
inline int get_dsp_18_z(int z) const { return z & (~3); }
inline int get_dsp_9_idx(int z) const { return z & 3; }
inline int get_dsp_18_idx(int z) const { return z & 4; }
inline int get_dsp_paired_9(int z) const { return (3 - get_dsp_9_idx(z)) | (z & (~3)); }
inline int get_dsp_paired_12(int z) const { return BelZ::MULT12X12_1_Z - (z & 1); }
inline int get_dsp_mult_from_padd(int padd_z) const { return padd_z + 8; }
inline int get_dsp_padd_from_mult(int mult_z) const { return mult_z - 8; }
inline int get_dsp_next_macro(int z) const { return z + 32; }
@ -144,6 +146,7 @@ struct GowinUtils
Loc get_dsp_next_9_in_chain(Loc from) const;
Loc get_dsp_next_macro_in_chain(Loc from) const;
Loc get_dsp_next_in_chain(Loc from, IdString dsp_type) const;
Loc get_dsp_next_in_chain_5a(Loc from) const;
// check bus.
// This is necessary to find the head in the DSP chain - these buses are

View File

@ -3190,6 +3190,20 @@ struct GowinPacker
}
}
} break;
case ID_MULT12X12: {
for (int i = 0; i < 2; ++i) {
ci->renamePort(ctx->idf("CLK[%d]", i), ctx->idf("CLK%d", i));
ci->renamePort(ctx->idf("CE[%d]", i), ctx->idf("CE%d", i));
ci->renamePort(ctx->idf("RESET[%d]", i), ctx->idf("RESET%d", i));
}
for (int i = 0; i < 12; ++i) {
ci->renamePort(ctx->idf("A[%d]", i), ctx->idf("A%d", i));
ci->renamePort(ctx->idf("B[%d]", i), ctx->idf("B%d", i));
}
for (int i = 0; i < 24; ++i) {
ci->renamePort(ctx->idf("DOUT[%d]", i), ctx->idf("DOUT%d", i));
}
} break;
case ID_MULT18X18: {
pass_net_type(ci, id_ASEL);
pass_net_type(ci, id_BSEL);
@ -3511,6 +3525,62 @@ struct GowinPacker
}
}
} break;
case ID_MULTADDALU12X12: {
for (int i = 0; i < 2; ++i) {
ci->renamePort(ctx->idf("CLK[%d]", i), ctx->idf("CLK%d", i));
ci->renamePort(ctx->idf("CE[%d]", i), ctx->idf("CE%d", i));
ci->renamePort(ctx->idf("RESET[%d]", i), ctx->idf("RESET%d", i));
ci->renamePort(ctx->idf("ADDSUB[%d]", i), ctx->idf("ADDSUB%d", i));
}
for (int i = 0; i < 12; ++i) {
ci->renamePort(ctx->idf("A0[%d]", i), ctx->idf("A0%d", i));
ci->renamePort(ctx->idf("B0[%d]", i), ctx->idf("B0%d", i));
ci->renamePort(ctx->idf("A1[%d]", i), ctx->idf("A1%d", i));
ci->renamePort(ctx->idf("B1[%d]", i), ctx->idf("B1%d", i));
}
pass_net_type(ci, id_ACCSEL);
ci->cell_bel_pins.at(id_ACCSEL).clear();
ci->cell_bel_pins.at(id_ACCSEL).push_back(id_ACCSEL0);
ci->cell_bel_pins.at(id_ACCSEL).push_back(id_ACCSEL1);
for (int i = 0; i < 48; ++i) {
ci->renamePort(ctx->idf("DOUT[%d]", i), ctx->idf("DOUT%d", i));
}
// mark 2 mult12x12 as parts of the cluster to prevent
// other multipliers from being placed there
ci->cluster = ci->name;
ci->constr_abs_z = false;
ci->constr_x = 0;
ci->constr_y = 0;
ci->constr_z = 0;
ci->constr_children.clear();
for (int i = 0; i < 2; ++i) {
IdString mult12x12_name = gwu.create_aux_name(ci->name, i * 2);
std::unique_ptr<CellInfo> mult12x12_cell = gwu.create_cell(mult12x12_name, id_DUMMY_CELL);
new_cells.push_back(std::move(mult12x12_cell));
CellInfo *mult12x12_ci = new_cells.back().get();
mult12x12_ci->cluster = ci->name;
mult12x12_ci->constr_abs_z = false;
mult12x12_ci->constr_x = 0;
mult12x12_ci->constr_y = 0;
mult12x12_ci->constr_z = BelZ::MULT12X12_0_Z - BelZ::MULTADDALU12X12_Z + i;
}
// DSP head?
if (gwu.dsp_bus_src(ci, "CASI", 48) == nullptr) {
for (int i = 0; i < 48; ++i) {
ci->disconnectPort(ctx->idf("CASI[%d]", i));
}
dsp_heads.push_back(ci);
if (ctx->verbose) {
log_info(" found a DSP head: %s\n", ctx->nameOf(ci));
}
}
} break;
case ID_MULTADDALU18X18: {
if (ci->params.count(id_MULTADDALU18X18_MODE) == 0) {
ci->setParam(id_MULTADDALU18X18_MODE, 0);
@ -3703,6 +3773,40 @@ struct GowinPacker
}
}
auto make_CAS_chain = [&](CellInfo *head, int wire_num) {
CellInfo *cur_dsp = head;
while (1) {
CellInfo *next_dsp = gwu.dsp_bus_dst(cur_dsp, "CASO", wire_num);
if (next_dsp == nullptr) {
// End of chain
for (int i = 0; i < wire_num; ++i) {
cur_dsp->disconnectPort(ctx->idf("CASO[%d]", i));
}
break;
}
for (int i = 0; i < wire_num; ++i) {
cur_dsp->disconnectPort(ctx->idf("CASO[%d]", i));
next_dsp->disconnectPort(ctx->idf("CASI[%d]", i));
}
cur_dsp->setAttr(id_USE_CASCADE_OUT, 1);
cur_dsp = next_dsp;
cur_dsp->setAttr(id_USE_CASCADE_IN, 1);
if (ctx->verbose) {
log_info(" add %s to the chain.\n", ctx->nameOf(cur_dsp));
}
if (head->cluster == ClusterId()) {
head->cluster = head->name;
}
cur_dsp->cluster = head->name;
head->constr_children.push_back(cur_dsp);
for (auto child : cur_dsp->constr_children) {
child->cluster = head->name;
head->constr_children.push_back(child);
}
cur_dsp->constr_children.clear();
}
};
// DSP chains
for (CellInfo *head : dsp_heads) {
if (ctx->verbose) {
@ -3807,38 +3911,10 @@ struct GowinPacker
case ID_MULTALU18X18: /* fallthrough */
case ID_MULTALU36X18: /* fallthrough */
case ID_ALU54D: {
int wire_num = 55;
CellInfo *cur_dsp = head;
while (1) {
CellInfo *next_dsp_a = gwu.dsp_bus_dst(cur_dsp, "CASO", wire_num);
if (next_dsp_a == nullptr) {
// End of chain
for (int i = 0; i < wire_num; ++i) {
cur_dsp->disconnectPort(ctx->idf("CASO[%d]", i));
}
break;
}
for (int i = 0; i < wire_num; ++i) {
cur_dsp->disconnectPort(ctx->idf("CASO[%d]", i));
next_dsp_a->disconnectPort(ctx->idf("CASI[%d]", i));
}
cur_dsp->setAttr(id_USE_CASCADE_OUT, 1);
cur_dsp = next_dsp_a;
cur_dsp->setAttr(id_USE_CASCADE_IN, 1);
if (ctx->verbose) {
log_info(" add %s to the chain.\n", ctx->nameOf(cur_dsp));
}
if (head->cluster == ClusterId()) {
head->cluster = head->name;
}
cur_dsp->cluster = head->name;
head->constr_children.push_back(cur_dsp);
for (auto child : cur_dsp->constr_children) {
child->cluster = head->name;
head->constr_children.push_back(child);
}
cur_dsp->constr_children.clear();
}
make_CAS_chain(head, 55);
} break;
case ID_MULTADDALU12X12: {
make_CAS_chain(head, 48);
} break;
case ID_MULTADDALU18X18: {
// This primitive has the ability to form chains using both SO[AB] -> SI[AB] and CASO->CASI