From 239f440c3b6760d775426762f28137fb56969525 Mon Sep 17 00:00:00 2001 From: YRabbit Date: Sun, 15 Feb 2026 19:11:40 +1000 Subject: [PATCH] Gowin. DSP. Implement MULT12x12. The 5A series DSP differs from previous ones. Many things have been greatly simplified: there are only two control signals of one type per cell (2 CLK, 2 CE and 2 RESET), and these signals are now explicitly specified in the DSP attributes, which makes the automatic assignment mechanism unnecessary for them. The DSP occupies 3 cells instead of nine due to the exclusion of 4 low-bit multipliers - now there are only two 12x12. There will naturally be clusters, but they will be simpler and consist of other primitives. Signed-off-by: YRabbit --- himbaechel/uarch/gowin/constids.inc | 7 ++++ himbaechel/uarch/gowin/gowin.cc | 34 +++++++++++++++- himbaechel/uarch/gowin/gowin.h | 8 +++- himbaechel/uarch/gowin/gowin_arch_gen.py | 50 +++++++++++++++++++++++- himbaechel/uarch/gowin/gowin_utils.cc | 6 +++ himbaechel/uarch/gowin/gowin_utils.h | 2 + himbaechel/uarch/gowin/pack.cc | 14 +++++++ 7 files changed, 115 insertions(+), 6 deletions(-) diff --git a/himbaechel/uarch/gowin/constids.inc b/himbaechel/uarch/gowin/constids.inc index 4a59355e..47bae2ac 100644 --- a/himbaechel/uarch/gowin/constids.inc +++ b/himbaechel/uarch/gowin/constids.inc @@ -991,6 +991,13 @@ X(LAST_IN_CHAIN) X(MULTALU18X18_MODE) X(MULTADDALU18X18_MODE) X(MULTALU36X18_MODE) +X(MULT12X12) +X(MULT27X36) +X(MULTALU27X18) +X(MULTADDALU12X12) +X(MULTACC) +X(RESET0) +X(RESET1) // IOB types X(IBUF) diff --git a/himbaechel/uarch/gowin/gowin.cc b/himbaechel/uarch/gowin/gowin.cc index c9362876..e9698d5f 100644 --- a/himbaechel/uarch/gowin/gowin.cc +++ b/himbaechel/uarch/gowin/gowin.cc @@ -51,7 +51,6 @@ struct GowinImpl : HimbaechelAPI bool getClusterPlacement(ClusterId cluster, BelId root_bel, std::vector> &placement) const override; - void configurePlacerHeap(PlacerHeapCfg &cfg) override; private: @@ -73,6 +72,8 @@ struct GowinImpl : HimbaechelAPI // dsp info const NetInfo *dsp_asign = nullptr, *dsp_bsign = nullptr, *dsp_asel = nullptr, *dsp_bsel = nullptr, *dsp_ce = nullptr, *dsp_clk = nullptr, *dsp_reset = nullptr; + const NetInfo *dsp_5a_clk0 = nullptr, *dsp_5a_clk1 = nullptr, *dsp_5a_ce0 = nullptr, *dsp_5a_ce1 = nullptr, + *dsp_5a_reset0 = nullptr, *dsp_5a_reset1 = nullptr; bool dsp_soa_reg; }; std::vector fast_cell_info; @@ -274,6 +275,9 @@ void GowinImpl::pack() // We also indicate to the router which Bel's pin to use. void GowinImpl::adjust_dsp_pin_mapping(void) { + if (gwu.has_5A_DSP()) { + return; + } for (auto b2c : dsp_bel2cell) { BelId bel = b2c.first; Loc loc = ctx->getBelLocation(bel); @@ -728,6 +732,7 @@ bool GowinImpl::isBelLocationValid(BelId bel, bool explain_invalid) const case ID_PADD9: /* fall-through */ case ID_PADD18: /* fall-through */ case ID_MULT9X9: /* fall-through */ + case ID_MULT12X12: /* fall-through */ case ID_MULT18X18: /* fall-through */ case ID_MULTADDALU18X18: /* fall-through */ case ID_MULTALU18X18: /* fall-through */ @@ -860,6 +865,12 @@ void GowinImpl::assign_cell_info() fc.dsp_asel = get_net(id_ASEL); fc.dsp_bsel = get_net(id_BSEL); fc.dsp_soa_reg = ci->params.count(id_SOA_REG) && ci->params.at(id_SOA_REG).as_int64() == 1; + fc.dsp_5a_clk0 = get_net(id_CLK0); + fc.dsp_5a_clk1 = get_net(id_CLK1); + fc.dsp_5a_ce0 = get_net(id_CE0); + fc.dsp_5a_ce1 = get_net(id_CE1); + fc.dsp_5a_reset0 = get_net(id_RESET0); + fc.dsp_5a_reset1 = get_net(id_RESET1); } } } @@ -987,6 +998,26 @@ bool GowinImpl::dsp_valid(Loc l, IdString bel_type, bool explain_invalid) const } } } + + if (bel_type == id_MULT12X12) { + int pair_z = gwu.get_dsp_paired_12(l.z); + const CellInfo *adj_dsp12 = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(l.x, l.y, pair_z))); + if (adj_dsp12 != nullptr) { + const auto &adj_dsp12_data = fast_cell_info.at(adj_dsp12->flat_index); + if ((dsp_data.dsp_5a_clk0 != adj_dsp12_data.dsp_5a_clk0) || + (dsp_data.dsp_5a_clk1 != adj_dsp12_data.dsp_5a_clk1) || + (dsp_data.dsp_5a_ce0 != adj_dsp12_data.dsp_5a_ce0) || + (dsp_data.dsp_5a_ce1 != adj_dsp12_data.dsp_5a_ce1) || + (dsp_data.dsp_5a_reset0 != adj_dsp12_data.dsp_5a_reset0) || + (dsp_data.dsp_5a_reset1 != adj_dsp12_data.dsp_5a_reset1)) { + if (explain_invalid) { + log_nonfatal_error("For MULT12X12 primitives the control signals must be same.\n"); + } + return false; + } + } + } + // check for control nets "overflow" BelId dsp_bel = ctx->getBelByLocation(Loc(l.x, l.y, BelZ::DSP_Z)); if (dsp_info.count(dsp_bel)) { @@ -1295,7 +1326,6 @@ void GowinImpl::configurePlacerHeap(PlacerHeapCfg &cfg) cfg.ioBufTypes.insert(id_GSR); } - } // namespace NEXTPNR_NAMESPACE_END diff --git a/himbaechel/uarch/gowin/gowin.h b/himbaechel/uarch/gowin/gowin.h index 7496015a..fbd008cf 100644 --- a/himbaechel/uarch/gowin/gowin.h +++ b/himbaechel/uarch/gowin/gowin.h @@ -73,7 +73,7 @@ inline bool is_bsram(const CellInfo *cell) { return type_is_bsram(cell->type); } inline bool type_is_dsp(IdString cell_type) { return cell_type.in(id_PADD9, id_PADD18, id_MULT9X9, id_MULT18X18, id_MULT36X36, id_ALU54D, id_MULTALU18X18, - id_MULTALU36X18, id_MULTADDALU18X18); + id_MULTALU36X18, id_MULTADDALU18X18, id_MULT12X12); } inline bool is_dsp(const CellInfo *cell) { return type_is_dsp(cell->type); } @@ -222,6 +222,7 @@ NPNR_PACKED_STRUCT(struct Extra_chip_data_POD { static constexpr int32_t NEED_SDP_FIX = 2048; static constexpr int32_t NEED_CFGPINS_INVERSION = 4096; static constexpr int32_t HAS_I2CCFG = 8192; + static constexpr int32_t HAS_5A_DSP = 16384; }); } // namespace @@ -329,7 +330,10 @@ enum CLKDIV_0_Z = 620, CLKDIV_1_Z = 621, CLKDIV_2_Z = 622, - CLKDIV_3_Z = 623 + CLKDIV_3_Z = 623, + + MULT12X12_0_Z = 640, + MULT12X12_1_Z = 641 }; } diff --git a/himbaechel/uarch/gowin/gowin_arch_gen.py b/himbaechel/uarch/gowin/gowin_arch_gen.py index d9c0171f..cc189534 100644 --- a/himbaechel/uarch/gowin/gowin_arch_gen.py +++ b/himbaechel/uarch/gowin/gowin_arch_gen.py @@ -33,6 +33,7 @@ CHIP_NEED_BSRAM_RESET_FIX = 0x400 CHIP_NEED_SDP_FIX = 0x800 CHIP_NEED_CFGPINS_INVERSION = 0x1000 CHIP_HAS_I2CCFG = 0x2000 +CHIP_HAS_5A_DSP = 0x4000 # Tile flags TILE_I3C_CAPABLE_IO = 0x1 @@ -136,6 +137,9 @@ CLKDIV_1_Z = 621 CLKDIV_2_Z = 622 CLKDIV_3_Z = 623 +MULT12X12_0_Z = 640 +MULT12X12_1_Z = 641 + # ======================================= # Chipdb additional info # ======================================= @@ -1155,6 +1159,44 @@ def create_bsram_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int, tde tdesc.tiletype = tiletype return tt + +# GW5A series has different DSP +def create_dsp_5a_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int, tdesc: TypeDesc): + typename = "DSP" + tiletype = f"{typename}_{ttyp}" + if tdesc.sfx != 0: + tiletype += f"_{tdesc.sfx}" + tt = chip.create_tile_type(tiletype) + tt.extra_data = TileExtraData(chip.strs.id(typename)) + + # create big DSP + belname = f'DSP' + dsp = tt.create_bel(belname, "DSP", DSP_Z) + dsp.flags = BEL_FLAG_HIDDEN + + # create DSP macro + belname = 'DSP0' + dsp = tt.create_bel(belname, "DSP", DSP_0_Z) + dsp.flags = BEL_FLAG_HIDDEN + + for idx in range(2): + belname = f'MULT12X120{idx}' + portmap = db[y, x].bels[belname].portmap + dsp = tt.create_bel(belname, "MULT12X12", eval(f'MULT12X12_{idx}_Z')) + + for sfx in {'A', 'B'}: + for inp in range(12): + add_port_wire(tt, dsp, portmap, f"{sfx}{inp}", "DSP_I", PinType.INPUT) + for inp in range(2): + add_port_wire(tt, dsp, portmap, f"CE{inp}", "DSP_I", PinType.INPUT) + add_port_wire(tt, dsp, portmap, f"CLK{inp}", "DSP_I", PinType.INPUT) + add_port_wire(tt, dsp, portmap, f"RESET{inp}", "DSP_I", PinType.INPUT) + for outp in range(24): + add_port_wire(tt, dsp, portmap, f"DOUT{outp}", "DSP_O", PinType.OUTPUT) + + tdesc.tiletype = tiletype + return tt + # DSP _mult_inputs = {'ASEL', 'BSEL', 'ASIGN', 'BSIGN'} def create_dsp_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int, tdesc: TypeDesc): @@ -1167,7 +1209,6 @@ def create_dsp_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int, tdesc # create big DSP belname = f'DSP' - portmap = db[y, x].bels[belname].portmap dsp = tt.create_bel(belname, "DSP", DSP_Z) dsp.flags = BEL_FLAG_HIDDEN @@ -1687,8 +1728,10 @@ def main(): chip_flags |= CHIP_NEED_SDP_FIX; if "NEED_CFGPINS_INVERSION" in db.chip_flags: chip_flags |= CHIP_NEED_CFGPINS_INVERSION; - if "CHIP_HAS_I2CCFG" in db.chip_flags: + if "HAS_I2CCFG" in db.chip_flags: chip_flags |= CHIP_HAS_I2CCFG; + if "HAS_5A_DSP" in db.chip_flags: + chip_flags |= CHIP_HAS_5A_DSP; X = db.cols; Y = db.rows; @@ -1710,6 +1753,7 @@ def main(): pll_tiletypes = db.tile_types['P'] bsram_tiletypes = db.tile_types.get('B', set()) dsp_tiletypes = db.tile_types.get('D', set()) + dsp_5a_tiletypes = db.tile_types.get('D5A', set()) # If Apicula does not specify a special location for the global GND and VCC # sources, place them at X0Y0. @@ -1731,6 +1775,8 @@ def main(): create_tiletype(create_bsram_tiletype, ch, db, x, y, ttyp) elif ttyp in dsp_tiletypes: create_tiletype(create_dsp_tiletype, ch, db, x, y, ttyp) + elif ttyp in dsp_5a_tiletypes: + create_tiletype(create_dsp_5a_tiletype, ch, db, x, y, ttyp) else: create_tiletype(create_null_tiletype, ch, db, x, y, ttyp) diff --git a/himbaechel/uarch/gowin/gowin_utils.cc b/himbaechel/uarch/gowin/gowin_utils.cc index 390c553f..90385f3f 100644 --- a/himbaechel/uarch/gowin/gowin_utils.cc +++ b/himbaechel/uarch/gowin/gowin_utils.cc @@ -462,6 +462,12 @@ std::unique_ptr GowinUtils::create_cell(IdString name, IdString type) } // DSP +bool GowinUtils::has_5A_DSP(void) const +{ + const Extra_chip_data_POD *extra = reinterpret_cast(ctx->chip_info->extra_data.get()); + return extra->chip_flags & Extra_chip_data_POD::HAS_5A_DSP; +} + Loc GowinUtils::get_dsp_next_9_in_chain(Loc from) const { Loc res; diff --git a/himbaechel/uarch/gowin/gowin_utils.h b/himbaechel/uarch/gowin/gowin_utils.h index 4255b8e3..01d92396 100644 --- a/himbaechel/uarch/gowin/gowin_utils.h +++ b/himbaechel/uarch/gowin/gowin_utils.h @@ -131,10 +131,12 @@ struct GowinUtils bool has_spine_enable_nets(void) const; // DSP + bool has_5A_DSP(void) const; inline int get_dsp_18_z(int z) const { return z & (~3); } inline int get_dsp_9_idx(int z) const { return z & 3; } inline int get_dsp_18_idx(int z) const { return z & 4; } inline int get_dsp_paired_9(int z) const { return (3 - get_dsp_9_idx(z)) | (z & (~3)); } + inline int get_dsp_paired_12(int z) const { return BelZ::MULT12X12_1_Z - (z & 1); } inline int get_dsp_mult_from_padd(int padd_z) const { return padd_z + 8; } inline int get_dsp_padd_from_mult(int mult_z) const { return mult_z - 8; } inline int get_dsp_next_macro(int z) const { return z + 32; } diff --git a/himbaechel/uarch/gowin/pack.cc b/himbaechel/uarch/gowin/pack.cc index fd82e126..48ae291f 100644 --- a/himbaechel/uarch/gowin/pack.cc +++ b/himbaechel/uarch/gowin/pack.cc @@ -3190,6 +3190,20 @@ struct GowinPacker } } } break; + case ID_MULT12X12: { + for (int i = 0; i < 2; ++i) { + ci->renamePort(ctx->idf("CLK[%d]", i), ctx->idf("CLK%d", i)); + ci->renamePort(ctx->idf("CE[%d]", i), ctx->idf("CE%d", i)); + ci->renamePort(ctx->idf("RESET[%d]", i), ctx->idf("RESET%d", i)); + } + for (int i = 0; i < 12; ++i) { + ci->renamePort(ctx->idf("A[%d]", i), ctx->idf("A%d", i)); + ci->renamePort(ctx->idf("B[%d]", i), ctx->idf("B%d", i)); + } + for (int i = 0; i < 24; ++i) { + ci->renamePort(ctx->idf("DOUT[%d]", i), ctx->idf("DOUT%d", i)); + } + } break; case ID_MULT18X18: { pass_net_type(ci, id_ASEL); pass_net_type(ci, id_BSEL);