From c84879e4d527c9c60f82c29043cb351e76384a1f Mon Sep 17 00:00:00 2001 From: YRabbit Date: Wed, 19 Mar 2025 17:41:35 +1000 Subject: [PATCH] Gowin. Implement the DLLDLY primitive. (#1464) DLLDLY is the clock delay primitive that adjust the input clock according to the DLLSTEP signal and outputs the delayed clock. These primitives are associated with clock pins and are "tapped" between the output of this IBUF and the clock networks, leaving the possibility to connect to the original unshifted signal as well, although the latter is not very practical because it is no longer possible to use fast wires. Signed-off-by: YRabbit --- himbaechel/uarch/gowin/constids.inc | 7 +++ himbaechel/uarch/gowin/globals.cc | 79 +++++++++++++++++++----- himbaechel/uarch/gowin/gowin.h | 8 +++ himbaechel/uarch/gowin/gowin_arch_gen.py | 54 +++++++++++++++- himbaechel/uarch/gowin/gowin_utils.cc | 11 ++++ himbaechel/uarch/gowin/gowin_utils.h | 1 + himbaechel/uarch/gowin/pack.cc | 39 ++++++++++++ 7 files changed, 182 insertions(+), 17 deletions(-) diff --git a/himbaechel/uarch/gowin/constids.inc b/himbaechel/uarch/gowin/constids.inc index a7485407..00c020ed 100644 --- a/himbaechel/uarch/gowin/constids.inc +++ b/himbaechel/uarch/gowin/constids.inc @@ -1346,6 +1346,13 @@ X(TREG_LSR_NET) X(NOIOBFF) X(IOBFF) +// DLLDLY +X(DLLDLY) +X(DLLSTEP) +X(DLLDLY_O) +X(DLLDLY_CLKOUT0) +X(DLLDLY_CLKOUT1) + // segments X(LW_TAP) X(LW_TAP_0) diff --git a/himbaechel/uarch/gowin/globals.cc b/himbaechel/uarch/gowin/globals.cc index 5de783e4..ea0626f5 100644 --- a/himbaechel/uarch/gowin/globals.cc +++ b/himbaechel/uarch/gowin/globals.cc @@ -57,21 +57,20 @@ struct GowinGlobalRouter bool not_dsc_pip = dst_name != id_CLKOUT; IdString src_type = ctx->getWireType(src); IdString dst_type = ctx->getWireType(dst); - bool src_valid = not_dsc_pip && src_type.in(id_GLOBAL_CLK, id_IO_O, id_PLL_O, id_HCLK); + bool src_valid = not_dsc_pip && src_type.in(id_GLOBAL_CLK, id_IO_O, id_PLL_O, id_HCLK, id_DLLDLY); bool dst_valid = not_dsc_pip && dst_type.in(id_GLOBAL_CLK, id_TILE_CLK, id_PLL_I, id_IO_I, id_HCLK); bool res; - if (src == src_wire && (!src_type.in(id_IO, id_HCLK))) { + if (src == src_wire && (!src_type.in(id_IO_O, id_HCLK, id_DLLDLY_O))) { bool dst_is_spine = dst_name.str(ctx).rfind("SPINE", 0) == 0; res = src_valid && dst_is_spine; } else { res = (src_valid && dst_valid) || (src_valid && is_local(dst_type)) || (is_local(src_type) && dst_valid); } if (ctx->debug && false /*&& res*/) { - log_info("%s <- %s [%s <- %s]\n", ctx->getWireName(ctx->getPipDstWire(pip)).str(ctx).c_str(), - ctx->getWireName(ctx->getPipSrcWire(pip)).str(ctx).c_str(), dst_type.c_str(ctx), - src_type.c_str(ctx)); - log_info("res:%d, src_valid:%d, dst_valid:%d, src local:%d, dst local:%d\n", res, src_valid, dst_valid, + log_info("%s <- %s [%s <- %s]\n", ctx->nameOfWire(ctx->getPipDstWire(pip)), + ctx->nameOfWire(ctx->getPipSrcWire(pip)), dst_type.c_str(ctx), src_type.c_str(ctx)); + log_info(" res:%d, src_valid:%d, dst_valid:%d, src local:%d, dst local:%d\n", res, src_valid, dst_valid, is_local(src_type), is_local(dst_type)); } return res; @@ -103,17 +102,17 @@ struct GowinGlobalRouter // If DQCE is used, then the source can only connect to SPINEs as only they can be switched off/on. bool res; if (src == src_wire) { - bool dst_is_spine = dst_name.str(ctx).rfind("SPINE", 0) == 0; + bool dst_is_spine = (dst_name.str(ctx).rfind("SPINE", 0) == 0 || dst_name.str(ctx).rfind("PCLK", 0) == 0 || + dst_name.str(ctx).rfind("LWSPINE", 0) == 0); res = src_valid && dst_is_spine; } else { res = (src_valid && dst_valid) || (src_valid && is_local(dst_type)) || (is_local(src_type) && dst_valid); - if (ctx->debug && false /*res*/) { - log_info("%s <- %s [%s <- %s]\n", ctx->getWireName(ctx->getPipDstWire(pip)).str(ctx).c_str(), - ctx->getWireName(ctx->getPipSrcWire(pip)).str(ctx).c_str(), dst_type.c_str(ctx), - src_type.c_str(ctx)); - log_info("res:%d, src_valid:%d, dst_valid:%d, src local:%d, dst local:%d\n", res, src_valid, dst_valid, - is_local(src_type), is_local(dst_type)); - } + } + if (ctx->debug && false /*res*/) { + log_info("%s <- %s [%s <- %s]\n", ctx->nameOfWire(ctx->getPipDstWire(pip)), + ctx->nameOfWire(ctx->getPipSrcWire(pip)), dst_type.c_str(ctx), src_type.c_str(ctx)); + log_info(" res:%d, src_valid:%d, dst_valid:%d, src local:%d, dst local:%d\n", res, src_valid, dst_valid, + is_local(src_type), is_local(dst_type)); } return res; } @@ -291,6 +290,16 @@ struct GowinGlobalRouter return true; } } + // DLLDLY outputs + if (driver.cell->type == id_DLLDLY) { + if (driver.port.in(id_CLKOUT)) { + if (ctx->debug) { + log_info("%s out:%s:%s\n", driver.cell->type.c_str(ctx), + ctx->getBelName(driver.cell->bel).str(ctx).c_str(), driver.port.c_str(ctx)); + } + return true; + } + } return false; } @@ -384,7 +393,26 @@ struct GowinGlobalRouter continue; } WireId dst = ctx->getPipDstWire(pip); - + IdString dst_name = ctx->getWireName(dst)[1]; + if (dst_name.str(ctx).rfind("PCLK", 0) == 0 || dst_name.str(ctx).rfind("LWSPINE", 0) == 0) { + // step over dummy pip + for (PipId next_pip : ctx->getPipsDownhill(dst)) { + if (ctx->getBoundPipNet(next_pip) != nullptr) { + ctx->unbindPip(pip); + src = dst; + break; + } + } + if (src == dst) { + break; + } + } + } + for (PipId pip : ctx->getPipsDownhill(src)) { + if (ctx->getBoundPipNet(pip) == nullptr) { + continue; + } + WireId dst = ctx->getPipDstWire(pip); BelId dqce_bel = gwu.get_dqce_bel(ctx->getWireName(dst)[1]); NPNR_ASSERT(dqce_bel != BelId()); @@ -459,6 +487,25 @@ struct GowinGlobalRouter // "spine" wires. Here we not only check this fact, but also find out // how many and what kind of "spine" wires were used for network // roaming. + for (PipId pip : ctx->getPipsDownhill(src)) { + if (ctx->getBoundPipNet(pip) == nullptr) { + continue; + } + WireId dst = ctx->getPipDstWire(pip); + IdString dst_name = ctx->getWireName(dst)[1]; + if (dst_name.str(ctx).rfind("PCLK", 0) == 0 || dst_name.str(ctx).rfind("LWSPINE", 0) == 0) { + // step over dummy pip + for (PipId next_pip : ctx->getPipsDownhill(dst)) { + if (ctx->getBoundPipNet(next_pip) != nullptr) { + src = dst; + break; + } + } + if (src == dst) { + break; + } + } + } for (PipId pip : ctx->getPipsDownhill(src)) { if (ctx->getBoundPipNet(pip) == nullptr) { continue; @@ -1222,7 +1269,7 @@ struct GowinGlobalRouter continue; } if (ctx->verbose) { - log_info("route clock net '%s'\n", ctx->nameOf(ni)); + log_info("route clock net '%s', src:%s\n", ctx->nameOf(ni), ctx->nameOf(ni->driver.cell)); } route_clk_net(ni); } diff --git a/himbaechel/uarch/gowin/gowin.h b/himbaechel/uarch/gowin/gowin.h index 3e09d8f3..83554210 100644 --- a/himbaechel/uarch/gowin/gowin.h +++ b/himbaechel/uarch/gowin/gowin.h @@ -139,6 +139,11 @@ NPNR_PACKED_STRUCT(struct Spine_bel_POD { int32_t bel_z; }); +NPNR_PACKED_STRUCT(struct Io_dlldly_bel_POD { + int32_t io; + int32_t dlldly; +}); + NPNR_PACKED_STRUCT(struct Wire_bel_POD { int32_t pip_xy; int32_t pip_dst; @@ -181,6 +186,7 @@ NPNR_PACKED_STRUCT(struct Extra_chip_data_POD { RelSlice dqce_bels; RelSlice dcs_bels; RelSlice dhcen_bels; + RelSlice io_dlldly_bels; RelSlice segments; // chip flags static constexpr int32_t HAS_SP32 = 1; @@ -235,6 +241,8 @@ enum MIPIOBUF_Z = 301, MIPIIBUF_Z = 302, + DLLDLY_Z = 303, // : 305 reserve for 2 DLLDLYs + // The two least significant bits encode Z for 9-bit adders and // multipliers, if they are equal to 0, then we get Z of their common // 18-bit equivalent. diff --git a/himbaechel/uarch/gowin/gowin_arch_gen.py b/himbaechel/uarch/gowin/gowin_arch_gen.py index 97fa077d..7eab4576 100644 --- a/himbaechel/uarch/gowin/gowin_arch_gen.py +++ b/himbaechel/uarch/gowin/gowin_arch_gen.py @@ -65,6 +65,8 @@ EMCU_Z = 300 MIPIOBUF_Z = 301 MIPIIBUF_Z = 302 +DLLDLY_Z = 303 # : 305 reserve for 2 DLLDLYs + DSP_Z = 509 DSP_0_Z = 511 # DSP macro 0 @@ -180,6 +182,18 @@ class SpineBel(BBAStruct): bba.u32(self.bel_y) bba.u32(self.bel_z) +# io -> dlldly bels +@dataclass +class IoBel(BBAStruct): + io: IdString + dlldly: IdString + + def serialise_lists(self, context: str, bba: BBAWriter): + pass + def serialise(self, context: str, bba: BBAWriter): + bba.u32(self.io.index) + bba.u32(self.dlldly.index) + # wire -> bel for DHCEN bels @dataclass class WireBel(BBAStruct): @@ -249,6 +263,7 @@ class ChipExtraData(BBAStruct): dqce_bels: list[SpineBel] = field(default_factory = list) dcs_bels: list[SpineBel] = field(default_factory = list) dhcen_bels: list[WireBel] = field(default_factory = list) + io_dlldly_bels: list[IoBel] = field(default_factory = list) segments: list[Segment] = field(default_factory = list) def create_bottom_io(self): @@ -269,6 +284,8 @@ class ChipExtraData(BBAStruct): def add_dcs_bel(self, spine: str, x: int, y: int, z: int): self.dcs_bels.append(SpineBel(self.strs.id(spine), x, y, z)) + def add_io_dlldly_bel(self, io: str, dlldly: str): + self.io_dlldly_bels.append(IoBel(self.strs.id(io), self.strs.id(dlldly))) def add_segment(self, x: int, seg_idx: int, min_x: int, min_y: int, max_x: int, max_y: int, top_row: int, bottom_row: int, top_wire: str, bottom_wire: str, top_gate_wire: list, bottom_gate_wire: list): new_seg = Segment(x, seg_idx, min_x, min_y, max_x, max_y, top_row, bottom_row, @@ -300,6 +317,9 @@ class ChipExtraData(BBAStruct): bba.label(f"{context}_dhcen_bels") for i, t in enumerate(self.dhcen_bels): t.serialise(f"{context}_dhcen_bel{i}", bba) + bba.label(f"{context}_io_dlldly_bels") + for i, t in enumerate(self.io_dlldly_bels): + t.serialise(f"{context}_io_dlldly_bel{i}", bba) bba.label(f"{context}_segments") for i, t in enumerate(self.segments): t.serialise(f"{context}_segment{i}", bba) @@ -311,6 +331,7 @@ class ChipExtraData(BBAStruct): bba.slice(f"{context}_dqce_bels", len(self.dqce_bels)) bba.slice(f"{context}_dcs_bels", len(self.dcs_bels)) bba.slice(f"{context}_dhcen_bels", len(self.dhcen_bels)) + bba.slice(f"{context}_io_dlldly_bels", len(self.io_dlldly_bels)) bba.slice(f"{context}_segments", len(self.segments)) @dataclass @@ -444,6 +465,9 @@ def create_nodes(chip: Chip, db: chipdb): wire_type, node = node_hdr if len(node) < 2: continue + min_wire_name_len = 0 + if node: + min_wire_name_len = len(next(iter(node))[2]) for y, x, wire in node: if wire_type: if not chip.tile_type_at(x, y).has_wire(wire): @@ -453,7 +477,11 @@ def create_nodes(chip: Chip, db: chipdb): new_node = NodeWire(x, y, wire) gl_nodes = global_nodes.setdefault(node_name, []) if new_node not in gl_nodes: - gl_nodes.append(NodeWire(x, y, wire)) + if len(wire) < min_wire_name_len: + min_wire_name_len = len(wire) + gl_nodes.insert(0, new_node) + else: + gl_nodes.append(new_node) for name, node in global_nodes.items(): chip.add_node(node) @@ -462,6 +490,10 @@ def create_switch_matrix(tt: TileType, db: chipdb, x: int, y: int): def get_wire_type(name): if name in {'XD0', 'XD1', 'XD2', 'XD3', 'XD4', 'XD5',}: return "X0" + if name in {"PCLK_DUMMY"}: + return "GLOBAL_CLK" + if name in {"DLLDLY_OUT"}: + return "DLLDLY_O" if name in {'LT00', 'LT10', 'LT20', 'LT30', 'LT02', 'LT13'}: return "LW_TAP" return "" @@ -550,6 +582,9 @@ dcs_bels = {} # map HCLKIN wire -> dhcen bel dhcen_bels = {} +# map io bel -> dlldly bel +io_dlldly_bels = {} + def create_extra_funcs(tt: TileType, db: chipdb, x: int, y: int): if (y, x) not in db.extra_func: return @@ -588,6 +623,20 @@ def create_extra_funcs(tt: TileType, db: chipdb, x: int, y: int): tt.add_bel_pin(bel, "CE", wire, PinType.INPUT) pip_xy, pip_dst, pip_src, side = dhcen['pip'] dhcen_bels[pip_xy, pip_dst, pip_src] = (x, y, bel_z, side) + elif func == 'dlldly': + for idx, dlldly in desc.items(): + bel_z = DLLDLY_Z + idx + bel = tt.create_bel(f"DLLDLY{idx}", "DLLDLY", z = bel_z) + for pin, wire in dlldly['in_wires'].items(): + if not tt.has_wire(wire): + tt.create_wire(wire) + tt.add_bel_pin(bel, pin, wire, PinType.INPUT) + for pin, wire in dlldly['out_wires'].items(): + if not tt.has_wire(wire): + tt.create_wire(wire) + tt.add_bel_pin(bel, pin, wire, PinType.OUTPUT) + io_dlldly_bels[f"{dlldly['io_loc']}/{dlldly['io_bel']}"] = f"X{x}Y{y}/DLLDLY{idx}" + elif func == 'dqce': for idx in range(6): bel_z = DQCE_Z + idx @@ -1346,6 +1395,9 @@ def create_extra_data(chip: Chip, db: chipdb, chip_flags: int): # create spine->dcs bel map for spine, bel in dcs_bels.items(): chip.extra_data.add_dcs_bel(spine, bel[0], bel[1], bel[2]) + # create iob->dlldly bel map + for io, dlldly in io_dlldly_bels.items(): + chip.extra_data.add_io_dlldly_bel(io, dlldly) # create segments if hasattr(db, "segments"): for y_x_idx, seg in db.segments.items(): diff --git a/himbaechel/uarch/gowin/gowin_utils.cc b/himbaechel/uarch/gowin/gowin_utils.cc index ce0b9754..628181fc 100644 --- a/himbaechel/uarch/gowin/gowin_utils.cc +++ b/himbaechel/uarch/gowin/gowin_utils.cc @@ -180,6 +180,17 @@ BelId GowinUtils::get_dcs_bel(IdString spine_name) return BelId(); } +BelId GowinUtils::get_dlldly_bel(BelId io_bel) +{ + const Extra_chip_data_POD *extra = reinterpret_cast(ctx->chip_info->extra_data.get()); + for (auto &io : extra->io_dlldly_bels) { + if (IdStringList::parse(ctx, (IdString(io.io)).str(ctx)) == ctx->getBelName(io_bel)) { + return ctx->getBelByName(IdStringList::parse(ctx, (IdString(io.dlldly)).str(ctx))); + } + } + return BelId(); +} + BelId GowinUtils::get_dhcen_bel(WireId hclkin_wire, IdString &side) { const Extra_chip_data_POD *extra = reinterpret_cast(ctx->chip_info->extra_data.get()); diff --git a/himbaechel/uarch/gowin/gowin_utils.h b/himbaechel/uarch/gowin/gowin_utils.h index f63a2257..3766ba6f 100644 --- a/himbaechel/uarch/gowin/gowin_utils.h +++ b/himbaechel/uarch/gowin/gowin_utils.h @@ -44,6 +44,7 @@ struct GowinUtils BelId get_dqce_bel(IdString spine_name); BelId get_dcs_bel(IdString spine_name); BelId get_dhcen_bel(WireId hclkin_wire, IdString &side); + BelId get_dlldly_bel(BelId io_bel); // Segments int get_segments_count(void) const; diff --git a/himbaechel/uarch/gowin/pack.cc b/himbaechel/uarch/gowin/pack.cc index b2c67c30..53efc44c 100644 --- a/himbaechel/uarch/gowin/pack.cc +++ b/himbaechel/uarch/gowin/pack.cc @@ -3816,6 +3816,42 @@ struct GowinPacker } } + // =================================== + // DLLDLY + // =================================== + void pack_dlldly() + { + log_info("Pack DLLDLYs...\n"); + + for (auto &cell : ctx->cells) { + auto ci = cell.second.get(); + if (ci->type != id_DLLDLY) + continue; + NetInfo *clkin_net = ci->getPort(id_CLKIN); + NetInfo *clkout_net = ci->getPort(id_CLKOUT); + if (clkin_net == nullptr || clkout_net == nullptr) { + log_error("%s cell has unconnected CLKIN or CLKOUT pins.\n", ctx->nameOf(ci)); + } + CellInfo *clk_src = clkin_net->driver.cell; + if (!is_io(clk_src)) { + log_error("Clock source for DLLDLY %s is not IO: %s.\n", ctx->nameOf(ci), ctx->nameOf(clk_src)); + } + // DLLDLY placement is fixed + BelId io_bel = clk_src->bel; + BelId dlldly_bel = gwu.get_dlldly_bel(io_bel); + if (dlldly_bel == BelId()) { + log_error("Can't use IO %s as input for DLLDLY %s.\n", ctx->nameOf(clk_src), ctx->nameOf(ci)); + } + if (ctx->verbose) { + log_info(" pack %s to use clock pin at %s\n", ctx->nameOf(ci), ctx->nameOfBel(io_bel)); + } + ctx->bindBel(dlldly_bel, ci, STRENGTH_LOCKED); + for (int i = 0; i < 8; ++i) { + ci->renamePort(ctx->idf("DLLSTEP[%d]", i), ctx->idf("DLLSTEP%d", i)); + } + } + } + // ========================================= // Create entry points to the clock system // ========================================= @@ -4188,6 +4224,9 @@ struct GowinPacker pack_hclk(); ctx->check(); + pack_dlldly(); + ctx->check(); + pack_bandgap(); ctx->check();