394 lines
19 KiB
Verilog
394 lines
19 KiB
Verilog
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Filename: ddr3_phy.v
|
|
// Project: UberDDR3 - An Open Source DDR3 Controller
|
|
//
|
|
// Purpose: PHY component for the DDR3 controller. Handles the primitives such
|
|
// as IOSERDES, IODELAY, and IOBUF. These generates the signals connected to
|
|
// the DDR3 RAM.
|
|
//
|
|
// Engineer: Angelo C. Jacobo
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Copyright (C) 2023-2025 Angelo Jacobo
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
`default_nettype none
|
|
`timescale 1ps / 1ps
|
|
//`define DEBUG_DQS // uncomment to route the raw DQS to output port for debugging
|
|
|
|
module ddr3_phy_ecp5 #(
|
|
parameter CONTROLLER_CLK_PERIOD = 10_000, //ps, clock period of the controller interface
|
|
DDR3_CLK_PERIOD = 2_500, //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD)
|
|
ROW_BITS = 14, //width of row address
|
|
BA_BITS = 3,
|
|
DQ_BITS = 8,
|
|
LANES = 8,
|
|
DUAL_RANK_DIMM = 0, // enable dual rank DIMM (1 = enable, 0 = disable)
|
|
parameter[0:0] ODELAY_SUPPORTED = 1, //set to 1 when ODELAYE2 is supported
|
|
USE_IO_TERMINATION = 0, //use IOBUF_DCIEN and IOBUFDS_DCIEN when 1
|
|
NO_IOSERDES_LOOPBACK = 1, // don't use IOSERDES loopback for bitslip training
|
|
LATTICE_ECP5 = 1, // 1 = Lattice ECP PHY, 0 = Xilinx PHY
|
|
// The next parameters act more like a localparam (since user does not have to set this manually) but was added here to simplify port declaration
|
|
parameter serdes_ratio = 4, // this controller is fixed as a 4:1 memory controller (CONTROLLER_CLK_PERIOD/DDR3_CLK_PERIOD = 4)
|
|
wb_data_bits = DQ_BITS*LANES*serdes_ratio*2,
|
|
wb_sel_bits = wb_data_bits / 8,
|
|
//4 is the width of a single ddr3 command {cs_n, ras_n, cas_n, we_n} plus 3 (ck_en, odt, reset_n) plus bank bits plus row bits
|
|
cmd_len = 4 + 3 + BA_BITS + ROW_BITS + 2*DUAL_RANK_DIMM
|
|
)(
|
|
input wire i_controller_clk, i_ddr3_clk, i_ref_clk,
|
|
input wire i_ddr3_clk_90, //required only when ODELAY_SUPPORTED is zero
|
|
input wire i_rst_n,
|
|
// Controller Interface
|
|
input wire i_controller_reset,
|
|
input wire[cmd_len*serdes_ratio-1:0] i_controller_cmd,
|
|
input wire i_controller_dqs_tri_control, i_controller_dq_tri_control,
|
|
input wire i_controller_toggle_dqs,
|
|
input wire[wb_data_bits-1:0] i_controller_data,
|
|
input wire[wb_sel_bits-1:0] i_controller_dm,
|
|
input wire[4:0] i_controller_odelay_data_cntvaluein,i_controller_odelay_dqs_cntvaluein,
|
|
input wire[4:0] i_controller_idelay_data_cntvaluein,i_controller_idelay_dqs_cntvaluein,
|
|
input wire[LANES-1:0] i_controller_odelay_data_ld, i_controller_odelay_dqs_ld,
|
|
input wire[LANES-1:0] i_controller_idelay_data_ld, i_controller_idelay_dqs_ld,
|
|
input wire[LANES-1:0] i_controller_bitslip,
|
|
input wire i_controller_write_leveling_calib,
|
|
output wire[DQ_BITS*LANES*8-1:0] o_controller_iserdes_data,
|
|
output wire[LANES*8-1:0] o_controller_iserdes_dqs,
|
|
output wire[LANES*8-1:0] o_controller_iserdes_bitslip_reference,
|
|
output wire o_controller_idelayctrl_rdy,
|
|
// DDR3 I/O Interface
|
|
output wire[DUAL_RANK_DIMM:0] o_ddr3_clk_p,o_ddr3_clk_n,
|
|
output wire o_ddr3_reset_n,
|
|
output wire[DUAL_RANK_DIMM:0] o_ddr3_cke, // CKE
|
|
output wire[DUAL_RANK_DIMM:0] o_ddr3_cs_n, // chip select signal
|
|
output wire o_ddr3_ras_n, // RAS#
|
|
output wire o_ddr3_cas_n, // CAS#
|
|
output wire o_ddr3_we_n, // WE#
|
|
output wire[ROW_BITS-1:0] o_ddr3_addr,
|
|
output wire[BA_BITS-1:0] o_ddr3_ba_addr,
|
|
inout wire[(DQ_BITS*LANES)-1:0] io_ddr3_dq,
|
|
inout wire[(DQ_BITS*LANES)/8-1:0] io_ddr3_dqs, io_ddr3_dqs_n,
|
|
output wire[LANES-1:0] o_ddr3_dm,
|
|
output wire[DUAL_RANK_DIMM:0] o_ddr3_odt, // on-die termination
|
|
// DEBUG PHY
|
|
output wire[(DQ_BITS*LANES)/8-1:0] o_ddr3_debug_read_dqs_p,
|
|
output wire[(DQ_BITS*LANES)/8-1:0] o_ddr3_debug_read_dqs_n
|
|
);
|
|
|
|
// cmd bit assignment
|
|
localparam CMD_CS_N_2 = cmd_len - 1,
|
|
CMD_CS_N = DUAL_RANK_DIMM[0]? cmd_len - 2 : cmd_len - 1,
|
|
CMD_RAS_N = DUAL_RANK_DIMM[0]? cmd_len - 3 : cmd_len - 2,
|
|
CMD_CAS_N = DUAL_RANK_DIMM[0]? cmd_len - 4 : cmd_len - 3,
|
|
CMD_WE_N = DUAL_RANK_DIMM[0]? cmd_len - 5 : cmd_len - 4,
|
|
CMD_ODT = DUAL_RANK_DIMM[0]? cmd_len - 6 : cmd_len - 5,
|
|
CMD_CKE_2 = DUAL_RANK_DIMM[0]? cmd_len - 7 : cmd_len - 6,
|
|
CMD_CKE = DUAL_RANK_DIMM[0]? cmd_len - 8 : cmd_len - 6,
|
|
CMD_RESET_N = DUAL_RANK_DIMM[0]? cmd_len - 9 : cmd_len - 7,
|
|
CMD_BANK_START = BA_BITS + ROW_BITS - 1,
|
|
CMD_ADDRESS_START = ROW_BITS - 1;
|
|
localparam SYNC_RESET_DELAY = 52_000/CONTROLLER_CLK_PERIOD; //52_000 ps of reset pulse width required for IDELAYCTRL
|
|
localparam READ_DATA_DELAY = ( (DDR3_CLK_PERIOD/4)/25 > 127)? 127 : (DDR3_CLK_PERIOD/4)/25,
|
|
WRITE_DQS_DELAY = ( (DDR3_CLK_PERIOD/4)/25 > 127)? 127 : (DDR3_CLK_PERIOD/4)/25,
|
|
READ_DQS_DELAY = ( (DDR3_CLK_PERIOD/4)/25 > 127)? 127 : (DDR3_CLK_PERIOD/4)/25;
|
|
localparam DATA_IDELAY_TAP = ((DDR3_CLK_PERIOD/4))/78.125; // delay incoming data by 90 degrees of DDR3_CLK_PERIOD
|
|
genvar gen_index;
|
|
wire[cmd_len-1:0] oserdes_cmd, //serialized(4:1) i_controller_cmd_slot_x
|
|
cmd;//delayed oserdes_cmd
|
|
wire[(DQ_BITS*LANES)-1:0] oserdes_data, odelay_data, idelay_data, read_dq;
|
|
wire[LANES-1:0] oserdes_dm, odelay_dm;
|
|
wire[LANES-1:0] odelay_dqs_p, odelay_dqs_n, read_dqs, idelay_dqs;
|
|
wire[DQ_BITS*LANES-1:0] oserdes_dq_tri_control;
|
|
wire[LANES-1:0] oserdes_dqs_p, oserdes_dqs_n;
|
|
wire[LANES-1:0] oserdes_dqs_tri_control;
|
|
wire[LANES-1:0] oserdes_bitslip_reference;
|
|
reg[$clog2(SYNC_RESET_DELAY):0] delay_before_release_reset;
|
|
reg sync_rst = 0;
|
|
wire ddr3_clk;
|
|
reg toggle_dqs_q; //past value of i_controller_toggle_dqs
|
|
wire ddr3_clk_delayed;
|
|
wire idelayctrl_rdy;
|
|
wire dci_locked;
|
|
reg[LANES*8-1:0] o_controller_iserdes_bitslip_reference_reg;
|
|
reg[LANES - 1 : 0] shift_bitslip_index;
|
|
|
|
// initial value of bitslip reference
|
|
initial begin
|
|
o_controller_iserdes_bitslip_reference_reg = {LANES{8'b0001_1110}};
|
|
shift_bitslip_index = 0;
|
|
end
|
|
|
|
assign o_controller_idelayctrl_rdy = 1'b1;
|
|
|
|
`ifdef DEBUG_DQS
|
|
assign o_ddr3_debug_read_dqs_p = io_ddr3_dqs;
|
|
assign o_ddr3_debug_read_dqs_n = io_ddr3_dqs_n;
|
|
`else
|
|
assign o_ddr3_debug_read_dqs_p = 0;
|
|
assign o_ddr3_debug_read_dqs_n = 0;
|
|
`endif
|
|
|
|
//synchronous reset
|
|
always @(posedge i_controller_clk) begin
|
|
if(!i_rst_n || i_controller_reset) begin
|
|
sync_rst <= 1'b1;
|
|
delay_before_release_reset <= SYNC_RESET_DELAY[$clog2(SYNC_RESET_DELAY):0];
|
|
toggle_dqs_q <= 0;
|
|
end
|
|
else begin
|
|
delay_before_release_reset <= (delay_before_release_reset == 0)? 0: delay_before_release_reset - 1;
|
|
sync_rst <= !(delay_before_release_reset == 0);
|
|
toggle_dqs_q <= i_controller_toggle_dqs;
|
|
end
|
|
end
|
|
|
|
//PHY cmd
|
|
generate
|
|
for(gen_index = 0; gen_index < cmd_len; gen_index = gen_index + 1) begin
|
|
oserdes_soft #(.LATTICE_ECP5(LATTICE_ECP5)) oserdes_soft_cmd(
|
|
.CLK(i_ddr3_clk), // High speed clock
|
|
.CLKDIV(i_controller_clk), // Divided clock
|
|
.RST(sync_rst), // Active high reset
|
|
.D1(i_controller_cmd[cmd_len*0 + gen_index]),
|
|
.D2(i_controller_cmd[cmd_len*0 + gen_index]),
|
|
.D3(i_controller_cmd[cmd_len*1 + gen_index]),
|
|
.D4(i_controller_cmd[cmd_len*1 + gen_index]),
|
|
.D5(i_controller_cmd[cmd_len*2 + gen_index]),
|
|
.D6(i_controller_cmd[cmd_len*2 + gen_index]),
|
|
.D7(i_controller_cmd[cmd_len*3 + gen_index]),
|
|
.D8(i_controller_cmd[cmd_len*3 + gen_index]),
|
|
.OQ(oserdes_cmd[gen_index]) // Data path output
|
|
);
|
|
end
|
|
endgenerate
|
|
|
|
|
|
assign o_ddr3_cs_n = oserdes_cmd[CMD_CS_N];
|
|
assign o_ddr3_cke = oserdes_cmd[CMD_CKE];
|
|
assign o_ddr3_odt = oserdes_cmd[CMD_ODT];
|
|
assign o_ddr3_ras_n = oserdes_cmd[CMD_RAS_N],
|
|
o_ddr3_cas_n = oserdes_cmd[CMD_CAS_N],
|
|
o_ddr3_we_n = oserdes_cmd[CMD_WE_N],
|
|
o_ddr3_reset_n = oserdes_cmd[CMD_RESET_N],
|
|
o_ddr3_ba_addr = oserdes_cmd[CMD_BANK_START:CMD_ADDRESS_START+1],
|
|
o_ddr3_addr = oserdes_cmd[CMD_ADDRESS_START:0];
|
|
|
|
assign o_ddr3_clk_p = !i_ddr3_clk;
|
|
assign o_ddr3_clk_n = i_ddr3_clk;
|
|
|
|
// PHY data and dm
|
|
generate
|
|
// Output data: oserdes_soft -> BB
|
|
// Input data: BB -> DELAYG -> iserdes_soft
|
|
for(gen_index = 0; gen_index < (DQ_BITS*LANES); gen_index = gen_index + 1) begin
|
|
oserdes_soft #(.LATTICE_ECP5(LATTICE_ECP5)) oserdes_soft_data(
|
|
.CLK(i_ddr3_clk), // High speed clock
|
|
.CLKDIV(i_controller_clk), // Divided clock
|
|
.RST(sync_rst), // Active high reset
|
|
.D1(i_controller_data[gen_index + (DQ_BITS*LANES)*0]),
|
|
.D2(i_controller_data[gen_index + (DQ_BITS*LANES)*1]),
|
|
.D3(i_controller_data[gen_index + (DQ_BITS*LANES)*2]),
|
|
.D4(i_controller_data[gen_index + (DQ_BITS*LANES)*3]),
|
|
.D5(i_controller_data[gen_index + (DQ_BITS*LANES)*4]),
|
|
.D6(i_controller_data[gen_index + (DQ_BITS*LANES)*5]),
|
|
.D7(i_controller_data[gen_index + (DQ_BITS*LANES)*6]),
|
|
.D8(i_controller_data[gen_index + (DQ_BITS*LANES)*7]),
|
|
.OQ(oserdes_data[gen_index]) // Data path output
|
|
);
|
|
|
|
if(LATTICE_ECP5) begin
|
|
BB BB_data (
|
|
.I(oserdes_data[gen_index]),
|
|
.O(read_dq[gen_index]),
|
|
.T(i_controller_dq_tri_control),
|
|
.B(io_ddr3_dq[gen_index])
|
|
);
|
|
|
|
DELAYG #(
|
|
.DEL_MODE("USER_DEFINED"),
|
|
.DEL_VALUE(READ_DATA_DELAY)
|
|
) DELAYG_data
|
|
(
|
|
.A(read_dq[gen_index]),
|
|
.Z(idelay_data[gen_index])
|
|
);
|
|
end // end of LATTICE_ECP5
|
|
else begin // XILINX
|
|
IOBUF IOBUF_data (
|
|
.I(oserdes_data[gen_index]),
|
|
.O(read_dq[gen_index]),
|
|
.T(i_controller_dq_tri_control),
|
|
.IO(io_ddr3_dq[gen_index])
|
|
);
|
|
|
|
IDELAYE2 #(
|
|
.DELAY_SRC("IDATAIN"), // Delay input (IDATAIN, DATAIN)
|
|
.HIGH_PERFORMANCE_MODE("TRUE"), //Reduced jitter ("TRUE"), Reduced power ("FALSE")
|
|
.IDELAY_TYPE("FIXED"), //FIXED, VARIABLE, VAR_LOAD, VAR_LOAD_PIPE
|
|
.IDELAY_VALUE(DATA_IDELAY_TAP > 31? 31 : DATA_IDELAY_TAP), //Input delay tap setting (0-31)
|
|
.PIPE_SEL("FALSE"), //Select pipelined mode, FALSE, TRUE
|
|
.REFCLK_FREQUENCY(200.0), //IDELAYCTRL clock input frequency in MHz (190.0-210.0).
|
|
.SIGNAL_PATTERN("DATA") //DATA, CLOCK input signal
|
|
)
|
|
IDELAYE2_data (
|
|
.CNTVALUEOUT(), // 5-bit output: Counter value output
|
|
.DATAOUT(idelay_data[gen_index]), // 1-bit output: Delayed data output
|
|
.C(i_controller_clk), // 1-bit input: Clock input
|
|
.CE(1'b0), // 1-bit input: Active high enable increment/decrement input
|
|
.CINVCTRL(1'b0),// 1-bit input: Dynamic clock inversion input
|
|
.CNTVALUEIN(0), // 5-bit input: Counter value input
|
|
.DATAIN(0), //1-bit input: Internal delay data input
|
|
.IDATAIN(read_dq[gen_index]), // 1-bit input: Data input from the I/O
|
|
.INC(1'b0), // 1-bit input: Increment / Decrement tap delay input
|
|
.LD(0), // 1-bit input: Load IDELAY_VALUE input
|
|
.LDPIPEEN(1'b0), // 1-bit input: Enable PIPELINE register to load data input
|
|
.REGRST(1'b0) // 1-bit input: Active-high reset tap-delay input
|
|
);
|
|
|
|
end // end of XILINX
|
|
|
|
iserdes_soft #(.LATTICE_ECP5(LATTICE_ECP5)) iserdes_soft_data(
|
|
.CLK(i_ddr3_clk), // High speed clock
|
|
.CLKDIV(i_controller_clk), // Divided clock
|
|
.RST(sync_rst), // Active high reset
|
|
.Q1(o_controller_iserdes_data[(DQ_BITS*LANES)*0 + gen_index]),
|
|
.Q2(o_controller_iserdes_data[(DQ_BITS*LANES)*1 + gen_index]),
|
|
.Q3(o_controller_iserdes_data[(DQ_BITS*LANES)*2 + gen_index]),
|
|
.Q4(o_controller_iserdes_data[(DQ_BITS*LANES)*3 + gen_index]),
|
|
.Q5(o_controller_iserdes_data[(DQ_BITS*LANES)*4 + gen_index]),
|
|
.Q6(o_controller_iserdes_data[(DQ_BITS*LANES)*5 + gen_index]),
|
|
.Q7(o_controller_iserdes_data[(DQ_BITS*LANES)*6 + gen_index]),
|
|
.Q8(o_controller_iserdes_data[(DQ_BITS*LANES)*7 + gen_index]),
|
|
.D(idelay_data[gen_index]), // Data path output
|
|
.bitslip(i_controller_bitslip[gen_index/8])
|
|
);
|
|
end
|
|
|
|
// data mask: oserdes -> o_ddr3_dm
|
|
for(gen_index = 0; gen_index < LANES; gen_index = gen_index + 1) begin
|
|
oserdes_soft #(.LATTICE_ECP5(LATTICE_ECP5)) oserdes_soft_dm(
|
|
.CLK(i_ddr3_clk), // High speed clock
|
|
.CLKDIV(i_controller_clk), // Divided clock
|
|
.RST(sync_rst), // Active high reset
|
|
.D1(i_controller_dm[gen_index + LANES*0]),
|
|
.D2(i_controller_dm[gen_index + LANES*1]),
|
|
.D3(i_controller_dm[gen_index + LANES*2]),
|
|
.D4(i_controller_dm[gen_index + LANES*3]),
|
|
.D5(i_controller_dm[gen_index + LANES*4]),
|
|
.D6(i_controller_dm[gen_index + LANES*5]),
|
|
.D7(i_controller_dm[gen_index + LANES*6]),
|
|
.D8(i_controller_dm[gen_index + LANES*7]),
|
|
.OQ(o_ddr3_dm[gen_index]) // Data path output
|
|
);
|
|
end
|
|
|
|
// dqs output: oserdes_soft -> DELAYG -> BB
|
|
// dqs input: BB -> DELAYG -> iserdes_soft
|
|
for(gen_index = 0; gen_index < LANES; gen_index = gen_index + 1) begin
|
|
if(LATTICE_ECP5) begin
|
|
// oserdes_soft #(.LATTICE_ECP5(LATTICE_ECP5)) oserdes_soft_dqs_p(
|
|
// .CLK(i_ddr3_clk), // High speed clock
|
|
// .CLKDIV(i_controller_clk), // Divided clock
|
|
// .RST(sync_rst), // Active high reset
|
|
// .D1(1'b1),
|
|
// .D2(1'b0),
|
|
// .D3(1'b1),
|
|
// .D4(1'b0),
|
|
// .D5(1'b1),
|
|
// .D6(1'b0),
|
|
// .D7(1'b1),
|
|
// .D8(1'b0),
|
|
// .OQ(oserdes_dqs_p[gen_index]) // Data path output
|
|
// );
|
|
|
|
// oserdes_soft #(.LATTICE_ECP5(LATTICE_ECP5)) oserdes_soft_dqs_n(
|
|
// .CLK(i_ddr3_clk), // High speed clock
|
|
// .CLKDIV(i_controller_clk), // Divided clock
|
|
// .RST(sync_rst), // Active high reset
|
|
// .D1(1'b0), //the last part will still have half dqs series
|
|
// .D2(1'b1),
|
|
// .D3(1'b0),
|
|
// .D4(1'b1),
|
|
// .D5(1'b0),
|
|
// .D6(1'b1),
|
|
// .D7(1'b0),
|
|
// .D8(1'b1),
|
|
// .OQ(oserdes_dqs_n[gen_index]) // Data path output
|
|
// );
|
|
|
|
// DELAYG #(
|
|
// .DEL_MODE("USER_DEFINED")
|
|
// ,.DEL_VALUE(WRITE_DQS_DELAY)
|
|
// ) DELAYG_odqs_p
|
|
// (
|
|
// .A(oserdes_dqs_p[gen_index]),
|
|
// .Z(odelay_dqs_p[gen_index])
|
|
// );
|
|
|
|
// DELAYG #(
|
|
// .DEL_MODE("USER_DEFINED")
|
|
// ,.DEL_VALUE(WRITE_DQS_DELAY)
|
|
// ) DELAYG_odqs_n
|
|
// (
|
|
// .A(oserdes_dqs_n[gen_index]),
|
|
// .Z(odelay_dqs_n[gen_index])
|
|
// );
|
|
|
|
BB BB_dqs_p (
|
|
.I(i_ddr3_clk_90),
|
|
.O(),
|
|
.T(i_controller_dqs_tri_control),
|
|
.B(io_ddr3_dqs[gen_index])
|
|
);
|
|
BB BB_dqs_n (
|
|
.I(!i_ddr3_clk_90),
|
|
.O(),
|
|
.T(i_controller_dqs_tri_control),
|
|
.B(io_ddr3_dqs_n[gen_index])
|
|
);
|
|
end // end of LATTICE_ECP5
|
|
else begin // XILINX
|
|
IOBUF IOBUF_dqs_p(
|
|
.I(i_ddr3_clk_90),
|
|
.O(),
|
|
.T(i_controller_dqs_tri_control),
|
|
.IO(io_ddr3_dqs[gen_index])
|
|
);
|
|
|
|
IOBUF IOBUF_dqs_n(
|
|
.I(!i_ddr3_clk_90),
|
|
.O(),
|
|
.T(i_controller_dqs_tri_control),
|
|
.IO(io_ddr3_dqs_n[gen_index])
|
|
);
|
|
end // end of XILINX
|
|
|
|
end
|
|
endgenerate
|
|
|
|
generate
|
|
if(!LATTICE_ECP5) begin
|
|
IDELAYCTRL IDELAYCTRL_inst (
|
|
.RDY(), // 1-bit output: Ready output
|
|
.REFCLK(i_ref_clk), // 1-bit input: Reference clock input.The frequency of REFCLK must be 200 MHz to guarantee the tap-delay value specified in the applicable data sheet.
|
|
.RST(sync_rst) // 1-bit input: Active high reset input, To ,Minimum Reset pulse width is 52ns
|
|
);
|
|
end
|
|
endgenerate
|
|
endmodule
|