From 4fdaace899c335f6ad3942fb3763e6d35b828c3a Mon Sep 17 00:00:00 2001 From: AngeloJacobo Date: Mon, 2 Dec 2024 11:28:21 +0800 Subject: [PATCH 1/3] add dual-rank feature (PHY ongoing changes) --- rtl/ddr3_controller.v | 206 ++++++++++++++++++++++-------- testbench/ddr3.sv | 2 +- testbench/ddr3_dimm_micron_sim.sv | 46 +++---- 3 files changed, 181 insertions(+), 73 deletions(-) diff --git a/rtl/ddr3_controller.v b/rtl/ddr3_controller.v index fdb7db1..994c4e8 100644 --- a/rtl/ddr3_controller.v +++ b/rtl/ddr3_controller.v @@ -68,17 +68,18 @@ module ddr3_controller #( SECOND_WISHBONE = 0, //set to 1 if 2nd wishbone is needed WB_ERROR = 0, // set to 1 to support Wishbone error (asserts at ECC double bit error) SKIP_INTERNAL_TEST = 1, // skip built-in self test (would require >2 seconds of internal test right after calibration) + DUAL_RANK_DIMM = 0, // enable dual rank DIMM parameter[1:0] ECC_ENABLE = 0, // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC ) (only change when you know what you are doing) parameter[1:0] DIC = 2'b00, //Output Driver Impedance Control (2'b00 = RZQ/6, 2'b01 = RZQ/7, RZQ = 240ohms) (only change when you know what you are doing) parameter[2:0] RTT_NOM = 3'b011, //RTT Nominal (3'b000 = disabled, 3'b001 = RZQ/4, 3'b010 = RZQ/2 , 3'b011 = RZQ/6, RZQ = 240ohms) parameter // The next parameters act more like a localparam (since user does not have to set this manually) but was added here to simplify port declaration serdes_ratio = 4, // this controller is fixed as a 4:1 memory controller (CONTROLLER_CLK_PERIOD/DDR3_CLK_PERIOD = 4) wb_data_bits = DQ_BITS*LANES*serdes_ratio*2, - wb_addr_bits = ROW_BITS + COL_BITS + BA_BITS - $clog2(serdes_ratio*2), + wb_addr_bits = ROW_BITS + COL_BITS + BA_BITS - $clog2(serdes_ratio*2) + DUAL_RANK_DIMM, wb_sel_bits = wb_data_bits / 8, wb2_sel_bits = WB2_DATA_BITS / 8, //4 is the width of a single ddr3 command {cs_n, ras_n, cas_n, we_n} plus 3 (ck_en, odt, reset_n) plus bank bits plus row bits - cmd_len = 4 + 3 + BA_BITS + ROW_BITS, + cmd_len = 4 + 3 + BA_BITS + ROW_BITS + DUAL_RANK_DIMM, lanes_clog2 = $clog2(LANES) == 0? 1: $clog2(LANES), parameter[1:0] row_bank_col = (ECC_ENABLE == 3)? 2 : 1, // memory address mapping: 0 {bank, row, col} , 1 = {row, bank, col} , 2 = {bank[2:1]. row, bank[0], col} FOR ECC parameter[0:0] ECC_TEST = 0 @@ -118,7 +119,7 @@ module ddr3_controller #( (* mark_debug = "true" *) input wire[LANES*serdes_ratio*2 - 1:0] i_phy_iserdes_dqs, input wire[LANES*serdes_ratio*2 - 1:0] i_phy_iserdes_bitslip_reference, input wire i_phy_idelayctrl_rdy, - output wire[cmd_len*serdes_ratio-1:0] o_phy_cmd, + output wire[(cmd_len+DUAL_RANK_DIMM)*serdes_ratio-1:0] o_phy_cmd, output reg o_phy_dqs_tri_control, o_phy_dq_tri_control, output wire o_phy_toggle_dqs, output wire[wb_data_bits-1:0] o_phy_data, @@ -168,15 +169,32 @@ module ddr3_controller #( // ddr3 command partitioning /* verilator lint_off UNUSEDPARAM */ - localparam CMD_CS_N = cmd_len - 1, - CMD_RAS_N = cmd_len - 2, - CMD_CAS_N= cmd_len - 3, - CMD_WE_N = cmd_len - 4, - CMD_ODT = cmd_len - 5, - CMD_CKE = cmd_len - 6, - CMD_RESET_N = cmd_len - 7, - CMD_BANK_START = BA_BITS + ROW_BITS - 1, - CMD_ADDRESS_START = ROW_BITS - 1; + generate + if(DUAL_RANK_DIMM) begin + localparam CMD_CS_N_2 = cmd_len - 1, + CMD_CS_N = cmd_len - 2, + CMD_RAS_N = cmd_len - 3, + CMD_CAS_N= cmd_len - 4, + CMD_WE_N = cmd_len - 5, + CMD_ODT = cmd_len - 6, + CMD_CKE = cmd_len - 7, + CMD_RESET_N = cmd_len - 8, + CMD_BANK_START = BA_BITS + ROW_BITS - 1, + CMD_ADDRESS_START = ROW_BITS - 1, + end + else begin + localparam CMD_CS_N = cmd_len - 1, + CMD_RAS_N = cmd_len - 2, + CMD_CAS_N= cmd_len - 3, + CMD_WE_N = cmd_len - 4, + CMD_ODT = cmd_len - 5, + CMD_CKE = cmd_len - 6, + CMD_RESET_N = cmd_len - 7, + CMD_BANK_START = BA_BITS + ROW_BITS - 1, + CMD_ADDRESS_START = ROW_BITS - 1, + end + endgenerate + /* verilator lint_on UNUSEDPARAM */ localparam READ_SLOT = get_slot(CMD_RD), WRITE_SLOT = get_slot(CMD_WR), @@ -412,9 +430,9 @@ module ddr3_controller #( reg stage2_update = 1; reg stage2_stall = 0; reg stage1_stall = 0; - reg[(1< 10 has different format from <= 10 + cmd_d[WRITE_SLOT] = {!stage2_bank[BA_BITS], stage2_bank[BA_BITS], CMD_WR[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0],{{ROW_BITS-32'd12}{1'b0}} , stage2_col[(COL_BITS <= 10) ? 0 : 10] , 1'b0 , stage2_col[9:0]}; + end end - else begin // COL_BITS > 10 has different format from <= 10 - cmd_d[WRITE_SLOT] = {1'b0, CMD_WR[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank,{{ROW_BITS-32'd12}{1'b0}} , stage2_col[(COL_BITS <= 10) ? 0 : 10] , 1'b0 , stage2_col[9:0]}; + else begin + if(COL_BITS <= 10) begin + cmd_d[WRITE_SLOT] = {1'b0, CMD_WR[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank,{{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_col[9:0]}; + end + else begin // COL_BITS > 10 has different format from <= 10 + cmd_d[WRITE_SLOT] = {1'b0, CMD_WR[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank,{{ROW_BITS-32'd12}{1'b0}} , stage2_col[(COL_BITS <= 10) ? 0 : 10] , 1'b0 , stage2_col[9:0]}; + end end //turn on odt at same time as write cmd cmd_d[0][CMD_ODT] = cmd_odt; @@ -1549,19 +1600,30 @@ module ddr3_controller #( end delay_before_read_counter_d[stage2_bank] = READ_TO_READ_DELAY; delay_before_write_counter_d[stage2_bank] = READ_TO_WRITE_DELAY + 1; //temporary solution since its possible odt to go high already while reading previously - for(index=0; index < (1< 10 has different format from <= 10 + cmd_d[READ_SLOT] = {!stage2_bank[BA_BITS], stage2_bank[BA_BITS], CMD_RD[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], {{ROW_BITS-32'd12}{1'b0}} , stage2_col[(COL_BITS <= 10) ? 0 : 10] , 1'b0 , stage2_col[9:0]}; + end end - else begin // COL_BITS > 10 has different format from <= 10 - cmd_d[READ_SLOT] = {1'b0, CMD_RD[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank, {{ROW_BITS-32'd12}{1'b0}} , stage2_col[(COL_BITS <= 10) ? 0 : 10] , 1'b0 , stage2_col[9:0]}; + else begin + if(COL_BITS <= 10) begin + cmd_d[READ_SLOT] = {1'b0, CMD_RD[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank, {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_col[9:0]}; + end + else begin // COL_BITS > 10 has different format from <= 10 + cmd_d[READ_SLOT] = {1'b0, CMD_RD[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank, {{ROW_BITS-32'd12}{1'b0}} , stage2_col[(COL_BITS <= 10) ? 0 : 10] , 1'b0 , stage2_col[9:0]}; + end end + //turn off odt at same time as read cmd cmd_d[0][CMD_ODT] = cmd_odt; cmd_d[1][CMD_ODT] = cmd_odt; @@ -1582,7 +1644,12 @@ module ddr3_controller #( delay_before_write_counter_d[stage2_bank] = ACTIVATE_TO_WRITE_DELAY; end //issue activate command - cmd_d[ACTIVATE_SLOT] = {1'b0, CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank , stage2_row}; + if(DUAL_RANK_DIMM) begin + cmd_d[ACTIVATE_SLOT] = {!stage2_bank[BA_BITS], stage2_bank[BA_BITS], CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], stage2_row}; + end + else begin + cmd_d[ACTIVATE_SLOT] = {1'b0, CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank , stage2_row}; + end //update bank status and active row bank_status_d[stage2_bank] = 1'b1; bank_active_row_d[stage2_bank] = stage2_row; @@ -1593,7 +1660,12 @@ module ddr3_controller #( //set-up delay before activate delay_before_activate_counter_d[stage2_bank] = PRECHARGE_TO_ACTIVATE_DELAY; //issue precharge command - cmd_d[PRECHARGE_SLOT] = {1'b0, CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[9:0] } }; + if(DUAL_RANK_DIMM) begin + cmd_d[PRECHARGE_SLOT] = {!stage2_bank[BA_BITS], stage2_bank[BA_BITS], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[9:0] } }; + end + else begin + cmd_d[PRECHARGE_SLOT] = {1'b0, CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[9:0] } }; + end //update bank status and active row bank_status_d[stage2_bank] = 1'b0; end @@ -1615,8 +1687,13 @@ module ddr3_controller #( // Thus stage 1 anticipate makes sure smooth burst operation that jumps banks if(bank_status_q[stage1_next_bank] && bank_active_row_q[stage1_next_bank] != stage1_next_row && delay_before_precharge_counter_q[stage1_next_bank] ==0 && !precharge_slot_busy) begin //set-up delay before read and write - delay_before_activate_counter_d[stage1_next_bank] = PRECHARGE_TO_ACTIVATE_DELAY; - cmd_d[PRECHARGE_SLOT] = {1'b0, CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage1_next_row[9:0] } }; + delay_before_activate_counter_d[stage1_next_bank] = PRECHARGE_TO_ACTIVATE_DELAY; + if(DUAL_RANK_DIMM) begin + cmd_d[PRECHARGE_SLOT] = {!stage1_next_bank[BA_BITS], stage1_next_bank[BA_BITS], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage1_next_row[9:0] } }; + end + else begin + cmd_d[PRECHARGE_SLOT] = {1'b0, CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage1_next_row[9:0] } }; + end bank_status_d[stage1_next_bank] = 1'b0; end //end of anticipate precharge @@ -1630,7 +1707,12 @@ module ddr3_controller #( if(delay_before_write_counter_d[stage1_next_bank] <= ACTIVATE_TO_WRITE_DELAY) begin // if current delay is > ACTIVATE_TO_WRITE_DELAY, then updating it to the lower delay will cause the previous delay to be violated delay_before_write_counter_d[stage1_next_bank] = ACTIVATE_TO_WRITE_DELAY; end - cmd_d[ACTIVATE_SLOT] = {1'b0, CMD_ACT[2:0] , cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank , stage1_next_row}; + if(DUAL_RANK_DIMM) begin + cmd_d[ACTIVATE_SLOT] = {!stage1_next_bank[BA_BITS], stage1_next_bank[BA_BITS], CMD_ACT[2:0] , cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank , stage1_next_row}; + end + else begin + cmd_d[ACTIVATE_SLOT] = {1'b0, CMD_ACT[2:0] , cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank , stage1_next_row}; + end bank_status_d[stage1_next_bank] = 1'b1; bank_active_row_d[stage1_next_bank] = stage1_next_row; end //end of anticipate activate @@ -2027,6 +2109,7 @@ module ddr3_controller #( write_by_byte_counter <= 0; initial_calibration_done <= 1'b0; final_calibration_done <= 1'b0; + reset_after_rank_1 <= 1'b0; for(index = 0; index < LANES; index = index + 1) begin added_read_pipe[index] <= 0; data_start_index[index] <= 0; @@ -2052,7 +2135,8 @@ module ddr3_controller #( /* verilator lint_on WIDTH */ idelay_data_cntvaluein_prev <= idelay_data_cntvaluein[lane]; reset_from_calibrate <= 0; - + reset_after_rank_1 <= 0; // reset for dual rank + if(wb2_update) begin odelay_data_cntvaluein[wb2_write_lane] <= wb2_phy_odelay_data_ld[wb2_write_lane]? wb2_phy_odelay_data_cntvaluein : odelay_data_cntvaluein[wb2_write_lane]; odelay_dqs_cntvaluein[wb2_write_lane] <= wb2_phy_odelay_dqs_ld[wb2_write_lane]? wb2_phy_odelay_dqs_cntvaluein : odelay_dqs_cntvaluein[wb2_write_lane]; @@ -2609,7 +2693,13 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin calib_stb <= 0; if(train_delay == 0) begin state_calibrate <= DONE_CALIBRATE; - final_calibration_done <= 1'b1; + if(DUAL_RANK_DIMM) begin + final_calibration_done <= current_rank; // calibration is only done after calibration of 2nd rank + reset_after_rank_1 <= !current_rank; // reset only if current rank is 1st rank + end + else begin + final_calibration_done <= 1'b1; + end end end @@ -2651,7 +2741,23 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin write_test_address_counter <= 0; end end - end + end + + generate + if(DUAL_RANK_DIMM) begin + // logic for current_rank to track if rank 1 or rank 2 is being calibrated + always @(posedge i_controller_clk) begin + if(sync_rst_controller && !reset_after_rank_1) begin // dont reset at reset_after_rank_1 + current_rank <= 1'b0; // start at rank 1 + end + else begin + if(reset_after_rank_1) begin + current_rank <= 1'b1; // switch to 2nd rank after reset + end + end + end + endgenerate + assign issue_read_command = (state_calibrate == MPR_READ && delay_before_read_data == 0); assign o_phy_odelay_data_cntvaluein = odelay_data_cntvaluein[lane]; assign o_phy_odelay_dqs_cntvaluein = odelay_dqs_cntvaluein[lane]; diff --git a/testbench/ddr3.sv b/testbench/ddr3.sv index c6f1fba..db292bf 100644 --- a/testbench/ddr3.sv +++ b/testbench/ddr3.sv @@ -96,7 +96,7 @@ `timescale 1ps / 1ps `define den8192Mb `define sg125 -`define x16 +`define x8 `default_nettype wire module ddr3 ( diff --git a/testbench/ddr3_dimm_micron_sim.sv b/testbench/ddr3_dimm_micron_sim.sv index 3640673..82de706 100644 --- a/testbench/ddr3_dimm_micron_sim.sv +++ b/testbench/ddr3_dimm_micron_sim.sv @@ -31,8 +31,8 @@ `define sg125 `define x16 //`define USE_CLOCK_WIZARD -`define TWO_LANES_x8 -//`define EIGHT_LANES_x8 +//`define TWO_LANES_x8 +`define EIGHT_LANES_x8 `define RAM_8Gb module ddr3_dimm_micron_sim; @@ -57,7 +57,7 @@ module ddr3_dimm_micron_sim; `ifdef EIGHT_LANES_x8 localparam BYTE_LANES = 8, - ODELAY_SUPPORTED = 1; + ODELAY_SUPPORTED = 0; `endif @@ -95,7 +95,7 @@ module ddr3_dimm_micron_sim; wire[$bits(ddr3_top.io_ddr3_dq)-1:0] dq; wire[$bits(ddr3_top.io_ddr3_dqs)-1:0] dqs; wire[$bits(ddr3_top.io_ddr3_dqs_n)-1:0] dqs_n; - wire o_ddr3_clk_p, o_ddr3_clk_n; + wire[1:0] o_ddr3_clk_p, o_ddr3_clk_n; integer index; // Wishbone 2 (PHY) inputs reg i_wb2_cyc; //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled) @@ -201,11 +201,11 @@ ddr3_top #( .o_wb2_ack(o_wb2_ack), //1 = read/write request has completed .o_wb2_data(o_wb2_data), //read data, for a 4:1 controller data width is 8 times the number of pins on the device // PHY Interface (to be added later) - .o_ddr3_clk_p(o_ddr3_clk_p), - .o_ddr3_clk_n(o_ddr3_clk_n), - .o_ddr3_cke(ck_en[0]), // CKE - .o_ddr3_cs_n(cs_n[0]), // chip select signal - .o_ddr3_odt(odt[0]), // on-die termination + .o_ddr3_clk_p(o_ddr3_clk_p[1]), + .o_ddr3_clk_n(o_ddr3_clk_n[1]), + .o_ddr3_cke(ck_en[1]), // CKE + .o_ddr3_cs_n(cs_n[1]), // chip select signal + .o_ddr3_odt(odt[1]), // on-die termination .o_ddr3_ras_n(ras_n), // RAS# .o_ddr3_cas_n(cas_n), // CAS# .o_ddr3_we_n(we_n), // WE# @@ -225,8 +225,8 @@ ddr3_top #( // 1 lane DDR3 ddr3 ddr3_0( .rst_n(reset_n), - .ck(o_ddr3_clk_p), - .ck_n(o_ddr3_clk_n), + .ck(o_ddr3_clk_p[0]), + .ck_n(o_ddr3_clk_n[0]), .cke(ck_en[0]), .cs_n(cs_n[0]), .ras_n(ras_n), @@ -241,30 +241,32 @@ ddr3_top #( .tdqs_n(), .odt(odt[0]) ); - assign ck_en[1]=0, - cs_n[1]=1, - odt[1]=0; `endif `ifdef EIGHT_LANES_x8 // DDR3 Device ddr3_module ddr3_module( .reset_n(reset_n), - .ck(o_ddr3_clk_p), - .ck_n(o_ddr3_clk_n), - .cke(ck_en), - .s_n(cs_n), + .ck(o_ddr3_clk_p), //[1:0] + .ck_n(o_ddr3_clk_n), //[1:0] + .cke(ck_en), //[1:0] + .s_n(cs_n), //[1:0] .ras_n(ras_n), .cas_n(cas_n), .we_n(we_n), .ba(ba_addr), .addr(addr), - .odt(odt), + .odt(odt), //[1:0] .dqs({ddr3_dm[0], ddr3_dm,ddr3_dm[0],dqs}), //ddr3_module uses last 8 MSB [16:9] as datamask .dqs_n(dqs_n), .dq(dq) ); + assign ck_en[0]=0, + cs_n[0]=1, + odt[0]=0; `endif + + reg[ddr3_top.ddr3_controller_inst.wb_data_bits-1:0] orig_phy_data; // Force change for ECC tests // Uncommented since there is ECC_TEST parameter inside ddr3_controller to test ECC @@ -945,9 +947,9 @@ ddr3_top #( reg[31:0] time_now; reg[3:0] repeats = 0; //display commands issued - always @(posedge o_ddr3_clk_p) begin - if(!cs_n[0]) begin //command is center-aligned to positive edge of clock, a valid command always has low cs_n - case({cs_n[0], ras_n, cas_n, we_n}) + always @(posedge o_ddr3_clk_p[1]) begin + if(!cs_n[1]) begin //command is center-aligned to positive edge of clock, a valid command always has low cs_n + case({cs_n[1], ras_n, cas_n, we_n}) 4'b0000: command_used = "MRS"; 4'b0001: command_used = "REF"; 4'b0010: command_used = "PRE"; From 7367182640a8ff9531814a331635ad58ea242abf Mon Sep 17 00:00:00 2001 From: AngeloJacobo Date: Fri, 20 Dec 2024 18:56:21 +0800 Subject: [PATCH 2/3] dual rank enabled is now passing formal and simulation! --- rtl/ddr3_controller.v | 332 +++++++++++++++++------------- rtl/ddr3_phy.v | 126 ++++++++---- rtl/ddr3_top.v | 20 +- testbench/ddr3_dimm_micron_sim.sv | 62 +++--- 4 files changed, 333 insertions(+), 207 deletions(-) diff --git a/rtl/ddr3_controller.v b/rtl/ddr3_controller.v index 994c4e8..27d91cd 100644 --- a/rtl/ddr3_controller.v +++ b/rtl/ddr3_controller.v @@ -63,12 +63,12 @@ module ddr3_controller #( AUX_WIDTH = 16, //width of aux line (must be >= 4) WB2_ADDR_BITS = 7, //width of 2nd wishbone address bus WB2_DATA_BITS = 32, //width of 2nd wishbone data bus + DUAL_RANK_DIMM = 0, // enable dual rank DIMM (1 = enable, 0 = disable) parameter[0:0] MICRON_SIM = 0, //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW) ODELAY_SUPPORTED = 1, //set to 1 when ODELAYE2 is supported SECOND_WISHBONE = 0, //set to 1 if 2nd wishbone is needed WB_ERROR = 0, // set to 1 to support Wishbone error (asserts at ECC double bit error) SKIP_INTERNAL_TEST = 1, // skip built-in self test (would require >2 seconds of internal test right after calibration) - DUAL_RANK_DIMM = 0, // enable dual rank DIMM parameter[1:0] ECC_ENABLE = 0, // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC ) (only change when you know what you are doing) parameter[1:0] DIC = 2'b00, //Output Driver Impedance Control (2'b00 = RZQ/6, 2'b01 = RZQ/7, RZQ = 240ohms) (only change when you know what you are doing) parameter[2:0] RTT_NOM = 3'b011, //RTT Nominal (3'b000 = disabled, 3'b001 = RZQ/4, 3'b010 = RZQ/2 , 3'b011 = RZQ/6, RZQ = 240ohms) @@ -79,7 +79,7 @@ module ddr3_controller #( wb_sel_bits = wb_data_bits / 8, wb2_sel_bits = WB2_DATA_BITS / 8, //4 is the width of a single ddr3 command {cs_n, ras_n, cas_n, we_n} plus 3 (ck_en, odt, reset_n) plus bank bits plus row bits - cmd_len = 4 + 3 + BA_BITS + ROW_BITS + DUAL_RANK_DIMM, + cmd_len = 4 + 3 + BA_BITS + ROW_BITS + 2*DUAL_RANK_DIMM, lanes_clog2 = $clog2(LANES) == 0? 1: $clog2(LANES), parameter[1:0] row_bank_col = (ECC_ENABLE == 3)? 2 : 1, // memory address mapping: 0 {bank, row, col} , 1 = {row, bank, col} , 2 = {bank[2:1]. row, bank[0], col} FOR ECC parameter[0:0] ECC_TEST = 0 @@ -119,7 +119,7 @@ module ddr3_controller #( (* mark_debug = "true" *) input wire[LANES*serdes_ratio*2 - 1:0] i_phy_iserdes_dqs, input wire[LANES*serdes_ratio*2 - 1:0] i_phy_iserdes_bitslip_reference, input wire i_phy_idelayctrl_rdy, - output wire[(cmd_len+DUAL_RANK_DIMM)*serdes_ratio-1:0] o_phy_cmd, + output wire[cmd_len*serdes_ratio-1:0] o_phy_cmd, output reg o_phy_dqs_tri_control, o_phy_dq_tri_control, output wire o_phy_toggle_dqs, output wire[wb_data_bits-1:0] o_phy_data, @@ -169,31 +169,17 @@ module ddr3_controller #( // ddr3 command partitioning /* verilator lint_off UNUSEDPARAM */ - generate - if(DUAL_RANK_DIMM) begin - localparam CMD_CS_N_2 = cmd_len - 1, - CMD_CS_N = cmd_len - 2, - CMD_RAS_N = cmd_len - 3, - CMD_CAS_N= cmd_len - 4, - CMD_WE_N = cmd_len - 5, - CMD_ODT = cmd_len - 6, - CMD_CKE = cmd_len - 7, - CMD_RESET_N = cmd_len - 8, - CMD_BANK_START = BA_BITS + ROW_BITS - 1, - CMD_ADDRESS_START = ROW_BITS - 1, - end - else begin - localparam CMD_CS_N = cmd_len - 1, - CMD_RAS_N = cmd_len - 2, - CMD_CAS_N= cmd_len - 3, - CMD_WE_N = cmd_len - 4, - CMD_ODT = cmd_len - 5, - CMD_CKE = cmd_len - 6, - CMD_RESET_N = cmd_len - 7, - CMD_BANK_START = BA_BITS + ROW_BITS - 1, - CMD_ADDRESS_START = ROW_BITS - 1, - end - endgenerate + localparam CMD_CS_N_2 = cmd_len - 1, + CMD_CS_N = DUAL_RANK_DIMM[0]? cmd_len - 2 : cmd_len - 1, + CMD_RAS_N = DUAL_RANK_DIMM[0]? cmd_len - 3 : cmd_len - 2, + CMD_CAS_N = DUAL_RANK_DIMM[0]? cmd_len - 4 : cmd_len - 3, + CMD_WE_N = DUAL_RANK_DIMM[0]? cmd_len - 5 : cmd_len - 4, + CMD_ODT = DUAL_RANK_DIMM[0]? cmd_len - 6 : cmd_len - 5, + CMD_CKE_2 = DUAL_RANK_DIMM[0]? cmd_len - 7 : cmd_len - 6, + CMD_CKE = DUAL_RANK_DIMM[0]? cmd_len - 8 : cmd_len - 6, + CMD_RESET_N = DUAL_RANK_DIMM[0]? cmd_len - 9 : cmd_len - 7, + CMD_BANK_START = BA_BITS + ROW_BITS - 1, + CMD_ADDRESS_START = ROW_BITS - 1; /* verilator lint_on UNUSEDPARAM */ localparam READ_SLOT = get_slot(CMD_RD), @@ -274,7 +260,7 @@ module ddr3_controller #( localparam tXSDLL_tRFC = tXSDLL - ps_to_cycles(tRFC); // cycles (controller) Time before refresh after exit from self-refresh localparam tCKE = max(3, ps_to_nCK(7500) ); // nCK CKE minimum pulse width localparam tCKESR = nCK_to_cycles(tCKE + 1)+ 5; // cycles (controller) Minimum time that the DDR3 SDRAM must remain in Self-Refresh mode is tCKESR - localparam tCPDED = 1; // cycle (tCPDED is at most 2nCK but we make it to 1cycle or 4nCK) Command pass disable delay , required cycles of NOP after CKE low + localparam tCPDED = 5; // cycle (tCPDED is at most 2nCK but we make it to 1cycle or 4nCK) Command pass disable delay , required cycles of NOP after CKE low /*********************************************************************************************************************************************/ @@ -430,7 +416,7 @@ module ddr3_controller #( reg stage2_update = 1; reg stage2_stall = 0; reg stage1_stall = 0; - reg[(1<<(BA_BITS+DUAL_RANK_DIMM)-1:0] bank_status_q, bank_status_d; //bank_status[bank_number]: determine current state of bank (1=active , 0=idle) + reg[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] bank_status_q, bank_status_d; //bank_status[bank_number]: determine current state of bank (1=active , 0=idle) //bank_active_row[bank_number] = stores the active row address in the specified bank reg[ROW_BITS-1:0] bank_active_row_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], bank_active_row_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0]; @@ -508,7 +494,7 @@ module ddr3_controller #( o_phy_bitslip = 0; end reg cmd_odt_q = 0, cmd_odt, cmd_reset_n; - (* mark_debug = "true" *) reg cmd_ck_en; + (* mark_debug = "true" *) reg[DUAL_RANK_DIMM:0] cmd_ck_en, prev_cmd_ck_en; reg o_wb_stall_q = 1, o_wb_stall_d, o_wb_stall_calib = 1; reg precharge_slot_busy; reg activate_slot_busy; @@ -602,7 +588,7 @@ module ddr3_controller #( reg[LANES-1:0] wb2_phy_idelay_dqs_ld; (* mark_debug ="true" *)reg[LANES-1:0] write_level_fail = 0; reg[lanes_clog2-1:0] wb2_write_lane; - reg sync_rst_wb2 = 0, sync_rst_controller = 0; + reg sync_rst_wb2 = 0, sync_rst_controller = 0, current_rank_rst = 0; reg reset_from_wb2 = 0, reset_from_calibrate = 0, reset_from_test = 0, repeat_test = 0; reg reset_after_rank_1 = 0; // reset after calibration rank 1 to switch to rank 2 reg current_rank = 0; @@ -775,13 +761,13 @@ module ddr3_controller #( 5'd23: read_rom_instruction = {5'b01111, CMD_PRE, ps_to_cycles(tRP)}; // 23. All banks must be precharged (A10-AP = high) and idle for a minimum of the precharge time tRP(min) before the Self-Refresh Command can be applied. - 5'd24: read_rom_instruction = {5'b01001, CMD_NOP, tCPDED[DELAY_SLOT_WIDTH-1:0]}; - // 24. CKE must go low to enter self-refresh, tCPDED cycles of NOP are required before CMD_SREF_EN - - 5'd25: read_rom_instruction = {5'b01001, CMD_SREF_EN, tCKESR[DELAY_SLOT_WIDTH-1:0]}; - // 25. Self-refresh entry + 5'd24: read_rom_instruction = {5'b01001, CMD_SREF_EN, tCKESR[DELAY_SLOT_WIDTH-1:0]}; + // 24. Self-refresh entry // JEDEC Standard No. 79-3E Page 79: The minimum time that the DDR3 SDRAM must remain in Self-Refresh mode is tCKESR + 5'd25: read_rom_instruction = {5'b01001, CMD_NOP, tCPDED[DELAY_SLOT_WIDTH-1:0]}; + // 25. tCPDED cycles of NOP are required after CKE low + 5'd26: read_rom_instruction = {5'b01011, CMD_SREF_XT, tXSDLL_tRFC[DELAY_SLOT_WIDTH-1:0]}; // 26. From 25 (Self-refresh entry), wait until user-self_refresh is disabled then wait for tXSDLL - tRFC before going to 20 (Refresh) // JEDEC Standard No. 79-3E Page 79: Before a command that requires a locked DLL can be applied, a delay of at least tXSDLL must be satisfied. @@ -797,9 +783,10 @@ module ddr3_controller #( /******************************************* Reset Sequence ROM Controller *******************************************/ always @(posedge i_controller_clk) begin sync_rst_controller <= !i_rst_n || reset_from_wb2 || reset_from_calibrate || reset_from_test || reset_after_rank_1; + current_rank_rst <= !i_rst_n || reset_from_wb2 || reset_from_calibrate || reset_from_test; sync_rst_wb2 <= !i_rst_n; end - assign o_phy_reset = sync_rst_controller; + assign o_phy_reset = current_rank_rst; // PHY will not reset when transitioning from rank 0 to rank 1 always @(posedge i_controller_clk) begin if(sync_rst_controller) begin @@ -827,7 +814,7 @@ module ddr3_controller #( //delay_counter of 1 means we will need to update the delay_counter next clock cycle (delay_counter of zero) so we need to retrieve //now the next instruction. The same thing needs to be done when current instruction does not need the timer delay. - if(delay_counter == 1 || !instruction[USE_TIMER]/* || skip_reset_seq_delay*/) begin + if( ((delay_counter == 1) && !pause_counter) || !instruction[USE_TIMER]/* || skip_reset_seq_delay*/) begin delay_counter_is_zero <= 1; instruction <= read_rom_instruction(instruction_address); if(instruction_address == 5'd22) begin // if user_self_refresh is disabled, wrap back to 19 (Precharge All before Refresh) @@ -852,7 +839,6 @@ module ddr3_controller #( instruction <= read_rom_instruction(instruction_address); end - //instruction[RST_DONE] is non-persistent thus we need to register it once it goes high reset_done <= instruction[RST_DONE]? 1'b1:reset_done; end @@ -861,6 +847,12 @@ module ddr3_controller #( // register user-enabled self-refresh always @(posedge i_controller_clk) begin user_self_refresh_q <= i_user_self_refresh && (user_self_refresh_q || (instruction_address != 5'd26)) && final_calibration_done; //will not go high again if already at instruction_address 26 (self-refresh exit), only go high when calibration is done + if(DUAL_RANK_DIMM[0]) begin // if dual rank enabled, then enable self refresh right after completing calibration + if(state_calibrate == FINISH_READ) begin + user_self_refresh_q <= 1'b1; + end + end + end /*********************************************************************************************************************************************/ @@ -994,7 +986,7 @@ module ddr3_controller #( // if ECC write, then we are writing ECC for previous address // if ECC read, then we are reading ECC for current address stage2_col <= ecc_col_addr_prev; - stage2_bank <= ecc_bank_addr_prev; + stage2_bank[BA_BITS-1:0] <= ecc_bank_addr_prev; stage2_row <= ecc_row_addr_prev; ecc_col_addr_prev <= ecc_col_addr; ecc_bank_addr_prev <= ecc_bank_addr; @@ -1009,7 +1001,7 @@ module ddr3_controller #( // if ECC write, then we are writing ECC for previous address // if ECC read, then we are reading ECC for current address stage2_col <= ecc_stage1_stall? (stage1_we? ecc_col_addr_prev : ecc_col_addr) : stage1_col; - stage2_bank <= ecc_stage1_stall? (stage1_we? ecc_bank_addr_prev : ecc_bank_addr) : stage1_bank; + stage2_bank[BA_BITS-1:0] <= ecc_stage1_stall? (stage1_we? ecc_bank_addr_prev : ecc_bank_addr) : stage1_bank[BA_BITS-1:0]; stage2_row <= ecc_stage1_stall? (stage1_we? ecc_row_addr_prev : ecc_row_addr) : stage1_row; ecc_col_addr_prev <= ecc_col_addr; ecc_bank_addr_prev <= ecc_bank_addr; @@ -1058,11 +1050,12 @@ module ddr3_controller #( end if(row_bank_col == 1) begin // memory address mapping: {row, bank, col} - if(DUAL_RANK_DIMM) begin - stage1_bank[BA_BITS] = i_wb_addr[ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)]; // msb determines rank + if(DUAL_RANK_DIMM[0]) begin + stage1_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= i_wb_addr[DUAL_RANK_DIMM[0]? (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) : 0]; // msb determines rank + stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= wb_addr_plus_anticipate[DUAL_RANK_DIMM[0]? (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) : 0]; // msb determines rank end stage1_row <= i_wb_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS - $clog2(serdes_ratio*2)) ]; //row_address - stage1_bank <= i_wb_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address + stage1_bank[BA_BITS-1:0] <= i_wb_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address stage1_col <= { i_wb_addr[ (COL_BITS- $clog2(serdes_ratio*2)-1) : 0 ], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) //stage1_next_bank will not increment unless stage1_next_col //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated @@ -1070,14 +1063,14 @@ module ddr3_controller #( //current column with a margin dictated by //MARGIN_BEFORE_ANTICIPATE /* verilator lint_off WIDTH */ - {stage1_next_row , stage1_next_bank} <= wb_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); + {stage1_next_row , stage1_next_bank[BA_BITS-1:0]} <= wb_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); //anticipated next row and bank to be accessed /* verilator lint_on WIDTH */ stage1_data <= i_wb_data; end else if(row_bank_col == 0) begin // memory address mapping: {bank, row, col} - stage1_bank <= i_wb_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address + stage1_bank[BA_BITS-1:0] <= i_wb_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address stage1_row <= i_wb_addr[ (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address stage1_col <= { i_wb_addr[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) //stage1_next_row will not increment unless stage1_next_col @@ -1132,11 +1125,12 @@ module ddr3_controller #( end if(row_bank_col == 1) begin // memory address mapping: {row, bank, col} - if(DUAL_RANK_DIMM) begin - stage1_bank[BA_BITS] = calib_addr[ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)]; // msb determines rank + if(DUAL_RANK_DIMM[0]) begin + stage1_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= current_rank; // rank depends on current_rank + stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= current_rank; // rank depends on current_rank end stage1_row <= calib_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS - $clog2(serdes_ratio*2)) ]; //row_address - stage1_bank <= calib_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address + stage1_bank[BA_BITS-1:0] <= calib_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address stage1_col <= { calib_addr[ (COL_BITS- $clog2(serdes_ratio*2)-1) : 0 ], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (8-burst word-aligned) //stage1_next_bank will not increment unless stage1_next_col //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated @@ -1144,13 +1138,13 @@ module ddr3_controller #( //current column with a margin dictated by //MARGIN_BEFORE_ANTICIPATE /* verilator lint_off WIDTH */ - {stage1_next_row , stage1_next_bank} <= calib_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); + {stage1_next_row , stage1_next_bank[BA_BITS-1:0] } <= calib_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); //anticipated next row and bank to be accessed /* verilator lint_on WIDTH */ stage1_data <= calib_data; end else if(row_bank_col == 0) begin // memory address mapping: {bank, row, col} - stage1_bank <= calib_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address + stage1_bank[BA_BITS-1:0] <= calib_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address stage1_row <= calib_addr[ (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address stage1_col <= { calib_addr[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (8-burst word-aligned) //stage1_next_row will not increment unless stage1_next_col @@ -1429,8 +1423,21 @@ module ddr3_controller #( stage2_ecc_write_data_mask_d = stage2_ecc_write_data_mask_q; write_ecc_stored_to_mem_d = write_ecc_stored_to_mem_q; cmd_odt = cmd_odt_q || write_calib_odt; - cmd_ck_en = instruction[CLOCK_EN]; - cmd_reset_n = instruction[RESET_N]; + // logic for clock enable + if(DUAL_RANK_DIMM[0]) begin + if(current_rank) begin // if already on rank 1 + cmd_ck_en[0] = final_calibration_done? instruction[CLOCK_EN] : 1'b0; // rank 0 is on self-refresh (clock en disabled) if calibration is not yet done for rank 1 + cmd_ck_en[DUAL_RANK_DIMM] = instruction[CLOCK_EN]; // rank 1 follows current instruction + end + else begin // if on rank 0 + cmd_ck_en[0] = instruction[CLOCK_EN]; // rank 0 follows current instruction + cmd_ck_en[DUAL_RANK_DIMM] = 1'b0; // rank 1 is idle + end + end + else begin + cmd_ck_en[0] = instruction[CLOCK_EN]; + end + cmd_reset_n = instruction[RESET_N] || (DUAL_RANK_DIMM[0] && current_rank); // if dual rank enabled and current rank is 1 then reset_n does not need to assert again (already asserted on rank 0) stage1_stall = 1'b0; stage2_stall = 1'b0; ecc_stage2_stall = 1'b0; @@ -1446,32 +1453,40 @@ module ddr3_controller #( end //set PRECHARGE_SLOT as reset instruction, the remainings are NOP (MSB is high) //delay_counter_is_zero high signifies start of new reset instruction (the time when the command must be issued) - cmd_d[PRECHARGE_SLOT] = {(!delay_counter_is_zero), instruction[DDR3_CMD_START-1:DDR3_CMD_END] | {3{(!delay_counter_is_zero)}} , cmd_odt, instruction[CLOCK_EN], instruction[RESET_N], + cmd_d[PRECHARGE_SLOT][cmd_len-1-DUAL_RANK_DIMM:0] = {(!delay_counter_is_zero), instruction[DDR3_CMD_START-1:DDR3_CMD_END] | {3{(!delay_counter_is_zero)}} , cmd_odt, cmd_ck_en, cmd_reset_n, instruction[MRS_BANK_START:(MRS_BANK_START-BA_BITS+1)], instruction[ROW_BITS-1:0]}; cmd_d[PRECHARGE_SLOT][10] = instruction[A10_CONTROL]; - cmd_d[READ_SLOT] = {(!issue_read_command), CMD_RD[2:0] | {3{(!issue_read_command)}}, cmd_odt, cmd_ck_en, cmd_reset_n, {(ROW_BITS+BA_BITS){1'b0}}}; // issued during MPR reads (address does not matter) - cmd_d[ACTIVATE_SLOT] = {1'b0, 3'b111 , cmd_odt, cmd_ck_en, cmd_reset_n, {(ROW_BITS+BA_BITS){1'b0}}}; // always NOP by default + cmd_d[READ_SLOT][cmd_len-1-DUAL_RANK_DIMM:0] = {(!issue_read_command), CMD_RD[2:0] | {3{(!issue_read_command)}}, cmd_odt, cmd_ck_en, cmd_reset_n, {(ROW_BITS+BA_BITS){1'b0}}}; // issued during MPR reads (address does not matter) + cmd_d[ACTIVATE_SLOT][cmd_len-1-DUAL_RANK_DIMM:0] = {1'b0, 3'b111 , cmd_odt, cmd_ck_en, cmd_reset_n, {(ROW_BITS+BA_BITS){1'b0}}}; // always NOP by default + if(PRECHARGE_SLOT != 0) begin // if precharge slot is not the 0th slot, then all slots before precharge will have the previous value of cmd_ck_en + for(index = 0; index < PRECHARGE_SLOT; index=index+1) begin // slots before + if(DUAL_RANK_DIMM[0]) begin + cmd_d[index][CMD_CKE_2] = prev_cmd_ck_en[DUAL_RANK_DIMM]; + end + cmd_d[index][CMD_CKE] = prev_cmd_ck_en[0]; + end + end // extra slot is created when READ and WRITE slots are the same // this remaining slot should be NOP by default if(WRITE_SLOT == READ_SLOT) begin - cmd_d[REMAINING_SLOT] = {1'b0, 3'b111 , cmd_odt, cmd_ck_en, cmd_reset_n, {(ROW_BITS+BA_BITS){1'b0}}}; // always NOP by default + cmd_d[REMAINING_SLOT][cmd_len-1-DUAL_RANK_DIMM:0] = {1'b0, 3'b111 , cmd_odt, cmd_ck_en, cmd_reset_n, {(ROW_BITS+BA_BITS){1'b0}}}; // always NOP by default end // if read and write slot is not shared, the write slot should be NOP by default else begin - cmd_d[WRITE_SLOT] = {1'b0, 3'b111, cmd_odt, cmd_ck_en, cmd_reset_n, {(ROW_BITS+BA_BITS){1'b0}}}; // always NOP by default + cmd_d[WRITE_SLOT][cmd_len-1-DUAL_RANK_DIMM:0] = {1'b0, 3'b111, cmd_odt, cmd_ck_en, cmd_reset_n, {(ROW_BITS+BA_BITS){1'b0}}}; // always NOP by default end ///////////////////////////////////////////////////////////////////////////////////////// // if dual rank is enabled, last 2 bits are {cs_2, cs_1} - if(DUAL_RANK_DIMM) begin - cmd_d[PRECHARGE_SLOT][cmd_len-1:cmd_len-2]= {!current_rank || !delay_counter_is_zero , current_rank || !delay_counter_is_zero}; // reset sequence is done per rank + if(DUAL_RANK_DIMM[0]) begin + cmd_d[PRECHARGE_SLOT][cmd_len-1:cmd_len-2]= {!current_rank || !delay_counter_is_zero , (current_rank && !final_calibration_done) || !delay_counter_is_zero}; // reset sequence is done per rank cmd_d[READ_SLOT][cmd_len-1:cmd_len-2] = {!current_rank || !issue_read_command , current_rank || !issue_read_command}; // MPR is done per rank cmd_d[ACTIVATE_SLOT][cmd_len-1:cmd_len-2] = 2'b11; // NOP by default if(WRITE_SLOT == READ_SLOT) begin - cmd_d[REMAINING_SLOT][cmd_len-1:cmd_len-2] = 2'b11 // always NOP by default + cmd_d[REMAINING_SLOT][cmd_len-1:cmd_len-2] = 2'b11; // always NOP by default end // if read and write slot is not shared, the write slot should be NOP by default else begin - cmd_d[WRITE_SLOT][cmd_len-1:cmd_len-2] = 2'b11 // always NOP by default + cmd_d[WRITE_SLOT][cmd_len-1:cmd_len-2] = 2'b11; // always NOP by default end end ///////////////////////////////////////////////////////////////////////////////////////// @@ -1538,13 +1553,13 @@ module ddr3_controller #( end delay_before_write_counter_d[stage2_bank] = WRITE_TO_WRITE_DELAY; //issue read command - if(DUAL_RANK_DIMM) begin + if(DUAL_RANK_DIMM[0]) begin if(COL_BITS <= 10) begin // if stage2_bank[BA_BITS] high then request is for 2nd rank, if low then for 1st rank - cmd_d[WRITE_SLOT] = {!stage2_bank[BA_BITS], stage2_bank[BA_BITS], CMD_WR[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0],{{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_col[9:0]}; + cmd_d[WRITE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_WR[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0],{{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_col[(DUAL_RANK_DIMM[0]? 9 : 8):0]}; end else begin // COL_BITS > 10 has different format from <= 10 - cmd_d[WRITE_SLOT] = {!stage2_bank[BA_BITS], stage2_bank[BA_BITS], CMD_WR[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0],{{ROW_BITS-32'd12}{1'b0}} , stage2_col[(COL_BITS <= 10) ? 0 : 10] , 1'b0 , stage2_col[9:0]}; + cmd_d[WRITE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_WR[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0],{{ROW_BITS-32'd12}{1'b0}} , stage2_col[(COL_BITS <= 10) ? 0 : 10] , 1'b0 , stage2_col[(DUAL_RANK_DIMM[0]? 9 : 8):0]}; end end else begin @@ -1607,12 +1622,12 @@ module ddr3_controller #( shift_reg_read_pipe_d[READ_ACK_PIPE_WIDTH-1] = {stage2_aux, !ecc_req_stage2}; // ack is sent to shift_reg which will be shifted until the wb ack output //issue read command - if(DUAL_RANK_DIMM) begin + if(DUAL_RANK_DIMM[0]) begin if(COL_BITS <= 10) begin - cmd_d[READ_SLOT] = {!stage2_bank[BA_BITS], stage2_bank[BA_BITS], CMD_RD[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_col[9:0]}; + cmd_d[READ_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_RD[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_col[(DUAL_RANK_DIMM[0]? 9 : 8):0]}; end else begin // COL_BITS > 10 has different format from <= 10 - cmd_d[READ_SLOT] = {!stage2_bank[BA_BITS], stage2_bank[BA_BITS], CMD_RD[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], {{ROW_BITS-32'd12}{1'b0}} , stage2_col[(COL_BITS <= 10) ? 0 : 10] , 1'b0 , stage2_col[9:0]}; + cmd_d[READ_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_RD[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], {{ROW_BITS-32'd12}{1'b0}} , stage2_col[(COL_BITS <= 10) ? 0 : 10] , 1'b0 , stage2_col[(DUAL_RANK_DIMM[0]? 9 : 8):0]}; end end else begin @@ -1644,8 +1659,8 @@ module ddr3_controller #( delay_before_write_counter_d[stage2_bank] = ACTIVATE_TO_WRITE_DELAY; end //issue activate command - if(DUAL_RANK_DIMM) begin - cmd_d[ACTIVATE_SLOT] = {!stage2_bank[BA_BITS], stage2_bank[BA_BITS], CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], stage2_row}; + if(DUAL_RANK_DIMM[0]) begin + cmd_d[ACTIVATE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], stage2_row[(DUAL_RANK_DIMM[0]? ROW_BITS-1 : ROW_BITS-2):0]}; end else begin cmd_d[ACTIVATE_SLOT] = {1'b0, CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank , stage2_row}; @@ -1660,8 +1675,8 @@ module ddr3_controller #( //set-up delay before activate delay_before_activate_counter_d[stage2_bank] = PRECHARGE_TO_ACTIVATE_DELAY; //issue precharge command - if(DUAL_RANK_DIMM) begin - cmd_d[PRECHARGE_SLOT] = {!stage2_bank[BA_BITS], stage2_bank[BA_BITS], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[9:0] } }; + if(DUAL_RANK_DIMM[0]) begin + cmd_d[PRECHARGE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[DUAL_RANK_DIMM[0]? 9 : 8:0] } }; end else begin cmd_d[PRECHARGE_SLOT] = {1'b0, CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[9:0] } }; @@ -1688,8 +1703,8 @@ module ddr3_controller #( if(bank_status_q[stage1_next_bank] && bank_active_row_q[stage1_next_bank] != stage1_next_row && delay_before_precharge_counter_q[stage1_next_bank] ==0 && !precharge_slot_busy) begin //set-up delay before read and write delay_before_activate_counter_d[stage1_next_bank] = PRECHARGE_TO_ACTIVATE_DELAY; - if(DUAL_RANK_DIMM) begin - cmd_d[PRECHARGE_SLOT] = {!stage1_next_bank[BA_BITS], stage1_next_bank[BA_BITS], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage1_next_row[9:0] } }; + if(DUAL_RANK_DIMM[0]) begin + cmd_d[PRECHARGE_SLOT] = {!stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank[BA_BITS-1:0], { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage1_next_row[(DUAL_RANK_DIMM[0]? 9 : 8):0] } }; end else begin cmd_d[PRECHARGE_SLOT] = {1'b0, CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage1_next_row[9:0] } }; @@ -1707,8 +1722,8 @@ module ddr3_controller #( if(delay_before_write_counter_d[stage1_next_bank] <= ACTIVATE_TO_WRITE_DELAY) begin // if current delay is > ACTIVATE_TO_WRITE_DELAY, then updating it to the lower delay will cause the previous delay to be violated delay_before_write_counter_d[stage1_next_bank] = ACTIVATE_TO_WRITE_DELAY; end - if(DUAL_RANK_DIMM) begin - cmd_d[ACTIVATE_SLOT] = {!stage1_next_bank[BA_BITS], stage1_next_bank[BA_BITS], CMD_ACT[2:0] , cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank , stage1_next_row}; + if(DUAL_RANK_DIMM[0]) begin + cmd_d[ACTIVATE_SLOT] = {!stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_ACT[2:0] , cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank[BA_BITS-1:0] , stage1_next_row[(DUAL_RANK_DIMM[0]? ROW_BITS-1 : ROW_BITS-2):0]}; end else begin cmd_d[ACTIVATE_SLOT] = {1'b0, CMD_ACT[2:0] , cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank , stage1_next_row}; @@ -1813,6 +1828,17 @@ module ddr3_controller #( end end //end of always block assign o_phy_cmd = {cmd_d[3], cmd_d[2], cmd_d[1], cmd_d[0]}; + + // register previous value of cmd_ck_en + always @(posedge i_controller_clk) begin + if(sync_rst_controller) begin + prev_cmd_ck_en <= 0; + end + else begin + prev_cmd_ck_en <= cmd_ck_en; + end + end + /*********************************************************************************************************************************************/ /******************************************************* Align Read Data from ISERDES *******************************************************/ @@ -2623,10 +2649,10 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap calib_aux <= 2; calib_sel <= {wb_sel_bits{1'b1}}; calib_we <= 1; - calib_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) ] - <= write_test_address_counter[ROW_BITS-1:0]; - calib_addr[(BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : 0] - <= write_test_address_counter[wb_addr_bits-1:ROW_BITS]; + calib_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1 + DUAL_RANK_DIMM) : (BA_BITS + COL_BITS- $clog2(serdes_ratio*2) + DUAL_RANK_DIMM) ] + <= write_test_address_counter[ROW_BITS-1:0]; // store row + calib_addr[(BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1 + DUAL_RANK_DIMM) : 0] + <= write_test_address_counter[wb_addr_bits-1:ROW_BITS]; // store bank + col calib_data <= {wb_sel_bits{write_test_address_counter[7:0]}}; if(MICRON_SIM) begin //if(write_test_address_counter[wb_addr_bits-1:0] == 1500) begin //inject error @@ -2651,10 +2677,10 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap calib_stb <= 1; calib_aux <= 3; calib_we <= 0; - calib_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) ] - <= read_test_address_counter[ROW_BITS-1:0]; - calib_addr[(BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : 0] - <= read_test_address_counter[wb_addr_bits-1:ROW_BITS]; + calib_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1 + DUAL_RANK_DIMM) : (BA_BITS + COL_BITS- $clog2(serdes_ratio*2) + DUAL_RANK_DIMM) ] + <= read_test_address_counter[ROW_BITS-1:0]; // row + calib_addr[(BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1 + DUAL_RANK_DIMM) : 0] + <= read_test_address_counter[wb_addr_bits-1:ROW_BITS]; // bank + col read_test_address_counter <= read_test_address_counter + 1; if(MICRON_SIM) begin if(read_test_address_counter == 199) begin //MUST END AT ODD NUMBER since ALTERNATE_WRITE_READ must start at even @@ -2692,12 +2718,17 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin FINISH_READ: begin calib_stb <= 0; if(train_delay == 0) begin - state_calibrate <= DONE_CALIBRATE; - if(DUAL_RANK_DIMM) begin - final_calibration_done <= current_rank; // calibration is only done after calibration of 2nd rank - reset_after_rank_1 <= !current_rank; // reset only if current rank is 1st rank + if(DUAL_RANK_DIMM[0]) begin + if(instruction_address == 26) begin // only once self-refresh is waiting for exit will current rank is done + final_calibration_done <= current_rank; // calibration is only done after calibration of 2nd rank + reset_after_rank_1 <= !current_rank; // reset only if current rank is 1st rank + if(current_rank) begin + state_calibrate <= DONE_CALIBRATE; + end + end end else begin + state_calibrate <= DONE_CALIBRATE; final_calibration_done <= 1'b1; end end @@ -2744,10 +2775,10 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin end generate - if(DUAL_RANK_DIMM) begin + if(DUAL_RANK_DIMM[0]) begin : dual_rank_mux // logic for current_rank to track if rank 1 or rank 2 is being calibrated always @(posedge i_controller_clk) begin - if(sync_rst_controller && !reset_after_rank_1) begin // dont reset at reset_after_rank_1 + if(current_rank_rst) begin // dont reset at reset_after_rank_1 current_rank <= 1'b0; // start at rank 1 end else begin @@ -2756,6 +2787,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin end end end + end endgenerate assign issue_read_command = (state_calibrate == MPR_READ && delay_before_read_data == 0); @@ -3437,6 +3469,26 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin $display("STAGE2_DATA_DEPTH = %0d", STAGE2_DATA_DEPTH); $display("READ_ACK_PIPE_WIDTH = %0d\n", READ_ACK_PIPE_WIDTH); + $display("\nDDR3 TOP PARAMETERS:\n-----------------------------"); + $display("CONTROLLER_CLK_PERIOD = %0d", CONTROLLER_CLK_PERIOD); + $display("DDR3_CLK_PERIOD = %0d", DDR3_CLK_PERIOD); + $display("ROW_BITS = %0d", ROW_BITS); + $display("COL_BITS = %0d", COL_BITS); + $display("BA_BITS = %0d", BA_BITS); + $display("BYTE_LANES = %0d", LANES); + $display("AUX_WIDTH = %0d", AUX_WIDTH); + $display("WB2_ADDR_BITS = %0d", WB2_ADDR_BITS); + $display("WB2_DATA_BITS = %0d", WB2_DATA_BITS); + $display("MICRON_SIM = %0d", MICRON_SIM); + $display("ODELAY_SUPPORTED = %0d", ODELAY_SUPPORTED); + $display("SECOND_WISHBONE = %0d", SECOND_WISHBONE); + $display("WB_ERROR = %0d", WB_ERROR); + $display("SKIP_INTERNAL_TEST = %0d", SKIP_INTERNAL_TEST); + $display("ECC_ENABLE = %0d", ECC_ENABLE); + $display("DIC = %0d", DIC); + $display("RTT_NOM = %0d", RTT_NOM); + $display("DUAL_RANK_DIMM = %0d", DUAL_RANK_DIMM); + $display("End of DDR3 TOP PARAMETERS\n-----------------------------"); end `endif @@ -3575,9 +3627,9 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin reg[4:0] f_index_1; reg[F_TEST_CMD_DATA_WIDTH - 1:0] f_write_data; reg f_write_fifo = 0, f_read_fifo = 0; - reg[ROW_BITS-1:0] f_bank_active_row[(1<= tCCD); end - if(cmd_d[READ_SLOT][CMD_CS_N:CMD_WE_N] == 4'b0101) begin //READ - f_read_time_stamp[cmd_d[READ_SLOT][CMD_BANK_START:CMD_ADDRESS_START+1]] <= f_timer + READ_SLOT; + if(cmd_d[READ_SLOT][CMD_CS_N-1:CMD_WE_N] == 3'b101) begin //READ + f_read_time_stamp[{!cmd_d[READ_SLOT][CMD_CS_N_2] && DUAL_RANK_DIMM , cmd_d[READ_SLOT][CMD_BANK_START:CMD_ADDRESS_START+1]}] <= f_timer + READ_SLOT; //Check tCCD (read-to-read delay) assert((f_timer+READ_SLOT) - f_read_time_stamp[bank_const] >= tCCD); end diff --git a/rtl/ddr3_phy.v b/rtl/ddr3_phy.v index 1a1b26c..f2b52cd 100644 --- a/rtl/ddr3_phy.v +++ b/rtl/ddr3_phy.v @@ -39,6 +39,7 @@ module ddr3_phy #( BA_BITS = 3, DQ_BITS = 8, LANES = 8, + DUAL_RANK_DIMM = 0, // enable dual rank DIMM (1 = enable, 0 = disable) parameter[0:0] ODELAY_SUPPORTED = 1, //set to 1 when ODELAYE2 is supported USE_IO_TERMINATION = 0, //use IOBUF_DCIEN and IOBUFDS_DCIEN when 1 NO_IOSERDES_LOOPBACK = 1, // don't use IOSERDES loopback for bitslip training @@ -47,7 +48,7 @@ module ddr3_phy #( wb_data_bits = DQ_BITS*LANES*serdes_ratio*2, wb_sel_bits = wb_data_bits / 8, //4 is the width of a single ddr3 command {cs_n, ras_n, cas_n, we_n} plus 3 (ck_en, odt, reset_n) plus bank bits plus row bits - cmd_len = 4 + 3 + BA_BITS + ROW_BITS + cmd_len = 4 + 3 + BA_BITS + ROW_BITS + 2*DUAL_RANK_DIMM )( input wire i_controller_clk, i_ddr3_clk, i_ref_clk, input wire i_ddr3_clk_90, //required only when ODELAY_SUPPORTED is zero @@ -70,10 +71,10 @@ module ddr3_phy #( output wire[LANES*8-1:0] o_controller_iserdes_bitslip_reference, output wire o_controller_idelayctrl_rdy, // DDR3 I/O Interface - output wire o_ddr3_clk_p,o_ddr3_clk_n, + output wire[DUAL_RANK_DIMM:0] o_ddr3_clk_p,o_ddr3_clk_n, output wire o_ddr3_reset_n, - output wire o_ddr3_cke, // CKE - output wire o_ddr3_cs_n, // chip select signal + output wire[DUAL_RANK_DIMM:0] o_ddr3_cke, // CKE + output wire[DUAL_RANK_DIMM:0] o_ddr3_cs_n, // chip select signal output wire o_ddr3_ras_n, // RAS# output wire o_ddr3_cas_n, // CAS# output wire o_ddr3_we_n, // WE# @@ -82,22 +83,24 @@ module ddr3_phy #( inout wire[(DQ_BITS*LANES)-1:0] io_ddr3_dq, inout wire[(DQ_BITS*LANES)/8-1:0] io_ddr3_dqs, io_ddr3_dqs_n, output wire[LANES-1:0] o_ddr3_dm, - output wire o_ddr3_odt, // on-die termination + output wire[DUAL_RANK_DIMM:0] o_ddr3_odt, // on-die termination // DEBUG PHY output wire[(DQ_BITS*LANES)/8-1:0] o_ddr3_debug_read_dqs_p, output wire[(DQ_BITS*LANES)/8-1:0] o_ddr3_debug_read_dqs_n ); // cmd bit assignment - localparam CMD_CS_N = cmd_len - 1, - CMD_RAS_N = cmd_len - 2, - CMD_CAS_N= cmd_len - 3, - CMD_WE_N = cmd_len - 4, - CMD_ODT = cmd_len - 5, - CMD_CKE = cmd_len - 6, - CMD_RESET_N = cmd_len - 7, - CMD_BANK_START = BA_BITS + ROW_BITS - 1, - CMD_ADDRESS_START = ROW_BITS - 1; + localparam CMD_CS_N_2 = cmd_len - 1, + CMD_CS_N = DUAL_RANK_DIMM[0]? cmd_len - 2 : cmd_len - 1, + CMD_RAS_N = DUAL_RANK_DIMM[0]? cmd_len - 3 : cmd_len - 2, + CMD_CAS_N = DUAL_RANK_DIMM[0]? cmd_len - 4 : cmd_len - 3, + CMD_WE_N = DUAL_RANK_DIMM[0]? cmd_len - 5 : cmd_len - 4, + CMD_ODT = DUAL_RANK_DIMM[0]? cmd_len - 6 : cmd_len - 5, + CMD_CKE_2 = DUAL_RANK_DIMM[0]? cmd_len - 7 : cmd_len - 6, + CMD_CKE = DUAL_RANK_DIMM[0]? cmd_len - 8 : cmd_len - 6, + CMD_RESET_N = DUAL_RANK_DIMM[0]? cmd_len - 9 : cmd_len - 7, + CMD_BANK_START = BA_BITS + ROW_BITS - 1, + CMD_ADDRESS_START = ROW_BITS - 1; localparam SYNC_RESET_DELAY = $rtoi($ceil(52_000/CONTROLLER_CLK_PERIOD)); //52_000 ps of reset pulse width required for IDELAYCTRL //cmd needs to be center-aligned to the positive edge of the //ddr3_clk. This means cmd needs to be delayed by half the ddr3 @@ -225,12 +228,25 @@ module ddr3_phy #( end endgenerate - assign o_ddr3_cs_n = oserdes_cmd[CMD_CS_N], - o_ddr3_ras_n = oserdes_cmd[CMD_RAS_N], + // cs[1] when DUAL_RANK_DIMM enabled + generate + if(DUAL_RANK_DIMM) begin + assign o_ddr3_cs_n[1] = oserdes_cmd[CMD_CS_N_2]; + assign o_ddr3_cs_n[0] = oserdes_cmd[CMD_CS_N]; + assign o_ddr3_cke[1] = oserdes_cmd[CMD_CKE_2]; + assign o_ddr3_cke[0] = oserdes_cmd[CMD_CKE]; + assign o_ddr3_odt[1] = oserdes_cmd[CMD_ODT]; + assign o_ddr3_odt[0] = oserdes_cmd[CMD_ODT]; + end + else begin + assign o_ddr3_cs_n = oserdes_cmd[CMD_CS_N]; + assign o_ddr3_cke = oserdes_cmd[CMD_CKE]; + assign o_ddr3_odt = oserdes_cmd[CMD_ODT]; + end + endgenerate + assign o_ddr3_ras_n = oserdes_cmd[CMD_RAS_N], o_ddr3_cas_n = oserdes_cmd[CMD_CAS_N], o_ddr3_we_n = oserdes_cmd[CMD_WE_N], - o_ddr3_odt = oserdes_cmd[CMD_ODT], - o_ddr3_cke = oserdes_cmd[CMD_CKE], o_ddr3_reset_n = oserdes_cmd[CMD_RESET_N], o_ddr3_ba_addr = oserdes_cmd[CMD_BANK_START:CMD_ADDRESS_START+1], o_ddr3_addr = oserdes_cmd[CMD_ADDRESS_START:0]; @@ -308,26 +324,64 @@ module ddr3_phy #( .ODATAIN(ddr3_clk), // 1-bit input: Output delay data input .REGRST(1'b0) // 1-bit input: Active-high reset tap-delay input ); - - // OBUFDS: Differential Output Buffer - // 7 Series - // Xilinx HDL Libraries Guide, version 13.4 - OBUFDS OBUFDS_inst ( - .O(o_ddr3_clk_p), // Diff_p output (connect directly to top-level port) - .OB(o_ddr3_clk_n), // Diff_n output (connect directly to top-level port) - .I(ddr3_clk_delayed) // Buffer input - ); - // End of OBUFDS_inst instantiation + // if dual rank enabled, then there will be two clk + if(DUAL_RANK_DIMM) begin + // OBUFDS: Differential Output Buffer + // 7 Series + // Xilinx HDL Libraries Guide, version 13.4 + OBUFDS OBUFDS0_inst ( + .O(o_ddr3_clk_p[0]), // Diff_p output (connect directly to top-level port) + .OB(o_ddr3_clk_n[0]), // Diff_n output (connect directly to top-level port) + .I(ddr3_clk_delayed) // Buffer input + ); + OBUFDS OBUFDS1_inst ( + .O(o_ddr3_clk_p[1]), // Diff_p output (connect directly to top-level port) + .OB(o_ddr3_clk_n[1]), // Diff_n output (connect directly to top-level port) + .I(ddr3_clk_delayed) // Buffer input + ); + // End of OBUFDS_inst instantiation + end + else begin + // OBUFDS: Differential Output Buffer + // 7 Series + // Xilinx HDL Libraries Guide, version 13.4 + OBUFDS OBUFDS_inst ( + .O(o_ddr3_clk_p), // Diff_p output (connect directly to top-level port) + .OB(o_ddr3_clk_n), // Diff_n output (connect directly to top-level port) + .I(ddr3_clk_delayed) // Buffer input + ); + // End of OBUFDS_inst instantiation + end end else begin //ODELAY is not supported - // OBUFDS: Differential Output Buffer - // 7 Series - // Xilinx HDL Libraries Guide, version 13.4 - OBUFDS OBUFDS_inst ( - .O(o_ddr3_clk_p), // Diff_p output (connect directly to top-level port) - .OB(o_ddr3_clk_n), // Diff_n output (connect directly to top-level port) - .I(!i_ddr3_clk) // Buffer input - ); + + // if dual rank enabled, then there will be two clk + if(DUAL_RANK_DIMM) begin + // OBUFDS: Differential Output Buffer + // 7 Series + // Xilinx HDL Libraries Guide, version 13.4 + OBUFDS OBUFDS0_inst ( + .O(o_ddr3_clk_p[1]), // Diff_p output (connect directly to top-level port) + .OB(o_ddr3_clk_n[1]), // Diff_n output (connect directly to top-level port) + .I(!i_ddr3_clk) // Buffer input + ); + OBUFDS OBUFDS1_inst ( + .O(o_ddr3_clk_p[0]), // Diff_p output (connect directly to top-level port) + .OB(o_ddr3_clk_n[0]), // Diff_n output (connect directly to top-level port) + .I(!i_ddr3_clk) // Buffer input + ); + // End of OBUFDS_inst instantiation + end + else begin + // OBUFDS: Differential Output Buffer + // 7 Series + // Xilinx HDL Libraries Guide, version 13.4 + OBUFDS OBUFDS_inst ( + .O(o_ddr3_clk_p), // Diff_p output (connect directly to top-level port) + .OB(o_ddr3_clk_n), // Diff_n output (connect directly to top-level port) + .I(!i_ddr3_clk) // Buffer input + ); + end end diff --git a/rtl/ddr3_top.v b/rtl/ddr3_top.v index 1a36040..63d6ad8 100644 --- a/rtl/ddr3_top.v +++ b/rtl/ddr3_top.v @@ -40,6 +40,7 @@ module ddr3_top #( AUX_WIDTH = 4, //width of aux line (must be >= 4) WB2_ADDR_BITS = 7, //width of 2nd wishbone address bus WB2_DATA_BITS = 32, //width of 2nd wishbone data bus + DUAL_RANK_DIMM = 0, // enable dual rank DIMM (1 = enable, 0 = disable) parameter[0:0] MICRON_SIM = 0, //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW) ODELAY_SUPPORTED = 0, //set to 1 when ODELAYE2 is supported SECOND_WISHBONE = 0, //set to 1 if 2nd wishbone for debugging is needed @@ -52,12 +53,12 @@ module ddr3_top #( parameter // The next parameters act more like a localparam (since user does not have to set this manually) but was added here to simplify port declaration DQ_BITS = 8, //device width (fixed to 8, if DDR3 is x16 then BYTE_LANES will be 2 while ) serdes_ratio = 4, // this controller is fixed as a 4:1 memory controller (CONTROLLER_CLK_PERIOD/DDR3_CLK_PERIOD = 4) - wb_addr_bits = ROW_BITS + COL_BITS + BA_BITS - $clog2(serdes_ratio*2), + wb_addr_bits = ROW_BITS + COL_BITS + BA_BITS - $clog2(serdes_ratio*2) + DUAL_RANK_DIMM, wb_data_bits = DQ_BITS*BYTE_LANES*serdes_ratio*2, wb_sel_bits = wb_data_bits / 8, wb2_sel_bits = WB2_DATA_BITS / 8, //4 is the width of a single ddr3 command {cs_n, ras_n, cas_n, we_n} plus 3 (ck_en, odt, reset_n) plus bank bits plus row bits - cmd_len = 4 + 3 + BA_BITS + ROW_BITS + cmd_len = 4 + 3 + BA_BITS + ROW_BITS + 2*DUAL_RANK_DIMM ) ( input wire i_controller_clk, i_ddr3_clk, i_ref_clk, //i_controller_clk = CONTROLLER_CLK_PERIOD, i_ddr3_clk = DDR3_CLK_PERIOD, i_ref_clk = 200MHz @@ -92,10 +93,10 @@ module ddr3_top #( output wire[WB2_DATA_BITS - 1:0] o_wb2_data, //read data // // DDR3 I/O Interface - output wire o_ddr3_clk_p, o_ddr3_clk_n, + output wire[DUAL_RANK_DIMM:0] o_ddr3_clk_p, o_ddr3_clk_n, output wire o_ddr3_reset_n, - output wire o_ddr3_cke, // CKE - output wire o_ddr3_cs_n, // chip select signal + output wire[DUAL_RANK_DIMM:0] o_ddr3_cke, // CKE + output wire[DUAL_RANK_DIMM:0] o_ddr3_cs_n, // chip select signal output wire o_ddr3_ras_n, // RAS# output wire o_ddr3_cas_n, // CAS# output wire o_ddr3_we_n, // WE# @@ -104,7 +105,7 @@ module ddr3_top #( inout wire[(DQ_BITS*BYTE_LANES)-1:0] io_ddr3_dq, inout wire[BYTE_LANES-1:0] io_ddr3_dqs, io_ddr3_dqs_n, output wire[BYTE_LANES-1:0] o_ddr3_dm, - output wire o_ddr3_odt, // on-die termination + output wire[DUAL_RANK_DIMM:0] o_ddr3_odt, // on-die termination // // Done Calibration pin output wire o_calib_complete, @@ -254,7 +255,8 @@ ddr3_top #( .WB_ERROR(WB_ERROR), // set to 1 to support Wishbone error (asserts at ECC double bit error) .SKIP_INTERNAL_TEST(SKIP_INTERNAL_TEST), // skip built-in self test (would require >2 seconds of internal test right after calibration) .DIC(DIC), //Output Driver Impedance Control (2'b00 = RZQ/6, 2'b01 = RZQ/7, RZQ = 240ohms) - .RTT_NOM(RTT_NOM) //RTT Nominal (3'b000 = disabled, 3'b001 = RZQ/4, 3'b010 = RZQ/2 , 3'b011 = RZQ/6, RZQ = 240ohms) + .RTT_NOM(RTT_NOM), //RTT Nominal (3'b000 = disabled, 3'b001 = RZQ/4, 3'b010 = RZQ/2 , 3'b011 = RZQ/6, RZQ = 240ohms) + .DUAL_RANK_DIMM(DUAL_RANK_DIMM) // enable dual rank DIMM (1 = enable, 0 = disable) ) ddr3_controller_inst ( .i_controller_clk(i_controller_clk), //i_controller_clk has period of CONTROLLER_CLK_PERIOD .i_rst_n(i_rst_n), //200MHz input clock @@ -323,7 +325,8 @@ ddr3_top #( .LANES(BYTE_LANES), //8 lanes of DQ .CONTROLLER_CLK_PERIOD(CONTROLLER_CLK_PERIOD), //ps, period of clock input to this DDR3 controller module .DDR3_CLK_PERIOD(DDR3_CLK_PERIOD), //ps, period of clock input to DDR3 RAM device - .ODELAY_SUPPORTED(ODELAY_SUPPORTED) + .ODELAY_SUPPORTED(ODELAY_SUPPORTED), //set to 1 when ODELAYE2 is supported + .DUAL_RANK_DIMM(DUAL_RANK_DIMM) // enable dual rank DIMM (1 = enable, 0 = disable) ) ddr3_phy_inst ( .i_controller_clk(i_controller_clk), .i_ddr3_clk(i_ddr3_clk), @@ -393,6 +396,7 @@ ddr3_top #( $display("DIC = %0d", DIC); $display("RTT_NOM = %0d", RTT_NOM); $display("SELF_REFRESH = %0d", SELF_REFRESH); + $display("DUAL_RANK_DIMM = %0d", DUAL_RANK_DIMM); $display("End of DDR3 TOP PARAMETERS\n-----------------------------"); end diff --git a/testbench/ddr3_dimm_micron_sim.sv b/testbench/ddr3_dimm_micron_sim.sv index 82de706..a89b564 100644 --- a/testbench/ddr3_dimm_micron_sim.sv +++ b/testbench/ddr3_dimm_micron_sim.sv @@ -62,10 +62,13 @@ module ddr3_dimm_micron_sim; localparam CONTROLLER_CLK_PERIOD = 10_000, //ps, period of clock input to this DDR3 controller module - DDR3_CLK_PERIOD = 2500, //ps, period of clock input to DDR3 RAM device + DDR3_CLK_PERIOD = 2500,//ps, period of clock input to DDR3 RAM device AUX_WIDTH = 16, // AUX lines ECC_ENABLE = 0, // ECC enable - SELF_REFRESH = 2'b11; + SELF_REFRESH = 2'b11, + DUAL_RANK_DIMM = 1, + TEST_SELF_REFRESH = 1; + reg i_controller_clk, i_ddr3_clk, i_ref_clk, i_ddr3_clk_90; reg i_rst_n; @@ -167,7 +170,8 @@ ddr3_top #( .ECC_ENABLE(ECC_ENABLE), // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC ) .WB_ERROR(1), // set to 1 to support Wishbone error (asserts at ECC double bit error) .SKIP_INTERNAL_TEST(0), // skip built-in self test (would require >2 seconds of internal test right after calibration) - .SELF_REFRESH(SELF_REFRESH) // 0 = use i_user_self_refresh input, 1 = Self-refresh mode is enabled after 64 controller clock cycles of no requests, 2 = 128 cycles, 3 = 256 cycles + .SELF_REFRESH(SELF_REFRESH), // 0 = use i_user_self_refresh input, 1 = Self-refresh mode is enabled after 64 controller clock cycles of no requests, 2 = 128 cycles, 3 = 256 cycles + .DUAL_RANK_DIMM(DUAL_RANK_DIMM) // enable dual rank DIMM (1 = enable, 0 = disable) ) ddr3_top ( //clock and reset @@ -201,11 +205,11 @@ ddr3_top #( .o_wb2_ack(o_wb2_ack), //1 = read/write request has completed .o_wb2_data(o_wb2_data), //read data, for a 4:1 controller data width is 8 times the number of pins on the device // PHY Interface (to be added later) - .o_ddr3_clk_p(o_ddr3_clk_p[1]), - .o_ddr3_clk_n(o_ddr3_clk_n[1]), - .o_ddr3_cke(ck_en[1]), // CKE - .o_ddr3_cs_n(cs_n[1]), // chip select signal - .o_ddr3_odt(odt[1]), // on-die termination + .o_ddr3_clk_p(o_ddr3_clk_p[DUAL_RANK_DIMM:0]), + .o_ddr3_clk_n(o_ddr3_clk_n[DUAL_RANK_DIMM:0]), + .o_ddr3_cke(ck_en[DUAL_RANK_DIMM:0]), // CKE + .o_ddr3_cs_n(cs_n[DUAL_RANK_DIMM:0]), // chip select signal + .o_ddr3_odt(odt[DUAL_RANK_DIMM:0]), // on-die termination .o_ddr3_ras_n(ras_n), // RAS# .o_ddr3_cas_n(cas_n), // CAS# .o_ddr3_we_n(we_n), // WE# @@ -261,9 +265,15 @@ ddr3_top #( .dqs_n(dqs_n), .dq(dq) ); - assign ck_en[0]=0, - cs_n[0]=1, - odt[0]=0; + generate + if(!DUAL_RANK_DIMM) begin // if dual rank disabled then rank 1 is idle + assign ck_en[1]=0, + cs_n[1]=1, + odt[1]=0, + o_ddr3_clk_p[1]=0, + o_ddr3_clk_n[1]=0; + end + endgenerate `endif @@ -730,17 +740,19 @@ ddr3_top #( end task self_refresh; - if(SELF_REFRESH == 2'b00) begin - // test self refresh - @(posedge i_controller_clk) - i_user_self_refresh = 1; - #40_000_000; //40_000 ns of self-refresh - @(posedge i_controller_clk) - i_user_self_refresh = 0; - end - else begin - #10_000_000; // 10_000 ns of rest - end + if(TEST_SELF_REFRESH) begin + if(SELF_REFRESH == 2'b00) begin + // test self refresh + @(posedge i_controller_clk) + i_user_self_refresh = 1; + #40_000_000; //40_000 ns of self-refresh + @(posedge i_controller_clk) + i_user_self_refresh = 0; + end + else begin + #10_000_000; // 10_000 ns of rest + end + end endtask //check read data @@ -947,9 +959,9 @@ ddr3_top #( reg[31:0] time_now; reg[3:0] repeats = 0; //display commands issued - always @(posedge o_ddr3_clk_p[1]) begin - if(!cs_n[1]) begin //command is center-aligned to positive edge of clock, a valid command always has low cs_n - case({cs_n[1], ras_n, cas_n, we_n}) + always @(posedge o_ddr3_clk_p[0]) begin + if(!cs_n[0]) begin //command is center-aligned to positive edge of clock, a valid command always has low cs_n + case({cs_n[0], ras_n, cas_n, we_n}) 4'b0000: command_used = "MRS"; 4'b0001: command_used = "REF"; 4'b0010: command_used = "PRE"; From 3b2ef2afa82a2a50b75fdf33c2e947197ceb4b0d Mon Sep 17 00:00:00 2001 From: AngeloJacobo Date: Sat, 21 Dec 2024 18:24:12 +0800 Subject: [PATCH 3/3] odt[1] generated by separate oserdes to make it routable --- rtl/ddr3_controller.v | 4 ++-- rtl/ddr3_phy.v | 52 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/rtl/ddr3_controller.v b/rtl/ddr3_controller.v index 27d91cd..b090d86 100644 --- a/rtl/ddr3_controller.v +++ b/rtl/ddr3_controller.v @@ -406,7 +406,7 @@ module ddr3_controller #( /************************************************************* Registers and Wires *************************************************************/ integer index; - reg[4:0] instruction_address = 0; //address for accessing rom instruction + (* mark_debug ="true" *) reg[4:0] instruction_address = 0; //address for accessing rom instruction reg[27:0] instruction = INITIAL_RESET_INSTRUCTION; //instruction retrieved from reset instruction rom reg[ DELAY_COUNTER_WIDTH - 1:0] delay_counter = INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0]; //counter used for delays reg delay_counter_is_zero = (INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0] == 0); //counter is now zero so retrieve next delay @@ -601,7 +601,7 @@ module ddr3_controller #( wire db_err_o; wire[wb_data_bits - 1:0] o_wb_data_q_decoded; /* verilator lint_on UNDRIVEN */ - reg user_self_refresh_q; // registered i_user_self_refresh + (* mark_debug ="true" *) reg user_self_refresh_q; // registered i_user_self_refresh // initial block for all regs initial begin diff --git a/rtl/ddr3_phy.v b/rtl/ddr3_phy.v index f2b52cd..5211865 100644 --- a/rtl/ddr3_phy.v +++ b/rtl/ddr3_phy.v @@ -226,6 +226,55 @@ module ddr3_phy #( // End of OSERDESE2_inst instantiation end + + if(DUAL_RANK_DIMM) begin // if dual rank enabled, odt_2 and odt_1 will be generated separately + // OSERDESE2: Output SERial/DESerializer with bitslip + //7 Series + // Xilinx HDL Libraries Guide, version 13.4 + OSERDESE2 #( + .DATA_RATE_OQ("SDR"), // DDR, SDR + .DATA_RATE_TQ("SDR"), // DDR, SDR + .DATA_WIDTH(4), // Parallel data width (2-8,10,14) + .INIT_OQ(1'b0), // Initial value of OQ output (1'b0,1'b1) + .TRISTATE_WIDTH(1) + ) + OSERDESE2_cmd( + .OFB(), // 1-bit output: Feedback path for data + .OQ(o_ddr3_odt[1]), // 1-bit output: Data path output + .CLK(i_ddr3_clk), // 1-bit input: High speed clock + .CLKDIV(i_controller_clk), // 1-bit input: Divided clock + // D1 - D8: 1-bit (each) input: Parallel data inputs (1-bit each) + .D1(i_controller_cmd[cmd_len*0 + CMD_ODT]), + .D2(i_controller_cmd[cmd_len*1 + CMD_ODT]), + .D3(i_controller_cmd[cmd_len*2 + CMD_ODT]), + .D4(i_controller_cmd[cmd_len*3 + CMD_ODT]), + .OCE(1'b1), // 1-bit input: Output data clock enable + .RST(sync_rst), // 1-bit input: Reset + // unused signals but were added here to make vivado happy + .SHIFTOUT1(), // SHIFTOUT1 / SHIFTOUT2: 1-bit (each) output: Data output expansion (1-bit each) + .SHIFTOUT2(), + .TBYTEOUT(), // 1-bit output: Byte group tristate + .TFB(), // 1-bit output: 3-state control + .TQ(), // 1-bit output: 3-state control + .D5(), + .D6(), + .D7(), + .D8(), + // SHIFTIN1 / SHIFTIN2: 1-bit (each) input: Data input expansion (1-bit each) + .SHIFTIN1(0), + .SHIFTIN2(0), + // T1 - T4: 1-bit (each) input: Parallel 3-state inputs + .T1(0), + .T2(0), + .T3(0), + .T4(0), + .TBYTEIN(0), + // 1-bit input: Byte group tristate + .TCE(0) + // 1-bit input: 3-state clock enable + ); + // End of OSERDESE2_inst instantiation + end endgenerate // cs[1] when DUAL_RANK_DIMM enabled @@ -235,8 +284,9 @@ module ddr3_phy #( assign o_ddr3_cs_n[0] = oserdes_cmd[CMD_CS_N]; assign o_ddr3_cke[1] = oserdes_cmd[CMD_CKE_2]; assign o_ddr3_cke[0] = oserdes_cmd[CMD_CKE]; - assign o_ddr3_odt[1] = oserdes_cmd[CMD_ODT]; assign o_ddr3_odt[0] = oserdes_cmd[CMD_ODT]; + // o_ddr3_odt[1] will be generated directly by a separate OSERDES + // if odt[1] and odt[0] uses same output from oserdes, one of them will be unroutable end else begin assign o_ddr3_cs_n = oserdes_cmd[CMD_CS_N];