diff --git a/rtl/ddr3_controller.v b/rtl/ddr3_controller.v index 20ecfd0..a5fd6d7 100644 --- a/rtl/ddr3_controller.v +++ b/rtl/ddr3_controller.v @@ -8,14 +8,14 @@ // - High (sustained) data throughput. Sequential writes should be able to continue without interruption -`define FORMAL_COVER //change delay in reset sequence to fit in cover statement +//`define FORMAL_COVER //change delay in reset sequence to fit in cover statement `define COVER_DELAY 3 //fixed delay used in formal cover for reset sequence `default_nettype none // THESE DEFINES WILL BE MODIFIED AS PARAMETERS LATER ON `define DDR3_1600_11_11_11 // DDR3-1600 (11-11-11) speed bin -`define RAM_1Gb //DDR3 Capacity +`define RAM_8Gb //DDR3 Capacity //`define RAM_2Gb //`define RAM_4Gb //`define RAM_8Gb @@ -123,59 +123,17 @@ module ddr3_controller #( /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////// SET MODE REGISTERS ////////////////////////////////////////////////////////////// - - // MR2 (JEDEC DDR3 doc pg. 30) - localparam[2:0] PASR = 3'b000; //Partial Array Self-Refresh: Full Array - localparam[2:0] CWL = 3'b011; //CAS write Latency: 8 (1.5 ns > tCK(avg) >= 1.25 ns) CREATE A FUNCTION FOR THIS - localparam[0:0] ASR = 1'b1; //Auto Self-Refresh: on - localparam[0:0] SRT = 1'b0; //Self-Refresh Temperature Range:0 (If ASR = 1, SRT bit must be set to 0) - localparam[1:0] RTT_WR = 2'b00; //Dynamic ODT: off - localparam[2:0] MR2_SEL = 3'b010; //Selected Mode Register - localparam[18:0] MR2 = {MR2_SEL, 5'b00000, RTT_WR, 1'b0, SRT, ASR, CWL, PASR}; - - // MR3 (JEDEC DDR3 doc pg. 32) - localparam[1:0] MPR_LOC = 2'b00; //Data location for MPR Reads: Predefined Pattern 0_1_0_1_0_1_0_1 - localparam[0:0] MPR_EN = 1'b0; //MPR Enable: Enable MPR reads and calibration during initialization - localparam[2:0] MR3_SEL = 3'b011; //MPR Selected - localparam[18:0] MR3_EN = {MR3_SEL, 13'b0_0000_0000_0000, 1'b1, MPR_LOC}; - localparam[18:0] MR3_DIS = {MR3_SEL, 13'b0_0000_0000_0000, 1'b0, MPR_LOC}; - localparam[ROW_BITS+BA_BITS-1:0] MR3_RD_ADDR = 0; - - // MR1 (JEDEC DDR3 doc pg. 27) - localparam DLL_EN = 1'b0; //DLL Enable/Disable: Enabled(0) - localparam[1:0] DIC = 2'b00; //Output Driver Impedance Control (IS THIS THE SAME WITH RTT_NOM???????????? Search later) - localparam[2:0] RTT_NOM = 3'b011; //RTT Nominal: 40ohms (RQZ/6) is the impedance of the PCB trace - localparam[0:0] WL_EN = 1'b0; //Write Leveling Enable: Disabled - localparam[1:0] AL = 2'b00; //Additive Latency: Disabled - localparam[0:0] TDQS = 1'b0; //Termination Data Strobe: Disabled (provides additional termination resistance outputs. When the TDQS function is disabled, the DM function is provided (vice-versa).TDQS function is only available for X8 DRAM and must be disabled for X4 and X16. - localparam[0:0] QOFF = 1'b0; //Output Buffer Control: Enabled - localparam[2:0] MR1_SEL = 3'b001; //Selected Mode Register - localparam[18:0] MR1 = {MR1_SEL, 3'b000, QOFF, TDQS, 1'b0, RTT_NOM[2], 1'b0, WL_EN, RTT_NOM[1], DIC[1], AL, RTT_NOM[0], DIC[0], DLL_EN}; - - //MR0 (JEDEC DDR3 doc pg. 24) - localparam[1:0] BL = 2'b00; //Burst Length: 8 (Fixed) - localparam[3:0] CL = 4'b1100; //CAS Read Latency: 10, can support DDR-1600 speedbin 8-8-8, 9-9-9, and 10-10-10 (Check JEDEC DDR doc pg. 162) CREATE A FUNCTION FOR THIS - localparam[0:0] RBT = 1'b0; //Read Burst Type: Nibble Sequential - localparam[0:0] DLL_RST = 1'b1; //DLL Reset: Yes (this is self-clearing and must be applied after DLL enable) - localparam[2:0] WR = WRA_mode_register_value($ceil(tWR/DDR3_CLK_PERIOD)); //Write recovery for autoprecharge ( - localparam[0:0] PPD = 1'b0; //DLL Control for Precharge PD: Slow exit (DLL off) - localparam[2:0] MR0_SEL = 3'b000; - localparam[18:0] MR0 = {MR0_SEL, 3'b000, PPD, WR, DLL_RST, 1'b0, CL[3:1], RBT, CL[0], BL}; - - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - /////////////////////////////////////////////////////////// TIMING PARAMETERS //////////////////////////////////////////////////////////////////////////////////// - - localparam POWER_ON_RESET_HIGH = 200_000; // 200us reset must be active at initialization + localparam DELAY_SLOT_WIDTH = 19; //Bitwidth of the delay slot and mode register slot on the reset/refresh rom will be at the same size as the Mode Register + localparam POWER_ON_RESET_HIGH = 200_000; // 200us reset must be active at initialization localparam INITIAL_CKE_LOW = 500_000; // 500us cke must be low before activating `ifdef DDR3_1600_11_11_11 //DDR3-1600 (11-11-11) speed bin - localparam tRAS = 35.0; // ns Minimum Active to Precharge command time - localparam tRC = 48.750; //ns Active to Active/Auto Refresh command time + //localparam tRAS = 35.0; // ns Minimum Active to Precharge command time + //localparam tRC = 48.750; //ns Active to Active/Auto Refresh command time localparam tRCD = 13.750; // ns Active to Read/Write command time - localparam tRP = 13.750; // ns Precharge command period + localparam tRP = 13.750; // ns Precharge command period `endif @@ -193,19 +151,68 @@ module ddr3_controller #( localparam tMRD = 4; // nCK Mode Register Set command cycle time localparam tWR = 15.0; // ns Write Recovery Time localparam tWTR = max(nCK_to_ns(4), 7.5); //ns Delay from start of internal write transaction to internal read command - localparam tDLLK = 512.0; //nCK DLL Locking time + //localparam tDLLK = 512.0; //nCK DLL Locking time + localparam[DELAY_SLOT_WIDTH - 1:0] tWLMRD = nCK_to_cycles(40); // nCK First DQS/DQS# rising edge after write leveling mode is programmed + localparam tWLO = 7.5; //ns Write leveling output delay + localparam tWLOE = 2; localparam tRTP = max(nCK_to_ns(4), 7.5); //ns Internal Command to PRECHARGE Command delay localparam tCCD = 4; //nCK CAS to CAS command delay localparam[DELAY_SLOT_WIDTH - 1:0] tMOD = max(nCK_to_cycles(12), ns_to_cycles(15)); //cycles (controller) Mode Register Set command update delay localparam[DELAY_SLOT_WIDTH - 1:0] tZQinit = max(nCK_to_cycles(512), ns_to_cycles(640));//cycles (controller) Power-up and RESET calibration time - localparam[DELAY_SLOT_WIDTH - 1:0] tZQoper = max(nCK_to_cycles(256), ns_to_cycles(320)); //cycles (controller) Normal operation Full calibration time - localparam CL_nCK = 10; - localparam CWL_nCK = 8; + //localparam[DELAY_SLOT_WIDTH - 1:0] tZQoper = max(nCK_to_cycles(256), ns_to_cycles(320)); //cycles (controller) Normal operation Full calibration time + localparam CL_nCK = 11; //create a function for this + localparam CWL_nCK = 8; //create a function for this localparam DELAY_MAX_VALUE = ns_to_cycles(INITIAL_CKE_LOW); //Largest possible delay needed by the reset and refresh sequence localparam DELAY_COUNTER_WIDTH= $clog2(DELAY_MAX_VALUE); //Bitwidth needed by the maximum possible delay, this will be the delay counter width - localparam DELAY_SLOT_WIDTH = 19; //Bitwidth of the delay slot and mode register slot on the reset/refresh rom will be at the same size as the Mode Register localparam READ_CAL_DELAY = 100; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + + ////////////////////////////////////////////////////////////// SET MODE REGISTERS ////////////////////////////////////////////////////////////// + + // MR2 (JEDEC DDR3 doc pg. 30) + localparam[2:0] PASR = 3'b000; //Partial Array Self-Refresh: Full Array + localparam[2:0] CWL = 3'b011; //CAS write Latency: 8 (1.5 ns > tCK(avg) >= 1.25 ns) CREATE A FUNCTION FOR THIS + localparam[0:0] ASR = 1'b1; //Auto Self-Refresh: on + localparam[0:0] SRT = 1'b0; //Self-Refresh Temperature Range:0 (If ASR = 1, SRT bit must be set to 0) + localparam[1:0] RTT_WR = 2'b00; //Dynamic ODT: off + localparam[2:0] MR2_SEL = 3'b010; //Selected Mode Register + localparam[18:0] MR2 = {MR2_SEL, 5'b00000, RTT_WR, 1'b0, SRT, ASR, CWL, PASR}; + + // MR3 (JEDEC DDR3 doc pg. 32) + localparam[1:0] MPR_LOC = 2'b00; //Data location for MPR Reads: Predefined Pattern 0_1_0_1_0_1_0_1 + localparam[0:0] MPR_EN = 1'b1; //MPR Enable: Enable MPR reads and calibration during initialization + localparam[0:0] MPR_DIS = 1'b0; //MPR Enable: Enable MPR reads and calibration during initialization + localparam[2:0] MR3_SEL = 3'b011; //MPR Selected + localparam[18:0] MR3_MPR_EN = {MR3_SEL, 13'b0_0000_0000_0000, MPR_EN, MPR_LOC}; + localparam[18:0] MR3_MPR_DIS = {MR3_SEL, 13'b0_0000_0000_0000, MPR_DIS, MPR_LOC}; + localparam[ROW_BITS+BA_BITS-1:0] MR3_RD_ADDR = 0; + + // MR1 (JEDEC DDR3 doc pg. 27) + localparam DLL_EN = 1'b0; //DLL Enable/Disable: Enabled(0) + localparam[1:0] DIC = 2'b00; //Output Driver Impedance Control (IS THIS THE SAME WITH RTT_NOM???????????? Search later) + localparam[2:0] RTT_NOM = 3'b011; //RTT Nominal: 40ohms (RQZ/6) is the impedance of the PCB trace + localparam[0:0] WL_EN = 1'b1; //Write Leveling Enable: Disabled + localparam[0:0] WL_DIS = 1'b0; //Write Leveling Enable: Disabled + localparam[1:0] AL = 2'b00; //Additive Latency: Disabled + localparam[0:0] TDQS = 1'b1; //Termination Data Strobe: Disabled (provides additional termination resistance outputs. When the TDQS function is disabled, the DM function is provided (vice-versa).TDQS function is only available for X8 DRAM and must be disabled for X4 and X16. + localparam[0:0] QOFF = 1'b0; //Output Buffer Control: Enabled + localparam[2:0] MR1_SEL = 3'b001; //Selected Mode Register + localparam[18:0] MR1_WL_EN = {MR1_SEL, 3'b000, QOFF, TDQS, 1'b0, RTT_NOM[2], 1'b0, WL_EN, RTT_NOM[1], DIC[1], AL, RTT_NOM[0], DIC[0], DLL_EN}; + localparam[18:0] MR1_WL_DIS = {MR1_SEL, 3'b000, QOFF, TDQS, 1'b0, RTT_NOM[2], 1'b0, WL_DIS, RTT_NOM[1], DIC[1], AL, RTT_NOM[0], DIC[0], DLL_EN}; + + //MR0 (JEDEC DDR3 doc pg. 24) + localparam[1:0] BL = 2'b00; //Burst Length: 8 (Fixed) + localparam[3:0] CL = 4'b1110; //CAS Read Latency: 10, can support DDR-1600 speedbin 8-8-8, 9-9-9, and 10-10-10 (Check JEDEC DDR doc pg. 162) CREATE A FUNCTION FOR THIS + localparam[0:0] RBT = 1'b0; //Read Burst Type: Nibble Sequential + localparam[0:0] DLL_RST = 1'b1; //DLL Reset: Yes (this is self-clearing and must be applied after DLL enable) + localparam[2:0] WR = WRA_mode_register_value($ceil(tWR/DDR3_CLK_PERIOD)); //Write recovery for autoprecharge ( + localparam[0:0] PPD = 1'b0; //DLL Control for Precharge PD: Slow exit (DLL off) + localparam[2:0] MR0_SEL = 3'b000; + localparam[18:0] MR0 = {MR0_SEL, 3'b000, PPD, WR, DLL_RST, 1'b0, CL[3:1], RBT, CL[0], BL}; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + localparam PRE_STALL_DELAY = 10; @@ -229,11 +236,11 @@ module ddr3_controller #( function [27:0] read_rom_instruction(input[5:0] instruction_address); case(instruction_address) - 5'd0: read_rom_instruction = {5'b01000 , CMD_NOP , ns_to_cycles(POWER_ON_RESET_HIGH)}; + 5'd0: read_rom_instruction = {5'b01000 , CMD_NOP , ns_to_cycles(/*POWER_ON_RESET_HIGH*/200)}; //0. RESET# needs to be maintained low for minimum 200us with power-up initialization. CKE is pulled //“Low” anytime before RESET# being de-asserted (min. time 10 ns). . - 5'd1: read_rom_instruction = {5'b01001 , CMD_NOP, ns_to_cycles(POWER_ON_RESET_HIGH/*INITIAL_CKE_LOW*/)}; + 5'd1: read_rom_instruction = {5'b01001 , CMD_NOP, ns_to_cycles(/*INITIAL_CKE_LOW*/500)}; //1. After RESET# is de-asserted, wait for another 500 us until CKE becomes active. During this time, the //DRAM will start internal state initialization; this will be done independently of external clocks. // .... Also, a NOP or Deselect command must be registered (with tIS set up time to clock) before @@ -245,62 +252,68 @@ module ddr3_controller #( 5'd3: read_rom_instruction = {{2'b00,MR2[10], 2'b11}, CMD_MRS, MR2}; //3. Issue MRS command to load MR2. - 5'd4: read_rom_instruction = {5'b01011, CMD_NOP, nCK_to_cycles(tMRD)}; - //4. Delay of tMRD between MRS commands + 5'd4: read_rom_instruction = {{2'b00,MR3_MPR_DIS[10], 2'b11}, CMD_MRS, MR3_MPR_DIS}; + //4. All banks must first be in the idle state (all banks precharged and tRP met) before doing MPR calibration, thus issue first disabled MR3 - 5'd5: read_rom_instruction = {{2'b00,MR3_DIS[10], 2'b11}, CMD_MRS, MR3_DIS}; - //5. All banks must first be in the idle state (all banks precharged and tRP met) before doing MPR calibration, thus issue first disabled MR3 - - 5'd6: read_rom_instruction = {5'b01011, CMD_NOP, nCK_to_cycles(tMRD)}; - //6. Delay of tMRD between MRS commands + 5'd5: read_rom_instruction = {{2'b00,MR1_WL_DIS[10], 2'b11}, CMD_MRS, MR1_WL_DIS}; + //5. Issue MRS command to load MR1, enable DLL,and disable WL. - 5'd7: read_rom_instruction = {{2'b00,MR1[10], 2'b11}, CMD_MRS, MR1}; - //7. Issue MRS command to load MR1 and enable DLL. + 5'd6: read_rom_instruction = {{2'b00,MR0[10], 2'b11}, CMD_MRS, MR0}; + //6. Issue MRS command to load MR0 and reset DLL. - 5'd8: read_rom_instruction = {5'b01011, CMD_NOP, nCK_to_cycles(tMRD)}; - //8. Delay of tMRD between MRS commands - - 5'd9: read_rom_instruction = {{2'b00,MR0[10], 2'b11}, CMD_MRS, MR0}; - //9. Issue MRS command to load MR0 and reset DLL. - - 5'd10: read_rom_instruction = {5'b01011, CMD_NOP, tMOD}; - //10. Delay of tMOD between MRS command to a non-MRS command excluding NOP and DES + 5'd7: read_rom_instruction = {5'b01011, CMD_NOP, tMOD}; + //7. Delay of tMOD between MRS command to a non-MRS command excluding NOP and DES - 5'd11: read_rom_instruction = {5'b01011, CMD_ZQC, tZQinit}; - //11. ZQ Calibration command is used to calibrate DRAM Ron & ODT values. ZQCL command triggers the calibration engine + 5'd8: read_rom_instruction = {5'b01111, CMD_ZQC, tZQinit}; + //8. ZQ Calibration command is used to calibrate DRAM Ron & ODT values. ZQCL command triggers the calibration engine //inside the DRAM and, once calibration is achieved, the calibrated values area transferred from the calibration engine to //DRAM IO, which gets reflected as updated output driver // Precharge all banks before enabling MPR - 5'd12: read_rom_instruction = {5'b01111, CMD_PRE, ns_to_cycles(tRP)}; - //12. All banks must be precharged (A10-AP = high) and idle for a minimum of the precharge time tRP(min) before the Refresh Command can be applied. + 5'd9: read_rom_instruction = {5'b01111, CMD_PRE, ns_to_cycles(tRP)}; + //9. All banks must be precharged (A10-AP = high) and idle for a minimum of the precharge time tRP(min) before the Refresh Command can be applied. - 5'd13: read_rom_instruction = {{2'b00,MR3_EN[10], 2'b11}, CMD_MRS, MR3_EN}; - //13. Issue MRS command to load MR3. Prior to enabling the MPR for read calibration, all banks must be in the idle state (all banks + 5'd10: read_rom_instruction = {{2'b00,MR3_MPR_EN[10], 2'b11}, CMD_MRS, MR3_MPR_EN}; + //10. Issue MRS command to load MR3. Prior to enabling the MPR for read calibration, all banks must be in the idle state (all banks // precharged and tRP met). Once the MPR is enabled, any subsequent RD or RDA commands will be redirected to the MultiPurpose Register. - 5'd14: read_rom_instruction = {5'b01011, CMD_NOP, tMOD}; - //14. Delay of tMOD between MRS command to a non-MRS command excluding NOP and DES + 5'd11: read_rom_instruction = {5'b01011, CMD_NOP, tMOD}; + //11. Delay of tMOD between MRS command to a non-MRS command excluding NOP and DES - 5'd15: read_rom_instruction = {5'b01011, CMD_NOP, DELAY_MAX_VALUE[DELAY_SLOT_WIDTH-1:0]}; - //15. Delay for read/write calibration + 5'd12: read_rom_instruction = {5'b01011, CMD_NOP, {(DELAY_SLOT_WIDTH){1'b1}}}; + //12. Delay for read calibration - 5'd16: read_rom_instruction = {5'b01011, CMD_NOP, tMOD}; - //16. Delay for read/write calibration + 5'd13: read_rom_instruction = {{2'b00,MR3_MPR_DIS[10], 2'b11}, CMD_MRS, MR3_MPR_DIS}; + //13. Disable MPR after read calibration + + 5'd14: read_rom_instruction = {{2'b00,MR1_WL_EN[10], 2'b11}, CMD_MRS, MR1_WL_EN}; + //14. Issue MRS command to load MR1, and enable WL. + + 5'd15: read_rom_instruction = {5'b01011, CMD_NOP, tWLMRD}; + //15. Delay of tMOD between MRS command to a non-MRS command excluding NOP and DES + + 5'd16: read_rom_instruction = {5'b01011, CMD_NOP, {(DELAY_SLOT_WIDTH){1'b1}}}; + //16. Delay for write calibration + + 5'd17: read_rom_instruction = {{2'b00,MR1_WL_DIS[10], 2'b11}, CMD_MRS, MR1_WL_DIS}; + //17. Issue MRS command to load MR1, and disable WL. + + 5'd18: read_rom_instruction = {5'b01011, CMD_NOP, tMOD}; + //18. Delay of tMOD between MRS command to a non-MRS command excluding NOP and DES // Perform first refresh and any subsequent refresh (so instruction 12 to 15 will be re-used for the refresh sequence) - 5'd17: read_rom_instruction = {5'b01111, CMD_PRE, ns_to_cycles(tRP)}; - //12. All banks must be precharged (A10-AP = high) and idle for a minimum of the precharge time tRP(min) before the Refresh Command can be applied. + 5'd19: read_rom_instruction = {5'b01111, CMD_PRE, ns_to_cycles(tRP)}; + //19. All banks must be precharged (A10-AP = high) and idle for a minimum of the precharge time tRP(min) before the Refresh Command can be applied. - 5'd18: read_rom_instruction = {5'b01011, CMD_REF, ns_to_cycles(tRFC)}; - //13. A delay between the Refresh Command and the next valid command, except NOP or DES, must be greater than or equal to the minimum + 5'd20: read_rom_instruction = {5'b01011, CMD_REF, ns_to_cycles(tRFC)}; + //20. A delay between the Refresh Command and the next valid command, except NOP or DES, must be greater than or equal to the minimum //Refresh cycle time tRFC(min) - 5'd19: read_rom_instruction = {5'b11011, CMD_NOP, ns_to_cycles(tREFI)}; - //14. Reset ends now. The refresh interval also starts to count. + 5'd21: read_rom_instruction = {5'b11011, CMD_NOP, ns_to_cycles(tREFI)}; + //21. Reset ends now. The refresh interval also starts to count. - 5'd20: read_rom_instruction = {5'b01011, CMD_NOP, PRE_STALL_DELAY[DELAY_SLOT_WIDTH-1:0]}; - // 15. Extra delay needed before starting the refresh sequence. (this already sets the wishbone stall high to make sure no user request is on-going when refresh seqeunce starts) + 5'd22: read_rom_instruction = {5'b01011, CMD_NOP, PRE_STALL_DELAY[DELAY_SLOT_WIDTH-1:0]}; + // 22. Extra delay needed before starting the refresh sequence. (this already sets the wishbone stall high to make sure no user request is on-going when refresh seqeunce starts) default: read_rom_instruction = {5'b00011, CMD_NOP, {(DELAY_SLOT_WIDTH){1'b0}}}; endcase @@ -316,7 +329,7 @@ module ddr3_controller #( reg reset_done = 0; //high if reset has already finished reg skip_reset_seq_delay = 0; //flag to skip delay and go to next reset instruction wire issue_read_command; - reg issue_write_command = 0; + wire issue_write_command; always @(posedge i_controller_clk, negedge i_rst_n) begin if(!i_rst_n) begin instruction_address <= 0; @@ -333,7 +346,7 @@ module ddr3_controller #( `else //if(instruction[DELAY_COUNTER_WIDTH - 1:0] > `COVER_DELAY) delay_counter <= `COVER_DELAY; //use fixed low value delay to cover the whole reset seqeunce using formal verification //else delay_counter <= instruction[DELAY_COUNTER_WIDTH - 1:0] ; //use delay from rom if that is smaller than the COVER_DELAY macro - if(instruction[DELAY_COUNTER_WIDTH - 1:0]!= DELAY_MAX_VALUE) delay_counter <= 20; + if(instruction[DELAY_COUNTER_WIDTH - 1:0]!= DELAY_MAX_VALUE) delay_counter <= 3; else delay_counter <= instruction[DELAY_COUNTER_WIDTH - 1:0]; `endif //RECEIVE THE COMMANDS @@ -347,7 +360,7 @@ module ddr3_controller #( if(delay_counter == 1 || !instruction[USE_TIMER] || skip_reset_seq_delay) begin delay_counter_is_zero <= 1; instruction <= read_rom_instruction(instruction_address); - instruction_address <= (instruction_address == 5'd20)? 5'd17:instruction_address+1; //instruction_address 15 must wrap back to instruction_address 12 for the refresh sequence + instruction_address <= (instruction_address == 5'd22)? 5'd19:instruction_address+1; //instruction_address 15 must wrap back to instruction_address 12 for the refresh sequence end //we are now on the middle of a delay else delay_counter_is_zero <=0; @@ -369,7 +382,7 @@ module ddr3_controller #( localparam WRITE_TO_READ_DELAY = find_delay((CWL_nCK + 3'd4 + ns_to_nCK(tWTR)), WRITE_SLOT, READ_SLOT); //4 localparam WRITE_TO_PRECHARGE_DELAY = find_delay((CWL_nCK + 3'd4 + ns_to_nCK(tWR)), WRITE_SLOT, PRECHARGE_SLOT); //5 localparam WRITE_TO_ODT_OFF = find_delay((CWL_nCK + 3'd4 + ns_to_nCK(tWR)), WRITE_SLOT, PRECHARGE_SLOT); //5 - + //MARGIN_BEFORE_ANTICIPATE is the number of columns before the column //end when the anticipate can start //the worst case scenario is when the anticipated bank needs to be precharged @@ -379,6 +392,8 @@ module ddr3_controller #( localparam MARGIN_BEFORE_ANTICIPATE = PRECHARGE_TO_ACTIVATE_DELAY + ACTIVATE_TO_WRITE_DELAY + WRITE_TO_PRECHARGE_DELAY; localparam STAGE2_DATA_DEPTH = ($rtoi($floor((CWL_nCK - (3 - WRITE_SLOT + 1))/4.0 ))) + 1; //this is always >= 1 localparam READ_DELAY = $rtoi($floor((CL_nCK - (3 - READ_SLOT + 1))/4.0 )); + localparam DELAY_BEFORE_WRITE_LEVEL_FEEDBACK = STAGE2_DATA_DEPTH + ns_to_cycles(tWLO+tWLOE) + 10; //plus 10 controller clocks for possible bus latency and + //the delay for receiving feedback DQ from IOBUF -> IDELAY -> ISERDES reg[(1<= 2 + START_WRITE_LEVEL = 7, + WAIT_FOR_FEEDBACK = 8, + ISSUE_WRITE_1 = 9, + ISSUE_WRITE_2 = 10, + ISSUE_READ = 11, + READ_DATA = 12, + ANALYZE_DATA = 13, + DONE_CALIBRATE = 14; + + localparam STORED_DQS_SIZE = 5, //must be >= 2 + REPEAT_DQS_ANALYZE = 5; // repeat DQS read to find the accurate starting position of DQS wire[(DQ_BITS*LANES)-1:0] oserdes_data, odelay_data, idelay_data, read_dq; wire[LANES-1:0] odelay_dqs, read_dqs, idelay_dqs; @@ -482,16 +512,19 @@ module ddr3_controller #( wire test_OFB; reg[LANES-1:0] bitslip; - reg[3:0] state_calibrate; - reg[REPEAT_DQS*8-1:0] dqs_store = 0; - reg[$clog2(REPEAT_DQS):0] dqs_count_repeat = 0; - reg[$clog2(REPEAT_DQS*8)-1:0] dqs_start_index = 0; - reg[$clog2(REPEAT_DQS*8)-1:0] dqs_target_index = 0; + reg[$clog2(DONE_CALIBRATE):0] state_calibrate; + reg[STORED_DQS_SIZE*8-1:0] dqs_store = 0; + reg[$clog2(STORED_DQS_SIZE):0] dqs_count_repeat = 0; + reg[$clog2(STORED_DQS_SIZE*8)-1:0] dqs_start_index = 0; + reg[$clog2(STORED_DQS_SIZE*8)-1:0] dqs_start_index_stored = 0; + reg[$clog2(STORED_DQS_SIZE*8)-1:0] dqs_target_index = 0; + reg[$clog2(REPEAT_DQS_ANALYZE):0] dqs_start_index_repeat=0; reg[1:0] train_delay; reg[CMD_LEN-1:0] cmd_reset_seq[3:0]; reg[3:0] delay_before_read_data = 0; + reg[$clog2(DELAY_BEFORE_WRITE_LEVEL_FEEDBACK):0] delay_before_write_level_feedback = 0; reg initial_dqs = 0; - reg[$clog2(LANES)-1:0] lane = 0; + reg[$clog2(DQ_BITS*LANES):0] lane = 0; reg[7:0] dqs_bitslip_arrangement = 0; reg[3:0] added_read_pipe_max = 0; reg[3:0] added_read_pipe[LANES - 1:0]; @@ -502,10 +535,24 @@ module ddr3_controller #( reg[15:0] delay_read_pipe[1:0]; //delay when each lane will retrieve iserdes_data reg[wb_data_bits - 1:0] o_wb_data_q[1:0]; //store data retrieved from iserdes_data to be sent to o_wb_data reg[15:0] o_wb_ack_read_q; + + reg write_calib_stb = 0; + reg write_calib_we = 0; + reg[3:0] write_calib_col = 0; + reg[63:0] write_calib_data = 0; + reg write_calib_odt = 0; + reg write_calib_dqs = 0; + reg write_calib_dq = 0; + reg prev_write_level_feedback = 1; + reg[63:0] read_data_store = 0; + reg[127:0] write_pattern = 0; + reg[$clog2(64):0] data_start_index = 0; + reg calib_stall; //process request transaction always @(posedge i_controller_clk, negedge i_rst_n) begin if(!i_rst_n ) begin o_wb_stall <= 1'b1; + calib_stall <= 1'b1; //set stage 1 to 0 stage1_pending <= 0; stage1_we <= 0; @@ -523,8 +570,9 @@ module ddr3_controller #( stage2_row <= 0; delay_before_odt_off_q <= 0; delay_before_read_ack_q <= 0; - write_dqs_q <= 0; - write_dqs <= 0; + cmd_odt_q <= 0; + stage2_data_unaligned <= 0; + unaligned_data <= 0; //set delay counters to 0 for(index=0; index<(1< shiftreg(CWL) -> OSERDES(DDR) -> ODELAY -> RAM end - + + // when not in refresh, transaction can only be processed when i_wb_cyc is high and not stall if(i_wb_cyc && !o_wb_stall) begin //stage1 will not do the request (pending low) when the @@ -613,14 +663,19 @@ module ddr3_controller #( {stage1_next_row , stage1_next_bank, stage1_next_col[COL_BITS-1:$clog2(serdes_ratio*2)] } <= i_wb_addr + MARGIN_BEFORE_ANTICIPATE; //anticipated next row and bank to be accessed stage1_data <= i_wb_data; end - + else if(write_calib_stb) begin + stage1_pending <= write_calib_stb;//actual request flag + stage1_we <= write_calib_we; //write-enable + stage1_col <= write_calib_col; //column address (n-burst word-aligned) + stage1_bank <= 0; //bank_address + stage1_row <= 0; //row_address + {stage1_next_row , stage1_next_bank, stage1_next_col[COL_BITS-1:$clog2(serdes_ratio*2)] } <= 0; //anticipated next row and bank to be accessed + stage1_data <= write_calib_data; + end + {unaligned_data, stage2_data[0]} <= (stage2_data_unaligned << data_start_index) | unaligned_data; for(index = 1; index <= STAGE2_DATA_DEPTH; index = index+1) begin stage2_data[index] <= stage2_data[index-1]; end - - for(index = 1; index <= STAGE2_DATA_DEPTH+1; index = index+1) begin - write_dqs[index] <= write_dqs[index-1]; - end end end @@ -630,14 +685,34 @@ module ddr3_controller #( shift_reg_read_pipe_q <= 0; index_read_pipe <= 0; index_wb_data <= 0; + write_dqs_val <= 0; + write_dqs_q <= 0; + write_dqs <= 0; + write_dq_q <= 0; + write_dq <= 0; for(index = 0; index < 2; index = index + 1) begin delay_read_pipe[index] <= 0; end for(index = 0; index < 2; index = index + 1) begin o_wb_data_q[index] <= 0; end + end else begin + write_dqs_val <= write_dqs_d || write_dqs_q[0]; + write_dqs_q[0] <= write_dqs_d; + write_dqs_q[1] <= write_dqs_q[0]; + write_dqs[0] <= write_dqs_d || write_dqs_q[1] || write_dqs_q[0]; //high for 3 clk cycles + + write_dq_q <= write_dq_d; + write_dq[0] <= write_dq_d || write_dq_q; //high for 2 clk cycles + for(index = 1; index <= STAGE2_DATA_DEPTH+1; index = index+1) begin //increase by 1 to accomodate postamble + write_dqs[index] <= write_dqs[index-1]; + end + for(index = 1; index <= STAGE2_DATA_DEPTH+1; index = index+1) begin //increase by 1 to accomodate postamble + write_dq[index] <= write_dq[index-1]; + end + shift_reg_read_pipe_q <= shift_reg_read_pipe_d; for(index = 0; index < 2; index = index + 1) begin delay_read_pipe[index] <= (delay_read_pipe[index] >> 1); @@ -649,30 +724,30 @@ module ddr3_controller #( for(index = 0; index < LANES; index = index + 1) begin //if(delay_before_read_ack_q == (added_read_pipe_max - added_read_pipe[index] + 1)) begin //same lane if(delay_read_pipe[0][added_read_pipe_max != added_read_pipe[index]]) begin //same lane - o_wb_data_q[0][(64*0 + 8*index) +: 8] <= iserdes_data[(64*0 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[0][(64*1 + 8*index) +: 8] <= iserdes_data[(64*1 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[0][(64*2 + 8*index) +: 8] <= iserdes_data[(64*2 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[0][(64*3 + 8*index) +: 8] <= iserdes_data[(64*3 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[0][(64*4 + 8*index) +: 8] <= iserdes_data[(64*4 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[0][(64*5 + 8*index) +: 8] <= iserdes_data[(64*5 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[0][(64*6 + 8*index) +: 8] <= iserdes_data[(64*6 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[0][(64*7 + 8*index) +: 8] <= iserdes_data[(64*7 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[0][((DQ_BITS*LANES)*0 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*0 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[0][((DQ_BITS*LANES)*1 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*1 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[0][((DQ_BITS*LANES)*2 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*2 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[0][((DQ_BITS*LANES)*3 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*3 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[0][((DQ_BITS*LANES)*4 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*4 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[0][((DQ_BITS*LANES)*5 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*5 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[0][((DQ_BITS*LANES)*6 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*6 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[0][((DQ_BITS*LANES)*7 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*7 + 8*index) +: 8]; //update each lane of the burst end if(delay_read_pipe[1][added_read_pipe_max != added_read_pipe[index]]) begin - o_wb_data_q[1][(64*0 + 8*index) +: 8] <= iserdes_data[(64*0 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[1][(64*1 + 8*index) +: 8] <= iserdes_data[(64*1 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[1][(64*2 + 8*index) +: 8] <= iserdes_data[(64*2 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[1][(64*3 + 8*index) +: 8] <= iserdes_data[(64*3 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[1][(64*4 + 8*index) +: 8] <= iserdes_data[(64*4 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[1][(64*5 + 8*index) +: 8] <= iserdes_data[(64*5 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[1][(64*6 + 8*index) +: 8] <= iserdes_data[(64*6 + 8*index) +: 8]; //update each lane of the burst - o_wb_data_q[1][(64*7 + 8*index) +: 8] <= iserdes_data[(64*7 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[1][((DQ_BITS*LANES)*0 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*0 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[1][((DQ_BITS*LANES)*1 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*1 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[1][((DQ_BITS*LANES)*2 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*2 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[1][((DQ_BITS*LANES)*3 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*3 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[1][((DQ_BITS*LANES)*4 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*4 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[1][((DQ_BITS*LANES)*5 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*5 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[1][((DQ_BITS*LANES)*6 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*6 + 8*index) +: 8]; //update each lane of the burst + o_wb_data_q[1][((DQ_BITS*LANES)*7 + 8*index) +: 8] <= iserdes_data[((DQ_BITS*LANES)*7 + 8*index) +: 8]; //update each lane of the burst end if(o_wb_ack_read_q[0]) begin index_wb_data <= !index_wb_data; end - for(index = 0; index < 16; index = index + 1) begin + for(index = 0; index < 15; index = index + 1) begin o_wb_ack_read_q[index] <= o_wb_ack_read_q[index+1]; end o_wb_ack_read_q[added_read_pipe_max] <= shift_reg_read_pipe_q[0]; @@ -698,33 +773,34 @@ module ddr3_controller #( // //Pipeline Stages: // wishbone inputs --> stage1 --> stage2 --> cmd - reg cmd_odt, cmd_ck_en, cmd_reset_n; - + always @* begin - cmd_odt = 0; - cmd_ck_en = 1; - cmd_reset_n = 1; + cmd_odt = cmd_odt_q || write_calib_odt; + cmd_ck_en = instruction[CLOCK_EN]; + cmd_reset_n = instruction[RESET_N]; o_wb_ack_d = 0; //ack goes high for every r/w request o_wb_stall_d = 0; //wb_stall going high is determined on stage 1 (higher priority), wb_stall going low is determined at stage2 (lower priority) pipe_stall = 0; //pipe_stall will follow i_wb_stall(so stall when stage 2 needs delay) but goes low after actual read/write request (move pipe forward when stage2 finishes request) precharge_slot_busy = 0; //flag that determines if stage 2 is issuing precharge (thus stage 1 cannot issue precharge) activate_slot_busy = 0; //flag that determines if stage 2 is issuing activate (thus stage 1 cannot issue activate) - write_dqs_d = 0; + write_dqs_d = write_calib_dqs; + write_dq_d = write_calib_dq; for(index=0; index < (1<= 5); + CALIBRATE_DQS: if(dqs_start_index_stored == dqs_target_index) begin + added_read_pipe[lane] = dqs_target_index[$clog2(STORED_DQS_SIZE*8)-1:3] + (dqs_target_index[2:0] >= 5); dqs_bitslip_arrangement <= 16'b0011_1100_0011_1100 >> dqs_target_index[2:0]; state_calibrate <= BITSLIP_DQS_TRAIN_2; end @@ -1514,27 +1630,93 @@ module ddr3_controller #( BITSLIP_DQS_TRAIN_2: if(train_delay == 0) begin //train again the ISERDES to capture the DQ correctly if(test_Q[lane] == dqs_bitslip_arrangement) begin - if(lane == 7) begin - skip_reset_seq_delay = 1; - state_calibrate <= DONE_CALIBRATE; + if(lane == LANES - 1) begin + skip_reset_seq_delay <= 1; + lane <= 0; + prev_write_level_feedback <= 1'b1; + state_calibrate <= START_WRITE_LEVEL; end else begin lane <= lane + 1; - added_read_pipe_max <= added_read_pipe_max > added_read_pipe[lane]? added_read_pipe_max:added_read_pipe[lane]; state_calibrate <= BITSLIP_DQS_TRAIN_1; end + added_read_pipe_max <= added_read_pipe_max > added_read_pipe[lane]? added_read_pipe_max:added_read_pipe[lane]; end else begin bitslip[lane] <= 1; train_delay <= 3; end end + + START_WRITE_LEVEL: if(instruction_address == 17) begin + write_calib_dqs <= 1'b1; + write_calib_odt <= 1'b1; + delay_before_write_level_feedback <= DELAY_BEFORE_WRITE_LEVEL_FEEDBACK; + state_calibrate <= WAIT_FOR_FEEDBACK; + end + + WAIT_FOR_FEEDBACK: if(delay_before_write_level_feedback == 0) begin + prev_write_level_feedback <= iserdes_data[lane<<3]; + if({prev_write_level_feedback, iserdes_data[lane<<3]} == 2'b01) begin + if(lane == LANES - 1) begin + write_calib_odt <= 0; + skip_reset_seq_delay <= 1; + state_calibrate <= ISSUE_WRITE_1; + end + else begin + lane <= lane + 1; + prev_write_level_feedback <= 1'b1; + state_calibrate <= START_WRITE_LEVEL; + end + end + else begin + odelay_ce[lane] <= 1'b1; + odelay_inc[lane] <= 1'b1; + state_calibrate <= START_WRITE_LEVEL; + end + end + + ISSUE_WRITE_1: if(instruction_address == 22) begin + write_calib_stb <= 1;//actual request flag + write_calib_we <= 1; //write-enable + write_calib_col <= 0; + write_calib_data <= 64'h77_66_55_44_33_22_11_00; + state_calibrate <= ISSUE_WRITE_2; + end + ISSUE_WRITE_2: begin + write_calib_stb <= 1;//actual request flag + write_calib_we <= 1; //write-enable + write_calib_col <= 8; + write_calib_data <= 64'hff_ee_dd_cc_bb_aa_99_88; + state_calibrate <= ISSUE_READ; + end + + ISSUE_READ: if(!o_wb_stall_d) begin + write_calib_stb <= 1;//actual request flag + write_calib_we <= 0; //write-enable + state_calibrate <= READ_DATA; + end + + READ_DATA: if(o_wb_ack_read_q[0]) begin + read_data_store <= o_wb_data; + state_calibrate <= ANALYZE_DATA; + data_start_index <= 0; + write_pattern <= 128'hff_ee_dd_cc_bb_aa_99_88_77_66_55_44_33_22_11_00; + end + + ANALYZE_DATA: if(write_pattern[data_start_index +: 64] == read_data_store) begin + state_calibrate <= DONE_CALIBRATE; + end + else begin + data_start_index <= data_start_index + 8; + end DONE_CALIBRATE: state_calibrate <= DONE_CALIBRATE; endcase end end assign issue_read_command = (state_calibrate == MPR_READ); + assign issue_write_command = 0; //////////////////////////////////////////////////////////////////////// End of PHY Interface //////////////////////////////////////////////////////////////////////// @@ -1575,7 +1757,7 @@ module ddr3_controller #( function[2:0] WRA_mode_register_value(input integer WRA); //WR_min (write recovery for autoprecharge) in clock cycles is calculated by dividing tWR(in ns) by tCK(in ns) and rounding up to the next integer. //The WR value in the mode register must be programmed to be equal or larger than WRmin. - case(WRA) + case(WRA+1) 1,2,3,4,5: WRA_mode_register_value = 3'b001; 6: WRA_mode_register_value = 3'b010; 7: WRA_mode_register_value = 3'b011;