// Background: // This DDR3 controller will be used with a DDR3-1600 with Kintex 7 FPGA Board (XC7K160T-3FFG676E). // The goal will be to: // - Run this at 1600Mbps (Maximum Physical Interface (PHY) Rate for a 4:1 // memory controller based on "DC and AC Switching Characteristics" for Kintex 7) // - Parameterize everything // - Interface should be (nearly) bus agnostic // - High (sustained) data throughput. Sequential writes should be able to continue without interruption `define MICRON_SIM //simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW) //`define FORMAL_COVER //change delay in reset sequence to fit in cover statement //`define COVER_DELAY 3 //fixed delay used in formal cover for reset sequence `default_nettype none // THESE DEFINES WILL BE MODIFIED AS PARAMETERS LATER ON `define DDR3_1600_11_11_11 // DDR3-1600 (11-11-11) speed bin `define RAM_8Gb //DDR3 Capacity //`define RAM_2Gb //`define RAM_4Gb //`define RAM_8Gb `define x8 //DDR3 organization (DQ bus width) //`define x4 //`define x16 module ddr3_controller #( parameter ROW_BITS = 14, //width of row address COL_BITS = 10, //width of column address BA_BITS = 3, //width of bank address DQ_BITS = 8, //width of DQ LANES = 8, //8 lanes of DQ CONTROLLER_CLK_PERIOD = 5, //ns, period of clock input to this DDR3 controller module DDR3_CLK_PERIOD = 1.25, //ns, period of clock input to DDR3 RAM device OPT_LOWPOWER = 1, //1 = low power, 0 = low logic OPT_BUS_ABORT = 1, //1 = can abort bus, 0 = no abort (i_wb_cyc will be ignored, ideal for an AXI implementation which cannot abort transaction) // The next parameters act more like a localparam (since user does not have to set this manually) but was added here to simplify port declaration serdes_ratio = $rtoi(CONTROLLER_CLK_PERIOD/DDR3_CLK_PERIOD), wb_addr_bits = ROW_BITS + COL_BITS + BA_BITS - $clog2(DQ_BITS*(serdes_ratio)*2 / 8), wb_data_bits = DQ_BITS*LANES*serdes_ratio*2, wb_sel_bits = wb_data_bits / 8, //4 is the width of a single ddr3 command {cs_n, ras_n, cas_n, we_n} plus 3 (ck_en, odt, reset_n) plus bank bits plus row bits cmd_len = 4 + 3 + BA_BITS + ROW_BITS ) ( input wire i_controller_clk, //i_controller_clk has period of CONTROLLER_CLK_PERIOD input wire i_rst_n, //200MHz input clock // Wishbone inputs input wire i_wb_cyc, //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled) input wire i_wb_stb, //request a transfer input wire i_wb_we, //write-enable (1 = write, 0 = read) input wire[wb_addr_bits - 1:0] i_wb_addr, //burst-addressable {row,bank,col} input wire[wb_data_bits - 1:0] i_wb_data, //write data, for a 4:1 controller data width is 8 times the number of pins on the device input wire[wb_sel_bits - 1:0] i_wb_sel, //byte strobe for write (1 = write the byte) input wire i_aux, //for AXI-interface compatibility (given upon strobe) // Wishbone outputs output reg o_wb_stall, //1 = busy, cannot accept requests output wire o_wb_ack, //1 = read/write request has completed output wire[wb_data_bits - 1:0] o_wb_data, //read data, for a 4:1 controller data width is 8 times the number of pins on the device output reg o_aux, //for AXI-interface compatibility (returned upon ack) // PHY interface input wire[DQ_BITS*LANES*8-1:0] i_phy_iserdes_data, input wire[LANES*8-1:0] i_phy_iserdes_dqs, input wire[LANES*8-1:0] i_phy_iserdes_bitslip_reference, input wire i_phy_idelayctrl_rdy, output wire[cmd_len*serdes_ratio-1:0] o_phy_cmd, output wire o_phy_dqs_tri_control, o_phy_dq_tri_control, output wire o_phy_toggle_dqs, output wire[wb_data_bits-1:0] o_phy_data, output wire[4:0] o_phy_odelay_data_cntvaluein, o_phy_odelay_dqs_cntvaluein, output wire[4:0] o_phy_idelay_data_cntvaluein, o_phy_idelay_dqs_cntvaluein, output reg[LANES-1:0] o_phy_odelay_data_ld, o_phy_odelay_dqs_ld, output reg[LANES-1:0] o_phy_idelay_data_ld, o_phy_idelay_dqs_ld, output reg[LANES-1:0] o_phy_bitslip ); /************************************************************* Command Parameters *************************************************************/ //DDR3 commands {cs_n, ras_n, cas_n, we_n} (JEDEC DDR3 doc pg. 33 ) localparam[3:0]CMD_MRS = 4'b0000, // Mode Register Set CMD_REF = 4'b0001, // Refresh CMD_PRE = 4'b0010, // Precharge (A10-AP: 0 = Single Bank Precharge, 1 = Precharge All Banks) CMD_ACT = 4'b0011, // Bank Activate CMD_WR = 4'b0100, // Write (A10-AP: 0 = no Auto-Precharge) (A12-BC#: 1 = Burst Length 8) CMD_RD = 4'b0101, //Read (A10-AP: 0 = no Auto-Precharge) (A12-BC#: 1 = Burst Length 8) CMD_NOP = 4'b0111, // No Operation CMD_DES = 4'b1000, // Deselect command performs the same function as No Operation command (JEDEC DDR3 doc pg. 34 NOTE 11) CMD_ZQC = 4'b0110; // ZQ Calibration (A10-AP: 0 = ZQ Calibration Short, 1 = ZQ Calibration Long) localparam RST_DONE = 27, // Command bit that determines if reset seqeunce had aready finished. non-persistent (only needs to be toggled once), REF_IDLE = 27, // No refresh is about to start and no ongoing refresh. (same bit as RST_DONE) USE_TIMER = 26, // Command bit that determines if timer will be used (if delay is zero, USE_TIMER must be LOW) A10_CONTROL = 25, //Command bit that determines if A10 AutoPrecharge will be high CLOCK_EN = 24, //Clock-enable to DDR3 RESET_N = 23, //Reset_n to DDR3 DDR3_CMD_START = 22, //Start of DDR3 command slot DDR3_CMD_END = 19, //end of DDR3 command slot MRS_BANK_START = 18; //start of bank value in MRS value // ddr3_metadata partitioning localparam CMD_CS_N = cmd_len - 1, CMD_RAS_N = cmd_len - 2, CMD_CAS_N= cmd_len - 3, CMD_WE_N = cmd_len - 4, CMD_ODT = cmd_len - 5, CMD_CKE_EN = cmd_len - 6, CMD_RESET_N = cmd_len - 7, CMD_BANK_START = BA_BITS + ROW_BITS - 1, CMD_ADDRESS_START = ROW_BITS - 1; localparam READ_SLOT = get_slot(CMD_RD), WRITE_SLOT = get_slot(CMD_WR), ACTIVATE_SLOT = get_slot(CMD_ACT), PRECHARGE_SLOT = get_slot(CMD_PRE); //cmd needs to be center-aligned to the positive edge of the //ddr3_clk. This means cmd needs to be delayed by half the ddr3 //clk period. Subtract by 600ps to include the IODELAY insertion //delay. Divide by a delay resolution of 78.125ps per tap to get //the needed tap value. localparam CMD_INITIAL_ODELAY_TAP = ((DDR3_CLK_PERIOD*1000/2) - 600)/78.125; // Data does not have to be delayed (DQS is the on that has to be // delayed and center-aligned to the center eye of data) localparam DATA_INITIAL_ODELAY_TAP = 0; //DQS needs to be edge-aligned to the center eye of the data. //This means DQS needs to be delayed by a quarter of the ddr3 //clk period relative to the data. Subtract by 600ps to include //the IODELAY insertion delay. Divide by a delay resolution of //78.125ps per tap to get the needed tap value. Then add the tap //value used in data to have the delay relative to the data. localparam DQS_INITIAL_ODELAY_TAP = ((DDR3_CLK_PERIOD*1000/4))/78.125 + DATA_INITIAL_ODELAY_TAP; //Incoming DQS should be 90 degree delayed relative to incoming data localparam DATA_INITIAL_IDELAY_TAP = 0; //600ps delay localparam DQS_INITIAL_IDELAY_TAP = ((DDR3_CLK_PERIOD*1000/4))/78.125 + DATA_INITIAL_IDELAY_TAP; /*********************************************************************************************************************************************/ /********************************************************** Timing Parameters ***********************************************************************************/ localparam DELAY_SLOT_WIDTH = 19; //Bitwidth of the delay slot and mode register slot on the reset/refresh rom will be at the same size as the Mode Register localparam POWER_ON_RESET_HIGH = 200_000; // 200us reset must be active at initialization localparam INITIAL_CKE_LOW = 500_000; // 500us cke must be low before activating `ifdef DDR3_1600_11_11_11 //DDR3-1600 (11-11-11) speed bin localparam tRCD = 13.750; // ns Active to Read/Write command time localparam tRP = 13.750; // ns Precharge command period `endif `ifdef RAM_1Gb localparam tRFC = 110.0; // ns Refresh command to ACT or REF `elsif RAM_2Gb localparam tRFC = 160.0; // ns Refresh command to ACT or REF `elsif RAM_4Gb localparam tRFC = 300.0; // ns Refresh command to ACT or REF `else localparam tRFC = 350.0; // ns Refresh command to ACT or REF `endif localparam tREFI = 7800; //ns Average periodic refresh interval localparam tXPR = max(5*DDR3_CLK_PERIOD,tRFC+10); // ns Exit Reset from CKE HIGH to a valid command localparam tMRD = 4; // nCK Mode Register Set command cycle time localparam tWR = 15.0; // ns Write Recovery Time localparam tWTR = max(nCK_to_ns(4), 7.5); //ns Delay from start of internal write transaction to internal read command localparam[DELAY_SLOT_WIDTH - 1:0] tWLMRD = nCK_to_cycles(40); // nCK First DQS/DQS# rising edge after write leveling mode is programmed localparam tWLO = 7.5; //ns Write leveling output delay localparam tWLOE = 2; localparam tRTP = max(nCK_to_ns(4), 7.5); //ns Internal Command to PRECHARGE Command delay localparam tCCD = 4; //nCK CAS to CAS command delay localparam[DELAY_SLOT_WIDTH - 1:0] tMOD = max(nCK_to_cycles(12), ns_to_cycles(15)); //cycles (controller) Mode Register Set command update delay localparam[DELAY_SLOT_WIDTH - 1:0] tZQinit = max(nCK_to_cycles(512), ns_to_cycles(640));//cycles (controller) Power-up and RESET calibration time localparam CL_nCK = 6; //create a function for this localparam CWL_nCK = 5; //create a function for this localparam DELAY_MAX_VALUE = ns_to_cycles(INITIAL_CKE_LOW); //Largest possible delay needed by the reset and refresh sequence localparam DELAY_COUNTER_WIDTH= $clog2(DELAY_MAX_VALUE); //Bitwidth needed by the maximum possible delay, this will be the delay counter width localparam READ_CAL_DELAY = 100; localparam PRE_STALL_DELAY = 10; /*********************************************************************************************************************************************/ /********************************************************** Computed Delay Parameters **********************************************************/ localparam PRECHARGE_TO_ACTIVATE_DELAY = find_delay(ns_to_nCK(tRP), PRECHARGE_SLOT, ACTIVATE_SLOT); //3 localparam ACTIVATE_TO_WRITE_DELAY = find_delay(ns_to_nCK(tRCD), ACTIVATE_SLOT, WRITE_SLOT); //3 localparam ACTIVATE_TO_READ_DELAY = find_delay(ns_to_nCK(tRCD), ACTIVATE_SLOT, READ_SLOT); //2 localparam READ_TO_WRITE_DELAY = find_delay((CL_nCK + tCCD + 3'd2 - CWL_nCK), READ_SLOT, WRITE_SLOT); //2 localparam READ_TO_READ_DELAY = 0; localparam READ_TO_PRECHARGE_DELAY = find_delay(ns_to_nCK(tRTP), READ_SLOT, PRECHARGE_SLOT); //1 localparam WRITE_TO_WRITE_DELAY = 0; localparam WRITE_TO_READ_DELAY = find_delay((CWL_nCK + 3'd4 + ns_to_nCK(tWTR)), WRITE_SLOT, READ_SLOT); //4 localparam WRITE_TO_PRECHARGE_DELAY = find_delay((CWL_nCK + 3'd4 + ns_to_nCK(tWR)), WRITE_SLOT, PRECHARGE_SLOT); //5 //MARGIN_BEFORE_ANTICIPATE is the number of columns before the column //end when the anticipate can start //the worst case scenario is when the anticipated bank needs to be precharged //thus the margin must satisfy tRP (for precharge) and tRCD (for activate). //Also, worscase is when the anticipated bank still has the leftover of the //WRITE_TO_PRECHARGE_DELAY thus consider also this. localparam MARGIN_BEFORE_ANTICIPATE = PRECHARGE_TO_ACTIVATE_DELAY + ACTIVATE_TO_WRITE_DELAY + WRITE_TO_PRECHARGE_DELAY; localparam STAGE2_DATA_DEPTH = ($rtoi($floor((CWL_nCK - (3 - WRITE_SLOT + 1))/4.0 ))) + 1; //this is always >= 1 localparam READ_DELAY = $rtoi($floor((CL_nCK - (3 - READ_SLOT + 1))/4.0 )); localparam DELAY_BEFORE_WRITE_LEVEL_FEEDBACK = STAGE2_DATA_DEPTH + ns_to_cycles(tWLO+tWLOE) + 10; //plus 10 controller clocks for possible bus latency and //the delay for receiving feedback DQ from IOBUF -> IDELAY -> ISERDES /*********************************************************************************************************************************************/ /********************************************************** Read/Write Calibration Parameters **********************************************************/ localparam IDLE = 0, BITSLIP_DQS_TRAIN_1 = 1, MPR_READ = 2, COLLECT_DQS = 3, ANALYZE_DQS = 4, CALIBRATE_DQS = 5, BITSLIP_DQS_TRAIN_2 = 6, START_WRITE_LEVEL = 7, WAIT_FOR_FEEDBACK = 8, ISSUE_WRITE_1 = 9, ISSUE_WRITE_2 = 10, ISSUE_READ = 11, READ_DATA = 12, ANALYZE_DATA = 13, DONE_CALIBRATE = 14; localparam STORED_DQS_SIZE = 5, //must be >= 2 REPEAT_DQS_ANALYZE = 1; // repeat DQS read to find the accurate starting position of DQS /*********************************************************************************************************************************************/ /************************************************************* Set Mode Registers Parameters *************************************************************/ // MR2 (JEDEC DDR3 doc pg. 30) localparam[2:0] PASR = 3'b000; //Partial Array Self-Refresh: Full Array localparam[2:0] CWL = 3'b000; //CAS write Latency: 8 (1.5 ns > tCK(avg) >= 1.25 ns) CREATE A FUNCTION FOR THIS localparam[0:0] ASR = 1'b1; //Auto Self-Refresh: on localparam[0:0] SRT = 1'b0; //Self-Refresh Temperature Range:0 (If ASR = 1, SRT bit must be set to 0) localparam[1:0] RTT_WR = 2'b00; //Dynamic ODT: off localparam[2:0] MR2_SEL = 3'b010; //Selected Mode Register localparam[18:0] MR2 = {MR2_SEL, 5'b00000, RTT_WR, 1'b0, SRT, ASR, CWL, PASR}; // MR3 (JEDEC DDR3 doc pg. 32) localparam[1:0] MPR_LOC = 2'b00; //Data location for MPR Reads: Predefined Pattern 0_1_0_1_0_1_0_1 localparam[0:0] MPR_EN = 1'b1; //MPR Enable: Enable MPR reads and calibration during initialization localparam[0:0] MPR_DIS = 1'b0; //MPR Enable: Enable MPR reads and calibration during initialization localparam[2:0] MR3_SEL = 3'b011; //MPR Selected localparam[18:0] MR3_MPR_EN = {MR3_SEL, 13'b0_0000_0000_0000, MPR_EN, MPR_LOC}; localparam[18:0] MR3_MPR_DIS = {MR3_SEL, 13'b0_0000_0000_0000, MPR_DIS, MPR_LOC}; localparam[ROW_BITS+BA_BITS-1:0] MR3_RD_ADDR = 0; // MR1 (JEDEC DDR3 doc pg. 27) localparam DLL_EN = 1'b0; //DLL Enable/Disable: Enabled(0) localparam[1:0] DIC = 2'b00; //Output Driver Impedance Control (IS THIS THE SAME WITH RTT_NOM???????????? Search later) localparam[2:0] RTT_NOM = 3'b011; //RTT Nominal: 40ohms (RQZ/6) is the impedance of the PCB trace localparam[0:0] WL_EN = 1'b1; //Write Leveling Enable: Disabled localparam[0:0] WL_DIS = 1'b0; //Write Leveling Enable: Disabled localparam[1:0] AL = 2'b00; //Additive Latency: Disabled localparam[0:0] TDQS = 1'b1; //Termination Data Strobe: Disabled (provides additional termination resistance outputs. //When the TDQS function is disabled, the DM function is provided (vice-versa).TDQS function is only //available for X8 DRAM and must be disabled for X4 and X16. localparam[0:0] QOFF = 1'b0; //Output Buffer Control: Enabled localparam[2:0] MR1_SEL = 3'b001; //Selected Mode Register localparam[18:0] MR1_WL_EN = {MR1_SEL, 3'b000, QOFF, TDQS, 1'b0, RTT_NOM[2], 1'b0, WL_EN, RTT_NOM[1], DIC[1], AL, RTT_NOM[0], DIC[0], DLL_EN}; localparam[18:0] MR1_WL_DIS = {MR1_SEL, 3'b000, QOFF, TDQS, 1'b0, RTT_NOM[2], 1'b0, WL_DIS, RTT_NOM[1], DIC[1], AL, RTT_NOM[0], DIC[0], DLL_EN}; //MR0 (JEDEC DDR3 doc pg. 24) localparam[1:0] BL = 2'b00; //Burst Length: 8 (Fixed) localparam[3:0] CL = 4'b0100; //CAS Read Latency: 10, can support DDR-1600 speedbin 8-8-8, 9-9-9, and 10-10-10 (Check JEDEC DDR doc pg. 162) CREATE A FUNCTION FOR THIS localparam[0:0] RBT = 1'b0; //Read Burst Type: Nibble Sequential localparam[0:0] DLL_RST = 1'b1; //DLL Reset: Yes (this is self-clearing and must be applied after DLL enable) localparam[2:0] WR = WRA_mode_register_value($ceil(tWR/DDR3_CLK_PERIOD)); //Write recovery for autoprecharge ( localparam[0:0] PPD = 1'b0; //DLL Control for Precharge PD: Slow exit (DLL off) localparam[2:0] MR0_SEL = 3'b000; localparam[18:0] MR0 = {MR0_SEL, 3'b000, PPD, WR, DLL_RST, 1'b0, CL[3:1], RBT, CL[0], BL}; /*********************************************************************************************************************************************/ localparam INITIAL_RESET_INSTRUCTION = {5'b01000 , CMD_NOP , { {(DELAY_SLOT_WIDTH-3){1'b0}} , 3'd5} }; /************************************************************* Registers and Wires *************************************************************/ reg[4:0] instruction_address = 0; //address for accessing rom instruction reg[27:0] instruction = INITIAL_RESET_INSTRUCTION; //instruction retrieved from reset instruction rom reg[ DELAY_COUNTER_WIDTH - 1:0] delay_counter = INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0]; //counter used for delays reg delay_counter_is_zero = (INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0] == 0); //counter is now zero so retrieve next delay reg reset_done = 0; //high if reset has already finished reg skip_reset_seq_delay = 0; //flag to skip delay and go to next reset instruction wire issue_read_command; wire issue_write_command; reg[(1< `COVER_DELAY) delay_counter <= `COVER_DELAY; else delay_counter <= instruction[DELAY_COUNTER_WIDTH - 1:0] ; //use delay from rom if that is smaller than the COVER_DELAY macro `else delay_counter <= instruction[DELAY_COUNTER_WIDTH - 1:0]; //retrieve delay value of current instruction, we count to zero thus minus 1 `endif end //else: decrement delay counter when current instruction needs delay else if(instruction[USE_TIMER]) delay_counter <= delay_counter - 1; //delay_counter of 1 means we will need to update the delay_counter next clock cycle (delay_counter of zero) so we need to retrieve //now the next instruction. The same thing needs to be done when current instruction does not need the timer delay. if(delay_counter == 1 || !instruction[USE_TIMER] || skip_reset_seq_delay) begin delay_counter_is_zero <= 1; instruction <= read_rom_instruction(instruction_address); instruction_address <= (instruction_address == 5'd23)? 5'd19:instruction_address+1; //wrap back of address to repeat refresh sequence end //we are now on the middle of a delay else delay_counter_is_zero <=0; //instruction[RST_DONE] is non-persistent thus we need to register it once it goes high reset_done <= instruction[RST_DONE]? 1'b1:reset_done; end end /*********************************************************************************************************************************************/ /******************************************************* Track Bank Status and Issue Command *******************************************************/ //process request transaction always @(posedge i_controller_clk, negedge i_rst_n) begin if(!i_rst_n ) begin o_wb_stall <= 1'b1; //set stage 1 to 0 stage1_pending <= 0; stage1_we <= 0; stage1_col <= 0; stage1_bank <= 0; stage1_row <= 0; stage1_next_bank <= 0; stage1_next_row <= 0; stage1_next_col <= 0; stage1_data <= 0; //set stage2 to 0 stage2_pending <= 0; stage2_we <= 0; stage2_col <= 0; stage2_bank <= 0; stage2_row <= 0; cmd_odt_q <= 0; stage2_data_unaligned <= 0; for(index=0; index shiftreg(CWL) -> OSERDES(DDR) -> ODELAY -> RAM end // when not in refresh, transaction can only be processed when i_wb_cyc is high and not stall if(i_wb_cyc && !o_wb_stall) begin //stage1 will not do the request (pending low) when the //request is on the same bank as the current request. This //will ensure stage1 bank will be different from stage2 bank stage1_pending <= i_wb_stb;//actual request flag stage1_we <= i_wb_we; //write-enable stage1_col <= { i_wb_addr[(COL_BITS- $clog2(serdes_ratio*2)-1):0], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) stage1_bank <= i_wb_addr[(BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2))]; //bank_address stage1_row <= i_wb_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address //stage1_next_bank will not increment unless stage1_next_col //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated //precharge and activate will happen only at the end of the //current column with a margin dictated by //MARGIN_BEFORE_ANTICIPATE {stage1_next_row , stage1_next_bank, stage1_next_col[COL_BITS-1:$clog2(serdes_ratio*2)] } <= i_wb_addr + MARGIN_BEFORE_ANTICIPATE; //anticipated next row and bank to be accessed stage1_data <= i_wb_data; end else if(write_calib_stb) begin stage1_pending <= write_calib_stb;//actual request flag stage1_we <= write_calib_we; //write-enable stage1_col <= write_calib_col; //column address (n-burst word-aligned) stage1_bank <= 0; //bank_address stage1_row <= 0; //row_address {stage1_next_row , stage1_next_bank, stage1_next_col[COL_BITS-1:$clog2(serdes_ratio*2)] } <= 0; //anticipated next row and bank to be accessed stage1_data <= write_calib_data; end for(index = 0; index < LANES; index = index + 1) begin {unaligned_data[index], { stage2_data[0][((DQ_BITS*LANES)*7 + 8*index) +: 8], stage2_data[0][((DQ_BITS*LANES)*6 + 8*index) +: 8], stage2_data[0][((DQ_BITS*LANES)*5 + 8*index) +: 8], stage2_data[0][((DQ_BITS*LANES)*4 + 8*index) +: 8], stage2_data[0][((DQ_BITS*LANES)*3 + 8*index) +: 8], stage2_data[0][((DQ_BITS*LANES)*2 + 8*index) +: 8], stage2_data[0][((DQ_BITS*LANES)*1 + 8*index) +: 8], stage2_data[0][((DQ_BITS*LANES)*0 + 8*index) +: 8] }} <= ( {stage2_data_unaligned[((DQ_BITS*LANES)*7 + 8*index) +: 8], stage2_data_unaligned[((DQ_BITS*LANES)*6 + 8*index) +: 8], stage2_data_unaligned[((DQ_BITS*LANES)*5 + 8*index) +: 8], stage2_data_unaligned[((DQ_BITS*LANES)*4 + 8*index) +: 8], stage2_data_unaligned[((DQ_BITS*LANES)*3 + 8*index) +: 8], stage2_data_unaligned[((DQ_BITS*LANES)*2 + 8*index) +: 8],stage2_data_unaligned[((DQ_BITS*LANES)*1 + 8*index) +: 8],stage2_data_unaligned[((DQ_BITS*LANES)*0 + 8*index) +: 8] } << data_start_index[index]) | unaligned_data[index]; end //{unaligned_data, stage2_data[0]} <= (stage2_data_unaligned << data_start_index) | unaligned_data; for(index = 1; index < STAGE2_DATA_DEPTH; index = index+1) begin stage2_data[index] <= stage2_data[index-1]; end end end assign o_phy_data = stage2_data[STAGE2_DATA_DEPTH-1]; // DIAGRAM FOR ALL RELEVANT TIMING PARAMETERS: // // tRTP // ------------------------------------------------------------- // | tCCD | // | -----> Read ---------> Read // v | ^ | // Precharge ------> Activate -------->| | tWTR | tRTW // ^ tRP tRCD | | v // | ------> Write -------> Write // | tCCD | // ------------------------------------------------------------- // tWR (after data burst) //note: all delays after write counts only after the data burst (except for write-to-write tCCD) // //Pipeline Stages: // wishbone inputs --> stage1 --> stage2 --> cmd always @* begin cmd_odt = cmd_odt_q || write_calib_odt; cmd_ck_en = instruction[CLOCK_EN]; cmd_reset_n = instruction[RESET_N]; o_wb_stall_d = 0; //wb_stall going high is determined on stage 1 (higher priority), wb_stall going low is determined at stage2 (lower priority) pipe_stall = 0; //pipe_stall will follow i_wb_stall(so stall when stage 2 needs delay) but goes low after actual read/write request (move pipe forward when stage2 finishes request) precharge_slot_busy = 0; //flag that determines if stage 2 is issuing precharge (thus stage 1 cannot issue precharge) activate_slot_busy = 0; //flag that determines if stage 2 is issuing activate (thus stage 1 cannot issue activate) write_dqs_d = write_calib_dqs; write_dq_d = write_calib_dq; for(index=0; index < (1<>1; //if there is a pending request, issue the appropriate commands if(stage2_pending) begin o_wb_stall_d = o_wb_stall; pipe_stall = o_wb_stall; //right row is already active so go straight to read/write if(bank_status_q[stage2_bank] && bank_active_row_q[stage2_bank] == stage2_row) begin //read/write operation //write request if(stage2_we && delay_before_write_counter_q[stage2_bank] == 0) begin o_wb_stall_d = 0; pipe_stall = 0; //move pipeline forward since write access is already done cmd_odt = 1'b1; //set-up delay before precharge, read, and write delay_before_precharge_counter_d[stage2_bank] = WRITE_TO_PRECHARGE_DELAY; for(index=0; index < (1<> 1); end if(shift_reg_read_pipe_q[1]) begin //delay is over and data is now starting to release from iserdes BUT NOT YET ALIGNED index_read_pipe <= !index_read_pipe; //control which delay_read_pipe would get updated (we have 3 pipe to store read data)ss delay_read_pipe[index_read_pipe][added_read_pipe_max] <= 1'b1; //update delay_read_pipe end for(index = 0; index < LANES; index = index + 1) begin if(delay_read_pipe[0][added_read_pipe_max != added_read_pipe[index]]) begin //same lane o_wb_data_q[0][((DQ_BITS*LANES)*0 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*0 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[0][((DQ_BITS*LANES)*1 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*1 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[0][((DQ_BITS*LANES)*2 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*2 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[0][((DQ_BITS*LANES)*3 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*3 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[0][((DQ_BITS*LANES)*4 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*4 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[0][((DQ_BITS*LANES)*5 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*5 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[0][((DQ_BITS*LANES)*6 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*6 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[0][((DQ_BITS*LANES)*7 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*7 + 8*index) +: 8]; //update each lane of the burst end if(delay_read_pipe[1][added_read_pipe_max != added_read_pipe[index]]) begin o_wb_data_q[1][((DQ_BITS*LANES)*0 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*0 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[1][((DQ_BITS*LANES)*1 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*1 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[1][((DQ_BITS*LANES)*2 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*2 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[1][((DQ_BITS*LANES)*3 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*3 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[1][((DQ_BITS*LANES)*4 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*4 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[1][((DQ_BITS*LANES)*5 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*5 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[1][((DQ_BITS*LANES)*6 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*6 + 8*index) +: 8]; //update each lane of the burst o_wb_data_q[1][((DQ_BITS*LANES)*7 + 8*index) +: 8] <= i_phy_iserdes_data[((DQ_BITS*LANES)*7 + 8*index) +: 8]; //update each lane of the burst end if(o_wb_ack_read_q[0]) begin index_wb_data <= !index_wb_data; end /* for(index = 0; index < 15; index = index + 1) begin o_wb_ack_read_q[index] <= o_wb_ack_read_q[index+1]; end */ o_wb_ack_read_q <= o_wb_ack_read_q >> 1; o_wb_ack_read_q[added_read_pipe_max] <= shift_reg_read_pipe_q[0]; end end end assign o_wb_ack = o_wb_ack_read_q[0]; assign o_wb_data = o_wb_data_q[index_wb_data]; assign o_phy_dqs_tri_control = !write_dqs[STAGE2_DATA_DEPTH]; assign o_phy_dq_tri_control = !write_dq[STAGE2_DATA_DEPTH+1]; assign o_phy_toggle_dqs = write_dqs_val[STAGE2_DATA_DEPTH-2]; /*********************************************************************************************************************************************/ /******************************************************* Read/Write Calibration Sequence *******************************************************/ always @(posedge i_controller_clk, negedge i_rst_n) begin if(!i_rst_n) begin state_calibrate <= IDLE; train_delay <= 0; dqs_store <= 0; dqs_count_repeat <= 0; dqs_start_index <= 0; dqs_target_index <= 0; dqs_target_index_orig <= 0; o_phy_bitslip <= 0; initial_dqs <= 1; lane <= 0; dqs_bitslip_arrangement <= 0; write_calib_dqs <= 0; write_calib_dq <= 0; write_calib_odt <= 0; prev_write_level_feedback <= 1; write_calib_stb <= 0;//actual request flag write_calib_we <= 0; //write-enable write_calib_col <= 0; write_calib_data <= 0; skip_reset_seq_delay <= 0; read_data_store <= 0; write_pattern <= 0; added_read_pipe_max <= 0; dqs_start_index_stored <= 0; dqs_start_index_repeat <= 0; delay_before_write_level_feedback <= 0; delay_before_read_data <= 0; for(index = 0; index < LANES; index = index + 1) begin added_read_pipe[index] <= 0; data_start_index[index] <= 0; odelay_data_cntvaluein[index] = DATA_INITIAL_ODELAY_TAP; odelay_dqs_cntvaluein[index] = DQS_INITIAL_ODELAY_TAP; idelay_data_cntvaluein[index] = DATA_INITIAL_IDELAY_TAP; idelay_dqs_cntvaluein[index] = DQS_INITIAL_IDELAY_TAP; end end else begin write_calib_stb <= 0;//actual request flag write_calib_we <= 0; //write-enable write_calib_col <= 0; write_calib_data <= 0; skip_reset_seq_delay <= 0; write_calib_dqs <= 0; write_calib_dq <= 0; train_delay <= (train_delay==0)? 0:(train_delay - 1); delay_before_read_data <= (delay_before_read_data == 0)? 0: delay_before_read_data - 1; delay_before_write_level_feedback <= (delay_before_write_level_feedback == 0)? 0: delay_before_write_level_feedback - 1; o_phy_bitslip <= 0; o_phy_odelay_data_ld <= 0; o_phy_odelay_dqs_ld <= 0; o_phy_idelay_data_ld <= 0; o_phy_idelay_dqs_ld <= 0; idelay_data_cntvaluein_prev <= idelay_data_cntvaluein[lane]; // increase cntvalue every load to prepare for possible next load odelay_data_cntvaluein[lane] <= o_phy_odelay_data_ld[lane]? odelay_data_cntvaluein[lane] + 1: odelay_data_cntvaluein[lane]; odelay_dqs_cntvaluein[lane] <= o_phy_odelay_dqs_ld[lane]? odelay_dqs_cntvaluein[lane] + 1: odelay_dqs_cntvaluein[lane]; idelay_data_cntvaluein[lane] <= o_phy_idelay_data_ld[lane]? idelay_data_cntvaluein[lane] + 1: idelay_data_cntvaluein[lane]; idelay_dqs_cntvaluein[lane] <= o_phy_idelay_dqs_ld[lane]? idelay_dqs_cntvaluein[lane] + 1: idelay_dqs_cntvaluein[lane]; if(initial_dqs) begin dqs_target_index <= dqs_target_index_value; dq_target_index <= dqs_target_index_value; dqs_target_index_orig <= dqs_target_index_value; end if(idelay_dqs_cntvaluein[lane] == 0) begin //go back to previous odd dqs_target_index <= dqs_target_index_orig - 2; end if(idelay_data_cntvaluein[lane] == 0 && idelay_data_cntvaluein_prev == 31) begin dq_target_index <= dqs_target_index_orig - 2; end // FSM case(state_calibrate) IDLE: if(i_phy_idelayctrl_rdy && instruction_address == 13) begin //we are now inside instruction 15 with maximum delay state_calibrate <= BITSLIP_DQS_TRAIN_1; lane <= 0; o_phy_odelay_data_ld <= {LANES{1'b1}}; o_phy_odelay_dqs_ld <= {LANES{1'b1}}; o_phy_idelay_data_ld <= {LANES{1'b1}}; o_phy_idelay_dqs_ld <= {LANES{1'b1}}; end BITSLIP_DQS_TRAIN_1: if(train_delay == 0) begin /* Bitslip cannot be asserted for two consecutive CLKDIV cycles; Bitslip must be deasserted for at least one CLKDIV cycle between two Bitslip assertions.The user logic should wait for at least two CLKDIV cycles in SDR mode or three CLKDIV cycles in DDR mode, before analyzing the received data pattern and potentially issuing another Bitslip command. If the ISERDESE2 is reset, the Bitslip logic is also reset and returns back to its initial state. */ if(i_phy_iserdes_bitslip_reference[lane*LANES +: 8] == 8'b0111_1000) begin //initial arrangement state_calibrate <= MPR_READ; initial_dqs <= 1; dqs_start_index_repeat <= 0; dqs_start_index_stored <= 0; end else begin o_phy_bitslip[lane] <= 1; train_delay <= 3; end end MPR_READ: begin //align the incoming DQS during reads to the controller clock //skip_reset_seq_delay = 1; //issue_read_command = 1; delay_before_read_data <= READ_DELAY + 1 + 2 + 1 /*- 1*/; ///1=issue command delay (OSERDES delay), 2 = ISERDES delay state_calibrate <= COLLECT_DQS; dqs_count_repeat <= 0; end COLLECT_DQS: if(delay_before_read_data == 0) begin dqs_store <= {i_phy_iserdes_dqs[LANES*lane +: 8], dqs_store[(STORED_DQS_SIZE*8-1):8]}; dqs_count_repeat = dqs_count_repeat + 1; if(dqs_count_repeat == STORED_DQS_SIZE) begin state_calibrate <= ANALYZE_DQS; dqs_start_index_stored <= dqs_start_index; dqs_start_index <= 0; end end ANALYZE_DQS: if(dqs_store[dqs_start_index +: 10] == 10'b01_01_01_01_00) begin dqs_start_index_repeat <= (dqs_start_index == dqs_start_index_stored)? dqs_start_index_repeat + 1: 0; //increase dqs_start_index_repeat when index is the same as before if(dqs_start_index_repeat == REPEAT_DQS_ANALYZE) begin //the same index appeared REPEAT_DQS_ANALYZE times in a row, thus can proceed to CALIBRATE_DQS initial_dqs <= 0; dqs_start_index_repeat <= 0; state_calibrate <= CALIBRATE_DQS; end else begin state_calibrate <= MPR_READ; end end else begin dqs_start_index <= dqs_start_index + 1; end CALIBRATE_DQS: if(dqs_start_index_stored == dqs_target_index) begin added_read_pipe[lane] <= dq_target_index[$clog2(STORED_DQS_SIZE*8)-1:3] + (dq_target_index[2:0] >= 5); dqs_bitslip_arrangement <= 16'b0011_1100_0011_1100 >> dq_target_index[2:0]; state_calibrate <= BITSLIP_DQS_TRAIN_2; end else begin o_phy_idelay_data_ld[lane] <= 1; o_phy_idelay_dqs_ld[lane] <= 1; state_calibrate <= MPR_READ; end BITSLIP_DQS_TRAIN_2: if(train_delay == 0) begin //train again the ISERDES to capture the DQ correctly if(i_phy_iserdes_bitslip_reference[lane*LANES +: 8] == dqs_bitslip_arrangement) begin if(lane == LANES - 1) begin skip_reset_seq_delay <= 1; lane <= 0; prev_write_level_feedback <= 1'b1; state_calibrate <= START_WRITE_LEVEL; end else begin lane <= lane + 1; state_calibrate <= BITSLIP_DQS_TRAIN_1; end added_read_pipe_max <= added_read_pipe_max > added_read_pipe[lane]? added_read_pipe_max:added_read_pipe[lane]; end else begin o_phy_bitslip[lane] <= 1; train_delay <= 3; end end START_WRITE_LEVEL: if(instruction_address == 17) begin write_calib_dqs <= 1'b1; write_calib_odt <= 1'b1; delay_before_write_level_feedback <= DELAY_BEFORE_WRITE_LEVEL_FEEDBACK; state_calibrate <= WAIT_FOR_FEEDBACK; end WAIT_FOR_FEEDBACK: if(delay_before_write_level_feedback == 0) begin prev_write_level_feedback <= i_phy_iserdes_data[lane<<3]; if({prev_write_level_feedback, i_phy_iserdes_data[lane<<3]} == 2'b01) begin if(lane == LANES - 1) begin write_calib_odt <= 0; skip_reset_seq_delay <= 1; lane <= 0; state_calibrate <= ISSUE_WRITE_1; end else begin lane <= lane + 1; prev_write_level_feedback <= 1'b1; state_calibrate <= START_WRITE_LEVEL; end end else begin o_phy_odelay_data_ld[lane] <= 1; o_phy_odelay_dqs_ld[lane] <= 1; state_calibrate <= START_WRITE_LEVEL; end end ISSUE_WRITE_1: if(instruction_address == 22) begin write_calib_stb <= 1;//actual request flag write_calib_we <= 1; //write-enable write_calib_col <= 0; write_calib_data <= { {LANES{8'h91}}, {LANES{8'h77}}, {LANES{8'h29}}, {LANES{8'h8c}}, {LANES{8'hd0}}, {LANES{8'had}}, {LANES{8'h51}}, {LANES{8'hc1}} }; state_calibrate <= ISSUE_WRITE_2; end ISSUE_WRITE_2: begin write_calib_stb <= 1;//actual request flag write_calib_we <= 1; //write-enable write_calib_col <= 8; write_calib_data <= { {LANES{8'h80}}, {LANES{8'hdb}}, {LANES{8'hcf}}, {LANES{8'hd2}}, {LANES{8'h75}}, {LANES{8'hf1}}, {LANES{8'h2c}}, {LANES{8'h3d}} }; state_calibrate <= ISSUE_READ; end ISSUE_READ: if(!o_wb_stall_d) begin write_calib_stb <= 1;//actual request flag write_calib_we <= 0; //write-enable state_calibrate <= READ_DATA; end READ_DATA: if(o_wb_ack_read_q[0]) begin read_data_store <= o_wb_data; state_calibrate <= ANALYZE_DATA; data_start_index[lane] <= 0; // Possible Patterns (strong autocorrel stat) //0x80dbcfd275f12c3d //0x9177298cd0ad51c1 //0x01b79fa4ebe2587b //0x22ee5319a15aa382 write_pattern <= 128'h80dbcfd275f12c3d_9177298cd0ad51c1; end //ANALYZE_DATA: if(write_pattern[data_start_index[lane] +: 64] == read_data_store[lane*DQ_BITS*8 +: DQ_BITS*8]) begin ANALYZE_DATA: if(write_pattern[data_start_index[lane] +: 64] == {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }) begin if(lane == LANES - 1) begin state_calibrate <= DONE_CALIBRATE; end else begin lane <= lane + 1; data_start_index[lane+1] <= 0; end end else begin data_start_index[lane] <= data_start_index[lane] + 8; end DONE_CALIBRATE: state_calibrate <= DONE_CALIBRATE; endcase end end assign issue_read_command = (state_calibrate == MPR_READ); assign issue_write_command = 0; assign o_phy_odelay_data_cntvaluein = odelay_data_cntvaluein[lane]; assign o_phy_odelay_dqs_cntvaluein = odelay_dqs_cntvaluein[lane]; assign o_phy_idelay_data_cntvaluein = idelay_data_cntvaluein[lane]; assign o_phy_idelay_dqs_cntvaluein = idelay_dqs_cntvaluein[lane]; assign dqs_target_index_value = dqs_start_index_stored[0]? dqs_start_index_stored + 2: dqs_start_index_stored + 1; /*********************************************************************************************************************************************/ /******************************************************* Functions *******************************************************/ //convert nanoseconds time input to number of controller clock cycles (referenced to CONTROLLER_CLK_PERIOD) function [DELAY_SLOT_WIDTH - 1:0] ns_to_cycles (input integer ns); //output is set at same length as a MRS command (19 bits) to maximize the time slot ns_to_cycles = $rtoi($ceil(ns*1.0/CONTROLLER_CLK_PERIOD)); //Without $rtoi: YOSYS ERROR: Non-constant expression in constant function endfunction //convert nCK input (number of DDR3 clock cycles) to number of controller clock cycles (referenced to CONTROLLER_CLK_PERIOD) function [DELAY_SLOT_WIDTH - 1:0] nCK_to_cycles (input integer nCK); //Without $rtoi: YOSYS ERROR: syntax error, unexpected TOK_REAL nCK_to_cycles = $rtoi($ceil(nCK*1.0/serdes_ratio)) ; endfunction //convert nanoseconds time input to number of DDR clock cycles (referenced to DDR3_CLK_PERIOD) function [DELAY_SLOT_WIDTH - 1:0] ns_to_nCK (input integer ns); ns_to_nCK = $rtoi($ceil(ns*1.0/DDR3_CLK_PERIOD)); //Without $rtoi: YOSYS ERROR: Non-constant expression in constant function endfunction //convert nanoseconds time input to number of DDR clock cycles (referenced to DDR3_CLK_PERIOD) function [DELAY_SLOT_WIDTH - 1:0] nCK_to_ns (input integer nCK); nCK_to_ns = $rtoi($ceil(nCK*1.0*DDR3_CLK_PERIOD)); //Without $rtoi: YOSYS ERROR: Non-constant expression in constant function endfunction // functions used to infer some localparam values function integer max(input integer a, input integer b); if(a >= b) max = a; else max = b; endfunction //Find the 3-bit value for the Mode Register 0 WR (Write recovery for auto-precharge) function[2:0] WRA_mode_register_value(input integer WRA); //WR_min (write recovery for autoprecharge) in clock cycles is calculated by dividing tWR(in ns) by tCK(in ns) and rounding up to the next integer. //The WR value in the mode register must be programmed to be equal or larger than WRmin. case(WRA+1) 1,2,3,4,5: WRA_mode_register_value = 3'b001; 6: WRA_mode_register_value = 3'b010; 7: WRA_mode_register_value = 3'b011; 8: WRA_mode_register_value = 3'b100; 9,10: WRA_mode_register_value = 3'b101; 11,12: WRA_mode_register_value = 3'b110; 13,14: WRA_mode_register_value = 3'b111; 15,16: WRA_mode_register_value = 3'b000; default: begin WRA_mode_register_value = 3'b000; //defaulting to largest write recovery cycles: 16 cycles end endcase endfunction function[1:0] get_slot (input[3:0] cmd); //cmd can either be CMD_PRE,CMD_ACT, CMD_WR, CMD_RD integer slot_number; integer delay; integer read_slot, write_slot, anticipate_activate_slot, anticipate_precharge_slot; begin // find read command slot number delay = CL_nCK; for(slot_number = 0 ; delay != 0 ; delay = delay - 1) begin slot_number[1:0] = slot_number[1:0] - 1'b1; end read_slot = slot_number[1:0]; // find write command slot number delay = CWL_nCK; for(slot_number = 0 ; delay != 0; delay = delay - 1) begin slot_number[1:0] = slot_number[1:0] - 1'b1; end write_slot = slot_number[1:0]; // find anticipate activate command slot number if(CL_nCK > CWL_nCK) slot_number = read_slot; else slot_number = write_slot; delay = ns_to_nCK(tRCD); for(slot_number = slot_number; delay != 0; delay = delay - 1) begin slot_number[1:0] = slot_number[1:0] - 1'b1; end anticipate_activate_slot = slot_number[1:0]; // if computed anticipate_activate_slot is same with either write_slot or read_slot, decrement slot number until while(anticipate_activate_slot[1:0] == write_slot[1:0] || anticipate_activate_slot[1:0] == read_slot[1:0]) begin anticipate_activate_slot[1:0] = anticipate_activate_slot[1:0] - 1'b1; end //the remaining slot will be for precharge command anticipate_precharge_slot = 0; while(anticipate_precharge_slot == write_slot || anticipate_precharge_slot == read_slot || anticipate_precharge_slot == anticipate_activate_slot) begin anticipate_precharge_slot[1:0] = anticipate_precharge_slot[1:0] - 1'b1; end case(cmd) CMD_RD: get_slot = read_slot; CMD_WR: get_slot = write_slot; CMD_ACT: get_slot = anticipate_activate_slot; CMD_PRE: get_slot = anticipate_precharge_slot; endcase end endfunction //find the delay to be used by delay_before_xxxx_counter. // - delay_nCK = delay required between the two commands in DDR3 clock cycles // - start_slot = slot number of the first command // - end_slot = slot number of the second command // returns the number of controller clock cycles to satisfy the delay required between the two commands function integer find_delay(input integer delay_nCK, input integer start_slot, input integer end_slot); integer k; //error: variable declaration assignments are only allowed at the module level begin k = 0; while( ((4 - start_slot) + end_slot + 4*k) < delay_nCK) begin k = k + 1; end find_delay = k; end endfunction /*********************************************************************************************************************************************/ `ifndef YOSYS ///YOSYS: System task `$display' called with invalid/unsupported format specifier initial begin $display("Test ns_to_cycles() function:"); $display("\tns_to_cycles(15) = 3 = %0d [exact]", ns_to_cycles(15) ); $display("\tns_to_cycles(14.5) = 3 = %0d [round-off]", ns_to_cycles(14.5) ); $display("\tns_to_cycles(11) = 3 = %0d [round-up]\n", ns_to_cycles(11) ); $display("Test nCK_to_cycles() function:"); $display("\tns_to_cycles(16) = 4 = %0d [exact]", nCK_to_cycles(16) ); $display("\tns_to_cycles(15) = 4 = %0d [round-off]", nCK_to_cycles(15) ); $display("\tns_to_cycles(13) = 4 = %0d [round-up]\n", nCK_to_cycles(13) ); $display("Test ns_to_nCK() function:"); $display("\tns_to_cycles(15) = 12 = %0d [exact]", ns_to_nCK(15) ); $display("\tns_to_cycles(14.875) = 12 = %0d [round-off]", ns_to_nCK(14.875) ); $display("\tns_to_cycles(13.875) = 12 = %0d [round-up]", ns_to_nCK(13.875) ); $display("\tns_to_nCK(tRCD) = 11 = %0d [WRONG]", ns_to_nCK(tRCD)); $display("\ttRTP = 7.5 = %f ", tRTP); $display("\tns_to_nCK(tRTP) = 6= %f [WRONG]\n", ns_to_nCK(tRTP) ); $display("Test nCK_to_ns() function:"); $display("\tns_to_cycles(4) = 5 = %0d [exact]", nCK_to_ns(4) ); $display("\tns_to_cycles(14.875) = 4 = %0d [round-off]", nCK_to_ns(3) ); $display("\tns_to_cycles(13.875) = 7 = %0d [round-up]\n", nCK_to_ns(5) ); $display("Test nCK_to_ns() function:"); $display("\tns_to_cycles(4) = 5 = %0d [exact]", nCK_to_ns(4) ); $display("\tns_to_cycles(14.875) = 4 = %0d [round-off]", nCK_to_ns(3) ); $display("\tns_to_cycles(13.875) = 7 = %0d [round-up]\n", nCK_to_ns(5) ); $display("Test $floor() function:"); $display("\t$floor(5/2) = 2.5 = %0d", $floor(5/2) ); $display("\t$floor(9/4) = 2.25 = %0d", $floor(9/4) ); $display("\t$floor(9/4) = 2 = %0d", $floor(8/4) ); $display("\t$floor(9/5) = 1.8 = %0d\n", $floor(9/5) ); $display("\nDELAY_COUNTER_WIDTH = %0d", DELAY_COUNTER_WIDTH); $display("DELAY_SLOT_WIDTH = %0d", DELAY_SLOT_WIDTH); //$display("$bits(instruction):%0d - $bits(CMD_MRS):%0d - $bits(MR0):%0d = 5 = %0d", $bits(instruction), $bits(CMD_MRS) , $bits(MR0), ($bits(instruction) - $bits(CMD_MRS) - $bits(MR0))); $display("serdes_ratio = %0d",serdes_ratio); $display("wb_addr_bits = %0d",wb_addr_bits); $display("wb_data_bits = %0d",wb_data_bits); $display("wb_sel_bits = %0d\n\n",wb_sel_bits); //$display("request_row_width = %0d = %0d", ROW_BITS, $bits(i_wb_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) ])); //$display("request_col_width = %0d = %0d", COL_BITS, $bits({ i_wb_addr[(COL_BITS- $clog2(serdes_ratio*2)-1):0], {{$clog2(serdes_ratio*2)}{1'b0}} })); //$display("request_bank_width = %0d = %0d", BA_BITS, $bits(i_wb_addr[(BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2))])); $display("READ_SLOT = %0d", READ_SLOT); $display("WRITE_SLOT = %0d", WRITE_SLOT); $display("ACTIVATE_SLOT = %0d", ACTIVATE_SLOT); $display("PRECHARGE_SLOT = %0d", PRECHARGE_SLOT); $display("\n\nDELAYS:"); $display("\tns_to_nCK(tRCD): %0d", ns_to_nCK(tRCD)); $display("\tns_to_nCK(tRP): %0d", ns_to_nCK(tRP)); $display("\tns_to_nCK(tRTP): %0d", ns_to_nCK(tRTP)); $display("\ttCCD: %0d", tCCD); $display("\t(CL_nCK + tCCD + 3'd2 - CWL_nCK): %0d", (CL_nCK + tCCD + 3'd2 - CWL_nCK)); $display("\t(CWL_nCK + 3'd4 + ns_to_nCK(tWR)): %0d", (CWL_nCK + 3'd4 + ns_to_nCK(tWR))); $display("\t(CWL_nCK + 3'd4 + ns_to_nCK(tWTR)): %0d", (CWL_nCK + 3'd4 + ns_to_nCK(tWTR))); $display("\t$signed(4'b1100)>>>4: %b", $signed(4'b1100) >>> 4); $display("\n\nPRECHARGE_TO_ACTIVATE_DELAY = 3 = %0d", PRECHARGE_TO_ACTIVATE_DELAY); $display("ACTIVATE_TO_WRITE_DELAY = 3 = %0d", ACTIVATE_TO_WRITE_DELAY); $display("ACTIVATE_TO_READ_DELAY = 2 = %0d", ACTIVATE_TO_READ_DELAY); $display("READ_TO_WRITE_DELAY = 2 = %0d", READ_TO_WRITE_DELAY); $display("READ_TO_READ_DELAY = 0 = %0d", READ_TO_READ_DELAY); $display("READ_TO_PRECHARGE_DELAY = 1 =%0d", READ_TO_PRECHARGE_DELAY); $display("WRITE_TO_WRITE_DELAY = 0 = %0d", WRITE_TO_WRITE_DELAY); $display("WRITE_TO_READ_DELAY = 4 = %0d", WRITE_TO_READ_DELAY); $display("WRITE_TO_PRECHARGE_DELAY = 5 = %0d", WRITE_TO_PRECHARGE_DELAY); end `endif `ifdef FORMAL initial assume(!i_rst_n); always @* begin //assert(tMOD + tZQinit > nCK_to_cycles(tDLLK)); //Initialization sequence requires that tDLLK is satisfied after MRS to mode register 0 and ZQ calibration assert(MR0[18] != 1'b1); //last Mode Register bit should never be zero assert(MR1_WL_EN[18] != 1'b1); //(as this is used for A10-AP control for non-MRS assert(MR1_WL_DIS[18] != 1'b1); //(as this is used for A10-AP control for non-MRS assert(MR2[18] != 1'b1); //commands in the reset sequence) assert(MR3_MPR_EN[18] != 1'b1); assert(MR3_MPR_DIS[18] != 1'b1); assert(DELAY_COUNTER_WIDTH <= $bits(MR0)); //bitwidth of mode register should be enough for the delay counter assert(($bits(instruction) - $bits(CMD_MRS) - $bits(MR0)) == 5 ); //sanity checking to ensure 5 bits is allotted for extra instruction {reset_finished, use_timer , stay_command , cke , reset_n } assert(DELAY_SLOT_WIDTH >= DELAY_COUNTER_WIDTH); //width occupied by delay timer slot on the reset rom must be able to occupy the maximum possible delay value on the reset sequence end reg f_past_valid = 0; always @(posedge i_controller_clk) f_past_valid <= 1; //The idea below is sourced from https://zipcpu.com/formal/2019/11/18/genuctrlr.html //We will form a packet of information describing each instruction as it goes through the pipeline and make assertions along the way. //2-stage Pipeline: f_addr (update address) -> f_read (read instruction from rom) reg[$bits(instruction_address) - 1: 0] f_addr = 0, f_read = 0 ; reg[$bits(instruction) - 1:0] f_read_inst = INITIAL_RESET_INSTRUCTION; //pipeline stage logic: f_addr (update address) -> f_read (read instruction from rom) always @(posedge i_controller_clk, negedge i_rst_n) begin if(!i_rst_n) begin f_addr <= 0; f_read <= 0; end else if((delay_counter == 1 || !instruction[USE_TIMER]) /*&& !reset_done*/ )begin //move the pipeline forward when counter is about to go zero and we are not yet at end of reset sequence f_addr <= (f_addr == 15)? 12:f_addr + 1; f_read <= f_addr; end end // assert f_addr and f_read as shadows of next and current instruction address always @* begin assert(f_addr == instruction_address); //f_addr is the shadow of instruction_address (thus f_addr is the address of NEXT instruction) f_read_inst = read_rom_instruction(f_read); //f_read is the address of CURRENT instruction assert(f_read_inst == read_rom_instruction(f_read)); // needed for induction to make sure the engine will not create his own instruction if(f_addr == 0) begin f_read_inst = INITIAL_RESET_INSTRUCTION; //will only happen at the very start: f_addr (0) -> f_read (0) where we are reading the initial reset instruction and not the rom end assert(f_read_inst == instruction); // f_read_inst is the shadow of current instruction end // main assertions for the reset sequence always @(posedge i_controller_clk) begin if(!i_rst_n || !$past(i_rst_n)) begin assert(f_addr == 0); assert(f_read == 0); assert(instruction_address == 0); assert(delay_counter == (INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0])); assert(delay_counter_is_zero == (INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0] == 0)); end else if(f_past_valid) begin //if counter is zero previously and current instruction needs timer delay, then this cycle should now have the new updated counter value if( $past(delay_counter_is_zero) && $past(f_read_inst[USE_TIMER]) /*&& !$past(reset_done)*/) `ifndef FORMAL_COVER assert(delay_counter == (f_read_inst[DELAY_COUNTER_WIDTH - 1:0])); `else //use fixed low value delay to cover the whole reset seqeunce using formal verification if(instruction[DELAY_COUNTER_WIDTH - 1:0] > `COVER_DELAY) assert(delay_counter == `COVER_DELAY); //use delay from rom if that is smaller than the COVER_DELAY macro else assert(delay_counter == f_read_inst[DELAY_COUNTER_WIDTH - 1:0]); `endif //delay_counter_is_zero can be high when counter is zero and current instruction needs delay if($past(f_read_inst[USE_TIMER]) /*&& !$past(reset_done)*/) assert( delay_counter_is_zero == (delay_counter == 0) ); //delay_counter_is_zero will go high this cycle when we received a don't-use-timer instruction else if(!$past(f_read_inst[USE_TIMER]) /*&& !$past(reset_done)*/) assert(delay_counter_is_zero); //we are on the middle of a delay thus all values must remain constant while only delay_counter changes (decrement) if(!delay_counter_is_zero) begin assert(f_addr == $past(f_addr)); assert(f_read == $past(f_read)); assert(f_read_inst == $past(f_read_inst)); end //if delay is not yet zero and timer delay is enabled, then delay_counter should decrement if(!$past(delay_counter_is_zero) && $past(f_read_inst[USE_TIMER])) begin assert(delay_counter == $past(delay_counter) - 1); assert(delay_counter < $past(delay_counter) ); //just to make sure delay_counter will never overflow back to all 1's end //sanity checking for the comment "delay_counter will be zero AT NEXT CLOCK CYCLE when counter is now one" if($past(delay_counter) == 1) begin assert(delay_counter == 0 && delay_counter_is_zero); end //assert the relationship between the stages FOR RESET SEQUENCE if(!reset_done) begin if(f_addr == 0) begin assert(f_read == 0); //will only happen at the very start: f_addr (0) -> f_read (0) end else if(f_read == 0) begin assert(f_addr <= 1); //will only happen at the very first two cycles: f_addr (1) -> f_read (0) or f_addr (0) -> f_read (0) end //else if($past(reset_done)) assert(f_read == $past(f_read)); //reset instruction does not repeat after reaching end address thus it must saturate when pipeline reaches end else begin assert(f_read + 1 == f_addr); //address increments continuously end assert($past(f_read) <= 14); //only instruction address 0-to-13 is for reset sequence (reset_done is asserted at address 14) end //assert the relationship between the stages FOR REFRESH SEQUENCE else begin if(f_read == 15) assert(f_addr == 12); //if current instruction is 15, then next instruction must be at 12 (instruction address wraps from 15 to 12) else if(f_addr == 12) assert(f_read == 15); //if next instruction is at 12, then current instruction must be at 15 (instruction address wraps from 15 to 12) else assert(f_read + 1 == f_addr); //if there is no need to wrap around, then instruction address must increment assert((f_read >= 12 && f_read <= 15) ); //refresh sequence is only on instruction address 12, 13, 14, and 15 end // reset_done must retain high when it was already asserted once if($past(reset_done)) assert(reset_done); // reset is already done at address 14 and up if($past(f_read) >= 14 ) assert(reset_done); //if reset is done, the REF_IDLE must only be high at instruction address 14 (on the middle of tREFI) if(reset_done && f_read_inst[REF_IDLE]) assert(f_read == 14); end end // assertions on the instructions stored on the rom (*anyconst*) reg[$bits(instruction_address) - 1: 0] f_const_addr; wire[$bits(instruction) - 1:0] a= read_rom_instruction(f_const_addr); //retrieve an instruction based on engine's choice always @* begin //there MUST BE no instruction which USE_TIMER is high but delay is zero since it can cause the logic to lock-up (delay must be at least 1) if(a[USE_TIMER]) begin assert( a[DELAY_COUNTER_WIDTH - 1:0] > 0); end end //cover statements `ifdef FORMAL_COVER reg[3:0] f_count_refreshes = 0; //count how many refresh cycles had already passed always @(posedge i_controller_clk) begin if($past(f_read) == 15 && f_read == 12) f_count_refreshes = f_count_refreshes + 1; //every time address wrap around refresh is completed end always @(posedge i_controller_clk) begin cover(f_count_refreshes == 5); //cover($past(instruction[RST_DONE]) && !instruction[RST_DONE] && i_rst_n); //MUST FAIL: find an instance where RST_DONE will go low after it already goes high (except when i_rst_n is activated) end `endif always @* begin //make sure each command has distinct slot number (except for read/write which can have the same or different slot number) assert((WRITE_SLOT != ACTIVATE_SLOT != PRECHARGE_SLOT) && (READ_SLOT != ACTIVATE_SLOT != PRECHARGE_SLOT) ); //make sure slot number for read command is correct end //create a formal assertion that says during refresh ack should be low always //make an assertion that there will be no request pending before actual refresh starts at instruction 4'd12 reg[24:0] f_wb_inputs[31:0]; reg[4:0] f_index = 0; reg[5:0] f_counter = 0; reg[9:0] f_reset_counter = 0; initial begin /* f_wb_inputs[0] = {1'b0, {14'd0,3'd1, 7'd0}}; //read f_wb_inputs[1] = {1'b0, {14'd0,3'd1, 7'd1}}; //read on same bank (tCCD) f_wb_inputs[2] = {1'b1, {14'd0,3'd1, 7'd2}}; //write on same bank (tRTW) f_wb_inputs[3] = {1'b1, {14'd0,3'd1, 7'd3}}; //write on same bank (tCCD) f_wb_inputs[4] = {1'b0, {14'd0,3'd2, 7'd0}}; //read on different bank f_wb_inputs[5] = {1'b1, {14'd0,3'd2, 7'd1}}; //write on same bank (tRTW) f_wb_inputs[6] = {1'b1, {14'd0,3'd1, 7'd4}}; //write on different bank (already activated) f_wb_inputs[7] = {1'b1, {14'd0,3'd1, 7'd5}}; //write (tCCD) f_wb_inputs[8] = {1'b1, {14'd1,3'd2, 7'd0}}; //write on different bank (already activated but wrong row) f_wb_inputs[9] = {1'b1, {14'd1,3'd2, 7'd1}}; //write (tCCD) f_wb_inputs[10] = {1'b1, {14'd1,3'd2, 7'd2}}; //write (tCCD) f_wb_inputs[11] = {1'b0, {14'd2,3'd2, 7'd0}}; //read (same bank but wrong row so precharge first) f_wb_inputs[12] = {1'b0, {14'd2,3'd2, 7'd1}}; //read (tCCD) f_wb_inputs[13] = {1'b0, {14'd2,3'd2, 7'd2}}; //read (tCCD) */ /* f_wb_inputs[0] = {1'b0, {14'd0,3'd1, 7'd0}}; //read f_wb_inputs[1] = {1'b0, {14'd0,3'd1, 7'd1}}; //read on same bank (tCCD) f_wb_inputs[2] = {1'b1, {14'd0,3'd2, 7'd0}}; //write on the anticipated bank f_wb_inputs[3] = {1'b1, {14'd0,3'd2, 7'd1}}; //write on same bank (tCCD) f_wb_inputs[4] = {1'b0, {14'd0,3'd3, 7'd0}}; //read on the anticipated bank f_wb_inputs[5] = {1'b0, {14'd0,3'd3, 7'd1}}; //read on same bank (tCCD) f_wb_inputs[6] = {1'b1, {14'd0,3'd7, 7'd0}}; //write on the un-anticipated idle bank (activate first) f_wb_inputs[7] = {1'b1, {14'd0,3'd1, 7'd1}}; //write on the un-anticipated active bank and row (write) f_wb_inputs[8] = {1'b1, {14'd1,3'd7, 7'd0}}; //write on the un-anticipated active bank but wrong row (precharge first) */ /* f_wb_inputs[0] = {1'b0, {14'd0,3'd1, 7'd0}}; //read f_wb_inputs[1] = {1'b0, {14'd0,3'd1, 7'd1}}; //read f_wb_inputs[2] = {1'b0, {14'd0,3'd1, 7'd2}}; //read f_wb_inputs[3] = {1'b0, {14'd0,3'd1, 7'd3}}; //read f_wb_inputs[4] = {1'b0, {14'd0,3'd1, 7'd4}}; //read f_wb_inputs[5] = {1'b0, {14'd0,3'd1, 7'd5}}; //read f_wb_inputs[6] = {1'b0, {14'd0,3'd1, 7'd6}}; //write f_wb_inputs[7] = {1'b0, {14'd0,3'd1, 7'd7}}; //write f_wb_inputs[8] = {1'b0, {14'd0,3'd1, 7'd8}}; //write f_wb_inputs[9] = {1'b0, {14'd0,3'd1, 7'd9}}; //write f_wb_inputs[10] = {1'b0, {14'd0,3'd1, 7'd10}}; //write f_wb_inputs[11] = {1'b0, {14'd0,3'd1, 7'd11}}; //write */ f_wb_inputs[0] = {1'b0, {14'd1,3'd1, 7'd120}}; //write on same bank (tRTW) f_wb_inputs[1] = {1'b0, {14'd1,3'd1, 7'd121}}; //write on different bank (already activated) f_wb_inputs[2] = {1'b0, {14'd1,3'd1, 7'd122}}; //write (tCCD) f_wb_inputs[3] = {1'b0, {14'd1,3'd1, 7'd123}}; //write on different bank (already activated but wrong row) f_wb_inputs[4] = {1'b0, {14'd1,3'd1, 7'd124}}; //write (tCCD) f_wb_inputs[5] = {1'b0, {14'd1,3'd1, 7'd125}}; //write (tCCD) f_wb_inputs[6] = {1'b0, {14'd1,3'd1, 7'd126}}; //read (same bank but wrong row so precharge first) f_wb_inputs[7] = {1'b0, {14'd1,3'd1, 7'd127}}; //read (tCCD) f_wb_inputs[8] = {1'b0, {14'd1,3'd2, 7'd0}}; //read (tCCD) f_wb_inputs[9] = {1'b0, {14'd1,3'd2, 7'd1}}; //read (tCCD) f_wb_inputs[10] = {1'b0, {14'd1,3'd2, 7'd2}}; //read (tCCD) end always @(posedge i_controller_clk) begin if(!o_wb_stall) begin f_index <= f_index + 1; f_counter <= 0; end else begin f_counter <= f_counter + 1; end if(o_wb_stall && i_rst_n) begin f_reset_counter = f_reset_counter + 1; end else f_reset_counter = 10; end always @* begin assume(i_wb_cyc == 1); assume(i_wb_stb == 1); if(f_past_valid) assume(i_rst_n); assume(i_wb_we == f_wb_inputs[f_index][24]); assume(i_wb_addr == f_wb_inputs[f_index][23:0]); cover(f_index == 12); //cover(f_reset_counter == 10); end `endif endmodule