diff --git a/.gitignore b/.gitignore index 1d106e5..2d86c08 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,8 @@ example_demo/build_logs* testbench/ddr3_dimm_micron_sim_behav.wcfg testbench/icarus_sim/*.log testbench/icarus_sim/uberddr3_sim +*jobid +.caas* # But do not ignore testbench/xsim/test_*.log !testbench/xsim/test_*.log diff --git a/example_demo/alinx_ax7103b/caas.conf b/example_demo/alinx_ax7103b/caas.conf new file mode 100644 index 0000000..940110f --- /dev/null +++ b/example_demo/alinx_ax7103b/caas.conf @@ -0,0 +1,11 @@ +[project] +Backend = openxc7 +Part = xc7a100tfgg484-2 +Top = ax7103_ddr3 +Constraint = ax7103_ddr3.xdc +Sources = *.v + +[caas] +Server = https://caas.symbioticeda.com:18888/ + + diff --git a/example_demo/alinx_ax7103b/ddr3_controller.v b/example_demo/alinx_ax7103b/ddr3_controller.v new file mode 120000 index 0000000..abadf6a --- /dev/null +++ b/example_demo/alinx_ax7103b/ddr3_controller.v @@ -0,0 +1 @@ +../../rtl/ddr3_controller.v \ No newline at end of file diff --git a/example_demo/alinx_ax7103b/ddr3_phy.v b/example_demo/alinx_ax7103b/ddr3_phy.v new file mode 120000 index 0000000..6fd8666 --- /dev/null +++ b/example_demo/alinx_ax7103b/ddr3_phy.v @@ -0,0 +1 @@ +../../rtl/ddr3_phy.v \ No newline at end of file diff --git a/example_demo/alinx_ax7103b/ddr3_top.v b/example_demo/alinx_ax7103b/ddr3_top.v new file mode 120000 index 0000000..5595b49 --- /dev/null +++ b/example_demo/alinx_ax7103b/ddr3_top.v @@ -0,0 +1 @@ +../../rtl/ddr3_top.v \ No newline at end of file diff --git a/example_demo/alinx_ax7325b/ax7325b_ddr3.v b/example_demo/alinx_ax7325b/ax7325b_ddr3.v index aee1839..e53de04 100644 --- a/example_demo/alinx_ax7325b/ax7325b_ddr3.v +++ b/example_demo/alinx_ax7325b/ax7325b_ddr3.v @@ -164,20 +164,22 @@ // DDR3 Controller ddr3_top #( - .CONTROLLER_CLK_PERIOD(12_000), //ps, clock period of the controller interface - .DDR3_CLK_PERIOD(3_000), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD) + .CONTROLLER_CLK_PERIOD(10_000), //ps, clock period of the controller interface + .DDR3_CLK_PERIOD(2_500), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD) .ROW_BITS(15), //width of row address .COL_BITS(10), //width of column address .BA_BITS(3), //width of bank address - .BYTE_LANES(8), //number of DDR3 modules to be controlled - .AUX_WIDTH(16), //width of aux line (must be >= 4) + .BYTE_LANES(2), //number of DDR3 modules to be controlled + .AUX_WIDTH(4), //width of aux line (must be >= 4) .WB2_ADDR_BITS(32), //width of 2nd wishbone address bus .WB2_DATA_BITS(32), //width of 2nd wishbone data bus .MICRON_SIM(0), //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW) .ODELAY_SUPPORTED(1), //set to 1 when ODELAYE2 is supported .SECOND_WISHBONE(0), //set to 1 if 2nd wishbone is needed .ECC_ENABLE(0), // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC ) - .WB_ERROR(0) // set to 1 to support Wishbone error (asserts at ECC double bit error) + .WB_ERROR(0), // set to 1 to support Wishbone error (asserts at ECC double bit error) + .BIST_MODE(1), // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w) + .SPEED_BIN(1) // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11) ) ddr3_top ( //clock and reset @@ -216,7 +218,7 @@ .o_ddr3_clk_n(ddr3_ck_n), .o_ddr3_reset_n(ddr3_reset_n), .o_ddr3_cke(ddr3_cke), // CKE - .o_ddr3_cs_n(ddr3_cs_n[0]), // chip select signal (controls rank 1 only) + .o_ddr3_cs_n(ddr3_cs_n), // chip select signal (controls rank 1 only) .o_ddr3_ras_n(ddr3_ras_n), // RAS# .o_ddr3_cas_n(ddr3_cas_n), // CAS# .o_ddr3_we_n(ddr3_we_n), // WE# diff --git a/example_demo/alinx_ax7325b/ax7325b_ddr3.xdc b/example_demo/alinx_ax7325b/ax7325b_ddr3.xdc index af4fa0a..e7f67aa 100644 --- a/example_demo/alinx_ax7325b/ax7325b_ddr3.xdc +++ b/example_demo/alinx_ax7325b/ax7325b_ddr3.xdc @@ -632,25 +632,25 @@ set_property PACKAGE_PIN AD4 [get_ports {ddr3_dm[7]}] # PadFunction: IO_L15P_T2_DQS_32 set_property VCCAUX_IO HIGH [get_ports {ddr3_dqs_p[0]}] set_property SLEW FAST [get_ports {ddr3_dqs_p[0]}] -set_property IOSTANDARD SSTL15 [get_ports {ddr3_dqs_p[0]}] +set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddr3_dqs_p[0]}] set_property PACKAGE_PIN Y19 [get_ports {ddr3_dqs_p[0]}] # PadFunction: IO_L15N_T2_DQS_32 set_property VCCAUX_IO HIGH [get_ports {ddr3_dqs_n[0]}] set_property SLEW FAST [get_ports {ddr3_dqs_n[0]}] -set_property IOSTANDARD SSTL15 [get_ports {ddr3_dqs_n[0]}] +set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddr3_dqs_n[0]}] set_property PACKAGE_PIN Y18 [get_ports {ddr3_dqs_n[0]}] # PadFunction: IO_L9P_T1_DQS_32 set_property VCCAUX_IO HIGH [get_ports {ddr3_dqs_p[1]}] set_property SLEW FAST [get_ports {ddr3_dqs_p[1]}] -set_property IOSTANDARD SSTL15 [get_ports {ddr3_dqs_p[1]}] +set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddr3_dqs_p[1]}] set_property PACKAGE_PIN AJ18 [get_ports {ddr3_dqs_p[1]}] # PadFunction: IO_L9N_T1_DQS_32 set_property VCCAUX_IO HIGH [get_ports {ddr3_dqs_n[1]}] set_property SLEW FAST [get_ports {ddr3_dqs_n[1]}] -set_property IOSTANDARD SSTL15 [get_ports {ddr3_dqs_n[1]}] +set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddr3_dqs_n[1]}] set_property PACKAGE_PIN AK18 [get_ports {ddr3_dqs_n[1]}] # PadFunction: IO_L3P_T0_DQS_32 diff --git a/example_demo/alinx_ax7325b/ax7325b_ddr3_openxc7.bit b/example_demo/alinx_ax7325b/ax7325b_ddr3_openxc7.bit index 4ecf631..eba2433 100644 Binary files a/example_demo/alinx_ax7325b/ax7325b_ddr3_openxc7.bit and b/example_demo/alinx_ax7325b/ax7325b_ddr3_openxc7.bit differ diff --git a/example_demo/alinx_ax7325b/caas.conf b/example_demo/alinx_ax7325b/caas.conf new file mode 100644 index 0000000..1450f8d --- /dev/null +++ b/example_demo/alinx_ax7325b/caas.conf @@ -0,0 +1,11 @@ +[project] +Backend = openxc7 +Part = xc7k325tffg900-2 +Top = ax7325b_ddr3 +Constraint = ax7325b_ddr3.xdc +Sources = *.v + +[caas] +Server = https://caas.symbioticeda.com:18888/ + + diff --git a/example_demo/alinx_ax7325b/clk_wiz.v b/example_demo/alinx_ax7325b/clk_wiz.v index d7efd42..ac6519a 100644 --- a/example_demo/alinx_ax7325b/clk_wiz.v +++ b/example_demo/alinx_ax7325b/clk_wiz.v @@ -23,18 +23,18 @@ module clk_wiz .COMPENSATION ("INTERNAL"), .STARTUP_WAIT ("FALSE"), .DIVCLK_DIVIDE (1), - .CLKFBOUT_MULT (5), // 200 MHz * 5 = 1000 MHz + .CLKFBOUT_MULT (8), // 200 MHz * 8 = 1600 MHz .CLKFBOUT_PHASE (0.000), - .CLKOUT0_DIVIDE (12), // 1000 MHz / 12 = 83.333 MHz + .CLKOUT0_DIVIDE (12), // 1600 MHz / 12 = 133.333 MHz .CLKOUT0_PHASE (0.000), .CLKOUT0_DUTY_CYCLE (0.500), - .CLKOUT1_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz, 0 phase + .CLKOUT1_DIVIDE (3), // 1600 MHz / 3 = 533.333 MHz, 0 phase .CLKOUT1_PHASE (0.000), .CLKOUT1_DUTY_CYCLE (0.500), - .CLKOUT2_DIVIDE (5), // 1000 MHz / 5 = 200 MHz + .CLKOUT2_DIVIDE (8), // 1600 MHz / 8 = 200 MHz .CLKOUT2_PHASE (0.000), .CLKOUT2_DUTY_CYCLE (0.500), - .CLKOUT3_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz, 90 phase + .CLKOUT3_DIVIDE (3), // 1600 MHz / 3 = 533.333 MHz, 90 phase .CLKOUT3_PHASE (90.000), .CLKOUT3_DUTY_CYCLE (0.500), .CLKIN1_PERIOD (5.000) // 200 MHz input diff --git a/example_demo/alinx_ax7325b/ddr3_controller.v b/example_demo/alinx_ax7325b/ddr3_controller.v new file mode 120000 index 0000000..abadf6a --- /dev/null +++ b/example_demo/alinx_ax7325b/ddr3_controller.v @@ -0,0 +1 @@ +../../rtl/ddr3_controller.v \ No newline at end of file diff --git a/example_demo/alinx_ax7325b/ddr3_phy.v b/example_demo/alinx_ax7325b/ddr3_phy.v new file mode 120000 index 0000000..6fd8666 --- /dev/null +++ b/example_demo/alinx_ax7325b/ddr3_phy.v @@ -0,0 +1 @@ +../../rtl/ddr3_phy.v \ No newline at end of file diff --git a/example_demo/alinx_ax7325b/ddr3_top.v b/example_demo/alinx_ax7325b/ddr3_top.v new file mode 120000 index 0000000..5595b49 --- /dev/null +++ b/example_demo/alinx_ax7325b/ddr3_top.v @@ -0,0 +1 @@ +../../rtl/ddr3_top.v \ No newline at end of file diff --git a/example_demo/arty_s7/arty_ddr3.v b/example_demo/arty_s7/arty_ddr3.v index 3d6e2d3..e6cba3a 100644 --- a/example_demo/arty_s7/arty_ddr3.v +++ b/example_demo/arty_s7/arty_ddr3.v @@ -180,14 +180,16 @@ .COL_BITS(10), //width of column address .BA_BITS(3), //width of bank address .BYTE_LANES(2), //number of DDR3 modules to be controlled - .AUX_WIDTH(16), //width of aux line (must be >= 4) + .AUX_WIDTH(4), //width of aux line (must be >= 4) .WB2_ADDR_BITS(32), //width of 2nd wishbone address bus .WB2_DATA_BITS(32), //width of 2nd wishbone data bus .MICRON_SIM(0), //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW) .ODELAY_SUPPORTED(0), //set to 1 when ODELAYE2 is supported .SECOND_WISHBONE(0), //set to 1 if 2nd wishbone is needed .ECC_ENABLE(0), // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC ) - .WB_ERROR(0) // set to 1 to support Wishbone error (asserts at ECC double bit error) + .WB_ERROR(0), // set to 1 to support Wishbone error (asserts at ECC double bit error) + .BIST_MODE(1), // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w) + .SPEED_BIN(1) // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11) ) ddr3_top ( //clock and reset diff --git a/example_demo/arty_s7/arty_ddr3_openxc7.bit b/example_demo/arty_s7/arty_ddr3_openxc7.bit index 0934e48..eef956d 100644 Binary files a/example_demo/arty_s7/arty_ddr3_openxc7.bit and b/example_demo/arty_s7/arty_ddr3_openxc7.bit differ diff --git a/example_demo/arty_s7/caas.conf b/example_demo/arty_s7/caas.conf new file mode 100644 index 0000000..02fc88e --- /dev/null +++ b/example_demo/arty_s7/caas.conf @@ -0,0 +1,11 @@ +[project] +Backend = openxc7 +Part = xc7s50csga324-1 +Top = arty_ddr3 +Constraint = arty_ddr3.xdc +Sources = *.v + +[caas] +Server = https://caas.symbioticeda.com:18888/ + + diff --git a/example_demo/arty_s7/clk_wiz.v b/example_demo/arty_s7/clk_wiz.v index 7ce38cc..2b18760 100644 --- a/example_demo/arty_s7/clk_wiz.v +++ b/example_demo/arty_s7/clk_wiz.v @@ -22,18 +22,18 @@ module clk_wiz .COMPENSATION ("INTERNAL"), .STARTUP_WAIT ("FALSE"), .DIVCLK_DIVIDE (1), - .CLKFBOUT_MULT (10), // 100 MHz * 10 = 1000 MHz + .CLKFBOUT_MULT (12), // 100 MHz * 12 = 1200 MHz .CLKFBOUT_PHASE (0.000), - .CLKOUT0_DIVIDE (12), // 1000 MHz / 12 = 83.333 MHz + .CLKOUT0_DIVIDE (12), // 1200 MHz / 12 = 100 MHz .CLKOUT0_PHASE (0.000), .CLKOUT0_DUTY_CYCLE (0.500), - .CLKOUT1_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz + .CLKOUT1_DIVIDE (3), // 1200 MHz / 3 = 400 MHz .CLKOUT1_PHASE (0.000), .CLKOUT1_DUTY_CYCLE (0.500), - .CLKOUT2_DIVIDE (5), // 1000 MHz / 5 = 200 MHz + .CLKOUT2_DIVIDE (6), // 1200 MHz / 6 = 200 MHz .CLKOUT2_PHASE (0.000), .CLKOUT2_DUTY_CYCLE (0.500), - .CLKOUT3_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz, 90 phase + .CLKOUT3_DIVIDE (3), // 1200 MHz / 3 = 400 MHz, 90 phase .CLKOUT3_PHASE (90.000), .CLKOUT3_DUTY_CYCLE (0.500), .CLKIN1_PERIOD (10.000) // 100 MHz input diff --git a/example_demo/arty_s7/ddr3_controller.v b/example_demo/arty_s7/ddr3_controller.v new file mode 120000 index 0000000..abadf6a --- /dev/null +++ b/example_demo/arty_s7/ddr3_controller.v @@ -0,0 +1 @@ +../../rtl/ddr3_controller.v \ No newline at end of file diff --git a/example_demo/arty_s7/ddr3_phy.v b/example_demo/arty_s7/ddr3_phy.v new file mode 120000 index 0000000..6fd8666 --- /dev/null +++ b/example_demo/arty_s7/ddr3_phy.v @@ -0,0 +1 @@ +../../rtl/ddr3_phy.v \ No newline at end of file diff --git a/example_demo/arty_s7/ddr3_top.v b/example_demo/arty_s7/ddr3_top.v new file mode 120000 index 0000000..5595b49 --- /dev/null +++ b/example_demo/arty_s7/ddr3_top.v @@ -0,0 +1 @@ +../../rtl/ddr3_top.v \ No newline at end of file diff --git a/example_demo/enclustra_kx2_st1/caas.conf b/example_demo/enclustra_kx2_st1/caas.conf new file mode 100644 index 0000000..8b356f0 --- /dev/null +++ b/example_demo/enclustra_kx2_st1/caas.conf @@ -0,0 +1,11 @@ +[project] +Backend = openxc7 +Part = xc7k160tffg676-2 +Top = enclustra_ddr3 +Constraint = enclustra_ddr3.xdc +Sources = ./ddr3_top.v,./ddr3_controller.v,./ddr3_phy.v,./enclustra_ddr3.v,./uart_rx.v,./uart_tx.v,./clk_wiz.v + +[caas] +Server = https://caas.symbioticeda.com:18888/ + + diff --git a/example_demo/enclustra_kx2_st1/ddr3_controller.v b/example_demo/enclustra_kx2_st1/ddr3_controller.v new file mode 120000 index 0000000..abadf6a --- /dev/null +++ b/example_demo/enclustra_kx2_st1/ddr3_controller.v @@ -0,0 +1 @@ +../../rtl/ddr3_controller.v \ No newline at end of file diff --git a/example_demo/enclustra_kx2_st1/ddr3_phy.v b/example_demo/enclustra_kx2_st1/ddr3_phy.v new file mode 120000 index 0000000..6fd8666 --- /dev/null +++ b/example_demo/enclustra_kx2_st1/ddr3_phy.v @@ -0,0 +1 @@ +../../rtl/ddr3_phy.v \ No newline at end of file diff --git a/example_demo/enclustra_kx2_st1/ddr3_top.v b/example_demo/enclustra_kx2_st1/ddr3_top.v new file mode 120000 index 0000000..5595b49 --- /dev/null +++ b/example_demo/enclustra_kx2_st1/ddr3_top.v @@ -0,0 +1 @@ +../../rtl/ddr3_top.v \ No newline at end of file diff --git a/example_demo/nexys_video/caas.conf b/example_demo/nexys_video/caas.conf new file mode 100644 index 0000000..b8a5b68 --- /dev/null +++ b/example_demo/nexys_video/caas.conf @@ -0,0 +1,11 @@ +[project] +Backend = openxc7 +Part = xc7a200tsbg484-1 +Top = nexysvideo_ddr3 +Constraint = nexysvideo_ddr3.xdc +Sources = *.v + +[caas] +Server = https://caas.symbioticeda.com:18888/ + + diff --git a/example_demo/nexys_video/ddr3_controller.v b/example_demo/nexys_video/ddr3_controller.v new file mode 120000 index 0000000..abadf6a --- /dev/null +++ b/example_demo/nexys_video/ddr3_controller.v @@ -0,0 +1 @@ +../../rtl/ddr3_controller.v \ No newline at end of file diff --git a/example_demo/nexys_video/ddr3_phy.v b/example_demo/nexys_video/ddr3_phy.v new file mode 120000 index 0000000..6fd8666 --- /dev/null +++ b/example_demo/nexys_video/ddr3_phy.v @@ -0,0 +1 @@ +../../rtl/ddr3_phy.v \ No newline at end of file diff --git a/example_demo/nexys_video/ddr3_top.v b/example_demo/nexys_video/ddr3_top.v new file mode 120000 index 0000000..5595b49 --- /dev/null +++ b/example_demo/nexys_video/ddr3_top.v @@ -0,0 +1 @@ +../../rtl/ddr3_top.v \ No newline at end of file diff --git a/example_demo/orangecrab_ecp5/caas.conf b/example_demo/orangecrab_ecp5/caas.conf new file mode 100644 index 0000000..acd5242 --- /dev/null +++ b/example_demo/orangecrab_ecp5/caas.conf @@ -0,0 +1,11 @@ +[project] +Backend = ecp5 +Part = lfe5u-85f-8mg285c-csfbga285 +Top = orangecrab_ecp5_ddr3 +Constraint = orangecrab_ecp5_ddr3.pcf +Sources = *.v + +[caas] +Server = https://caas.symbioticeda.com:18888/ + + diff --git a/example_demo/orangecrab_ecp5/ddr3_controller.v b/example_demo/orangecrab_ecp5/ddr3_controller.v new file mode 120000 index 0000000..abadf6a --- /dev/null +++ b/example_demo/orangecrab_ecp5/ddr3_controller.v @@ -0,0 +1 @@ +../../rtl/ddr3_controller.v \ No newline at end of file diff --git a/example_demo/orangecrab_ecp5/ddr3_phy_ecp5.v b/example_demo/orangecrab_ecp5/ddr3_phy_ecp5.v new file mode 120000 index 0000000..9f6600e --- /dev/null +++ b/example_demo/orangecrab_ecp5/ddr3_phy_ecp5.v @@ -0,0 +1 @@ +../../rtl/ecp5_phy/ddr3_phy_ecp5.v \ No newline at end of file diff --git a/example_demo/orangecrab_ecp5/ddr3_top.v b/example_demo/orangecrab_ecp5/ddr3_top.v new file mode 100644 index 0000000..1e117bc --- /dev/null +++ b/example_demo/orangecrab_ecp5/ddr3_top.v @@ -0,0 +1,474 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Filename: ddr3_top.v +// Project: UberDDR3 - An Open Source DDR3 Controller +// +// Purpose: Top module which instantiates the ddr3_controller and ddr3_phy modules +// Use this as top module for instantiating UberDDR3 with Wishbone Interface. +// +// Engineer: Angelo C. Jacobo +// +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (C) 2023-2025 Angelo Jacobo +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +//////////////////////////////////////////////////////////////////////////////// + +`default_nettype none +`timescale 1ps / 1ps +`define LATTICE_ECP5_PHY + +module ddr3_top #( + parameter CONTROLLER_CLK_PERIOD = 12_000, //ps, clock period of the controller interface + DDR3_CLK_PERIOD = 3_000, //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD) + ROW_BITS = 14, //width of row address + COL_BITS = 10, //width of column address + BA_BITS = 3, //width of bank address + BYTE_LANES = 2, //number of byte lanes of DDR3 RAM + AUX_WIDTH = 4, //width of aux line (must be >= 4) + WB2_ADDR_BITS = 7, //width of 2nd wishbone address bus + WB2_DATA_BITS = 32, //width of 2nd wishbone data bus + DUAL_RANK_DIMM = 0, // enable dual rank DIMM (1 = enable, 0 = disable) + // DDR3 timing parameter values + parameter SPEED_BIN = 3, // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11) + SDRAM_CAPACITY = 5, // 0 = 256Mb, 1 = 512Mb, 2 = 1Gb, 3 = 2Gb, 4 = 4Gb, 5 = 8Gb, 6 = 16Gb + TRCD = 13_750, // ps Active to Read/Write command time (only used if SPEED_BIN = 0) + TRP = 13_750, // ps Precharge command period (only used if SPEED_BIN = 0) + TRAS = 35_000, // ps ACT to PRE command period (only used if SPEED_BIN = 0) + parameter[0:0] MICRON_SIM = 0, //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW) + ODELAY_SUPPORTED = 0, //set to 1 when ODELAYE2 is supported + SECOND_WISHBONE = 0, //set to 1 if 2nd wishbone for debugging is needed + DLL_OFF = 0, // 1 = DLL off for low frequency ddr3 clock (< 125MHz) + WB_ERROR = 0, // set to 1 to support Wishbone error (asserts at ECC double bit error) + parameter[1:0] BIST_MODE = 1, // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w) + parameter[1:0] ECC_ENABLE = 0, // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC ) + parameter[1:0] DIC = 2'b00, //Output Driver Impedance Control (2'b00 = RZQ/6, 2'b01 = RZQ/7, RZQ = 240ohms) (only change when you know what you are doing) + parameter[2:0] RTT_NOM = 3'b011, //RTT Nominal (3'b000 = disabled, 3'b001 = RZQ/4, 3'b010 = RZQ/2 , 3'b011 = RZQ/6, RZQ = 240ohms) (only change when you know what you are doing) + parameter[1:0] SELF_REFRESH = 2'b00, // 0 = use i_user_self_refresh input, 1 = Self-refresh mode is enabled after 64 controller clock cycles of no requests, 2 = 128 cycles, 3 = 256 cycles + parameter // The next parameters act more like a localparam (since user does not have to set this manually) but was added here to simplify port declaration + DQ_BITS = 8, //device width (fixed to 8, if DDR3 is x16 then BYTE_LANES will be 2 while ) + serdes_ratio = 4, // this controller is fixed as a 4:1 memory controller (CONTROLLER_CLK_PERIOD/DDR3_CLK_PERIOD = 4) + wb_addr_bits = ROW_BITS + COL_BITS + BA_BITS - $clog2(serdes_ratio*2) + DUAL_RANK_DIMM, + wb_data_bits = DQ_BITS*BYTE_LANES*serdes_ratio*2, + wb_sel_bits = wb_data_bits / 8, + wb2_sel_bits = WB2_DATA_BITS / 8, + //4 is the width of a single ddr3 command {cs_n, ras_n, cas_n, we_n} plus 3 (ck_en, odt, reset_n) plus bank bits plus row bits + cmd_len = 4 + 3 + BA_BITS + ROW_BITS + 2*DUAL_RANK_DIMM + ) + ( + input wire i_controller_clk, i_ddr3_clk, i_ref_clk, //i_controller_clk = CONTROLLER_CLK_PERIOD, i_ddr3_clk = DDR3_CLK_PERIOD, i_ref_clk = 200MHz + input wire i_ddr3_clk_90, //required only when ODELAY_SUPPORTED is zero + input wire i_rst_n, + // + // Wishbone inputs + input wire i_wb_cyc, //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled) + input wire i_wb_stb, //request a transfer + input wire i_wb_we, //write-enable (1 = write, 0 = read) + input wire[wb_addr_bits - 1:0] i_wb_addr, //burst-addressable {row,bank,col} + input wire[wb_data_bits - 1:0] i_wb_data, //write data, for a 4:1 controller data width is 8 times the number of pins on the device + input wire[wb_sel_bits - 1:0] i_wb_sel, //byte strobe for write (1 = write the byte) + input wire[AUX_WIDTH - 1:0] i_aux, //for AXI-interface compatibility (given upon strobe) + // Wishbone outputs + output wire o_wb_stall, //1 = busy, cannot accept requests + output wire o_wb_ack, //1 = read/write request has completed + output wire o_wb_err, //1 = Error due to ECC double bit error (fixed to 0 if WB_ERROR = 0) + output wire[wb_data_bits - 1:0] o_wb_data, //read data, for a 4:1 controller data width is 8 times the number of pins on the device + output wire[AUX_WIDTH - 1:0] o_aux, //for AXI-interface compatibility (given upon strobe) + // + // Wishbone 2 (PHY) inputs + input wire i_wb2_cyc, //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled) + input wire i_wb2_stb, //request a transfer + input wire i_wb2_we, //write-enable (1 = write, 0 = read) + input wire[WB2_ADDR_BITS - 1:0] i_wb2_addr, // memory-mapped register to be accessed + input wire[WB2_DATA_BITS - 1:0] i_wb2_data, //write data + input wire[wb2_sel_bits - 1:0] i_wb2_sel, //byte strobe for write (1 = write the byte) + // Wishbone 2 (Controller) outputs + output wire o_wb2_stall, //1 = busy, cannot accept requests + output wire o_wb2_ack, //1 = read/write request has completed + output wire[WB2_DATA_BITS - 1:0] o_wb2_data, //read data + // + // DDR3 I/O Interface + output wire[DUAL_RANK_DIMM:0] o_ddr3_clk_p, o_ddr3_clk_n, + output wire o_ddr3_reset_n, + output wire[DUAL_RANK_DIMM:0] o_ddr3_cke, // CKE + output wire[DUAL_RANK_DIMM:0] o_ddr3_cs_n, // chip select signal + output wire o_ddr3_ras_n, // RAS# + output wire o_ddr3_cas_n, // CAS# + output wire o_ddr3_we_n, // WE# + output wire[ROW_BITS-1:0] o_ddr3_addr, + output wire[BA_BITS-1:0] o_ddr3_ba_addr, + inout wire[(DQ_BITS*BYTE_LANES)-1:0] io_ddr3_dq, + inout wire[BYTE_LANES-1:0] io_ddr3_dqs, io_ddr3_dqs_n, + output wire[BYTE_LANES-1:0] o_ddr3_dm, + output wire[DUAL_RANK_DIMM:0] o_ddr3_odt, // on-die termination + // + // Done Calibration pin + output wire o_calib_complete, + // Debug outputs + output wire[31:0] o_debug1, +// output wire[31:0] o_debug2, +// output wire[31:0] o_debug3, +// output wire[(DQ_BITS*BYTE_LANES)/8-1:0] o_ddr3_debug_read_dqs_p, +// output wire[(DQ_BITS*BYTE_LANES)/8-1:0] o_ddr3_debug_read_dqs_n + // + // User enabled self-refresh + input wire i_user_self_refresh, + output wire uart_tx + ); + +// Instantiation Template (DEFAULT VALUE IS FOR ARTY S7) +/* +// DDR3 Controller +ddr3_top #( + .CONTROLLER_CLK_PERIOD(12_000), //ps, clock period of the controller interface + .DDR3_CLK_PERIOD(3_000), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD) + .ROW_BITS(14), //width of row address + .COL_BITS(10), //width of column address + .BA_BITS(3), //width of bank address + .BYTE_LANES(2), //number of byte lanes of DDR3 RAM + .AUX_WIDTH(4), //width of aux line (must be >= 4) + .MICRON_SIM(0), //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW) + .ODELAY_SUPPORTED(0), //set to 1 if ODELAYE2 is supported + .SECOND_WISHBONE(0), //set to 1 if 2nd wishbone for debugging is needed + .ECC_ENABLE(0), // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC ) + .WB_ERROR(0), // set to 1 to support Wishbone error (asserts at ECC double bit error) + ) ddr3_top + ( + //clock and reset + .i_controller_clk(i_controller_clk), + .i_ddr3_clk(i_ddr3_clk), //i_controller_clk has period of CONTROLLER_CLK_PERIOD, i_ddr3_clk has period of DDR3_CLK_PERIOD + .i_ref_clk(i_ref_clk), // usually set to 200 MHz + .i_ddr3_clk_90(i_ddr3_clk_90), //90 degree phase shifted version i_ddr3_clk (required only when ODELAY_SUPPORTED is zero) + .i_rst_n(!i_rst && clk_locked), + // + // Wishbone inputs + .i_wb_cyc(1), //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled) + .i_wb_stb(i_wb_stb), //request a transfer + .i_wb_we(i_wb_we), //write-enable (1 = write, 0 = read) + .i_wb_addr(i_wb_addr), //burst-addressable {row,bank,col} + .i_wb_data(i_wb_data), //write data, for a 4:1 controller data width is 8 times the number of pins on the device + .i_wb_sel(16'hffff), //byte strobe for write (1 = write the byte) + .i_aux(i_wb_we), //for AXI-interface compatibility (given upon strobe) + // Wishbone outputs + .o_wb_stall(o_wb_stall), //1 = busy, cannot accept requests + .o_wb_ack(o_wb_ack), //1 = read/write request has completed + .o_wb_err(o_wb_err), //1 = Error due to ECC double bit error (fixed to 0 if WB_ERROR = 0) + .o_wb_data(o_wb_data), //read data, for a 4:1 controller data width is 8 times the number of pins on the device + .o_aux(o_aux), + // + // Wishbone 2 (PHY) inputs + .i_wb2_cyc(0), //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled) + .i_wb2_stb(0), //request a transfer + .i_wb2_we(0), //write-enable (1 = write, 0 = read) + .i_wb2_addr(0), //burst-addressable {row,bank,col} + .i_wb2_data(0), //write data, for a 4:1 controller data width is 8 times the number of pins on the device + .i_wb2_sel(0), //byte strobe for write (1 = write the byte) + // Wishbone 2 (Controller) outputs + .o_wb2_stall(), //1 = busy, cannot accept requests + .o_wb2_ack(), //1 = read/write request has completed + .o_wb2_data(), //read data, for a 4:1 controller data width is 8 times the number of pins on the device + // + // DDR3 I/O Interface + .o_ddr3_clk_p(ddr3_clk_p), + .o_ddr3_clk_n(ddr3_clk_n), + .o_ddr3_reset_n(ddr3_reset_n), + .o_ddr3_cke(ddr3_cke), + .o_ddr3_cs_n(ddr3_cs_n), // width = number of DDR3 ranks + .o_ddr3_ras_n(ddr3_ras_n), + .o_ddr3_cas_n(ddr3_cas_n), + .o_ddr3_we_n(ddr3_we_n), + .o_ddr3_addr(ddr3_addr), // width = ROW_BITS + .o_ddr3_ba_addr(ddr3_ba), // width = BA_BITS + .io_ddr3_dq(ddr3_dq), // width = BYTE_LANES*8 + .io_ddr3_dqs(ddr3_dqs_p), // width = BYTE_LANES + .io_ddr3_dqs_n(ddr3_dqs_n), // width = BYTE_LANES + .o_ddr3_dm(ddr3_dm), // width = BYTE_LANES + .o_ddr3_odt(ddr3_odt), + // Debug outputs + .o_debug1(), + //////////////////////////////////// + ); +*/ + + // Wire connections between controller and phy + wire[cmd_len*serdes_ratio-1:0] cmd; + wire dqs_tri_control, dq_tri_control; + wire toggle_dqs; + wire[wb_data_bits-1:0] data; + wire[wb_sel_bits-1:0] dm; + wire[BYTE_LANES-1:0] bitslip; + wire[DQ_BITS*BYTE_LANES*8-1:0] iserdes_data; + wire[BYTE_LANES*8-1:0] iserdes_dqs; + wire[BYTE_LANES*8-1:0] iserdes_bitslip_reference; + wire idelayctrl_rdy; + wire[4:0] odelay_data_cntvaluein, odelay_dqs_cntvaluein; + wire[4:0] idelay_data_cntvaluein, idelay_dqs_cntvaluein; + wire[BYTE_LANES-1:0] odelay_data_ld, odelay_dqs_ld; + wire[BYTE_LANES-1:0] idelay_data_ld, idelay_dqs_ld; + wire write_leveling_calib; + wire reset; + + // logic for self-refresh + reg[8:0] refresh_counter = 0; + reg user_self_refresh; + // refresh counter + always @(posedge i_controller_clk) begin + if(i_wb_stb && i_wb_cyc) begin // if there is Wishbone request, then reset counter + refresh_counter <= 0; + end + else if(!o_wb_stall || user_self_refresh) begin // if no request (but not stalled) OR already on self-refresh, then increment counter + refresh_counter <= refresh_counter + 1; + end + end + // choose self-refresh options + always @* begin + case(SELF_REFRESH) + 2'b00: user_self_refresh = i_user_self_refresh; // use input i_user_self_refresh (high = enter self-refresh, low = exit self-refresh) + 2'b01: user_self_refresh = refresh_counter[6]; // Self-refresh mode is enabled after 64 controller clock cycles of no requests, then exit Self-refresh after another 64 controller clk cycles + 2'b10: user_self_refresh = refresh_counter[7]; // Self-refresh mode is enabled after 128 controller clock cycles of no requests, then exit Self-refresh after another 128 controller clk cycles + 2'b11: user_self_refresh = refresh_counter[8]; // Self-refresh mode is enabled after 256 controller clock cycles of no requests, then exit Self-refresh after another 256 controller clk cycles + endcase + end + + + + //module instantiations + ddr3_controller #( + .CONTROLLER_CLK_PERIOD(CONTROLLER_CLK_PERIOD), //ps, clock period of the controller interface + .DDR3_CLK_PERIOD(DDR3_CLK_PERIOD), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD) + .ROW_BITS(ROW_BITS), //width of row address + .COL_BITS(COL_BITS), //width of column address + .BA_BITS(BA_BITS), //width of bank address + .DQ_BITS(DQ_BITS), //width of DQ + .LANES(BYTE_LANES), // byte lanes + .AUX_WIDTH(AUX_WIDTH), //width of aux line (must be >= 4) + .WB2_ADDR_BITS(WB2_ADDR_BITS), //width of 2nd wishbone address bus + .WB2_DATA_BITS(WB2_DATA_BITS), //width of 2nd wishbone data bus + .MICRON_SIM(MICRON_SIM), //simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW) + .ODELAY_SUPPORTED(ODELAY_SUPPORTED), //set to 1 when ODELAYE2 is supported + .SECOND_WISHBONE(SECOND_WISHBONE), //set to 1 if 2nd wishbone is needed + .ECC_ENABLE(ECC_ENABLE), // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC ) + .DLL_OFF(DLL_OFF), // 1 = DLL off for low frequency ddr3 clock (< 125MHz) + .WB_ERROR(WB_ERROR), // set to 1 to support Wishbone error (asserts at ECC double bit error) + .BIST_MODE(BIST_MODE), // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w) + .DIC(DIC), //Output Driver Impedance Control (2'b00 = RZQ/6, 2'b01 = RZQ/7, RZQ = 240ohms) + .RTT_NOM(RTT_NOM), //RTT Nominal (3'b000 = disabled, 3'b001 = RZQ/4, 3'b010 = RZQ/2 , 3'b011 = RZQ/6, RZQ = 240ohms) + .DUAL_RANK_DIMM(DUAL_RANK_DIMM), // enable dual rank DIMM (1 = enable, 0 = disable) + .SPEED_BIN(SPEED_BIN), // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11) + .SDRAM_CAPACITY(SDRAM_CAPACITY), // 0 = 256Mb, 1 = 512Mb, 2 = 1Gb, 3 = 2Gb, 4 = 4Gb, 5 = 8Gb, 6 = 16Gb + .TRCD(TRCD), // ps Active to Read/Write command time (only used if SPEED_BIN = 0) + .TRP(TRP), // ps Precharge command period (only used if SPEED_BIN = 0) + .TRAS(TRAS) // ps ACT to PRE command period (only used if SPEED_BIN = 0) + ) ddr3_controller_inst ( + .i_controller_clk(i_controller_clk), //i_controller_clk has period of CONTROLLER_CLK_PERIOD + .i_rst_n(i_rst_n), //200MHz input clock + // Wishbone inputs + .i_wb_cyc(i_wb_cyc), //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled) + .i_wb_stb(i_wb_stb), //request a transfer + .i_wb_we(i_wb_we), //write-enable (1 = write, 0 = read) + .i_wb_addr(i_wb_addr), //burst-addressable {row,bank,col} + .i_wb_data(i_wb_data), //write data, for a 4:1 controller data width is 8 times the number of pins on the device + .i_wb_sel(i_wb_sel), //byte strobe for write (1 = write the byte) + .i_aux(i_aux), //for AXI-interface compatibility (given upon strobe) + // Wishbone outputs + .o_wb_stall(o_wb_stall), //1 = busy, cannot accept requests + .o_wb_ack(o_wb_ack), //1 = read/write request has completed + .o_wb_err(o_wb_err), //1 = Error due to ECC double bit error (fixed to 0 if WB_ERROR = 0) + .o_wb_data(o_wb_data), //read data, for a 4:1 controller data width is 8 times the number of pins on the device + .o_aux(o_aux), //for AXI-interface compatibility (returned upon ack) + // Wishbone 2 (PHY) inputs + .i_wb2_cyc(i_wb2_cyc), //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled) + .i_wb2_stb(i_wb2_stb), //request a transfer + .i_wb2_we(i_wb2_we), //write-enable (1 = write, 0 = read) + .i_wb2_addr(i_wb2_addr), // memory-mapped register to be accessed + .i_wb2_data(i_wb2_data), //write data + .i_wb2_sel(i_wb2_sel), //byte strobe for write (1 = write the byte) + // Wishbone 2 (Controller) outputs + .o_wb2_stall(o_wb2_stall), //1 = busy, cannot accept requests + .o_wb2_ack(o_wb2_ack), //1 = read/write request has completed + .o_wb2_data(o_wb2_data), //read data + // + // PHY interface + .i_phy_iserdes_data(iserdes_data), + .i_phy_iserdes_dqs(iserdes_dqs), + .i_phy_iserdes_bitslip_reference(iserdes_bitslip_reference), + .i_phy_idelayctrl_rdy(idelayctrl_rdy), + .o_phy_cmd(cmd), + .o_phy_dqs_tri_control(dqs_tri_control), + .o_phy_dq_tri_control(dq_tri_control), + .o_phy_toggle_dqs(toggle_dqs), + .o_phy_data(data), + .o_phy_dm(dm), + .o_phy_odelay_data_cntvaluein(odelay_data_cntvaluein), + .o_phy_odelay_dqs_cntvaluein(odelay_dqs_cntvaluein), + .o_phy_idelay_data_cntvaluein(idelay_data_cntvaluein), + .o_phy_idelay_dqs_cntvaluein(idelay_dqs_cntvaluein), + .o_phy_odelay_data_ld(odelay_data_ld), + .o_phy_odelay_dqs_ld(odelay_dqs_ld), + .o_phy_idelay_data_ld(idelay_data_ld), + .o_phy_idelay_dqs_ld(idelay_dqs_ld), + .o_phy_bitslip(bitslip), + .o_phy_write_leveling_calib(write_leveling_calib), + .o_phy_reset(reset), + // Done Calibration pin + .o_calib_complete(o_calib_complete), + // Debug outputs + .o_debug1(o_debug1), +// .o_debug2(o_debug2), +// .o_debug3(o_debug3) + // User enabled self-refresh + .i_user_self_refresh(user_self_refresh), + .uart_tx(uart_tx) + ); + `ifndef LATTICE_ECP5_PHY // XILINX PHY + ddr3_phy #( + .ROW_BITS(ROW_BITS), //width of row address + .BA_BITS(BA_BITS), //width of bank address + .DQ_BITS(DQ_BITS), //width of DQ + .LANES(BYTE_LANES), //8 lanes of DQ + .CONTROLLER_CLK_PERIOD(CONTROLLER_CLK_PERIOD), //ps, period of clock input to this DDR3 controller module + .DDR3_CLK_PERIOD(DDR3_CLK_PERIOD), //ps, period of clock input to DDR3 RAM device + .ODELAY_SUPPORTED(ODELAY_SUPPORTED), //set to 1 when ODELAYE2 is supported + .DUAL_RANK_DIMM(DUAL_RANK_DIMM) // enable dual rank DIMM (1 = enable, 0 = disable) + ) ddr3_phy_inst ( + .i_controller_clk(i_controller_clk), + .i_ddr3_clk(i_ddr3_clk), + .i_ref_clk(i_ref_clk), + .i_ddr3_clk_90(i_ddr3_clk_90), + .i_rst_n(i_rst_n), + // Controller Interface + .i_controller_reset(reset), + .i_controller_cmd(cmd), + .i_controller_dqs_tri_control(dqs_tri_control), + .i_controller_dq_tri_control(dq_tri_control), + .i_controller_toggle_dqs(toggle_dqs), + .i_controller_data(data), + .i_controller_dm(dm), + .i_controller_odelay_data_cntvaluein(odelay_data_cntvaluein), + .i_controller_odelay_dqs_cntvaluein(odelay_dqs_cntvaluein), + .i_controller_idelay_data_cntvaluein(idelay_data_cntvaluein), + .i_controller_idelay_dqs_cntvaluein(idelay_dqs_cntvaluein), + .i_controller_odelay_data_ld(odelay_data_ld), + .i_controller_odelay_dqs_ld(odelay_dqs_ld), + .i_controller_idelay_data_ld(idelay_data_ld), + .i_controller_idelay_dqs_ld(idelay_dqs_ld), + .i_controller_bitslip(bitslip), + .i_controller_write_leveling_calib(write_leveling_calib), + .o_controller_iserdes_data(iserdes_data), + .o_controller_iserdes_dqs(iserdes_dqs), + .o_controller_iserdes_bitslip_reference(iserdes_bitslip_reference), + .o_controller_idelayctrl_rdy(idelayctrl_rdy), + // DDR3 I/O Interface + .o_ddr3_clk_p(o_ddr3_clk_p), + .o_ddr3_clk_n(o_ddr3_clk_n), + .o_ddr3_reset_n(o_ddr3_reset_n), + .o_ddr3_cke(o_ddr3_cke), // CKE + .o_ddr3_cs_n(o_ddr3_cs_n), // chip select signal + .o_ddr3_ras_n(o_ddr3_ras_n), // RAS# + .o_ddr3_cas_n(o_ddr3_cas_n), // CAS# + .o_ddr3_we_n(o_ddr3_we_n), // WE# + .o_ddr3_addr(o_ddr3_addr), + .o_ddr3_ba_addr(o_ddr3_ba_addr), + .io_ddr3_dq(io_ddr3_dq), + .io_ddr3_dqs(io_ddr3_dqs), + .io_ddr3_dqs_n(io_ddr3_dqs_n), + .o_ddr3_dm(o_ddr3_dm), + .o_ddr3_odt(o_ddr3_odt), // on-die termination + .o_ddr3_debug_read_dqs_p(/*o_ddr3_debug_read_dqs_p*/), + .o_ddr3_debug_read_dqs_n(/*o_ddr3_debug_read_dqs_n*/) + ); + `else // LATTICE ECP5 PHY + ddr3_phy_ecp5 #( + .ROW_BITS(ROW_BITS), //width of row address + .BA_BITS(BA_BITS), //width of bank address + .DQ_BITS(DQ_BITS), //width of DQ + .LANES(BYTE_LANES), //8 lanes of DQ + .CONTROLLER_CLK_PERIOD(CONTROLLER_CLK_PERIOD) //ps, period of clock input to this DDR3 controller module + ) ddr3_phy_inst ( + .i_controller_clk(i_controller_clk), + .i_ddr3_clk(i_ddr3_clk), + .i_ref_clk(i_ref_clk), + .i_ddr3_clk_90(i_ddr3_clk_90), + .i_rst_n(i_rst_n), + // Controller Interface + .i_controller_reset(reset), + .i_controller_cmd(cmd), + .i_controller_dqs_tri_control(dqs_tri_control), + .i_controller_dq_tri_control(dq_tri_control), + .i_controller_toggle_dqs(toggle_dqs), + .i_controller_data(data), + .i_controller_dm(dm), + .i_controller_odelay_data_cntvaluein(odelay_data_cntvaluein), + .i_controller_odelay_dqs_cntvaluein(odelay_dqs_cntvaluein), + .i_controller_idelay_data_cntvaluein(idelay_data_cntvaluein), + .i_controller_idelay_dqs_cntvaluein(idelay_dqs_cntvaluein), + .i_controller_odelay_data_ld(odelay_data_ld), + .i_controller_odelay_dqs_ld(odelay_dqs_ld), + .i_controller_idelay_data_ld(idelay_data_ld), + .i_controller_idelay_dqs_ld(idelay_dqs_ld), + .i_controller_bitslip(bitslip), + .i_controller_write_leveling_calib(write_leveling_calib), + .o_controller_iserdes_data(iserdes_data), + .o_controller_iserdes_dqs(iserdes_dqs), + .o_controller_iserdes_bitslip_reference(iserdes_bitslip_reference), + .o_controller_idelayctrl_rdy(idelayctrl_rdy), + // DDR3 I/O Interface + .o_ddr3_clk_p(o_ddr3_clk_p), + .o_ddr3_clk_n(o_ddr3_clk_n), + .o_ddr3_reset_n(o_ddr3_reset_n), + .o_ddr3_cke(o_ddr3_cke), // CKE + .o_ddr3_cs_n(o_ddr3_cs_n), // chip select signal + .o_ddr3_ras_n(o_ddr3_ras_n), // RAS# + .o_ddr3_cas_n(o_ddr3_cas_n), // CAS# + .o_ddr3_we_n(o_ddr3_we_n), // WE# + .o_ddr3_addr(o_ddr3_addr), + .o_ddr3_ba_addr(o_ddr3_ba_addr), + .io_ddr3_dq(io_ddr3_dq), + .io_ddr3_dqs(io_ddr3_dqs), + .io_ddr3_dqs_n(io_ddr3_dqs_n), + .o_ddr3_dm(o_ddr3_dm), + .o_ddr3_odt(o_ddr3_odt), // on-die termination + .o_ddr3_debug_read_dqs_p(/*o_ddr3_debug_read_dqs_p*/), + .o_ddr3_debug_read_dqs_n(/*o_ddr3_debug_read_dqs_n*/) + ); + `endif + + // // display value of parameters for easy debugging + // initial begin + // $display("\nDDR3 TOP PARAMETERS:\n-----------------------------"); + // $display("CONTROLLER_CLK_PERIOD = %0d", CONTROLLER_CLK_PERIOD); + // $display("DDR3_CLK_PERIOD = %0d", DDR3_CLK_PERIOD); + // $display("ROW_BITS = %0d", ROW_BITS); + // $display("COL_BITS = %0d", COL_BITS); + // $display("BA_BITS = %0d", BA_BITS); + // $display("BYTE_LANES = %0d", BYTE_LANES); + // $display("AUX_WIDTH = %0d", AUX_WIDTH); + // $display("WB2_ADDR_BITS = %0d", WB2_ADDR_BITS); + // $display("WB2_DATA_BITS = %0d", WB2_DATA_BITS); + // $display("MICRON_SIM = %0d", MICRON_SIM); + // $display("ODELAY_SUPPORTED = %0d", ODELAY_SUPPORTED); + // $display("SECOND_WISHBONE = %0d", SECOND_WISHBONE); + // $display("WB_ERROR = %0d", WB_ERROR); + // $display("BIST_MODE = %0d", BIST_MODE); + // $display("ECC_ENABLE = %0d", ECC_ENABLE); + // $display("DIC = %0d", DIC); + // $display("RTT_NOM = %0d", RTT_NOM); + // $display("SELF_REFRESH = %0d", SELF_REFRESH); + // $display("DUAL_RANK_DIMM = %0d", DUAL_RANK_DIMM); + // $display("End of DDR3 TOP PARAMETERS\n-----------------------------"); + // end + +endmodule diff --git a/example_demo/orangecrab_ecp5/iserdes_soft.v b/example_demo/orangecrab_ecp5/iserdes_soft.v new file mode 120000 index 0000000..3b5c05f --- /dev/null +++ b/example_demo/orangecrab_ecp5/iserdes_soft.v @@ -0,0 +1 @@ +../../rtl/ecp5_phy/iserdes_soft.v \ No newline at end of file diff --git a/example_demo/orangecrab_ecp5/orangecrab_ecp5_ddr3.dfu b/example_demo/orangecrab_ecp5/orangecrab_ecp5_ddr3.dfu index 2b160a4..17d0c60 100644 Binary files a/example_demo/orangecrab_ecp5/orangecrab_ecp5_ddr3.dfu and b/example_demo/orangecrab_ecp5/orangecrab_ecp5_ddr3.dfu differ diff --git a/example_demo/orangecrab_ecp5/oserdes_soft.v b/example_demo/orangecrab_ecp5/oserdes_soft.v new file mode 120000 index 0000000..745696d --- /dev/null +++ b/example_demo/orangecrab_ecp5/oserdes_soft.v @@ -0,0 +1 @@ +../../rtl/ecp5_phy/oserdes_soft.v \ No newline at end of file diff --git a/example_demo/qmtech_kintex_7/caas.conf b/example_demo/qmtech_kintex_7/caas.conf new file mode 100644 index 0000000..f449be9 --- /dev/null +++ b/example_demo/qmtech_kintex_7/caas.conf @@ -0,0 +1,11 @@ +[project] +Backend = openxc7 +Part = xc7k325tffg676-1 +Top = qmtech_kintex7_ddr3 +Constraint = qmtech_kintex7_ddr3.xdc +Sources = *.v + +[caas] +Server = https://caas.symbioticeda.com:18888/ + + diff --git a/example_demo/qmtech_kintex_7/clk_wiz.v b/example_demo/qmtech_kintex_7/clk_wiz.v index 72bc093..e9132b1 100644 --- a/example_demo/qmtech_kintex_7/clk_wiz.v +++ b/example_demo/qmtech_kintex_7/clk_wiz.v @@ -24,16 +24,16 @@ module clk_wiz .DIVCLK_DIVIDE (1), .CLKFBOUT_MULT (20), // 50 MHz * 20 = 1000 MHz .CLKFBOUT_PHASE (0.000), - .CLKOUT0_DIVIDE (12), // 1000 MHz / 12 = 83.333 MHz + .CLKOUT0_DIVIDE (8), // 1000 MHz / 8 = 125 MHz .CLKOUT0_PHASE (0.000), .CLKOUT0_DUTY_CYCLE (0.500), - .CLKOUT1_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz + .CLKOUT1_DIVIDE (2), // 1000 MHz / 2 = 500 MHz .CLKOUT1_PHASE (0.000), .CLKOUT1_DUTY_CYCLE (0.500), .CLKOUT2_DIVIDE (5), // 1000 MHz / 5 = 200 MHz .CLKOUT2_PHASE (0.000), .CLKOUT2_DUTY_CYCLE (0.500), - .CLKOUT3_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz, 90 phase + .CLKOUT3_DIVIDE (2), // 1000 MHz / 2 = 500 MHz, 90 phase .CLKOUT3_PHASE (90.000), .CLKOUT3_DUTY_CYCLE (0.500), .CLKIN1_PERIOD (20.000) // 50 MHz input diff --git a/example_demo/qmtech_kintex_7/ddr3_controller.v b/example_demo/qmtech_kintex_7/ddr3_controller.v new file mode 120000 index 0000000..abadf6a --- /dev/null +++ b/example_demo/qmtech_kintex_7/ddr3_controller.v @@ -0,0 +1 @@ +../../rtl/ddr3_controller.v \ No newline at end of file diff --git a/example_demo/qmtech_kintex_7/ddr3_phy.v b/example_demo/qmtech_kintex_7/ddr3_phy.v new file mode 120000 index 0000000..6fd8666 --- /dev/null +++ b/example_demo/qmtech_kintex_7/ddr3_phy.v @@ -0,0 +1 @@ +../../rtl/ddr3_phy.v \ No newline at end of file diff --git a/example_demo/qmtech_kintex_7/ddr3_top.v b/example_demo/qmtech_kintex_7/ddr3_top.v new file mode 120000 index 0000000..5595b49 --- /dev/null +++ b/example_demo/qmtech_kintex_7/ddr3_top.v @@ -0,0 +1 @@ +../../rtl/ddr3_top.v \ No newline at end of file diff --git a/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3.v b/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3.v index 2e416ac..6df6113 100644 --- a/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3.v +++ b/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3.v @@ -141,8 +141,8 @@ // DDR3 Controller ddr3_top #( - .CONTROLLER_CLK_PERIOD(12_000), //ps, clock period of the controller interface - .DDR3_CLK_PERIOD(3_000), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD) + .CONTROLLER_CLK_PERIOD(10_000), //ps, clock period of the controller interface + .DDR3_CLK_PERIOD(2_500), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD) .ROW_BITS(14), //width of row address .COL_BITS(10), //width of column address .BA_BITS(3), //width of bank address @@ -152,7 +152,9 @@ .WB2_DATA_BITS(32), //width of 2nd wishbone data bus .MICRON_SIM(0), //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW) .ODELAY_SUPPORTED(1), //set to 1 when ODELAYE2 is supported - .SECOND_WISHBONE(0) //set to 1 if 2nd wishbone is needed + .SECOND_WISHBONE(0), //set to 1 if 2nd wishbone is needed + .BIST_MODE(1), // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w) + .SPEED_BIN(1) // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11) ) ddr3_top ( //clock and reset diff --git a/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3_openxc7.bit b/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3_openxc7.bit index 5d50a5c..c918970 100644 Binary files a/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3_openxc7.bit and b/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3_openxc7.bit differ diff --git a/example_demo/qmtech_wukong/caas.conf b/example_demo/qmtech_wukong/caas.conf new file mode 100644 index 0000000..e422074 --- /dev/null +++ b/example_demo/qmtech_wukong/caas.conf @@ -0,0 +1,11 @@ +[project] +Backend = openxc7 +Part = xc7a100tfgg676-2 +Top = wukong_ddr3 +Constraint = wukong_ddr3.xdc +Sources = *.v + +[caas] +Server = https://caas.symbioticeda.com:18888/ + + diff --git a/example_demo/qmtech_wukong/clk_wiz.v b/example_demo/qmtech_wukong/clk_wiz.v index 72bc093..bc59612 100644 --- a/example_demo/qmtech_wukong/clk_wiz.v +++ b/example_demo/qmtech_wukong/clk_wiz.v @@ -22,18 +22,18 @@ module clk_wiz .COMPENSATION ("INTERNAL"), .STARTUP_WAIT ("FALSE"), .DIVCLK_DIVIDE (1), - .CLKFBOUT_MULT (20), // 50 MHz * 20 = 1000 MHz + .CLKFBOUT_MULT (24), // 50 MHz * 24 = 1200 MHz .CLKFBOUT_PHASE (0.000), - .CLKOUT0_DIVIDE (12), // 1000 MHz / 12 = 83.333 MHz + .CLKOUT0_DIVIDE (12), // 1200 MHz / 12 = 100 MHz .CLKOUT0_PHASE (0.000), .CLKOUT0_DUTY_CYCLE (0.500), - .CLKOUT1_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz + .CLKOUT1_DIVIDE (3), // 1200 MHz / 3 = 400 MHz .CLKOUT1_PHASE (0.000), .CLKOUT1_DUTY_CYCLE (0.500), - .CLKOUT2_DIVIDE (5), // 1000 MHz / 5 = 200 MHz + .CLKOUT2_DIVIDE (6), // 1200 MHz / 6 = 200 MHz .CLKOUT2_PHASE (0.000), .CLKOUT2_DUTY_CYCLE (0.500), - .CLKOUT3_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz, 90 phase + .CLKOUT3_DIVIDE (3), // 1200 MHz / 3 = 400 MHz, 90 phase .CLKOUT3_PHASE (90.000), .CLKOUT3_DUTY_CYCLE (0.500), .CLKIN1_PERIOD (20.000) // 50 MHz input diff --git a/example_demo/qmtech_wukong/ddr3_controller.v b/example_demo/qmtech_wukong/ddr3_controller.v new file mode 120000 index 0000000..abadf6a --- /dev/null +++ b/example_demo/qmtech_wukong/ddr3_controller.v @@ -0,0 +1 @@ +../../rtl/ddr3_controller.v \ No newline at end of file diff --git a/example_demo/qmtech_wukong/ddr3_phy.v b/example_demo/qmtech_wukong/ddr3_phy.v new file mode 120000 index 0000000..6fd8666 --- /dev/null +++ b/example_demo/qmtech_wukong/ddr3_phy.v @@ -0,0 +1 @@ +../../rtl/ddr3_phy.v \ No newline at end of file diff --git a/example_demo/qmtech_wukong/ddr3_top.v b/example_demo/qmtech_wukong/ddr3_top.v new file mode 120000 index 0000000..5595b49 --- /dev/null +++ b/example_demo/qmtech_wukong/ddr3_top.v @@ -0,0 +1 @@ +../../rtl/ddr3_top.v \ No newline at end of file diff --git a/example_demo/qmtech_wukong/wukong_ddr3.v b/example_demo/qmtech_wukong/wukong_ddr3.v index 221f159..862f85a 100644 --- a/example_demo/qmtech_wukong/wukong_ddr3.v +++ b/example_demo/qmtech_wukong/wukong_ddr3.v @@ -156,7 +156,9 @@ .WB2_DATA_BITS(32), //width of 2nd wishbone data bus .MICRON_SIM(0), //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW) .ODELAY_SUPPORTED(0), //set to 1 when ODELAYE2 is supported - .SECOND_WISHBONE(0) //set to 1 if 2nd wishbone is needed + .SECOND_WISHBONE(0), //set to 1 if 2nd wishbone is needed + .BIST_MODE(1), // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w) + .SPEED_BIN(1) // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11) ) ddr3_top ( //clock and reset diff --git a/example_demo/qmtech_wukong/wukong_ddr3_openxc7.bit b/example_demo/qmtech_wukong/wukong_ddr3_openxc7.bit index 1ddad45..38fd93e 100644 Binary files a/example_demo/qmtech_wukong/wukong_ddr3_openxc7.bit and b/example_demo/qmtech_wukong/wukong_ddr3_openxc7.bit differ diff --git a/example_demo/sechzig_mx2/caas.conf b/example_demo/sechzig_mx2/caas.conf new file mode 100644 index 0000000..c2b2766 --- /dev/null +++ b/example_demo/sechzig_mx2/caas.conf @@ -0,0 +1,11 @@ +[project] +Backend = openxc7 +Part = xc7a35tftg256-2 +Top = sechzig_mx2_ddr3 +Constraint = sechzig_mx2_ddr3.xdc +Sources = *.v + +[caas] +Server = https://caas.symbioticeda.com:18888/ + + diff --git a/example_demo/sechzig_mx2/ddr3_controller.v b/example_demo/sechzig_mx2/ddr3_controller.v new file mode 120000 index 0000000..abadf6a --- /dev/null +++ b/example_demo/sechzig_mx2/ddr3_controller.v @@ -0,0 +1 @@ +../../rtl/ddr3_controller.v \ No newline at end of file diff --git a/example_demo/sechzig_mx2/ddr3_phy.v b/example_demo/sechzig_mx2/ddr3_phy.v new file mode 120000 index 0000000..6fd8666 --- /dev/null +++ b/example_demo/sechzig_mx2/ddr3_phy.v @@ -0,0 +1 @@ +../../rtl/ddr3_phy.v \ No newline at end of file diff --git a/example_demo/sechzig_mx2/ddr3_top.v b/example_demo/sechzig_mx2/ddr3_top.v new file mode 120000 index 0000000..5595b49 --- /dev/null +++ b/example_demo/sechzig_mx2/ddr3_top.v @@ -0,0 +1 @@ +../../rtl/ddr3_top.v \ No newline at end of file diff --git a/rtl/ddr3_controller.v b/rtl/ddr3_controller.v index 1caab4d..780b216 100644 --- a/rtl/ddr3_controller.v +++ b/rtl/ddr3_controller.v @@ -50,6 +50,7 @@ // `define UART_DEBUG_READ_LEVEL // `define UART_DEBUG_WRITE_LEVEL // `define UART_DEBUG_ALIGN +// `define UART_DEBUG_BIST `ifdef UART_DEBUG_READ_LEVEL @@ -58,6 +59,8 @@ `define UART_DEBUG `elsif UART_DEBUG_ALIGN `define UART_DEBUG +`elsif UART_DEBUG_BIST + `define UART_DEBUG `endif module ddr3_controller #( @@ -295,6 +298,12 @@ module ddr3_controller #( localparam[3:0] WRITE_TO_WRITE_DELAY = 0; localparam[3:0] WRITE_TO_READ_DELAY = find_delay((CWL_nCK + 4 + ps_to_nCK(tWTR)), WRITE_SLOT, READ_SLOT); //4 localparam[3:0] WRITE_TO_PRECHARGE_DELAY = find_delay((CWL_nCK + 4 + ps_to_nCK(tWR)), WRITE_SLOT, PRECHARGE_SLOT); //5 + // determines bitwidth of delay counters + localparam MAX_DELAY_BEFORE_PRECHARGE = max(ACTIVATE_TO_PRECHARGE_DELAY, max(WRITE_TO_PRECHARGE_DELAY, READ_TO_PRECHARGE_DELAY)); + localparam MAX_DELAY_BEFORE_ACTIVATE = max(PRECHARGE_TO_ACTIVATE_DELAY, ACTIVATE_TO_ACTIVATE_DELAY); + localparam MAX_DELAY_BEFORE_WRITE = max(ACTIVATE_TO_WRITE_DELAY, max(READ_TO_WRITE_DELAY + 'd1, WRITE_TO_WRITE_DELAY)); + localparam MAX_DELAY_BEFORE_READ = max(ACTIVATE_TO_READ_DELAY, max(WRITE_TO_READ_DELAY + 'd1, READ_TO_READ_DELAY)); + /* verilator lint_on WIDTHEXPAND */ localparam PRE_REFRESH_DELAY = WRITE_TO_PRECHARGE_DELAY + 1; `ifdef FORMAL @@ -338,7 +347,7 @@ module ddr3_controller #( //the delays included the ODELAY and OSERDES when issuing the read command //and the IDELAY and ISERDES when receiving the data (NOTE TO SELF: ELABORATE ON WHY THOSE MAGIC NUMBERS) localparam READ_ACK_PIPE_WIDTH = READ_DELAY + 1 + 2 + 1 + 1 + (DLL_OFF? 2 : 0); // FOr DLL_OFF, phy has no delay thus add delay here - localparam MAX_ADDED_READ_ACK_DELAY = 16; + localparam MAX_ADDED_READ_ACK_DELAY = 2; localparam DELAY_BEFORE_WRITE_LEVEL_FEEDBACK = STAGE2_DATA_DEPTH + ps_to_cycles(tWLO+tWLOE) + 10; //plus 10 controller clocks for possible bus latency and the delay for receiving feedback DQ from IOBUF -> IDELAY -> ISERDES localparam ECC_INFORMATION_BITS = (ECC_ENABLE == 2)? max_information_bits(wb_data_bits) : max_information_bits(wb_data_bits/8); @@ -428,11 +437,12 @@ module ddr3_controller #( /************************************************************* Registers and Wires *************************************************************/ integer index; - (* mark_debug ="true" *) reg[4:0] instruction_address = 0; //address for accessing rom instruction - reg[27:0] instruction = INITIAL_RESET_INSTRUCTION; //instruction retrieved from reset instruction rom - reg[ DELAY_COUNTER_WIDTH - 1:0] delay_counter = INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0]; //counter used for delays - reg delay_counter_is_zero = (INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0] == 0); //counter is now zero so retrieve next delay - reg reset_done = 0; //high if reset has already finished + (* mark_debug ="true" *) reg[4:0] instruction_address = 0, instruction_address_d; //address for accessing rom instruction + reg[27:0] instruction = INITIAL_RESET_INSTRUCTION, instruction_d; //instruction retrieved from reset instruction rom + reg[ DELAY_COUNTER_WIDTH - 1:0] delay_counter = INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0], delay_counter_d; //counter used for delays + reg delay_counter_is_zero = (INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0] == 0), delay_counter_is_zero_d; //counter is now zero so retrieve next delay + reg reset_done = 0, reset_done_d; //high if reset has already finished + reg precharge_all_instruction, precharge_all_instruction_d; reg pause_counter = 0; wire issue_read_command; reg stage2_update = 1; @@ -444,9 +454,9 @@ module ddr3_controller #( // ECC_ENABLE = 3 regs /* verilator lint_off UNUSEDSIGNAL */ - reg[BA_BITS-1:0] ecc_bank_addr = 0, ecc_bank_addr_prev = 0; - reg[ROW_BITS-1:0] ecc_row_addr = 0, ecc_row_addr_prev = 0; - reg[COL_BITS-1:0] ecc_col_addr = 0, ecc_col_addr_prev = 0; + reg[BA_BITS-1:0] ecc_bank_addr = 0, ecc_bank_addr_prev = 0, ecc_bank_addr_d, ecc_bank_addr_prev_d; + reg[ROW_BITS-1:0] ecc_row_addr = 0, ecc_row_addr_prev = 0, ecc_row_addr_d, ecc_row_addr_prev_d; + reg[COL_BITS-1:0] ecc_col_addr = 0, ecc_col_addr_prev = 0, ecc_col_addr_d, ecc_col_addr_prev_d; reg we_prev; reg stage0_pending = 0; reg[wb_addr_bits - 1:0] stage0_addr = 0; @@ -472,43 +482,43 @@ module ddr3_controller #( reg[wb_sel_bits - 1 : 0] stage2_ecc_write_data_mask_q = 0, stage2_ecc_write_data_mask_d; wire[wb_data_bits/8 - 1 : 0] decoded_parity; wire[wb_data_bits/8 - 1 : 0] encoded_parity; - reg[wb_data_bits/8 - 1 : 0] stage2_encoded_parity = 0; + reg[wb_data_bits/8 - 1 : 0] stage2_encoded_parity = 0, stage2_encoded_parity_d; reg ecc_req_stage2 = 0; /* verilator lint_on UNUSEDSIGNAL */ //pipeline stage 1 regs - reg stage1_pending = 0; - reg[AUX_WIDTH-1:0] stage1_aux = 0; - reg stage1_we = 0; - reg[wb_data_bits - 1:0] stage1_data = 0; + reg stage1_pending = 0, stage1_pending_d; + reg[AUX_WIDTH-1:0] stage1_aux = 0, stage1_aux_d; + reg stage1_we = 0, stage1_we_d; + reg[wb_data_bits - 1:0] stage1_data = 0, stage1_data_d; wire[wb_data_bits - 1:0] stage1_data_mux, stage1_data_encoded; - reg[wb_sel_bits - 1:0] stage1_dm = 0; - reg[COL_BITS-1:0] stage1_col = 0; - reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage1_bank = 0; - reg[ROW_BITS-1:0] stage1_row = 0; - reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage1_next_bank = 0; - reg[ROW_BITS-1:0] stage1_next_row = 0; + reg[wb_sel_bits - 1:0] stage1_dm = 0, stage1_dm_d; + reg[COL_BITS-1:0] stage1_col = 0, stage1_col_d; + reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage1_bank = 0, stage1_bank_d; + reg[ROW_BITS-1:0] stage1_row = 0, stage1_row_d; + reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage1_next_bank = 0, stage1_next_bank_d; + reg[ROW_BITS-1:0] stage1_next_row = 0, stage1_next_row_d; wire[wb_addr_bits-1:0] wb_addr_plus_anticipate, calib_addr_plus_anticipate; //pipeline stage 2 regs - reg stage2_pending = 0; - reg[AUX_WIDTH-1:0] stage2_aux = 0; - reg stage2_we = 0; - reg[wb_sel_bits - 1:0] stage2_dm_unaligned = 0, stage2_dm_unaligned_temp = 0; + reg stage2_pending = 0, stage2_pending_d; + reg[AUX_WIDTH-1:0] stage2_aux = 0, stage2_aux_d; + reg stage2_we = 0, stage2_we_d; + reg[wb_sel_bits - 1:0] stage2_dm_unaligned = 0, stage2_dm_unaligned_temp = 0, stage2_dm_unaligned_d, stage2_dm_unaligned_temp_d; reg[wb_sel_bits - 1:0] stage2_dm[STAGE2_DATA_DEPTH-1:0]; - reg[wb_data_bits - 1:0] stage2_data_unaligned = 0, stage2_data_unaligned_temp = 0; + reg[wb_data_bits - 1:0] stage2_data_unaligned = 0, stage2_data_unaligned_temp = 0, stage2_data_unaligned_d, stage2_data_unaligned_temp_d; reg[wb_data_bits - 1:0] stage2_data[STAGE2_DATA_DEPTH-1:0]; reg [DQ_BITS*8 - 1:0] unaligned_data[LANES-1:0]; reg [8 - 1:0] unaligned_dm[LANES-1:0]; - reg[COL_BITS-1:0] stage2_col = 0; - reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage2_bank = 0; - reg[ROW_BITS-1:0] stage2_row = 0; - + reg[COL_BITS-1:0] stage2_col = 0, stage2_col_d; + reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage2_bank = 0, stage2_bank_d; + reg[ROW_BITS-1:0] stage2_row = 0, stage2_row_d; + //delay counter for every banks - reg[3:0] delay_before_precharge_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_precharge_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0]; //delay counters - reg[3:0] delay_before_activate_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_activate_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ; - reg[3:0] delay_before_write_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_write_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ; - reg[3:0] delay_before_read_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] , delay_before_read_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ; + reg[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0] delay_before_precharge_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_precharge_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0]; //delay counters + reg[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0] delay_before_activate_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_activate_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ; + reg[$clog2(MAX_DELAY_BEFORE_WRITE):0] delay_before_write_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_write_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ; + reg[$clog2(MAX_DELAY_BEFORE_READ):0] delay_before_read_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] , delay_before_read_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ; //commands to be sent to PHY (4 slots per controller clk cycle) reg[cmd_len-1:0] cmd_d[3:0]; @@ -517,7 +527,7 @@ module ddr3_controller #( end reg cmd_odt_q = 0, cmd_odt, cmd_reset_n; reg[DUAL_RANK_DIMM:0] cmd_ck_en, prev_cmd_ck_en; - reg o_wb_stall_q = 1, o_wb_stall_d, o_wb_stall_calib = 1; + reg o_wb_stall_int_q = 1, o_wb_stall_int_d, o_wb_stall_calib; reg precharge_slot_busy; reg activate_slot_busy; reg[1:0] write_dqs_q; @@ -547,8 +557,8 @@ module ddr3_controller #( /* verilator lint_off UNUSEDSIGNAL */ reg[15:0] dqs_bitslip_arrangement = 0; /* verilator lint_off UNUSEDSIGNAL */ - reg[3:0] added_read_pipe_max = 0; - reg[3:0] added_read_pipe[LANES - 1:0]; + reg added_read_pipe_max = 0; + reg added_read_pipe[LANES - 1:0]; //each lane will have added delay relative to when ISERDES should actually return the data //this make sure that we will wait until the lane with longest delay (added_read_pipe_max) is received before //all lanes are sent to wishbone data @@ -559,7 +569,7 @@ module ddr3_controller #( reg[$clog2(READ_ACK_PIPE_WIDTH-1):0] write_ack_index_q = 1, write_ack_index_d = 1; reg index_read_pipe; //tells which delay_read_pipe will be updated (there are two delay_read_pipe) reg index_wb_data; //tells which o_wb_data_q will be sent to o_wb_data - reg[15:0] delay_read_pipe[1:0]; //delay when each lane will retrieve i_phy_iserdes_data (since different lanes might not be aligned with each other and needs to be retrieved at a different time) + reg[1:0] delay_read_pipe[1:0]; //delay when each lane will retrieve i_phy_iserdes_data (since different lanes might not be aligned with each other and needs to be retrieved at a different time) reg[wb_data_bits - 1:0] o_wb_data_q[1:0]; //store data retrieved from i_phy_iserdes_data to be sent to o_wb_data wire[wb_data_bits - 1:0] o_wb_data_q_current; reg[wb_data_bits - 1:0] o_wb_data_q_q; @@ -581,6 +591,7 @@ module ddr3_controller #( reg prev_write_level_feedback = 1; reg[wb_data_bits-1:0] read_data_store = 0; reg[127:0] write_pattern = 0; + reg[63:0] write_pattern_lane = 0; reg[$clog2(64):0] data_start_index[LANES-1:0]; reg[LANES-1:0] lane_write_dq_late = 0; reg[LANES-1:0] lane_read_dq_early = 0; @@ -593,6 +604,7 @@ module ddr3_controller #( reg stored_write_level_feedback = 0; reg[5:0] start_index_check = 0; reg[63:0] read_lane_data = 0; + reg[31:0] read_lane_data_shifted = 0; reg odelay_cntvalue_halfway = 0; reg initial_calibration_done = 0; reg final_calibration_done = 0; @@ -650,9 +662,24 @@ module ddr3_controller #( reg[wb_data_bits-1:0] wrong_data = 0, expected_data=0; wire[wb_data_bits-1:0] correct_data; reg[LANES-1:0] late_dq; + reg stage2_do_wr_or_rd, stage2_do_wr_or_rd_d; + reg stage2_do_wr, stage2_do_wr_d; + reg stage2_do_update_delay_before_precharge_after_wr, stage2_do_update_delay_before_precharge_after_wr_d; + reg stage2_do_rd, stage2_do_rd_d; + reg stage2_do_update_delay_before_precharge_after_rd, stage2_do_update_delay_before_precharge_after_rd_d; + reg stage2_do_act, stage2_do_act_d; + reg stage2_do_update_delay_before_read_after_act, stage2_do_update_delay_before_read_after_act_d; + reg stage2_do_update_delay_before_write_after_act, stage2_do_update_delay_before_write_after_act_d; + reg stage2_do_pre, stage2_do_pre_d; + reg stage1_do_pre, stage1_do_pre_d; + reg stage1_do_act, stage1_do_act_d; + reg force_o_wb_stall_high_q, force_o_wb_stall_high_d; + reg force_o_wb_stall_calib_high_q, force_o_wb_stall_calib_high_d; + reg[1:0] prep_done; + reg write_pattern_matches; + // initial block for all regs initial begin - o_wb_stall = 1; for(index = 0; index < MAX_ADDED_READ_ACK_DELAY; index = index + 1) begin o_wb_ack_read_q[index] = 0; end @@ -838,60 +865,88 @@ module ddr3_controller #( always @(posedge i_controller_clk) begin if(sync_rst_controller) begin - instruction_address <= 0; `ifdef FORMAL_COVER instruction_address <= 21; + `else + instruction_address <= 0; `endif instruction <= INITIAL_RESET_INSTRUCTION; delay_counter <= INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0]; delay_counter_is_zero <= (INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0] == 0); reset_done <= 1'b0; + precharge_all_instruction <= 1'b0; end else begin - //update counter after reaching zero - if(delay_counter_is_zero) begin - delay_counter <= instruction[DELAY_COUNTER_WIDTH - 1:0]; //retrieve delay value of current instruction, we count to zero thus minus 1 - end - - //else: decrement delay counter when current instruction needs delay - //don't decrement (has infinite time) when last bit of - //delay_counter is 1 (for r/w calibration and prestall delay) - //address will only move forward for these kinds of delay only - //when skip_reset_seq_delay is toggled - else if(instruction[USE_TIMER] /*&& delay_counter != {(DELAY_COUNTER_WIDTH){1'b1}}*/ && !pause_counter && delay_counter != 0) delay_counter <= delay_counter - 1; - - //delay_counter of 1 means we will need to update the delay_counter next clock cycle (delay_counter of zero) so we need to retrieve - //now the next instruction. The same thing needs to be done when current instruction does not need the timer delay. - if( ((delay_counter == 1) && !pause_counter) || !instruction[USE_TIMER]/* || skip_reset_seq_delay*/) begin - delay_counter_is_zero <= 1; - instruction <= read_rom_instruction(instruction_address); - if(instruction_address == 5'd22) begin // if user_self_refresh is disabled, wrap back to 19 (Precharge All before Refresh) - instruction_address <= 5'd19; - end - else if(instruction_address == 5'd26) begin // self-refresh exit always wraps back to 20 (Refresh) - instruction_address <= 5'd20; - end - else begin - instruction_address <= instruction_address + 5'd1; // just increment address - end - end - //we are now on the middle of a delay - else begin - delay_counter_is_zero <=0; - end - - if(instruction_address == 5'd22 && user_self_refresh_q) begin // if user_self_refresh is enabled, go straight to 23 - instruction_address <= 23; // go to Precharge All for Self-refresh - delay_counter_is_zero <= 1; - delay_counter <= 0; - instruction <= read_rom_instruction(instruction_address); - end - - //instruction[RST_DONE] is non-persistent thus we need to register it once it goes high - reset_done <= instruction[RST_DONE]? 1'b1:reset_done; + instruction_address <= instruction_address_d; + instruction <= instruction_d; + delay_counter <= delay_counter_d; + delay_counter_is_zero <= delay_counter_is_zero_d; + reset_done <= reset_done_d; + precharge_all_instruction <= precharge_all_instruction_d; end end + always @* begin + instruction_address_d = instruction_address; + instruction_d = instruction; + delay_counter_d = delay_counter; + delay_counter_is_zero_d = delay_counter_is_zero; + reset_done_d = reset_done; + + //update counter after reaching zero + if(delay_counter_is_zero) begin + //retrieve delay value of current instruction, we count to zero thus minus 1 + delay_counter_d = instruction[DELAY_COUNTER_WIDTH - 1:0]; + end + //else: decrement delay counter when current instruction needs delay + //don't decrement (has infinite time) when last bit of + //delay_counter is 1 (for r/w calibration and prestall delay) + //address will only move forward for these kinds of delay only + //when skip_reset_seq_delay is toggled + else if(instruction[USE_TIMER] /*&& delay_counter != {(DELAY_COUNTER_WIDTH){1'b1}}*/ && !pause_counter && delay_counter != 0) begin + delay_counter_d = delay_counter - 1; + end + + //delay_counter of 1 means we will need to update the delay_counter next clock cycle (delay_counter of zero) so we need to retrieve + //now the next instruction. The same thing needs to be done when current instruction does not need the timer delay. + if( ((delay_counter == 1) && !pause_counter) || !instruction[USE_TIMER]/* || skip_reset_seq_delay*/) begin + delay_counter_is_zero_d = 1; + instruction_d = read_rom_instruction(instruction_address); + if(instruction_address == 5'd22) begin + // if user_self_refresh is disabled, wrap back to 19 (Precharge All before Refresh) + instruction_address_d = 5'd19; + end + else if(instruction_address == 5'd26) begin + // self-refresh exit always wraps back to 20 (Refresh) + instruction_address_d = 5'd20; + end + else begin + // just increment address + instruction_address_d = instruction_address + 5'd1; // just increment address + end + end + //we are now on the middle of a delay + else begin + delay_counter_is_zero_d =0; + end + + // if user_self_refresh is enabled, go straight to 23 + if(instruction_address == 5'd22 && user_self_refresh_q) begin + // go to Precharge All for Self-refresh (23) + instruction_address_d = 23; + delay_counter_is_zero_d = 1; + delay_counter_d = 0; + instruction_d = read_rom_instruction(instruction_address); + end + + //instruction[RST_DONE] is non-persistent thus we need to register it once it goes high + reset_done_d = instruction[RST_DONE]? 1 : reset_done; + + // instruction is at precharge all (20 or 24) + precharge_all_instruction_d = instruction_address_d == 20 || instruction_address_d == 24; + end + + // register user-enabled self-refresh always @(posedge i_controller_clk) begin user_self_refresh_q <= i_user_self_refresh && (user_self_refresh_q || (instruction_address != 5'd26)) && final_calibration_done; //will not go high again if already at instruction_address 26 (self-refresh exit), only go high when calibration is done @@ -900,7 +955,6 @@ module ddr3_controller #( user_self_refresh_q <= 1'b1; end end - end /*********************************************************************************************************************************************/ @@ -909,9 +963,7 @@ module ddr3_controller #( //process request transaction always @(posedge i_controller_clk) begin if(sync_rst_controller) begin - o_wb_stall <= 1'b1; - o_wb_stall_q <= 1'b1; - o_wb_stall_calib <= 1'b1; + o_wb_stall_int_q <= 1'b1; //set stage 1 to 0 stage1_pending <= 0; stage1_aux <= 0; @@ -957,283 +1009,68 @@ module ddr3_controller #( end //reset bank status and active row for( index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin - bank_status_q[index] <= 0; - bank_active_row_q[index] <= 0; + bank_status_q[index] <= 0; + bank_active_row_q[index] <= 0; end //reset data for(index = 0; index < STAGE2_DATA_DEPTH; index = index+1) begin stage2_data[index] <= 0; stage2_dm[index] <= 0; end + for(index=0; index shiftreg(CWL) -> OSERDES(DDR) -> ODELAY -> RAM - end - if(!ODELAY_SUPPORTED && !DLL_OFF) begin - stage2_data_unaligned <= stage2_data_unaligned_temp; //_temp is for added delay of 1 clock cycle (no ODELAY so no added delay) - stage2_dm_unaligned <= stage2_dm_unaligned_temp; //_temp is for added delay of 1 clock cycle (no ODELAY so no added delay) - end - - if(stage1_update) begin - //stage1 will not do the request (pending low) when the - //request is on the same bank as the current request. This - //will ensure stage1 bank will be different from stage2 bank - - // if ECC_ENABLE != 3, then stage1 will always receive wishbone interface - if(ECC_ENABLE != 3) begin - stage1_pending <= i_wb_stb;//actual request flag - stage1_aux <= i_aux; //aux ID for AXI compatibility - stage1_we <= i_wb_we; //write-enable - stage1_dm <= (ECC_ENABLE == 0)? i_wb_sel : {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled - end - // ECC_ENABLE == 3 - else begin // if ECC_ENABLE = 3 (inline ECC), then stage1 will either receive stage0 or wishbone - stage1_pending <= wb_stb_mux;//actual request flag - stage1_aux <= aux_mux; //aux ID for AXI compatibility - stage1_we <= wb_we_mux; //write-enable - stage1_dm <= {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled - end - - if(row_bank_col == 1) begin // memory address mapping: {row, bank, col} - if(DUAL_RANK_DIMM[0]) begin - stage1_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= i_wb_addr[DUAL_RANK_DIMM[0]? (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) : 0]; // msb determines rank - stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= wb_addr_plus_anticipate[DUAL_RANK_DIMM[0]? (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) : 0]; // msb determines rank - end - stage1_row <= i_wb_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS - $clog2(serdes_ratio*2)) ]; //row_address - stage1_bank[BA_BITS-1:0] <= i_wb_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address - stage1_col <= { i_wb_addr[ (COL_BITS- $clog2(serdes_ratio*2)-1) : 0 ], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) - //stage1_next_bank will not increment unless stage1_next_col - //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated - //precharge and activate will happen only at the end of the - //current column with a margin dictated by - //MARGIN_BEFORE_ANTICIPATE - /* verilator lint_off WIDTH */ - {stage1_next_row , stage1_next_bank[BA_BITS-1:0]} <= wb_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); - //anticipated next row and bank to be accessed - /* verilator lint_on WIDTH */ - stage1_data <= i_wb_data; - end - - else if(row_bank_col == 0) begin // memory address mapping: {bank, row, col} - stage1_bank[BA_BITS-1:0] <= i_wb_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address - stage1_row <= i_wb_addr[ (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address - stage1_col <= { i_wb_addr[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) - //stage1_next_row will not increment unless stage1_next_col - //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated - //precharge and activate will happen only at the end of the - //current column with a margin dictated by - //MARGIN_BEFORE_ANTICIPATE - /* verilator lint_off WIDTH */ - {stage1_next_bank, stage1_next_row} <= wb_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); - //anticipated next row and bank to be accessed - /* verilator lint_on WIDTH */ - stage1_data <= i_wb_data; - end - - else if(row_bank_col == 2) begin // memory address mapping: {bank[2:1], row, bank[0], col} , used for ECC_ENABLE = 3 (Inline ECC) - stage1_bank[2:1] <= wb_addr_mux[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)]; //bank_address - stage1_row <= wb_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1) ]; //row_address - stage1_bank[0] <= wb_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]; - stage1_col <= { wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) - //stage1_next_bank will not increment unless stage1_next_col - //overwraps due to MARGIN_BEFORE_ANTICIPATE. This will overwrap every two banks - //MARGIN_BEFORE_ANTICIPATE - /* verilator lint_off WIDTH */ - {stage1_next_bank[2:1], stage1_next_row, stage1_next_bank[0]} <= wb_addr_plus_anticipate >> (COL_BITS - $clog2(serdes_ratio*2)); - //anticipated next row and bank to be accessed - /* verilator lint_on WIDTH */ - // ECC Mapping (Excel sheet design planning: https://docs.google.com/spreadsheets/d/1_8vrLmVSFpvRD13Mk8aNAMYlh62SfpPXOCYIQFEtcs4/edit?gid=0#gid=0) - ecc_bank_addr <= {2'b11,!wb_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]}; - ecc_row_addr <= {1'b1, wb_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1 + 1) ]}; - ecc_col_addr <= { wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) + 1)] , - wb_addr_mux[(BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)] , - wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) - 1) : 3], 3'b000 }; - stage1_data <= wb_data_mux; - end - end - - // request from calibrate FSM will be accepted here - else if(stage1_update_calib) begin - // if ECC_ENABLE != 3, then stage1 will always receive wishbone interface - if(ECC_ENABLE != 3) begin - stage1_pending <= calib_stb;//actual request flag - stage1_aux <= calib_aux; //aux ID for AXI compatibility - stage1_we <= calib_we; //write-enable - stage1_dm <= (ECC_ENABLE == 0)? calib_sel : {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled - end - // ECC_ENABLE == 3 - else begin // if ECC_ENABLE = 3 (inline ECC), then stage1 will either receive stage0 or wishbone - stage1_pending <= calib_stb_mux;//actual request flag - stage1_we <= calib_we_mux; //write-enable - stage1_dm <= {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled - stage1_aux <= calib_aux_mux; //aux ID for AXI compatibility - end - - if(row_bank_col == 1) begin // memory address mapping: {row, bank, col} - if(DUAL_RANK_DIMM[0]) begin - stage1_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= current_rank; // rank depends on current_rank - stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= current_rank; // rank depends on current_rank - end - stage1_row <= calib_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS - $clog2(serdes_ratio*2)) ]; //row_address - stage1_bank[BA_BITS-1:0] <= calib_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address - stage1_col <= { calib_addr[ (COL_BITS- $clog2(serdes_ratio*2)-1) : 0 ], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (8-burst word-aligned) - //stage1_next_bank will not increment unless stage1_next_col - //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated - //precharge and activate will happen only at the end of the - //current column with a margin dictated by - //MARGIN_BEFORE_ANTICIPATE - /* verilator lint_off WIDTH */ - {stage1_next_row , stage1_next_bank[BA_BITS-1:0] } <= calib_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); - //anticipated next row and bank to be accessed - /* verilator lint_on WIDTH */ - stage1_data <= calib_data; - end - else if(row_bank_col == 0) begin // memory address mapping: {bank, row, col} - stage1_bank[BA_BITS-1:0] <= calib_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address - stage1_row <= calib_addr[ (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address - stage1_col <= { calib_addr[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (8-burst word-aligned) - //stage1_next_row will not increment unless stage1_next_col - //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated - //precharge and activate will happen only at the end of the - //current column with a margin dictated by - //MARGIN_BEFORE_ANTICIPATE - /* verilator lint_off WIDTH */ - {stage1_next_bank, stage1_next_row} <= calib_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); - //anticipated next row and bank to be accessed - /* verilator lint_on WIDTH */ - stage1_data <= calib_data; - end - else if(row_bank_col == 2) begin // memory address mapping: {bank[2:1], row, bank[0], col} - stage1_bank[2:1] <= calib_addr_mux[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)]; //bank_address - stage1_row <= calib_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1) ]; //row_address - stage1_bank[0] <= calib_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]; - stage1_col <= { calib_addr_mux[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) - //stage1_next_row will not increment unless stage1_next_col - //overwraps due to MARGIN_BEFORE_ANTICIPATE. This will overwrap every two banks - //MARGIN_BEFORE_ANTICIPATE - /* verilator lint_off WIDTH */ - {stage1_next_bank[2:1], stage1_next_row, stage1_next_bank[0]} <= calib_addr_plus_anticipate >> (COL_BITS - $clog2(serdes_ratio*2)); - //anticipated next row and bank to be accessed - /* verilator lint_on WIDTH */ - // ECC Mapping (Excel sheet design planning: https://docs.google.com/spreadsheets/d/1_8vrLmVSFpvRD13Mk8aNAMYlh62SfpPXOCYIQFEtcs4/edit?gid=0#gid=0) - // ECC_BANK = {11,!bank[0]} - // ECC_ROW = {1,row>>1} - // ECC_COL = {row[0],bank[2:1],col>>3}" - ecc_bank_addr <= {2'b11,!calib_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]}; - ecc_row_addr <= {1'b1, calib_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1 + 1) ]}; - ecc_col_addr <= { calib_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) + 1)] , - calib_addr_mux[(BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)] , - calib_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) - 1) : 3], 3'b000 }; - stage1_data <= calib_data_mux; - end - end - // stage2 can have multiple pipelined stages inside it which acts as delay before issuing the write data (after issuing write command) for(index = 0; index < STAGE2_DATA_DEPTH-1; index = index+1) begin - stage2_data[index+1] <= stage2_data[index]; // 0->1, 1->2 + stage2_data[index+1] <= stage2_data[index]; // 0->1, 1->2 stage2_dm[index+1] <= stage2_dm[index]; end @@ -1328,20 +1165,341 @@ module ddr3_controller #( /* verilator lint_on WIDTH */ end // end for else statement (dq is not late for this lane) end // end of for loop to forward stage2_unaligned to stage2 by lane - - //abort any outgoing ack when cyc is low - if(!i_wb_cyc && final_calibration_done) begin - stage2_pending <= 0; - stage1_pending <= 0; + end + end + + always @* begin + // stage 1 + stage1_pending_d = stage1_pending; + stage1_aux_d = stage1_aux; + stage1_we_d = stage1_we; + stage1_dm_d = stage1_dm; + stage1_col_d = stage1_col; + stage1_bank_d = stage1_bank; + stage1_row_d = stage1_row; + stage1_next_bank_d = stage1_next_bank; + stage1_next_row_d = stage1_next_row; + stage1_data_d = stage1_data; + // stage 2 + stage2_pending_d = stage2_pending; + stage2_aux_d = stage2_aux; + stage2_we_d = stage2_we; + stage2_col_d = stage2_col; + stage2_bank_d = stage2_bank; + stage2_row_d = stage2_row; + stage2_data_unaligned_d = stage2_data_unaligned; + stage2_data_unaligned_temp_d = stage2_data_unaligned_temp; + stage2_dm_unaligned_d = stage2_dm_unaligned; + stage2_dm_unaligned_temp_d = stage2_dm_unaligned_temp; + if(ECC_ENABLE == 3) begin + ecc_col_addr_prev_d = ecc_col_addr_prev; + ecc_bank_addr_prev_d = ecc_bank_addr_prev; + ecc_row_addr_prev_d = ecc_row_addr_prev; + ecc_bank_addr_d = ecc_bank_addr; + ecc_row_addr_d = ecc_row_addr; + ecc_col_addr_d = ecc_col_addr; + stage2_encoded_parity_d = stage2_encoded_parity; + end + + ///////////////////////////////////////// + // Stage 2 + ///////////////////////////////////////// + //if pipeline is not stalled (or a request is left on the prestall + //delay address 19 or if in calib), move pipeline to stage 2 + if(stage2_update) begin //ITS POSSIBLE ONLY NEXT CLK WILL STALL SUPPOSE TO GO LOW + stage2_pending_d = stage1_pending; + if(ECC_ENABLE != 3) begin + stage1_pending_d = 1'b0; //no request initially unless overridden by the actual stb request + stage2_pending_d = stage1_pending; + stage2_aux_d = stage1_aux; + stage2_we_d = stage1_we; + stage2_col_d = stage1_col; + stage2_bank_d = stage1_bank; + stage2_row_d = stage1_row; + if(ODELAY_SUPPORTED || DLL_OFF) begin + stage2_data_unaligned_d = stage1_data_mux; + stage2_dm_unaligned_d = ~stage1_dm; //inverse each bit (1 must mean "masked" or not written) + end + else begin + stage2_data_unaligned_temp_d = stage1_data_mux; + stage2_dm_unaligned_temp_d = ~stage1_dm; //inverse each bit (1 must mean "masked" or not written) + end + end + // ECC_ENABLE == 3 + else begin + stage1_pending_d = ecc_stage1_stall? stage1_pending : 1'b0; //stage1 remains the same for ECC op (no request initially unless overridden by the actual stb request) + // if switching from write to read and ECC is not yet written then do a write first to store those ECC bits + if(!stage1_we && stage2_we && stage1_pending && !write_ecc_stored_to_mem_d && initial_calibration_done) begin + stage2_we_d = 1'b1; + // if ecc_stage1_stall, stage2 will start ECC write/read operation + // if ECC write, then we are writing ECC for previous address + // if ECC read, then we are reading ECC for current address + stage2_col_d = ecc_col_addr_prev; + stage2_bank_d[BA_BITS-1:0] = ecc_bank_addr_prev; + stage2_row_d = ecc_row_addr_prev; + ecc_col_addr_prev_d = ecc_col_addr; + ecc_bank_addr_prev_d = ecc_bank_addr; + ecc_row_addr_prev_d = ecc_row_addr; + // For ECC requests, 2MSB of aux determines type of ECC request (read = 2'10, write = 2'b11) + stage2_aux_d = { 1'b1, 1'b1, 3'b000, {(AUX_WIDTH-5){1'b1}} }; + end + // else pass stage 1 to stage 2 + else begin + stage2_we_d = stage1_we; + // if ecc_stage1_stall, stage2 will start ECC write/read operation + // if ECC write, then we are writing ECC for previous address + // if ECC read, then we are reading ECC for current address + stage2_col_d = ecc_stage1_stall? (stage1_we? ecc_col_addr_prev : ecc_col_addr) : stage1_col; + stage2_bank_d[BA_BITS-1:0] = ecc_stage1_stall? (stage1_we? ecc_bank_addr_prev : ecc_bank_addr) : stage1_bank[BA_BITS-1:0]; + stage2_row_d = ecc_stage1_stall? (stage1_we? ecc_row_addr_prev : ecc_row_addr) : stage1_row; + ecc_col_addr_prev_d = ecc_col_addr; + ecc_bank_addr_prev_d = ecc_bank_addr; + ecc_row_addr_prev_d = ecc_row_addr; + // For ECC requests, 2MSB of aux determines type of ECC request (read = 2'10, write = 2'b11) + // For non-ECC request (MSB is 0), next 3MSB is allotted for the column (burst position to know position of encoded parity ECC bits) + stage2_aux_d = ecc_stage1_stall? { 1'b1, !stage1_we, 3'b000, {(AUX_WIDTH-5){1'b1}} } : {1'b0, !stage1_we, stage1_col[5:3], stage1_aux[AUX_WIDTH-6:0]}; + end + // store parity code for stage1_data + stage2_encoded_parity_d = encoded_parity; + if(ODELAY_SUPPORTED || DLL_OFF) begin + stage2_data_unaligned_d = stage1_data_mux; + stage2_dm_unaligned_d = ecc_stage1_stall? ~stage2_ecc_write_data_mask_d : ~stage1_dm; //inverse each bit (1 must mean "masked" or not written) + end + else begin + stage2_data_unaligned_temp_d = stage1_data_mux; + stage2_dm_unaligned_temp_d = ecc_stage1_stall? ~stage2_ecc_write_data_mask_d : ~stage1_dm; //inverse each bit (1 must mean "masked" or not written) + end + end + // pipeline: stage2_data -> shiftreg(CWL) -> OSERDES(DDR) -> ODELAY -> RAM + end + + if(!ODELAY_SUPPORTED && !DLL_OFF) begin + //_temp is for added delay of 1 clock cycle (no ODELAY so no added delay) + stage2_data_unaligned_d = stage2_data_unaligned_temp; + stage2_dm_unaligned_d = stage2_dm_unaligned_temp; + end + + ///////////////////////////////////////// + // Stage 1 + ///////////////////////////////////////// + if(stage1_update && reset_done) begin + //stage1 will not do the request (pending low) when the + //request is on the same bank as the current request. This + //will ensure stage1 bank will be different from stage2 bank + + // if ECC_ENABLE != 3, then stage1 will always receive wishbone interface + if(ECC_ENABLE != 3) begin + stage1_pending_d = i_wb_stb;//actual request flag + stage1_aux_d = i_aux; //aux ID for AXI compatibility + stage1_we_d = i_wb_we; //write-enable + stage1_dm_d = (ECC_ENABLE == 0)? i_wb_sel : {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled + end + // ECC_ENABLE == 3 + else begin // if ECC_ENABLE = 3 (inline ECC), then stage1 will either receive stage0 or wishbone + stage1_pending_d = wb_stb_mux;//actual request flag + stage1_aux_d = aux_mux; //aux ID for AXI compatibility + stage1_we_d = wb_we_mux; //write-enable + stage1_dm_d = {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled + end + + if(row_bank_col == 1) begin // memory address mapping: {row, bank, col} + if(DUAL_RANK_DIMM[0]) begin + stage1_bank_d[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] = i_wb_addr[DUAL_RANK_DIMM[0]? (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) : 0]; // msb determines rank + stage1_next_bank_d[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] = wb_addr_plus_anticipate[DUAL_RANK_DIMM[0]? (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) : 0]; // msb determines rank + end + stage1_row_d = i_wb_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS - $clog2(serdes_ratio*2)) ]; //row_address + stage1_bank_d[BA_BITS-1:0] = i_wb_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address + stage1_col_d = { i_wb_addr[ (COL_BITS- $clog2(serdes_ratio*2)-1) : 0 ], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) + //stage1_next_bank will not increment unless stage1_next_col + //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated + //precharge and activate will happen only at the end of the + //current column with a margin dictated by + //MARGIN_BEFORE_ANTICIPATE + /* verilator lint_off WIDTH */ + {stage1_next_row_d , stage1_next_bank_d[BA_BITS-1:0]} = wb_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); + //anticipated next row and bank to be accessed + /* verilator lint_on WIDTH */ + stage1_data_d = i_wb_data; + end + + else if(row_bank_col == 0) begin // memory address mapping: {bank, row, col} + stage1_bank_d[BA_BITS-1:0] = i_wb_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address + stage1_row_d = i_wb_addr[ (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address + stage1_col_d = { i_wb_addr[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) + //stage1_next_row will not increment unless stage1_next_col + //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated + //precharge and activate will happen only at the end of the + //current column with a margin dictated by + //MARGIN_BEFORE_ANTICIPATE + /* verilator lint_off WIDTH */ + {stage1_next_bank_d, stage1_next_row_d} = wb_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); + //anticipated next row and bank to be accessed + /* verilator lint_on WIDTH */ + stage1_data_d = i_wb_data; + end + + else if(row_bank_col == 2) begin // memory address mapping: {bank[2:1], row, bank[0], col} , used for ECC_ENABLE = 3 (Inline ECC) + stage1_bank_d[2:1] = wb_addr_mux[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)]; //bank_address + stage1_row_d = wb_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1) ]; //row_address + stage1_bank_d[0] = wb_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]; + stage1_col_d = { wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) + //stage1_next_bank will not increment unless stage1_next_col + //overwraps due to MARGIN_BEFORE_ANTICIPATE. This will overwrap every two banks + //MARGIN_BEFORE_ANTICIPATE + /* verilator lint_off WIDTH */ + {stage1_next_bank_d[2:1], stage1_next_row_d, stage1_next_bank_d[0]} = wb_addr_plus_anticipate >> (COL_BITS - $clog2(serdes_ratio*2)); + //anticipated next row and bank to be accessed + /* verilator lint_on WIDTH */ + // ECC Mapping (Excel sheet design planning: https://docs.google.com/spreadsheets/d/1_8vrLmVSFpvRD13Mk8aNAMYlh62SfpPXOCYIQFEtcs4/edit?gid=0#gid=0) + ecc_bank_addr_d = {2'b11,!wb_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]}; + ecc_row_addr_d = {1'b1, wb_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1 + 1) ]}; + ecc_col_addr_d = { wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) + 1)] , + wb_addr_mux[(BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)] , + wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) - 1) : 3], 3'b000 }; + stage1_data_d = wb_data_mux; end end - end - always @* begin - for(index = 0; index < LANES; index = index + 1) begin - late_dq[index] = (lane_write_dq_late[index] && (data_start_index[index] != 0)) && (STAGE2_DATA_DEPTH > 1); + + // request from calibrate FSM will be accepted here + else if(stage1_update_calib && reset_done) begin + // if ECC_ENABLE != 3, then stage1 will always receive wishbone interface + if(ECC_ENABLE != 3) begin + stage1_pending_d = calib_stb;//actual request flag + stage1_aux_d = calib_aux; //aux ID for AXI compatibility + stage1_we_d = calib_we; //write-enable + stage1_dm_d = (ECC_ENABLE == 0)? calib_sel : {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled + end + // ECC_ENABLE == 3 + else begin // if ECC_ENABLE = 3 (inline ECC), then stage1 will either receive stage0 or wishbone + stage1_pending_d = calib_stb_mux;//actual request flag + stage1_we_d = calib_we_mux; //write-enable + stage1_dm_d = {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled + stage1_aux_d = calib_aux_mux; //aux ID for AXI compatibility + end + + if(row_bank_col == 1) begin // memory address mapping: {row, bank, col} + if(DUAL_RANK_DIMM[0]) begin + stage1_bank_d[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] = current_rank; // rank depends on current_rank + stage1_next_bank_d[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] = current_rank; // rank depends on current_rank + end + stage1_row_d = calib_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS - $clog2(serdes_ratio*2)) ]; //row_address + stage1_bank_d[BA_BITS-1:0] = calib_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address + stage1_col_d = { calib_addr[ (COL_BITS- $clog2(serdes_ratio*2)-1) : 0 ], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (8-burst word-aligned) + //stage1_next_bank will not increment unless stage1_next_col + //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated + //precharge and activate will happen only at the end of the + //current column with a margin dictated by + //MARGIN_BEFORE_ANTICIPATE + /* verilator lint_off WIDTH */ + {stage1_next_row_d , stage1_next_bank_d[BA_BITS-1:0] } = calib_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); + //anticipated next row and bank to be accessed + /* verilator lint_on WIDTH */ + stage1_data_d = calib_data; + end + else if(row_bank_col == 0) begin // memory address mapping: {bank, row, col} + stage1_bank_d[BA_BITS-1:0] = calib_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address + stage1_row_d = calib_addr[ (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address + stage1_col_d = { calib_addr[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (8-burst word-aligned) + //stage1_next_row will not increment unless stage1_next_col + //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated + //precharge and activate will happen only at the end of the + //current column with a margin dictated by + //MARGIN_BEFORE_ANTICIPATE + /* verilator lint_off WIDTH */ + {stage1_next_bank_d, stage1_next_row_d} = calib_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2)); + //anticipated next row and bank to be accessed + /* verilator lint_on WIDTH */ + stage1_data_d = calib_data; + end + else if(row_bank_col == 2) begin // memory address mapping: {bank[2:1], row, bank[0], col} + stage1_bank_d[2:1] = calib_addr_mux[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)]; //bank_address + stage1_row_d = calib_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1) ]; //row_address + stage1_bank_d[0] = calib_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]; + stage1_col_d = { calib_addr_mux[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned) + //stage1_next_row will not increment unless stage1_next_col + //overwraps due to MARGIN_BEFORE_ANTICIPATE. This will overwrap every two banks + //MARGIN_BEFORE_ANTICIPATE + /* verilator lint_off WIDTH */ + {stage1_next_bank_d[2:1], stage1_next_row_d, stage1_next_bank_d[0]} = calib_addr_plus_anticipate >> (COL_BITS - $clog2(serdes_ratio*2)); + //anticipated next row and bank to be accessed + /* verilator lint_on WIDTH */ + // ECC Mapping (Excel sheet design planning: https://docs.google.com/spreadsheets/d/1_8vrLmVSFpvRD13Mk8aNAMYlh62SfpPXOCYIQFEtcs4/edit?gid=0#gid=0) + // ECC_BANK = {11,!bank[0]} + // ECC_ROW = {1,row>>1} + // ECC_COL = {row[0],bank[2:1],col>>3}" + ecc_bank_addr_d = {2'b11,!calib_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]}; + ecc_row_addr_d = {1'b1, calib_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1 + 1) ]}; + ecc_col_addr_d = { calib_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) + 1)] , + calib_addr_mux[(BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)] , + calib_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) - 1) : 3], 3'b000 }; + stage1_data_d = calib_data_mux; + end + end + + //abort any outgoing ack when cyc is low + if(!i_wb_cyc && final_calibration_done) begin + stage2_pending_d = 0; + stage1_pending_d = 0; end end + always @(posedge i_controller_clk) begin + for(index = 0; index < LANES; index = index + 1) begin + late_dq[index] <= (lane_write_dq_late[index] && (data_start_index[index] != 0)) && (STAGE2_DATA_DEPTH > 1); + end + end + + // Logic for registering the conditions used for the 2-stage pipeline logic + // to cut the timing path and achieve higher max frequnecy + always @(posedge i_controller_clk) begin + if(sync_rst_controller) begin + stage2_do_wr_or_rd <= 0; + stage2_do_wr <= 0; + stage2_do_update_delay_before_precharge_after_wr <= 0; + stage2_do_rd <= 0; + stage2_do_update_delay_before_precharge_after_rd <= 0; + stage2_do_act <= 0; + stage2_do_update_delay_before_read_after_act <= 0; + stage2_do_update_delay_before_write_after_act <= 0; + stage2_do_pre <= 0; + + stage1_do_pre <= 0; + stage1_do_act <= 0; + end + else begin + // stage 2 conditions + stage2_do_wr_or_rd <= stage2_do_wr_or_rd_d; + stage2_do_wr <= stage2_do_wr_d; + stage2_do_update_delay_before_precharge_after_wr <= stage2_do_update_delay_before_precharge_after_wr_d; + stage2_do_rd <= stage2_do_rd_d; + stage2_do_update_delay_before_precharge_after_rd <= stage2_do_update_delay_before_precharge_after_rd_d; + stage2_do_act <= stage2_do_act_d; + stage2_do_update_delay_before_read_after_act <= stage2_do_update_delay_before_read_after_act_d; + stage2_do_update_delay_before_write_after_act <= stage2_do_update_delay_before_write_after_act_d; + stage2_do_pre <= stage2_do_pre_d; + // stage 1 conditions + stage1_do_pre <= stage1_do_pre_d; + stage1_do_act <= stage1_do_act_d; + end + end + + always @* begin + // stage 2 conditions + stage2_do_wr_or_rd_d = bank_status_d[stage2_bank_d] && bank_active_row_d[stage2_bank_d] == stage2_row_d; + stage2_do_wr_d = stage2_we_d && delay_before_write_counter_d[stage2_bank_d] == 0; + stage2_do_update_delay_before_precharge_after_wr_d = delay_before_precharge_counter_d[stage2_bank_d] <= WRITE_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0]; + stage2_do_rd_d = !stage2_we_d && delay_before_read_counter_d[stage2_bank_d] == 0; + stage2_do_update_delay_before_precharge_after_rd_d = delay_before_precharge_counter_d[stage2_bank_d] <= READ_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0]; + stage2_do_act_d = !bank_status_d[stage2_bank_d] && delay_before_activate_counter_d[stage2_bank_d] == 0; + stage2_do_update_delay_before_read_after_act_d = delay_before_read_counter_d[stage2_bank_d] <= ACTIVATE_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0]; + stage2_do_update_delay_before_write_after_act_d = delay_before_write_counter_d[stage2_bank_d] <= ACTIVATE_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0]; + stage2_do_pre_d = bank_status_d[stage2_bank_d] && bank_active_row_d[stage2_bank_d] != stage2_row_d && delay_before_precharge_counter_d[stage2_bank_d] == 0 ; + // stage 2 conditions + stage1_do_pre_d = bank_status_d[stage1_next_bank_d] && bank_active_row_d[stage1_next_bank_d] != stage1_next_row_d && delay_before_precharge_counter_d[stage1_next_bank_d] == 0; + stage1_do_act_d = !bank_status_d[stage1_next_bank_d] && delay_before_activate_counter_d[stage1_next_bank_d] == 0; + end + + // generate signals to be received by stage1 generate if(ECC_ENABLE == 3) begin : ecc_3_pipeline_control @@ -1351,7 +1509,7 @@ module ddr3_controller #( // AND ecc_stage1_stall low (if high then stage2 will have ECC operation while stage1 remains) assign stage0_update = ((i_wb_cyc && !o_wb_stall) || (!final_calibration_done && !o_wb_stall_calib)) && ecc_stage1_stall; // stage0 is only used when ECC will be inserted next cycle (stage1 must remain) assign stage1_update = ( (i_wb_cyc && !o_wb_stall) || (stage0_pending && !ecc_stage2_stall) ) && !ecc_stage1_stall; - assign stage1_update_calib = ( ((state_calibrate != DONE_CALIBRATE) && !o_wb_stall_calib) || (stage0_pending && !ecc_stage2_stall) ) && !ecc_stage1_stall; + assign stage1_update_calib = ( ((!final_calibration_done) && !o_wb_stall_calib) || (stage0_pending && !ecc_stage2_stall) ) && !ecc_stage1_stall; /* verilator lint_off WIDTH */ assign wb_addr_plus_anticipate = wb_addr_mux + MARGIN_BEFORE_ANTICIPATE; // wb_addr_plus_anticipate determines if it is near the end of column by checking if it jumps to next row assign calib_addr_plus_anticipate = calib_addr_mux + MARGIN_BEFORE_ANTICIPATE; // just same as wb_addr_plus_anticipate but while doing calibration @@ -1541,7 +1699,6 @@ module ddr3_controller #( stage2_stall = 1'b0; ecc_stage2_stall = 1'b0; stage2_update = 1'b1; //always update stage 2 UNLESS it has a pending request (stage2_pending high) - // o_wb_stall_d = 1'b0; //wb_stall going high is determined on stage 1 (higher priority), wb_stall going low is determined at stage2 (lower priority) precharge_slot_busy = 0; //flag that determines if stage 2 is issuing precharge (thus stage 1 cannot issue precharge) activate_slot_busy = 0; //flag that determines if stage 2 is issuing activate (thus stage 1 cannot issue activate) write_dqs_d = write_calib_dqs; @@ -1614,15 +1771,60 @@ module ddr3_controller #( //USE _d in ALL //if there is a pending request, issue the appropriate commands if(stage2_pending) begin - stage2_stall = 1; //initially high when stage 2 is pending ecc_stage2_stall = 1; stage2_update = 0; + //bank is not idle but wrong row is activated so do precharge + if(stage2_do_pre) begin + precharge_slot_busy = 1'b1; + //set-up delay before activate + delay_before_activate_counter_d[stage2_bank] = PRECHARGE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0]; + //issue precharge command + if(DUAL_RANK_DIMM[0]) begin + cmd_d[PRECHARGE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[DUAL_RANK_DIMM[0]? 9 : 8:0] } }; + end + else begin + cmd_d[PRECHARGE_SLOT] = {1'b0, CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[9:0] } }; + end + //update bank status and active row + bank_status_d[stage2_bank] = 1'b0; + end + + //bank is idle so activate it + else if(stage2_do_act) begin + activate_slot_busy = 1'b1; + // must meet TRRD (activate to activate delay) + for(index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin //the activate to activate delay applies to all banks + if(delay_before_activate_counter_q[index] <= ACTIVATE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0]) begin // if delay is > ACTIVATE_TO_ACTIVATE_DELAY, then updating it to the lower delay will cause the previous delay to be violated + delay_before_activate_counter_d[index] = ACTIVATE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0]; + end + end + + delay_before_precharge_counter_d[stage2_bank] = ACTIVATE_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0]; + + //set-up delay before read and write + if(stage2_do_update_delay_before_read_after_act) begin // if current delay is > ACTIVATE_TO_READ_DELAY, then updating it to the lower delay will cause the previous delay to be violated + delay_before_read_counter_d[stage2_bank] = ACTIVATE_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0]; + end + if(stage2_do_update_delay_before_write_after_act) begin // if current delay is > ACTIVATE_TO_WRITE_DELAY, then updating it to the lower delay will cause the previous delay to be violated + delay_before_write_counter_d[stage2_bank] = ACTIVATE_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0]; + end + //issue activate command + if(DUAL_RANK_DIMM[0]) begin + cmd_d[ACTIVATE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], stage2_row[(DUAL_RANK_DIMM[0]? ROW_BITS-1 : ROW_BITS-2):0]}; + end + else begin + cmd_d[ACTIVATE_SLOT] = {1'b0, CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank , stage2_row}; + end + //update bank status and active row + bank_status_d[stage2_bank] = 1'b1; + bank_active_row_d[stage2_bank] = stage2_row; + end + //right row is already active so go straight to read/write - if(bank_status_q[stage2_bank] && bank_active_row_q[stage2_bank] == stage2_row) begin //read/write operation + else if(stage2_do_wr_or_rd) begin //read/write operation //write request - if(stage2_we && delay_before_write_counter_q[stage2_bank] == 0) begin - stage2_stall = 0; + if(stage2_do_wr) begin ecc_stage2_stall = 0; stage2_update = 1; cmd_odt = 1'b1; @@ -1645,7 +1847,7 @@ module ddr3_controller #( // where the transaction can continue regardless when ack returns //set-up delay before precharge, read, and write - if(delay_before_precharge_counter_q[stage2_bank] <= WRITE_TO_PRECHARGE_DELAY) begin + if(stage2_do_update_delay_before_precharge_after_wr) begin //it is possible that the delay_before_precharge is //set to tRAS (activate to precharge delay). And if we //overwrite delay_before_precharge, we might overwrite @@ -1653,12 +1855,12 @@ module ddr3_controller #( //tRAS requirement. Thus, we must first check if the //delay_before_precharge is set to a value not more //than the WRITE_TO_PRECHARGE_DELAY - delay_before_precharge_counter_d[stage2_bank] = WRITE_TO_PRECHARGE_DELAY; + delay_before_precharge_counter_d[stage2_bank] = WRITE_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0]; end for(index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin //the write to read delay applies to all banks (odt must be turned off properly before reading) - delay_before_read_counter_d[index] = WRITE_TO_READ_DELAY + 1; //NOTE TO SELF: why plus 1? + delay_before_read_counter_d[index] = WRITE_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0] + 'd1; //NOTE TO SELF: why plus 1? end - delay_before_write_counter_d[stage2_bank] = WRITE_TO_WRITE_DELAY; + delay_before_write_counter_d[stage2_bank] = WRITE_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0]; //issue read command if(DUAL_RANK_DIMM[0]) begin if(COL_BITS <= 10) begin @@ -1711,19 +1913,17 @@ module ddr3_controller #( end //read request - else if(!stage2_we && delay_before_read_counter_q[stage2_bank]==0) begin - stage2_stall = 0; + else if(stage2_do_rd) begin ecc_stage2_stall = 0; stage2_update = 1; cmd_odt = 1'b0; //set-up delay before precharge, read, and write - if(delay_before_precharge_counter_q[stage2_bank] <= READ_TO_PRECHARGE_DELAY) begin - delay_before_precharge_counter_d[stage2_bank] = READ_TO_PRECHARGE_DELAY; + if(stage2_do_update_delay_before_precharge_after_rd) begin + delay_before_precharge_counter_d[stage2_bank] = READ_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0]; end - delay_before_read_counter_d[stage2_bank] = READ_TO_READ_DELAY; - delay_before_write_counter_d[stage2_bank] = READ_TO_WRITE_DELAY + 1; //temporary solution since its possible odt to go high already while reading previously + delay_before_read_counter_d[stage2_bank] = READ_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0]; for(index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin //the read to write delay applies to all banks (odt must be turned on properly before writing and this delay is for ODT to settle) - delay_before_write_counter_d[index] = READ_TO_WRITE_DELAY + 1; // NOTE TO SELF: why plus 1? + delay_before_write_counter_d[index] = READ_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0] + 'd1; // NOTE TO SELF: why plus 1? temporary solution since its possible odt to go high already while reading previously end // don't acknowledge if ECC request // higher shift_read_pipe means the earlier it will check data received from i_phy_iserdes_data @@ -1755,52 +1955,6 @@ module ddr3_controller #( cmd_d[3][CMD_ODT] = cmd_odt; end end - - //bank is idle so activate it - else if(!bank_status_q[stage2_bank] && delay_before_activate_counter_q[stage2_bank] == 0) begin - activate_slot_busy = 1'b1; - // must meet TRRD (activate to activate delay) - for(index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin //the activate to activate delay applies to all banks - if(delay_before_activate_counter_q[index] <= ACTIVATE_TO_ACTIVATE_DELAY) begin // if delay is > ACTIVATE_TO_ACTIVATE_DELAY, then updating it to the lower delay will cause the previous delay to be violated - delay_before_activate_counter_d[index] = ACTIVATE_TO_ACTIVATE_DELAY; - end - end - - delay_before_precharge_counter_d[stage2_bank] = ACTIVATE_TO_PRECHARGE_DELAY; - - //set-up delay before read and write - if(delay_before_read_counter_q[stage2_bank] <= ACTIVATE_TO_READ_DELAY) begin // if current delay is > ACTIVATE_TO_READ_DELAY, then updating it to the lower delay will cause the previous delay to be violated - delay_before_read_counter_d[stage2_bank] = ACTIVATE_TO_READ_DELAY; - end - if(delay_before_write_counter_q[stage2_bank] <= ACTIVATE_TO_WRITE_DELAY) begin // if current delay is > ACTIVATE_TO_WRITE_DELAY, then updating it to the lower delay will cause the previous delay to be violated - delay_before_write_counter_d[stage2_bank] = ACTIVATE_TO_WRITE_DELAY; - end - //issue activate command - if(DUAL_RANK_DIMM[0]) begin - cmd_d[ACTIVATE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], stage2_row[(DUAL_RANK_DIMM[0]? ROW_BITS-1 : ROW_BITS-2):0]}; - end - else begin - cmd_d[ACTIVATE_SLOT] = {1'b0, CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank , stage2_row}; - end - //update bank status and active row - bank_status_d[stage2_bank] = 1'b1; - bank_active_row_d[stage2_bank] = stage2_row; - end - //bank is not idle but wrong row is activated so do precharge - else if(bank_status_q[stage2_bank] && bank_active_row_q[stage2_bank] != stage2_row && delay_before_precharge_counter_q[stage2_bank] ==0) begin - precharge_slot_busy = 1'b1; - //set-up delay before activate - delay_before_activate_counter_d[stage2_bank] = PRECHARGE_TO_ACTIVATE_DELAY; - //issue precharge command - if(DUAL_RANK_DIMM[0]) begin - cmd_d[PRECHARGE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[DUAL_RANK_DIMM[0]? 9 : 8:0] } }; - end - else begin - cmd_d[PRECHARGE_SLOT] = {1'b0, CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[9:0] } }; - end - //update bank status and active row - bank_status_d[stage2_bank] = 1'b0; - end end //end of stage 2 pending // pending request on stage 1 @@ -1819,9 +1973,9 @@ module ddr3_controller #( //issue Activate and Precharge on the CURRENT bank. Else, stage //1 will issue Activate and Precharge for the NEXT bank // Thus stage 1 anticipate makes sure smooth burst operation that jumps banks - if(bank_status_q[stage1_next_bank] && bank_active_row_q[stage1_next_bank] != stage1_next_row && delay_before_precharge_counter_q[stage1_next_bank] ==0 && !precharge_slot_busy) begin + if(stage1_do_pre && !precharge_slot_busy) begin //set-up delay before read and write - delay_before_activate_counter_d[stage1_next_bank] = PRECHARGE_TO_ACTIVATE_DELAY; + delay_before_activate_counter_d[stage1_next_bank] = PRECHARGE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0]; if(DUAL_RANK_DIMM[0]) begin cmd_d[PRECHARGE_SLOT] = {!stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank[BA_BITS-1:0], { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage1_next_row[(DUAL_RANK_DIMM[0]? 9 : 8):0] } }; end @@ -1832,22 +1986,22 @@ module ddr3_controller #( end //end of anticipate precharge //anticipated bank is idle so do activate - else if(!bank_status_q[stage1_next_bank] && delay_before_activate_counter_q[stage1_next_bank] == 0 && !activate_slot_busy) begin + else if(stage1_do_act && !activate_slot_busy) begin // must meet TRRD (activate to activate delay) for(index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin //the activate to activate delay applies to all banks - if(delay_before_activate_counter_d[index] <= ACTIVATE_TO_ACTIVATE_DELAY) begin // if delay is > ACTIVATE_TO_ACTIVATE_DELAY, then updating it to the lower delay will cause the previous delay to be violated - delay_before_activate_counter_d[index] = ACTIVATE_TO_ACTIVATE_DELAY; + if(delay_before_activate_counter_d[index] <= ACTIVATE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0]) begin // if delay is > ACTIVATE_TO_ACTIVATE_DELAY, then updating it to the lower delay will cause the previous delay to be violated + delay_before_activate_counter_d[index] = ACTIVATE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0]; end end - delay_before_precharge_counter_d[stage1_next_bank] = ACTIVATE_TO_PRECHARGE_DELAY; + delay_before_precharge_counter_d[stage1_next_bank] = ACTIVATE_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0]; //set-up delay before read and write - if(delay_before_read_counter_d[stage1_next_bank] <= ACTIVATE_TO_READ_DELAY) begin // if current delay is > ACTIVATE_TO_READ_DELAY, then updating it to the lower delay will cause the previous delay to be violated - delay_before_read_counter_d[stage1_next_bank] = ACTIVATE_TO_READ_DELAY; + if(delay_before_read_counter_d[stage1_next_bank] <= ACTIVATE_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0]) begin // if current delay is > ACTIVATE_TO_READ_DELAY, then updating it to the lower delay will cause the previous delay to be violated + delay_before_read_counter_d[stage1_next_bank] = ACTIVATE_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0]; end - if(delay_before_write_counter_d[stage1_next_bank] <= ACTIVATE_TO_WRITE_DELAY) begin // if current delay is > ACTIVATE_TO_WRITE_DELAY, then updating it to the lower delay will cause the previous delay to be violated - delay_before_write_counter_d[stage1_next_bank] = ACTIVATE_TO_WRITE_DELAY; + if(delay_before_write_counter_d[stage1_next_bank] <= ACTIVATE_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0]) begin // if current delay is > ACTIVATE_TO_WRITE_DELAY, then updating it to the lower delay will cause the previous delay to be violated + delay_before_write_counter_d[stage1_next_bank] = ACTIVATE_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0]; end if(DUAL_RANK_DIMM[0]) begin cmd_d[ACTIVATE_SLOT] = {!stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_ACT[2:0] , cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank[BA_BITS-1:0] , stage1_next_row[(DUAL_RANK_DIMM[0]? ROW_BITS-1 : ROW_BITS-2):0]}; @@ -1862,35 +2016,36 @@ module ddr3_controller #( end //end of stage1 anticipate end - // control stage 1 stall - if(stage1_pending) begin //raise stall only if stage2 will still be busy next clock - // Stage1 bank and row will determine if transaction will be - // stalled (bank is idle OR wrong row is active). - if(!bank_status_d[stage1_bank] || (bank_status_d[stage1_bank] && bank_active_row_d[stage1_bank] != stage1_row)) begin - stage1_stall = 1; + // control stage 1 stall in advance + if(stage1_pending) begin // raise stall only if stage2 will still be busy next clock + // stall stage 1 by default if there is pending request on stage 1 + stage1_stall = 1; + + if(bank_status_d[stage1_bank] && bank_active_row_d[stage1_bank] == stage1_row) begin + // if write request and delay before write is already met then deassert stall + if(stage1_we && delay_before_write_counter_d[stage1_bank] == 0) begin + stage1_stall = 0; + end + // if read request and delay before read is already met then deassert stall + else if(!stage1_we && delay_before_read_counter_d[stage1_bank] == 0) begin + stage1_stall = 0; + end end - else if(!stage1_we && delay_before_read_counter_d[stage1_bank] != 0) begin // if read request but delay before read is not yet met then stall - stage1_stall = 1; - end - else if(stage1_we && delay_before_write_counter_d[stage1_bank] != 0) begin // if write request but delay before write is not yet met then stall - stage1_stall = 1; - end - //different request type will need a delay of more than 1 clk cycle so stall the pipeline - //if(stage1_we != stage2_we) begin - // stage1_stall = 1; - //end end - //control stage 2 stall + //control stage 2 stall in advance if(stage2_pending) begin - //control stage2 stall in advance - if(bank_status_d[stage2_bank] && bank_active_row_d[stage2_bank] == stage2_row) begin //read/write operation - //write request - if(stage2_we && delay_before_write_counter_d[stage2_bank] == 0) begin // if write request and delay before write is already met then deassert stall + // by default, stage 2 stall deasserts once conditions for write/read command is met + stage2_stall = !(stage2_do_wr_or_rd && (stage2_do_wr || stage2_do_rd)); + // equivalent to: if(bank_status_d[stage2_bank] && bank_active_row_d[stage2_bank] == stage2_row) + // can start read/write operation if right row is active on the bank + if(stage2_do_act || stage2_do_wr_or_rd) begin + // if write request and delay before write is already met then deassert stall + if(stage2_we && delay_before_write_counter_d[stage2_bank] == 0) begin stage2_stall = 0; //to low stall next stage, but not yet at this stage end - //read request - else if(!stage2_we && delay_before_read_counter_d[stage2_bank]==0) begin // if read request and delay before read is already met then deassert stall + // if read request and delay before read is already met then deassert stall + else if(!stage2_we && delay_before_read_counter_d[stage2_bank]==0) begin stage2_stall = 0; end end @@ -1904,66 +2059,77 @@ module ddr3_controller #( // a way that it will only stall next clock cycle if the pipeline will be full on the next clock cycle. // Excel sheet design planning: https://docs.google.com/spreadsheets/d/1_8vrLmVSFpvRD13Mk8aNAMYlh62SfpPXOCYIQFEtcs4/edit?gid=668378527#gid=668378527 // Old: https://1drv.ms/x/s!AhWdq9CipeVagSqQXPwRmXhDgttL?e=vVYIxE&nav=MTVfezAwMDAwMDAwLTAwMDEtMDAwMC0wMDAwLTAwMDAwMDAwMDAwMH0 - // if(o_wb_stall_q) o_wb_stall_d = stage2_stall; - // else if( (!i_wb_stb && final_calibration_done) || (!calib_stb && state_calibrate != DONE_CALIBRATE) ) o_wb_stall_d = 0; - // else if(!stage1_pending) o_wb_stall_d = stage2_stall; - // else o_wb_stall_d = stage1_stall; + // if(o_wb_stall_int_q) o_wb_stall_int_d = stage2_stall; + // else if( (!i_wb_stb && final_calibration_done) || (!calib_stb && !final_calibration_done) ) o_wb_stall_int_d = 0; + // else if(!stage1_pending) o_wb_stall_int_d = stage2_stall; + // else o_wb_stall_int_d = stage1_stall; - // if( !o_wb_stall_q && !i_wb_stb ) o_wb_stall_d = 1'b0; - // else if(ecc_stage1_stall) o_wb_stall_d = 1'b1; - // else if(stage0_pending) o_wb_stall_d = ecc_stage2_stall || stage1_stall; + // if( !o_wb_stall_int_q && !i_wb_stb ) o_wb_stall_int_d = 1'b0; + // else if(ecc_stage1_stall) o_wb_stall_int_d = 1'b1; + // else if(stage0_pending) o_wb_stall_int_d = ecc_stage2_stall || stage1_stall; // else begin - // if(o_wb_stall_q) o_wb_stall_d = stage2_stall; - // else o_wb_stall_d = stage1_stall; + // if(o_wb_stall_int_q) o_wb_stall_int_d = stage2_stall; + // else o_wb_stall_int_d = stage1_stall; // end // pipeline control for ECC_ENABLE != 3 + if(ECC_ENABLE != 3) begin if(!i_wb_cyc && final_calibration_done) begin - o_wb_stall_d = 0; + o_wb_stall_int_d = 0; end - else if(!o_wb_stall_q && ( (!i_wb_stb && final_calibration_done) || (!calib_stb && !final_calibration_done) )) begin - o_wb_stall_d = 0; + else if(!o_wb_stall_int_q && ( (!i_wb_stb && final_calibration_done) || (!calib_stb && !final_calibration_done) )) begin + o_wb_stall_int_d = 0; end - else if(o_wb_stall_q || !stage1_pending) begin - o_wb_stall_d = stage2_stall; + else if(o_wb_stall_int_q || !stage1_pending) begin + o_wb_stall_int_d = stage2_stall; end else begin - o_wb_stall_d = stage1_stall; + o_wb_stall_int_d = stage1_stall; end end // pipeline control for ECC_ENABLE = 3 else begin if(!i_wb_cyc && final_calibration_done) begin - o_wb_stall_d = 1'b0; + o_wb_stall_int_d = 1'b0; end else if(ecc_stage1_stall) begin - o_wb_stall_d = 1'b1; + o_wb_stall_int_d = 1'b1; end - else if(!o_wb_stall_q && ( (!i_wb_stb && final_calibration_done) || (!calib_stb && !final_calibration_done) )) begin - o_wb_stall_d = 1'b0; + else if(!o_wb_stall_int_q && ( (!i_wb_stb && final_calibration_done) || (!calib_stb && !final_calibration_done) )) begin + o_wb_stall_int_d = 1'b0; end else if(stage0_pending) begin - o_wb_stall_d = !stage2_update || stage1_stall; + o_wb_stall_int_d = !stage2_update || stage1_stall; end else begin - if(o_wb_stall_q || !stage1_pending) begin - o_wb_stall_d = stage2_stall; + if(o_wb_stall_int_q || !stage1_pending) begin + o_wb_stall_int_d = stage2_stall; end else begin - o_wb_stall_d = stage1_stall; + o_wb_stall_int_d = stage1_stall; end end end end //end of always block - + + always @* begin + force_o_wb_stall_high_d = !final_calibration_done || !instruction[REF_IDLE]; + force_o_wb_stall_calib_high_d = !instruction[REF_IDLE]; + o_wb_stall = o_wb_stall_int_q || force_o_wb_stall_high_q; + o_wb_stall_calib = o_wb_stall_int_q || force_o_wb_stall_calib_high_q; + end // register previous value of cmd_ck_en always @(posedge i_controller_clk) begin if(sync_rst_controller) begin prev_cmd_ck_en <= 0; + force_o_wb_stall_high_q <= 0; + force_o_wb_stall_calib_high_q <= 0; end else begin prev_cmd_ck_en <= cmd_ck_en; + force_o_wb_stall_high_q <= force_o_wb_stall_high_d; + force_o_wb_stall_calib_high_q <= force_o_wb_stall_calib_high_d; end end @@ -2251,6 +2417,9 @@ module ddr3_controller #( pause_counter <= 0; read_data_store <= 0; write_pattern <= 0; + write_pattern_lane <= 0; + read_lane_data_shifted <= 0; + write_pattern_matches <= 0; added_read_pipe_max <= 0; dqs_start_index_stored <= 0; dqs_start_index_repeat <= 0; @@ -2271,6 +2440,7 @@ module ddr3_controller #( lane_read_dq_early <= 0; shift_read_pipe <= 0; bitslip_counter <= 0; + prep_done <= 0; `ifdef UART_DEBUG uart_start_send <= 0; uart_text <= 0; @@ -2303,6 +2473,7 @@ module ddr3_controller #( idelay_data_cntvaluein_prev <= idelay_data_cntvaluein[lane]; reset_from_calibrate <= 0; reset_after_rank_1 <= 0; // reset for dual rank + prep_done <= 0; if(wb2_update) begin odelay_data_cntvaluein[wb2_write_lane] <= wb2_phy_odelay_data_ld[wb2_write_lane]? wb2_phy_odelay_data_cntvaluein : odelay_data_cntvaluein[wb2_write_lane]; @@ -2315,7 +2486,7 @@ module ddr3_controller #( o_phy_idelay_dqs_ld <= wb2_phy_idelay_dqs_ld; lane <= wb2_write_lane; end - else if(state_calibrate != DONE_CALIBRATE) begin + else if(!final_calibration_done) begin // increase cntvalue every load to prepare for possible next load odelay_data_cntvaluein[lane] <= o_phy_odelay_data_ld[lane]? odelay_data_cntvaluein[lane] + 1: odelay_data_cntvaluein[lane]; odelay_dqs_cntvaluein[lane] <= o_phy_odelay_dqs_ld[lane]? odelay_dqs_cntvaluein[lane] + 1: odelay_dqs_cntvaluein[lane]; @@ -2507,8 +2678,8 @@ module ddr3_controller #( CALIBRATE_DQS: if(dqs_start_index_stored == dqs_target_index) begin // dq_target_index still stores the original dqs_target_index_value. The bit size of dq_target_index is just enough // to count the bits in dqs_store (the received 8 DQS stored STORED_DQS_SIZE times) - added_read_pipe[lane] <= { {( 4 - ($clog2(STORED_DQS_SIZE*8) - (3+1)) ){1'b0}} , dq_target_index[lane][$clog2(STORED_DQS_SIZE*8)-1:(3+1)] } - + { 3'b0 , (dq_target_index[lane][3:0] >= (5+8)) }; + added_read_pipe[lane] <= |({ {( 4 - ($clog2(STORED_DQS_SIZE*8) - (3+1)) ){1'b0}} , dq_target_index[lane][$clog2(STORED_DQS_SIZE*8)-1:(3+1)] } + + { 3'b0 , (dq_target_index[lane][3:0] >= (5+8)) })? 'd1 : 'd0; // added_read_pipe can just be 1 or 0 // if target_index is > 13, then a 1 CONTROLLLER_CLK cycle delay (4 ddr3_clk cycles) is added on that particular lane (due to trace delay) // added_read_pipe[lane] <= dq_target_index[lane][$clog2(STORED_DQS_SIZE*8)-1 : (4)] + ( dq_target_index[lane][3:0] >= 13 ) ; dqs_bitslip_arrangement <= 16'b0011_1100_0011_1100 >> dq_target_index[lane][2:0]; @@ -2608,73 +2779,75 @@ module ddr3_controller #( pause_counter <= 0; end - WAIT_FOR_FEEDBACK: if(delay_before_write_level_feedback == 0) begin - /* verilator lint_off WIDTH */ //_verilator warning: Bit extraction of var[511:0] requires 9 bit index, not 3 bits (but [lane<<3] is much simpler and cleaner) - sample_clk_repeat <= (i_phy_iserdes_data[lane_times_8] == stored_write_level_feedback)? sample_clk_repeat + 1 : 0; //sample_clk_repeat should get the same response - stored_write_level_feedback <= i_phy_iserdes_data[lane_times_8]; - write_calib_dqs <= 0; - if(sample_clk_repeat == REPEAT_CLK_SAMPLING) begin - sample_clk_repeat <= 0; - prev_write_level_feedback <= stored_write_level_feedback; - if(({prev_write_level_feedback, stored_write_level_feedback} == 2'b01) /*|| write_level_fail[lane]*/) begin - /* verilator lint_on WIDTH */ - /* verilator lint_off WIDTH */ - if(lane == LANES - 1) begin - /* verilator lint_on WIDTH */ - write_calib_odt <= 0; - pause_counter <= 0; //write calibration now complete so continue the reset instruction sequence - lane <= 0; - o_phy_write_leveling_calib <= 0; - state_calibrate <= ISSUE_WRITE_1; + WAIT_FOR_FEEDBACK: if(ODELAY_SUPPORTED) begin + if(delay_before_write_level_feedback == 0) begin + /* verilator lint_off WIDTH */ //_verilator warning: Bit extraction of var[511:0] requires 9 bit index, not 3 bits (but [lane<<3] is much simpler and cleaner) + sample_clk_repeat <= (i_phy_iserdes_data[lane_times_8] == stored_write_level_feedback)? sample_clk_repeat + 1 : 0; //sample_clk_repeat should get the same response + stored_write_level_feedback <= i_phy_iserdes_data[lane_times_8]; + write_calib_dqs <= 0; + if(sample_clk_repeat == REPEAT_CLK_SAMPLING) begin + sample_clk_repeat <= 0; + prev_write_level_feedback <= stored_write_level_feedback; + if(({prev_write_level_feedback, stored_write_level_feedback} == 2'b01) /*|| write_level_fail[lane]*/) begin + /* verilator lint_on WIDTH */ + /* verilator lint_off WIDTH */ + if(lane == LANES - 1) begin + /* verilator lint_on WIDTH */ + write_calib_odt <= 0; + pause_counter <= 0; //write calibration now complete so continue the reset instruction sequence + lane <= 0; + o_phy_write_leveling_calib <= 0; + state_calibrate <= ISSUE_WRITE_1; + `ifdef UART_DEBUG_WRITE_LEVEL + uart_start_send <= 1'b1; + uart_text <= {"state=WAIT_FOR_FEEDBACK, All Lanes Done",8'h0a,"----------------------",8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= ISSUE_WRITE_1; + `endif + end + else begin + lane <= lane + 1; + odelay_cntvalue_halfway <= 0; + prev_write_level_feedback <= 1'b1; + sample_clk_repeat <= 0; + state_calibrate <= START_WRITE_LEVEL; `ifdef UART_DEBUG_WRITE_LEVEL uart_start_send <= 1'b1; - uart_text <= {"state=WAIT_FOR_FEEDBACK, All Lanes Done",8'h0a,"----------------------",8'h0a}; + uart_text <= {"state=WAIT_FOR_FEEDBACK, Done lane=",hex_to_ascii(lane),8'h0a,"----------------------",8'h0a}; state_calibrate <= WAIT_UART; - state_calibrate_next <= ISSUE_WRITE_1; + state_calibrate_next <= START_WRITE_LEVEL; `endif + end end else begin - lane <= lane + 1; - odelay_cntvalue_halfway <= 0; - prev_write_level_feedback <= 1'b1; - sample_clk_repeat <= 0; + o_phy_odelay_data_ld[lane] <= 1; + o_phy_odelay_dqs_ld[lane] <= 1; + write_level_fail[lane] <= odelay_cntvalue_halfway; + // if(odelay_cntvalue_halfway) begin // if halfway cntvalue is reached which is illegal (or impossible to happen), then we load the original cntvalues + // odelay_data_cntvaluein[lane] <= DATA_INITIAL_ODELAY_TAP[4:0]; + // odelay_dqs_cntvaluein[lane] <= DQS_INITIAL_ODELAY_TAP[4:0]; + // end state_calibrate <= START_WRITE_LEVEL; `ifdef UART_DEBUG_WRITE_LEVEL uart_start_send <= 1'b1; - uart_text <= {"state=WAIT_FOR_FEEDBACK, Done lane=",hex_to_ascii(lane),8'h0a,"----------------------",8'h0a}; + uart_text <= {"state=WAIT_FOR_FEEDBACK, lane=",hex_to_ascii(lane), ", {prev,stored}=", hex_to_ascii(prev_write_level_feedback), + hex_to_ascii(stored_write_level_feedback), ", o_phy_odelay_data_cntvaluein=0x", hex_to_ascii(o_phy_odelay_data_cntvaluein[4]), + hex_to_ascii(o_phy_odelay_data_cntvaluein[3:0]), 8'h0a,8'h0a}; state_calibrate <= WAIT_UART; state_calibrate_next <= START_WRITE_LEVEL; `endif end - end - else begin - o_phy_odelay_data_ld[lane] <= 1; - o_phy_odelay_dqs_ld[lane] <= 1; - write_level_fail[lane] <= odelay_cntvalue_halfway; - // if(odelay_cntvalue_halfway) begin // if halfway cntvalue is reached which is illegal (or impossible to happen), then we load the original cntvalues - // odelay_data_cntvaluein[lane] <= DATA_INITIAL_ODELAY_TAP[4:0]; - // odelay_dqs_cntvaluein[lane] <= DQS_INITIAL_ODELAY_TAP[4:0]; - // end - state_calibrate <= START_WRITE_LEVEL; - `ifdef UART_DEBUG_WRITE_LEVEL - uart_start_send <= 1'b1; - uart_text <= {"state=WAIT_FOR_FEEDBACK, lane=",hex_to_ascii(lane), ", {prev,stored}=", hex_to_ascii(prev_write_level_feedback), - hex_to_ascii(stored_write_level_feedback), ", o_phy_odelay_data_cntvaluein=0x", hex_to_ascii(o_phy_odelay_data_cntvaluein[4]), - hex_to_ascii(o_phy_odelay_data_cntvaluein[3:0]), 8'h0a,8'h0a}; - state_calibrate <= WAIT_UART; - state_calibrate_next <= START_WRITE_LEVEL; - `endif - end - end - `ifdef UART_DEBUG_WRITE_LEVEL - else begin - uart_start_send <= 1'b1; - uart_text <= {"state=WAIT_FOR_FEEDBACK, sample_clk_repeat=",hex_to_ascii(sample_clk_repeat),8'h0a}; - state_calibrate <= WAIT_UART; - state_calibrate_next <= START_WRITE_LEVEL; - end - `endif - end + end + `ifdef UART_DEBUG_WRITE_LEVEL + else begin + uart_start_send <= 1'b1; + uart_text <= {"state=WAIT_FOR_FEEDBACK, sample_clk_repeat=",hex_to_ascii(sample_clk_repeat),8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= START_WRITE_LEVEL; + end + `endif + end + end ISSUE_WRITE_1: if(instruction_address == 22 && !o_wb_stall_calib) begin calib_stb <= 1;//actual request flag @@ -2765,74 +2938,75 @@ module ddr3_controller #( // end end - ANALYZE_DATA_LOW_FREQ: begin // read_data_store should have the expected 9177298cd0ad51c1, if not then issue bitslip - if(write_pattern[0 +: 64] == {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }) begin - /* verilator lint_off WIDTH */ - if(lane == LANES - 1) begin - /* verilator lint_on WIDTH */ - state_calibrate <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; // go straight to FINISH_READ if BIST_MODE == 0 - initial_calibration_done <= 1'b1; - `ifdef UART_DEBUG_ALIGN - uart_start_send <= 1'b1; - //uart_text <= {"state=ANALYZE_DATA_LOW_FREQ, Done All Lanes",8'h0a,"-----------------",8'h0a,8'h0a}; - uart_text <= {8'h0a,8'h0a, "Done All Lanes, bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe), - ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a, - {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }, - 8'h0a,8'h0a,8'h0a,8'h0a}; - state_calibrate <= WAIT_UART; - state_calibrate_next <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; - `endif - end - else begin - lane <= lane + 1; - bitslip_counter <= 0; - `ifdef UART_DEBUG_ALIGN - uart_start_send <= 1'b1; - // uart_text <= {"state=ANALYZE_DATA_LOW_FREQ, Done lane=",hex_to_ascii(lane),8'h0a,"-----------------",8'h0a}; - uart_text <= {8'h0a,8'h0a, "Done lane=", hex_to_ascii(lane), ", bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe), - ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a, - {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }, - 8'h0a,8'h0a,8'h0a,8'h0a}; - state_calibrate <= WAIT_UART; - state_calibrate_next <= ANALYZE_DATA_LOW_FREQ; - `endif - end - end - else begin // issue bitslip then repeat write-read - o_phy_bitslip[lane] <= 1'b1; - bitslip_counter <= bitslip_counter + 1; // increment counter every bitslip - if(bitslip_counter == 7) begin // there are only 8 bitslip, once past this then we shift read pipe backwards (assumption is that we read too early) - shift_read_pipe <= shift_read_pipe + 1; - bitslip_counter <= 0; - if(shift_read_pipe == 1) begin // if shift_read_pipe at end then we increase data_start_index since problem might be write DQ too early thus we shift it later using data_start_index - shift_read_pipe <= 0; - data_start_index[lane] <= lane_write_dq_late[lane]? data_start_index[lane] - 8: data_start_index[lane] + 8; - if((data_start_index[lane] == 64) && !lane_write_dq_late[lane]) begin // if data_start_index at end then we assert data_start_index, last assumption is that we are writing DQ too late thus we move stage2_data forward to be sent out earlier - data_start_index[lane] <= 64; - lane_write_dq_late[lane] <= 1'b1; - end - end - end - state_calibrate <= ISSUE_WRITE_1; - `ifdef UART_DEBUG_ALIGN - uart_start_send <= 1'b1; - uart_text <= {8'h0a,8'h0a, "lane=", hex_to_ascii(lane), ", bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe), - ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a, - {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }, - 8'h0a,8'h0a,8'h0a,8'h0a}; - state_calibrate <= WAIT_UART; - state_calibrate_next <= ISSUE_WRITE_1; - `endif - end - end +ANALYZE_DATA_LOW_FREQ: if(DLL_OFF) begin // read_data_store should have the expected 9177298cd0ad51c1, if not then issue bitslip + if(write_pattern[0 +: 64] == {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }) begin + /* verilator lint_off WIDTH */ + if(lane == LANES - 1) begin + /* verilator lint_on WIDTH */ + state_calibrate <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; // go straight to FINISH_READ if BIST_MODE == 0 + initial_calibration_done <= 1'b1; + `ifdef UART_DEBUG_ALIGN + uart_start_send <= 1'b1; + //uart_text <= {"state=ANALYZE_DATA_LOW_FREQ, Done All Lanes",8'h0a,"-----------------",8'h0a,8'h0a}; + uart_text <= {8'h0a,8'h0a, "Done All Lanes, bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe), + ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a, + {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }, + 8'h0a,8'h0a,8'h0a,8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; + `endif + end + else begin + lane <= lane + 1; + bitslip_counter <= 0; + `ifdef UART_DEBUG_ALIGN + uart_start_send <= 1'b1; + // uart_text <= {"state=ANALYZE_DATA_LOW_FREQ, Done lane=",hex_to_ascii(lane),8'h0a,"-----------------",8'h0a}; + uart_text <= {8'h0a,8'h0a, "Done lane=", hex_to_ascii(lane), ", bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe), + ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a, + {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }, + 8'h0a,8'h0a,8'h0a,8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= ANALYZE_DATA_LOW_FREQ; + `endif + end + end + else begin // issue bitslip then repeat write-read + o_phy_bitslip[lane] <= 1'b1; + bitslip_counter <= bitslip_counter + 1; // increment counter every bitslip + if(bitslip_counter == 7) begin // there are only 8 bitslip, once past this then we shift read pipe backwards (assumption is that we read too early) + shift_read_pipe <= shift_read_pipe + 1; + bitslip_counter <= 0; + if(shift_read_pipe == 1) begin // if shift_read_pipe at end then we increase data_start_index since problem might be write DQ too early thus we shift it later using data_start_index + shift_read_pipe <= 0; + data_start_index[lane] <= lane_write_dq_late[lane]? data_start_index[lane] - 8: data_start_index[lane] + 8; + if((data_start_index[lane] == 64) && !lane_write_dq_late[lane]) begin // if data_start_index at end then we assert data_start_index, last assumption is that we are writing DQ too late thus we move stage2_data forward to be sent out earlier + data_start_index[lane] <= 64; + lane_write_dq_late[lane] <= 1'b1; + end + end + end + state_calibrate <= ISSUE_WRITE_1; + `ifdef UART_DEBUG_ALIGN + uart_start_send <= 1'b1; + uart_text <= {8'h0a,8'h0a, "lane=", hex_to_ascii(lane), ", bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe), + ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a, + {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }, + 8'h0a,8'h0a,8'h0a,8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= ISSUE_WRITE_1; + `endif + end + end + // extract burst_0-to-burst_7 data for a specified lane then determine which byte in write_pattern does it starts (ASSUMPTION: the DQ is too early [3d_9177298cd0ad51]c1 is written) // NOTE TO SELF: all "8" here assume DQ_BITS are 8? parameterize this properly // data_start_index for a specified lane determine how many bits are off the data from the write command @@ -2840,149 +3014,157 @@ module ddr3_controller #( // e.g. LANE={burst7, burst6, burst5, burst4, burst3, burst2, burst1, burst0} then with 1 ddr3 cycle delay between DQ and command // burst0 will not be written but only starting on burst1 // if lane_write_dq_late is already set to 1 for this lane, then current lane should already be fixed without changing the data_start_index - ANALYZE_DATA: if(write_pattern[ (lane_write_dq_late[lane]? 0 : data_start_index[lane]) +: 64] == {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }) begin - /* verilator lint_off WIDTH */ - if(lane == LANES - 1) begin - /* verilator lint_on WIDTH */ - state_calibrate <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; // go straight to FINISH_READ if BIST_MODE == 0 - initial_calibration_done <= 1'b1; - `ifdef UART_DEBUG_ALIGN - uart_start_send <= 1'b1; - uart_text <= {"state=ANALYZE_DATA, Done All Lanes",8'h0a,"-----------------",8'h0a,8'h0a}; - state_calibrate <= WAIT_UART; - state_calibrate_next <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; - `endif - end + ANALYZE_DATA: if(prep_done[1]) begin + if(write_pattern_matches) begin + /* verilator lint_off WIDTH */ + if(lane == LANES - 1) begin + /* verilator lint_on WIDTH */ + state_calibrate <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; // go straight to FINISH_READ if BIST_MODE == 0 + initial_calibration_done <= 1'b1; + `ifdef UART_DEBUG_ALIGN + uart_start_send <= 1'b1; + uart_text <= {"state=ANALYZE_DATA, Done All Lanes",8'h0a,"-----------------",8'h0a,8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; + `endif + end + else begin + lane <= lane + 1; + data_start_index[lane+1] <= 0; + state_calibrate <= ANALYZE_DATA; + `ifdef UART_DEBUG_ALIGN + uart_start_send <= 1'b1; + uart_text <= {"state=ANALYZE_DATA, Done lane=",hex_to_ascii(lane),8'h0a,"-----------------",8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= ANALYZE_DATA; + `endif + end + end else begin - lane <= lane + 1; - data_start_index[lane+1] <= 0; - `ifdef UART_DEBUG_ALIGN - uart_start_send <= 1'b1; - uart_text <= {"state=ANALYZE_DATA, Done lane=",hex_to_ascii(lane),8'h0a,"-----------------",8'h0a}; - state_calibrate <= WAIT_UART; - state_calibrate_next <= ANALYZE_DATA; - `endif - end - end - else begin - data_start_index[lane] <= data_start_index[lane] + 8; //skip by 8 (basically we want to delay DQ since it was too early) - if(lane_write_dq_late[lane] && lane_read_dq_early[lane]) begin // both assumption is wrong so we reset the controller - reset_from_calibrate <= 1; - end - // first assumption (write DQ is late) is wrong so we repeat write-read with data_start_index back to 0 - else if(lane_write_dq_late[lane]) begin - data_start_index[lane] <= 0; // set delay to outgoing stage2_data back to zero - if(data_start_index[lane] == 0) begin // if already set to zero then we already did write-read with default zero data_start_index, so we go to CHECK_STARTING_DATA to try second assumtpion + data_start_index[lane] <= data_start_index[lane] + 8; //skip by 8 (basically we want to delay DQ since it was too early) + if(lane_write_dq_late[lane] && lane_read_dq_early[lane]) begin // both assumption is wrong so we reset the controller + reset_from_calibrate <= 1; + end + // first assumption (write DQ is late) is wrong so we repeat write-read with data_start_index back to 0 + else if(lane_write_dq_late[lane]) begin + data_start_index[lane] <= 0; // set delay to outgoing stage2_data back to zero + if(data_start_index[lane] == 0) begin // if already set to zero then we already did write-read with default zero data_start_index, so we go to CHECK_STARTING_DATA to try second assumtpion + state_calibrate <= CHECK_STARTING_DATA; + `ifdef UART_DEBUG_ALIGN + uart_start_send <= 1'b1; + uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", First Assumption wrong, Start second assumption: Read too early",8'h0a,8'h0a, + 8'h0a,8'h0a, + {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }, + 8'h0a,8'h0a,8'h0a,8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= CHECK_STARTING_DATA; + `endif + end + else begin // if not yet zero then we have to write-read again + state_calibrate <= ISSUE_WRITE_1; + end + end + //reached the end but STILL has error, issue might be WRITING TOO LATE (298cd0ad51c1XXXX is written) OR READING TOO EARLY ([9177]_298cd0ad51c1XXXX is read) + else if(data_start_index[lane] == 56) begin + data_start_index[lane] <= 0; + start_index_check <= 0; state_calibrate <= CHECK_STARTING_DATA; `ifdef UART_DEBUG_ALIGN uart_start_send <= 1'b1; - uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", First Assumption wrong, Start second assumption: Read too early",8'h0a,8'h0a, - 8'h0a,8'h0a, - {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }, - 8'h0a,8'h0a,8'h0a,8'h0a}; + uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", Reached end",8'h0a,8'h0a}; state_calibrate <= WAIT_UART; state_calibrate_next <= CHECK_STARTING_DATA; `endif - end - else begin // if not yet zero then we have to write-read again - state_calibrate <= ISSUE_WRITE_1; - end - end - //reached the end but STILL has error, issue might be WRITING TOO LATE (298cd0ad51c1XXXX is written) OR READING TOO EARLY ([9177]_298cd0ad51c1XXXX is read) - else if(data_start_index[lane] == 56) begin - data_start_index[lane] <= 0; - start_index_check <= 0; - state_calibrate <= CHECK_STARTING_DATA; - `ifdef UART_DEBUG_ALIGN + end + `ifdef UART_DEBUG_ALIGN + else begin uart_start_send <= 1'b1; - uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", Reached end",8'h0a,8'h0a}; + state_calibrate <= ANALYZE_DATA; + uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", data_start_index[lane]=0x", + hex_to_ascii(data_start_index[lane][6:4]),hex_to_ascii(data_start_index[lane][3:0]),8'h0a,8'h0a,8'h0a,8'h0a, + {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], + read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }, + 8'h0a,8'h0a,8'h0a,8'h0a + }; state_calibrate <= WAIT_UART; - state_calibrate_next <= CHECK_STARTING_DATA; - `endif - end - `ifdef UART_DEBUG_ALIGN - else begin - uart_start_send <= 1'b1; - uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", data_start_index[lane]=0x", - hex_to_ascii(data_start_index[lane][6:4]),hex_to_ascii(data_start_index[lane][3:0]),8'h0a,8'h0a,8'h0a,8'h0a, - {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }, - 8'h0a,8'h0a,8'h0a,8'h0a - }; - state_calibrate <= WAIT_UART; - state_calibrate_next <= ANALYZE_DATA; - end - `endif - end + state_calibrate_next <= ANALYZE_DATA; + end + `endif + end + end + else begin + prep_done <= {prep_done[0],1'b1}; + end // check when the 4 MSB of write_pattern {d0ad51c1} starts on read_lane_data (read_lane_data is just the concatenation of read_data_store of a specific lane) // assumption here read_lane_data ~= 298cd0ad51c1XXXX is written: either because we write too late (thus we need to delay outgoing stage2_data) OR we read too early (thus we need to calibrate incoming iserdes_dq) - CHECK_STARTING_DATA: begin - /* verilator lint_off WIDTHTRUNC */ - if(read_lane_data[start_index_check +: 32] == write_pattern[0 +: 32]) begin - /* verilator lint_on WIDTHTRUNC */ - // first assumption: controller DQ is late WHEN WRITING(THUS WE NEED TO CALIBRATE data_start_index of outgoing stage2_data) - if(!lane_write_dq_late[lane]) begin // lane_write_dq_late is not yet set so we know this first assunmption is not yet tested - state_calibrate <= ISSUE_WRITE_1; // start writing again (the next write should fix the late DQ for this current lane) - data_start_index[lane] <= 64 - start_index_check; // stage2_data_unaligned is forwarded to stage[1] so we are now 8-bursts early, so we subtract from 64 so the burst we will be forwarded to the tip of stage2_data - lane_write_dq_late[lane] <= 1'b1; - `ifdef UART_DEBUG_ALIGN - uart_start_send <= 1'b1; - uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=0x",hex8_to_ascii(start_index_check), ", Ongoing First Assumption",8'h0a}; - state_calibrate <= WAIT_UART; - state_calibrate_next <= ISSUE_WRITE_1; - `endif - end - // if first assumption is not the fix then second assmption: controller reads the DQ too early (THUS WE NEED TO CALIBRATE INCOMING DQ SIGNAL starting from bitslip training) - else begin - lane_read_dq_early[lane] <= 1'b1; // set to 1 to see later what lanes has this problem - state_calibrate <= BITSLIP_DQS_TRAIN_3; - added_read_pipe[lane] <= { {( 4 - ($clog2(STORED_DQS_SIZE*8) - (3+1)) ){1'b0}} , dq_target_index[lane][$clog2(STORED_DQS_SIZE*8)-1:(3+1)] } - + { 3'b0 , (dq_target_index[lane][3:0] >= (5+8)) }; - dqs_bitslip_arrangement <= 16'b0011_1100_0011_1100 >> dq_target_index[lane][2:0]; - `ifdef UART_DEBUG_ALIGN - uart_start_send <= 1'b1; - uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=0x",hex8_to_ascii(start_index_check), ", Ongoing Second Assumption",8'h0a}; - state_calibrate <= WAIT_UART; - state_calibrate_next <= BITSLIP_DQS_TRAIN_3; - `endif - end - end - else begin - start_index_check <= start_index_check + 16; // plus 16, we assume here that DQ will be late BY 1 DDR3 CLK CYCLE (if only +8, then it will be late by half DDR3 cycle, that should NOT happen) - dq_target_index[lane] <= dq_target_index[lane] + 2; - if(start_index_check == 48)begin // start_index_check is now outside the possible values - // first assumption: controller DQ is 1 CONTROLLER CYCLE late WHEN WRITING (data is written to address 1 and not address 0) - if(!lane_write_dq_late[lane]) begin // lane_write_dq_late is not yet set so we know this first assunmption is not yet tested + CHECK_STARTING_DATA: if(prep_done[1]) begin + /* verilator lint_off WIDTHTRUNC */ + if(read_lane_data_shifted == write_pattern[0 +: 32]) begin + /* verilator lint_on WIDTHTRUNC */ + // first assumption: controller DQ is late WHEN WRITING(THUS WE NEED TO CALIBRATE data_start_index of outgoing stage2_data) + if(!lane_write_dq_late[lane]) begin // lane_write_dq_late is not yet set so we know this first assunmption is not yet tested state_calibrate <= ISSUE_WRITE_1; // start writing again (the next write should fix the late DQ for this current lane) - data_start_index[lane] <= 1; // stage2_data_unaligned is forwarded to stage[1] so we are now 8-bursts early, since assumption is we are 1 controller cycle early then data_start_index is 64 + data_start_index[lane] <= 64 - start_index_check; // stage2_data_unaligned is forwarded to stage[1] so we are now 8-bursts early, so we subtract from 64 so the burst we will be forwarded to the tip of stage2_data lane_write_dq_late[lane] <= 1'b1; `ifdef UART_DEBUG_ALIGN uart_start_send <= 1'b1; - uart_text <= {"state=CHECK_STARTING_DATA, Reached end, First Assumption: Write is 1 Controller cycle early",8'h0a}; + uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=0x",hex8_to_ascii(start_index_check), ", Ongoing First Assumption",8'h0a}; state_calibrate <= WAIT_UART; state_calibrate_next <= ISSUE_WRITE_1; `endif end - else begin // if first assumption is wrong and start_index_check is still outside of possible values then reset - reset_from_calibrate <= 1; + // if first assumption is not the fix then second assmption: controller reads the DQ too early (THUS WE NEED TO CALIBRATE INCOMING DQ SIGNAL starting from bitslip training) + else begin + lane_read_dq_early[lane] <= 1'b1; // set to 1 to see later what lanes has this problem + state_calibrate <= BITSLIP_DQS_TRAIN_3; + added_read_pipe[lane] <= |({ {( 4 - ($clog2(STORED_DQS_SIZE*8) - (3+1)) ){1'b0}} , dq_target_index[lane][$clog2(STORED_DQS_SIZE*8)-1:(3+1)] } + + { 3'b0 , (dq_target_index[lane][3:0] >= (5+8)) })? 'd1 : 'd0; // added_read_pipe can just be 1 or 0 + dqs_bitslip_arrangement <= 16'b0011_1100_0011_1100 >> dq_target_index[lane][2:0]; + `ifdef UART_DEBUG_ALIGN + uart_start_send <= 1'b1; + uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=0x",hex8_to_ascii(start_index_check), ", Ongoing Second Assumption",8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= BITSLIP_DQS_TRAIN_3; + `endif end end - `ifdef UART_DEBUG_ALIGN else begin - uart_start_send <= 1'b1; - uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=", hex_to_ascii(start_index_check[5:4]), hex_to_ascii(start_index_check[3:0]),8'h0a}; - state_calibrate <= WAIT_UART; - state_calibrate_next <= CHECK_STARTING_DATA; + start_index_check <= start_index_check + 16; // plus 16, we assume here that DQ will be late BY 1 DDR3 CLK CYCLE (if only +8, then it will be late by half DDR3 cycle, that should NOT happen) + dq_target_index[lane] <= dq_target_index[lane] + 2; + if(start_index_check == 48)begin // start_index_check is now outside the possible values + // first assumption: controller DQ is 1 CONTROLLER CYCLE late WHEN WRITING (data is written to address 1 and not address 0) + if(!lane_write_dq_late[lane]) begin // lane_write_dq_late is not yet set so we know this first assunmption is not yet tested + state_calibrate <= ISSUE_WRITE_1; // start writing again (the next write should fix the late DQ for this current lane) + data_start_index[lane] <= 1; // stage2_data_unaligned is forwarded to stage[1] so we are now 8-bursts early, since assumption is we are 1 controller cycle early then data_start_index is 64 + lane_write_dq_late[lane] <= 1'b1; + `ifdef UART_DEBUG_ALIGN + uart_start_send <= 1'b1; + uart_text <= {"state=CHECK_STARTING_DATA, Reached end, First Assumption: Write is 1 Controller cycle early",8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= ISSUE_WRITE_1; + `endif + end + else begin // if first assumption is wrong and start_index_check is still outside of possible values then reset + reset_from_calibrate <= 1; + end + end + `ifdef UART_DEBUG_ALIGN + else begin + uart_start_send <= 1'b1; + uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=", hex_to_ascii(start_index_check[5:4]), hex_to_ascii(start_index_check[3:0]),8'h0a}; + state_calibrate <= WAIT_UART; + state_calibrate_next <= CHECK_STARTING_DATA; + end + `endif end - `endif end - end + else begin + prep_done <= {prep_done[0],1'b1}; + end BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to capture the DQ correctly if(i_phy_iserdes_bitslip_reference[lane*serdes_ratio*2 +: 8] == dqs_bitslip_arrangement[7:0]) begin @@ -3013,7 +3195,7 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap write_test_address_counter <= 0; end state_calibrate <= BURST_READ; - `ifdef UART_DEBUG_ALIGN + `ifdef UART_DEBUG_BIST uart_start_send <= 1'b1; uart_text <= {"DONE BURST WRITE (PER BYTE): BIST_MODE=",hex_to_ascii(BIST_MODE),8'h0a}; state_calibrate <= WAIT_UART; @@ -3036,7 +3218,7 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap write_test_address_counter <= 0; end state_calibrate <= BURST_READ; - `ifdef UART_DEBUG_ALIGN + `ifdef UART_DEBUG_BIST uart_start_send <= 1'b1; uart_text <= {"DONE BURST WRITE (ALL BYTES): BIST_MODE=",hex_to_ascii(BIST_MODE),8'h0a}; state_calibrate <= WAIT_UART; @@ -3059,7 +3241,7 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap read_test_address_counter <= 0; end state_calibrate <= RANDOM_WRITE; - `ifdef UART_DEBUG_ALIGN + `ifdef UART_DEBUG_BIST uart_start_send <= 1'b1; uart_text <= {"DONE BURST READ: BIST_MODE=",hex_to_ascii(BIST_MODE),8'h0a}; state_calibrate <= WAIT_UART; @@ -3087,7 +3269,7 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap write_test_address_counter <= 0; end state_calibrate <= RANDOM_READ; - `ifdef UART_DEBUG_ALIGN + `ifdef UART_DEBUG_BIST uart_start_send <= 1'b1; uart_text <= {"DONE RANDOM WRITE: BIST_MODE=",hex_to_ascii(BIST_MODE),8'h0a}; state_calibrate <= WAIT_UART; @@ -3113,7 +3295,7 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap read_test_address_counter <= 0; end state_calibrate <= ALTERNATE_WRITE_READ; - `ifdef UART_DEBUG_ALIGN + `ifdef UART_DEBUG_BIST uart_start_send <= 1'b1; uart_text <= {"DONE RANDOM READ: BIST_MODE=",hex_to_ascii(BIST_MODE),8'h0a}; state_calibrate <= WAIT_UART; @@ -3137,7 +3319,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin /* verilator lint_on WIDTHEXPAND */ train_delay <= 15; state_calibrate <= FINISH_READ; - `ifdef UART_DEBUG_ALIGN + `ifdef UART_DEBUG_BIST uart_start_send <= 1'b1; uart_text <= {"DONE ALTERNATING WRITE-READ",8'h0a}; state_calibrate <= WAIT_UART; @@ -3161,7 +3343,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin state_calibrate <= DONE_CALIBRATE; final_calibration_done <= 1'b1; end - `ifdef UART_DEBUG_ALIGN + `ifdef UART_DEBUG_BIST uart_start_send <= 1'b1; uart_text <= {"DONE BIST_MODE=",hex_to_ascii(BIST_MODE),", correct_read_data=", 8'h0a, 8'h0a, correct_read_data, 8'h0a, 8'h0a, 8'h0a, 8'h0a @@ -3199,10 +3381,13 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin `ifdef FORMAL_COVER state_calibrate <= DONE_CALIBRATE; `endif - - read_lane_data <= {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8], - read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }; + read_lane_data <= {read_data_store[((DQ_BITS*LANES)*7 + ({29'd0, lane}<<3)) +: 8], read_data_store[((DQ_BITS*LANES)*6 + ({29'd0, lane}<<3)) +: 8], + read_data_store[((DQ_BITS*LANES)*5 + ({29'd0, lane}<<3)) +: 8], read_data_store[((DQ_BITS*LANES)*4 + ({29'd0, lane}<<3)) +: 8], read_data_store[((DQ_BITS*LANES)*3 + ({29'd0, lane}<<3)) +: 8], + read_data_store[((DQ_BITS*LANES)*2 + ({29'd0, lane}<<3)) +: 8],read_data_store[((DQ_BITS*LANES)*1 + ({29'd0, lane}<<3)) +: 8],read_data_store[((DQ_BITS*LANES)*0 + ({29'd0, lane}<<3)) +: 8] }; + write_pattern_lane <= write_pattern[ (lane_write_dq_late[lane]? 0 : data_start_index[lane]) +: 64]; + read_lane_data_shifted <= read_lane_data[start_index_check +: 32]; + write_pattern_matches <= write_pattern_lane == read_lane_data; + //halfway value has been reached (illegal) and will go back to zero at next load if(odelay_data_cntvaluein[lane] == 15) begin odelay_cntvalue_halfway <= 1; @@ -3211,7 +3396,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin pause_counter <= 1; // pause instruction address until pre-stall delay before refresh sequence finishes //skip to instruction address 20 (precharge all before refresh) when no pending requests anymore //toggle it for 1 clk cycle only - if( !stage1_pending && !stage2_pending && ( (o_wb_stall && final_calibration_done) || (o_wb_stall_calib && state_calibrate != DONE_CALIBRATE) ) ) begin + if( !stage1_pending && !stage2_pending && ( (o_wb_stall && final_calibration_done) || (o_wb_stall_calib && !final_calibration_done) ) ) begin pause_counter <= 0; // pre-stall delay done since all remaining requests are completed end end @@ -3455,7 +3640,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin end else begin reset_from_test <= 0; - if(state_calibrate != DONE_CALIBRATE) begin + if(!final_calibration_done) begin if ( o_aux[2:0] == 3'd3 && o_wb_ack_uncalibrated ) begin //o_aux = 3 is for read from calibration if(o_wb_data == correct_data) begin correct_read_data <= correct_read_data + 1; @@ -4936,7 +5121,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin // stage0_pending will rise to high if ecc_stage1_stall is high the previous cycle and stall is low if(stage0_pending && !$past(stage0_pending)) begin - assert($past(ecc_stage1_stall) && !$past(o_wb_stall_q)); + assert($past(ecc_stage1_stall) && !$past(o_wb_stall_int_q)); end // stage0_pending currently high means stage2 and stage1 is pending, and there is ECC request on stage2 @@ -5035,7 +5220,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin assert(!stage1_pending); assert(!stage2_pending); end - if($past(o_wb_stall_q) && stage1_pending && !$past(stage1_update)) begin //if pipe did not move forward + if($past(o_wb_stall_int_q) && stage1_pending && !$past(stage1_update)) begin //if pipe did not move forward assert(stage1_we == $past(stage1_we)); assert(stage1_aux == $past(stage1_aux)); assert(stage1_bank == $past(stage1_bank));