diff --git a/.gitignore b/.gitignore
index 1d106e5..2d86c08 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,8 @@ example_demo/build_logs*
testbench/ddr3_dimm_micron_sim_behav.wcfg
testbench/icarus_sim/*.log
testbench/icarus_sim/uberddr3_sim
+*jobid
+.caas*
# But do not ignore testbench/xsim/test_*.log
!testbench/xsim/test_*.log
diff --git a/example_demo/alinx_ax7103b/caas.conf b/example_demo/alinx_ax7103b/caas.conf
new file mode 100644
index 0000000..940110f
--- /dev/null
+++ b/example_demo/alinx_ax7103b/caas.conf
@@ -0,0 +1,11 @@
+[project]
+Backend = openxc7
+Part = xc7a100tfgg484-2
+Top = ax7103_ddr3
+Constraint = ax7103_ddr3.xdc
+Sources = *.v
+
+[caas]
+Server = https://caas.symbioticeda.com:18888/
+
+
diff --git a/example_demo/alinx_ax7103b/ddr3_controller.v b/example_demo/alinx_ax7103b/ddr3_controller.v
new file mode 120000
index 0000000..abadf6a
--- /dev/null
+++ b/example_demo/alinx_ax7103b/ddr3_controller.v
@@ -0,0 +1 @@
+../../rtl/ddr3_controller.v
\ No newline at end of file
diff --git a/example_demo/alinx_ax7103b/ddr3_phy.v b/example_demo/alinx_ax7103b/ddr3_phy.v
new file mode 120000
index 0000000..6fd8666
--- /dev/null
+++ b/example_demo/alinx_ax7103b/ddr3_phy.v
@@ -0,0 +1 @@
+../../rtl/ddr3_phy.v
\ No newline at end of file
diff --git a/example_demo/alinx_ax7103b/ddr3_top.v b/example_demo/alinx_ax7103b/ddr3_top.v
new file mode 120000
index 0000000..5595b49
--- /dev/null
+++ b/example_demo/alinx_ax7103b/ddr3_top.v
@@ -0,0 +1 @@
+../../rtl/ddr3_top.v
\ No newline at end of file
diff --git a/example_demo/alinx_ax7325b/ax7325b_ddr3.v b/example_demo/alinx_ax7325b/ax7325b_ddr3.v
index aee1839..e53de04 100644
--- a/example_demo/alinx_ax7325b/ax7325b_ddr3.v
+++ b/example_demo/alinx_ax7325b/ax7325b_ddr3.v
@@ -164,20 +164,22 @@
// DDR3 Controller
ddr3_top #(
- .CONTROLLER_CLK_PERIOD(12_000), //ps, clock period of the controller interface
- .DDR3_CLK_PERIOD(3_000), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD)
+ .CONTROLLER_CLK_PERIOD(10_000), //ps, clock period of the controller interface
+ .DDR3_CLK_PERIOD(2_500), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD)
.ROW_BITS(15), //width of row address
.COL_BITS(10), //width of column address
.BA_BITS(3), //width of bank address
- .BYTE_LANES(8), //number of DDR3 modules to be controlled
- .AUX_WIDTH(16), //width of aux line (must be >= 4)
+ .BYTE_LANES(2), //number of DDR3 modules to be controlled
+ .AUX_WIDTH(4), //width of aux line (must be >= 4)
.WB2_ADDR_BITS(32), //width of 2nd wishbone address bus
.WB2_DATA_BITS(32), //width of 2nd wishbone data bus
.MICRON_SIM(0), //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW)
.ODELAY_SUPPORTED(1), //set to 1 when ODELAYE2 is supported
.SECOND_WISHBONE(0), //set to 1 if 2nd wishbone is needed
.ECC_ENABLE(0), // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC )
- .WB_ERROR(0) // set to 1 to support Wishbone error (asserts at ECC double bit error)
+ .WB_ERROR(0), // set to 1 to support Wishbone error (asserts at ECC double bit error)
+ .BIST_MODE(1), // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w)
+ .SPEED_BIN(1) // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11)
) ddr3_top
(
//clock and reset
@@ -216,7 +218,7 @@
.o_ddr3_clk_n(ddr3_ck_n),
.o_ddr3_reset_n(ddr3_reset_n),
.o_ddr3_cke(ddr3_cke), // CKE
- .o_ddr3_cs_n(ddr3_cs_n[0]), // chip select signal (controls rank 1 only)
+ .o_ddr3_cs_n(ddr3_cs_n), // chip select signal (controls rank 1 only)
.o_ddr3_ras_n(ddr3_ras_n), // RAS#
.o_ddr3_cas_n(ddr3_cas_n), // CAS#
.o_ddr3_we_n(ddr3_we_n), // WE#
diff --git a/example_demo/alinx_ax7325b/ax7325b_ddr3.xdc b/example_demo/alinx_ax7325b/ax7325b_ddr3.xdc
index af4fa0a..e7f67aa 100644
--- a/example_demo/alinx_ax7325b/ax7325b_ddr3.xdc
+++ b/example_demo/alinx_ax7325b/ax7325b_ddr3.xdc
@@ -632,25 +632,25 @@ set_property PACKAGE_PIN AD4 [get_ports {ddr3_dm[7]}]
# PadFunction: IO_L15P_T2_DQS_32
set_property VCCAUX_IO HIGH [get_ports {ddr3_dqs_p[0]}]
set_property SLEW FAST [get_ports {ddr3_dqs_p[0]}]
-set_property IOSTANDARD SSTL15 [get_ports {ddr3_dqs_p[0]}]
+set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddr3_dqs_p[0]}]
set_property PACKAGE_PIN Y19 [get_ports {ddr3_dqs_p[0]}]
# PadFunction: IO_L15N_T2_DQS_32
set_property VCCAUX_IO HIGH [get_ports {ddr3_dqs_n[0]}]
set_property SLEW FAST [get_ports {ddr3_dqs_n[0]}]
-set_property IOSTANDARD SSTL15 [get_ports {ddr3_dqs_n[0]}]
+set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddr3_dqs_n[0]}]
set_property PACKAGE_PIN Y18 [get_ports {ddr3_dqs_n[0]}]
# PadFunction: IO_L9P_T1_DQS_32
set_property VCCAUX_IO HIGH [get_ports {ddr3_dqs_p[1]}]
set_property SLEW FAST [get_ports {ddr3_dqs_p[1]}]
-set_property IOSTANDARD SSTL15 [get_ports {ddr3_dqs_p[1]}]
+set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddr3_dqs_p[1]}]
set_property PACKAGE_PIN AJ18 [get_ports {ddr3_dqs_p[1]}]
# PadFunction: IO_L9N_T1_DQS_32
set_property VCCAUX_IO HIGH [get_ports {ddr3_dqs_n[1]}]
set_property SLEW FAST [get_ports {ddr3_dqs_n[1]}]
-set_property IOSTANDARD SSTL15 [get_ports {ddr3_dqs_n[1]}]
+set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddr3_dqs_n[1]}]
set_property PACKAGE_PIN AK18 [get_ports {ddr3_dqs_n[1]}]
# PadFunction: IO_L3P_T0_DQS_32
diff --git a/example_demo/alinx_ax7325b/ax7325b_ddr3_openxc7.bit b/example_demo/alinx_ax7325b/ax7325b_ddr3_openxc7.bit
index 4ecf631..eba2433 100644
Binary files a/example_demo/alinx_ax7325b/ax7325b_ddr3_openxc7.bit and b/example_demo/alinx_ax7325b/ax7325b_ddr3_openxc7.bit differ
diff --git a/example_demo/alinx_ax7325b/caas.conf b/example_demo/alinx_ax7325b/caas.conf
new file mode 100644
index 0000000..1450f8d
--- /dev/null
+++ b/example_demo/alinx_ax7325b/caas.conf
@@ -0,0 +1,11 @@
+[project]
+Backend = openxc7
+Part = xc7k325tffg900-2
+Top = ax7325b_ddr3
+Constraint = ax7325b_ddr3.xdc
+Sources = *.v
+
+[caas]
+Server = https://caas.symbioticeda.com:18888/
+
+
diff --git a/example_demo/alinx_ax7325b/clk_wiz.v b/example_demo/alinx_ax7325b/clk_wiz.v
index d7efd42..ac6519a 100644
--- a/example_demo/alinx_ax7325b/clk_wiz.v
+++ b/example_demo/alinx_ax7325b/clk_wiz.v
@@ -23,18 +23,18 @@ module clk_wiz
.COMPENSATION ("INTERNAL"),
.STARTUP_WAIT ("FALSE"),
.DIVCLK_DIVIDE (1),
- .CLKFBOUT_MULT (5), // 200 MHz * 5 = 1000 MHz
+ .CLKFBOUT_MULT (8), // 200 MHz * 8 = 1600 MHz
.CLKFBOUT_PHASE (0.000),
- .CLKOUT0_DIVIDE (12), // 1000 MHz / 12 = 83.333 MHz
+ .CLKOUT0_DIVIDE (12), // 1600 MHz / 12 = 133.333 MHz
.CLKOUT0_PHASE (0.000),
.CLKOUT0_DUTY_CYCLE (0.500),
- .CLKOUT1_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz, 0 phase
+ .CLKOUT1_DIVIDE (3), // 1600 MHz / 3 = 533.333 MHz, 0 phase
.CLKOUT1_PHASE (0.000),
.CLKOUT1_DUTY_CYCLE (0.500),
- .CLKOUT2_DIVIDE (5), // 1000 MHz / 5 = 200 MHz
+ .CLKOUT2_DIVIDE (8), // 1600 MHz / 8 = 200 MHz
.CLKOUT2_PHASE (0.000),
.CLKOUT2_DUTY_CYCLE (0.500),
- .CLKOUT3_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz, 90 phase
+ .CLKOUT3_DIVIDE (3), // 1600 MHz / 3 = 533.333 MHz, 90 phase
.CLKOUT3_PHASE (90.000),
.CLKOUT3_DUTY_CYCLE (0.500),
.CLKIN1_PERIOD (5.000) // 200 MHz input
diff --git a/example_demo/alinx_ax7325b/ddr3_controller.v b/example_demo/alinx_ax7325b/ddr3_controller.v
new file mode 120000
index 0000000..abadf6a
--- /dev/null
+++ b/example_demo/alinx_ax7325b/ddr3_controller.v
@@ -0,0 +1 @@
+../../rtl/ddr3_controller.v
\ No newline at end of file
diff --git a/example_demo/alinx_ax7325b/ddr3_phy.v b/example_demo/alinx_ax7325b/ddr3_phy.v
new file mode 120000
index 0000000..6fd8666
--- /dev/null
+++ b/example_demo/alinx_ax7325b/ddr3_phy.v
@@ -0,0 +1 @@
+../../rtl/ddr3_phy.v
\ No newline at end of file
diff --git a/example_demo/alinx_ax7325b/ddr3_top.v b/example_demo/alinx_ax7325b/ddr3_top.v
new file mode 120000
index 0000000..5595b49
--- /dev/null
+++ b/example_demo/alinx_ax7325b/ddr3_top.v
@@ -0,0 +1 @@
+../../rtl/ddr3_top.v
\ No newline at end of file
diff --git a/example_demo/arty_s7/arty_ddr3.v b/example_demo/arty_s7/arty_ddr3.v
index 3d6e2d3..e6cba3a 100644
--- a/example_demo/arty_s7/arty_ddr3.v
+++ b/example_demo/arty_s7/arty_ddr3.v
@@ -180,14 +180,16 @@
.COL_BITS(10), //width of column address
.BA_BITS(3), //width of bank address
.BYTE_LANES(2), //number of DDR3 modules to be controlled
- .AUX_WIDTH(16), //width of aux line (must be >= 4)
+ .AUX_WIDTH(4), //width of aux line (must be >= 4)
.WB2_ADDR_BITS(32), //width of 2nd wishbone address bus
.WB2_DATA_BITS(32), //width of 2nd wishbone data bus
.MICRON_SIM(0), //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW)
.ODELAY_SUPPORTED(0), //set to 1 when ODELAYE2 is supported
.SECOND_WISHBONE(0), //set to 1 if 2nd wishbone is needed
.ECC_ENABLE(0), // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC )
- .WB_ERROR(0) // set to 1 to support Wishbone error (asserts at ECC double bit error)
+ .WB_ERROR(0), // set to 1 to support Wishbone error (asserts at ECC double bit error)
+ .BIST_MODE(1), // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w)
+ .SPEED_BIN(1) // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11)
) ddr3_top
(
//clock and reset
diff --git a/example_demo/arty_s7/arty_ddr3_openxc7.bit b/example_demo/arty_s7/arty_ddr3_openxc7.bit
index 0934e48..eef956d 100644
Binary files a/example_demo/arty_s7/arty_ddr3_openxc7.bit and b/example_demo/arty_s7/arty_ddr3_openxc7.bit differ
diff --git a/example_demo/arty_s7/caas.conf b/example_demo/arty_s7/caas.conf
new file mode 100644
index 0000000..02fc88e
--- /dev/null
+++ b/example_demo/arty_s7/caas.conf
@@ -0,0 +1,11 @@
+[project]
+Backend = openxc7
+Part = xc7s50csga324-1
+Top = arty_ddr3
+Constraint = arty_ddr3.xdc
+Sources = *.v
+
+[caas]
+Server = https://caas.symbioticeda.com:18888/
+
+
diff --git a/example_demo/arty_s7/clk_wiz.v b/example_demo/arty_s7/clk_wiz.v
index 7ce38cc..2b18760 100644
--- a/example_demo/arty_s7/clk_wiz.v
+++ b/example_demo/arty_s7/clk_wiz.v
@@ -22,18 +22,18 @@ module clk_wiz
.COMPENSATION ("INTERNAL"),
.STARTUP_WAIT ("FALSE"),
.DIVCLK_DIVIDE (1),
- .CLKFBOUT_MULT (10), // 100 MHz * 10 = 1000 MHz
+ .CLKFBOUT_MULT (12), // 100 MHz * 12 = 1200 MHz
.CLKFBOUT_PHASE (0.000),
- .CLKOUT0_DIVIDE (12), // 1000 MHz / 12 = 83.333 MHz
+ .CLKOUT0_DIVIDE (12), // 1200 MHz / 12 = 100 MHz
.CLKOUT0_PHASE (0.000),
.CLKOUT0_DUTY_CYCLE (0.500),
- .CLKOUT1_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz
+ .CLKOUT1_DIVIDE (3), // 1200 MHz / 3 = 400 MHz
.CLKOUT1_PHASE (0.000),
.CLKOUT1_DUTY_CYCLE (0.500),
- .CLKOUT2_DIVIDE (5), // 1000 MHz / 5 = 200 MHz
+ .CLKOUT2_DIVIDE (6), // 1200 MHz / 6 = 200 MHz
.CLKOUT2_PHASE (0.000),
.CLKOUT2_DUTY_CYCLE (0.500),
- .CLKOUT3_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz, 90 phase
+ .CLKOUT3_DIVIDE (3), // 1200 MHz / 3 = 400 MHz, 90 phase
.CLKOUT3_PHASE (90.000),
.CLKOUT3_DUTY_CYCLE (0.500),
.CLKIN1_PERIOD (10.000) // 100 MHz input
diff --git a/example_demo/arty_s7/ddr3_controller.v b/example_demo/arty_s7/ddr3_controller.v
new file mode 120000
index 0000000..abadf6a
--- /dev/null
+++ b/example_demo/arty_s7/ddr3_controller.v
@@ -0,0 +1 @@
+../../rtl/ddr3_controller.v
\ No newline at end of file
diff --git a/example_demo/arty_s7/ddr3_phy.v b/example_demo/arty_s7/ddr3_phy.v
new file mode 120000
index 0000000..6fd8666
--- /dev/null
+++ b/example_demo/arty_s7/ddr3_phy.v
@@ -0,0 +1 @@
+../../rtl/ddr3_phy.v
\ No newline at end of file
diff --git a/example_demo/arty_s7/ddr3_top.v b/example_demo/arty_s7/ddr3_top.v
new file mode 120000
index 0000000..5595b49
--- /dev/null
+++ b/example_demo/arty_s7/ddr3_top.v
@@ -0,0 +1 @@
+../../rtl/ddr3_top.v
\ No newline at end of file
diff --git a/example_demo/enclustra_kx2_st1/caas.conf b/example_demo/enclustra_kx2_st1/caas.conf
new file mode 100644
index 0000000..8b356f0
--- /dev/null
+++ b/example_demo/enclustra_kx2_st1/caas.conf
@@ -0,0 +1,11 @@
+[project]
+Backend = openxc7
+Part = xc7k160tffg676-2
+Top = enclustra_ddr3
+Constraint = enclustra_ddr3.xdc
+Sources = ./ddr3_top.v,./ddr3_controller.v,./ddr3_phy.v,./enclustra_ddr3.v,./uart_rx.v,./uart_tx.v,./clk_wiz.v
+
+[caas]
+Server = https://caas.symbioticeda.com:18888/
+
+
diff --git a/example_demo/enclustra_kx2_st1/ddr3_controller.v b/example_demo/enclustra_kx2_st1/ddr3_controller.v
new file mode 120000
index 0000000..abadf6a
--- /dev/null
+++ b/example_demo/enclustra_kx2_st1/ddr3_controller.v
@@ -0,0 +1 @@
+../../rtl/ddr3_controller.v
\ No newline at end of file
diff --git a/example_demo/enclustra_kx2_st1/ddr3_phy.v b/example_demo/enclustra_kx2_st1/ddr3_phy.v
new file mode 120000
index 0000000..6fd8666
--- /dev/null
+++ b/example_demo/enclustra_kx2_st1/ddr3_phy.v
@@ -0,0 +1 @@
+../../rtl/ddr3_phy.v
\ No newline at end of file
diff --git a/example_demo/enclustra_kx2_st1/ddr3_top.v b/example_demo/enclustra_kx2_st1/ddr3_top.v
new file mode 120000
index 0000000..5595b49
--- /dev/null
+++ b/example_demo/enclustra_kx2_st1/ddr3_top.v
@@ -0,0 +1 @@
+../../rtl/ddr3_top.v
\ No newline at end of file
diff --git a/example_demo/nexys_video/caas.conf b/example_demo/nexys_video/caas.conf
new file mode 100644
index 0000000..b8a5b68
--- /dev/null
+++ b/example_demo/nexys_video/caas.conf
@@ -0,0 +1,11 @@
+[project]
+Backend = openxc7
+Part = xc7a200tsbg484-1
+Top = nexysvideo_ddr3
+Constraint = nexysvideo_ddr3.xdc
+Sources = *.v
+
+[caas]
+Server = https://caas.symbioticeda.com:18888/
+
+
diff --git a/example_demo/nexys_video/ddr3_controller.v b/example_demo/nexys_video/ddr3_controller.v
new file mode 120000
index 0000000..abadf6a
--- /dev/null
+++ b/example_demo/nexys_video/ddr3_controller.v
@@ -0,0 +1 @@
+../../rtl/ddr3_controller.v
\ No newline at end of file
diff --git a/example_demo/nexys_video/ddr3_phy.v b/example_demo/nexys_video/ddr3_phy.v
new file mode 120000
index 0000000..6fd8666
--- /dev/null
+++ b/example_demo/nexys_video/ddr3_phy.v
@@ -0,0 +1 @@
+../../rtl/ddr3_phy.v
\ No newline at end of file
diff --git a/example_demo/nexys_video/ddr3_top.v b/example_demo/nexys_video/ddr3_top.v
new file mode 120000
index 0000000..5595b49
--- /dev/null
+++ b/example_demo/nexys_video/ddr3_top.v
@@ -0,0 +1 @@
+../../rtl/ddr3_top.v
\ No newline at end of file
diff --git a/example_demo/orangecrab_ecp5/caas.conf b/example_demo/orangecrab_ecp5/caas.conf
new file mode 100644
index 0000000..acd5242
--- /dev/null
+++ b/example_demo/orangecrab_ecp5/caas.conf
@@ -0,0 +1,11 @@
+[project]
+Backend = ecp5
+Part = lfe5u-85f-8mg285c-csfbga285
+Top = orangecrab_ecp5_ddr3
+Constraint = orangecrab_ecp5_ddr3.pcf
+Sources = *.v
+
+[caas]
+Server = https://caas.symbioticeda.com:18888/
+
+
diff --git a/example_demo/orangecrab_ecp5/ddr3_controller.v b/example_demo/orangecrab_ecp5/ddr3_controller.v
new file mode 120000
index 0000000..abadf6a
--- /dev/null
+++ b/example_demo/orangecrab_ecp5/ddr3_controller.v
@@ -0,0 +1 @@
+../../rtl/ddr3_controller.v
\ No newline at end of file
diff --git a/example_demo/orangecrab_ecp5/ddr3_phy_ecp5.v b/example_demo/orangecrab_ecp5/ddr3_phy_ecp5.v
new file mode 120000
index 0000000..9f6600e
--- /dev/null
+++ b/example_demo/orangecrab_ecp5/ddr3_phy_ecp5.v
@@ -0,0 +1 @@
+../../rtl/ecp5_phy/ddr3_phy_ecp5.v
\ No newline at end of file
diff --git a/example_demo/orangecrab_ecp5/ddr3_top.v b/example_demo/orangecrab_ecp5/ddr3_top.v
new file mode 100644
index 0000000..1e117bc
--- /dev/null
+++ b/example_demo/orangecrab_ecp5/ddr3_top.v
@@ -0,0 +1,474 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+// Filename: ddr3_top.v
+// Project: UberDDR3 - An Open Source DDR3 Controller
+//
+// Purpose: Top module which instantiates the ddr3_controller and ddr3_phy modules
+// Use this as top module for instantiating UberDDR3 with Wishbone Interface.
+//
+// Engineer: Angelo C. Jacobo
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// Copyright (C) 2023-2025 Angelo Jacobo
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+////////////////////////////////////////////////////////////////////////////////
+
+`default_nettype none
+`timescale 1ps / 1ps
+`define LATTICE_ECP5_PHY
+
+module ddr3_top #(
+ parameter CONTROLLER_CLK_PERIOD = 12_000, //ps, clock period of the controller interface
+ DDR3_CLK_PERIOD = 3_000, //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD)
+ ROW_BITS = 14, //width of row address
+ COL_BITS = 10, //width of column address
+ BA_BITS = 3, //width of bank address
+ BYTE_LANES = 2, //number of byte lanes of DDR3 RAM
+ AUX_WIDTH = 4, //width of aux line (must be >= 4)
+ WB2_ADDR_BITS = 7, //width of 2nd wishbone address bus
+ WB2_DATA_BITS = 32, //width of 2nd wishbone data bus
+ DUAL_RANK_DIMM = 0, // enable dual rank DIMM (1 = enable, 0 = disable)
+ // DDR3 timing parameter values
+ parameter SPEED_BIN = 3, // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11)
+ SDRAM_CAPACITY = 5, // 0 = 256Mb, 1 = 512Mb, 2 = 1Gb, 3 = 2Gb, 4 = 4Gb, 5 = 8Gb, 6 = 16Gb
+ TRCD = 13_750, // ps Active to Read/Write command time (only used if SPEED_BIN = 0)
+ TRP = 13_750, // ps Precharge command period (only used if SPEED_BIN = 0)
+ TRAS = 35_000, // ps ACT to PRE command period (only used if SPEED_BIN = 0)
+ parameter[0:0] MICRON_SIM = 0, //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW)
+ ODELAY_SUPPORTED = 0, //set to 1 when ODELAYE2 is supported
+ SECOND_WISHBONE = 0, //set to 1 if 2nd wishbone for debugging is needed
+ DLL_OFF = 0, // 1 = DLL off for low frequency ddr3 clock (< 125MHz)
+ WB_ERROR = 0, // set to 1 to support Wishbone error (asserts at ECC double bit error)
+ parameter[1:0] BIST_MODE = 1, // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w)
+ parameter[1:0] ECC_ENABLE = 0, // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC )
+ parameter[1:0] DIC = 2'b00, //Output Driver Impedance Control (2'b00 = RZQ/6, 2'b01 = RZQ/7, RZQ = 240ohms) (only change when you know what you are doing)
+ parameter[2:0] RTT_NOM = 3'b011, //RTT Nominal (3'b000 = disabled, 3'b001 = RZQ/4, 3'b010 = RZQ/2 , 3'b011 = RZQ/6, RZQ = 240ohms) (only change when you know what you are doing)
+ parameter[1:0] SELF_REFRESH = 2'b00, // 0 = use i_user_self_refresh input, 1 = Self-refresh mode is enabled after 64 controller clock cycles of no requests, 2 = 128 cycles, 3 = 256 cycles
+ parameter // The next parameters act more like a localparam (since user does not have to set this manually) but was added here to simplify port declaration
+ DQ_BITS = 8, //device width (fixed to 8, if DDR3 is x16 then BYTE_LANES will be 2 while )
+ serdes_ratio = 4, // this controller is fixed as a 4:1 memory controller (CONTROLLER_CLK_PERIOD/DDR3_CLK_PERIOD = 4)
+ wb_addr_bits = ROW_BITS + COL_BITS + BA_BITS - $clog2(serdes_ratio*2) + DUAL_RANK_DIMM,
+ wb_data_bits = DQ_BITS*BYTE_LANES*serdes_ratio*2,
+ wb_sel_bits = wb_data_bits / 8,
+ wb2_sel_bits = WB2_DATA_BITS / 8,
+ //4 is the width of a single ddr3 command {cs_n, ras_n, cas_n, we_n} plus 3 (ck_en, odt, reset_n) plus bank bits plus row bits
+ cmd_len = 4 + 3 + BA_BITS + ROW_BITS + 2*DUAL_RANK_DIMM
+ )
+ (
+ input wire i_controller_clk, i_ddr3_clk, i_ref_clk, //i_controller_clk = CONTROLLER_CLK_PERIOD, i_ddr3_clk = DDR3_CLK_PERIOD, i_ref_clk = 200MHz
+ input wire i_ddr3_clk_90, //required only when ODELAY_SUPPORTED is zero
+ input wire i_rst_n,
+ //
+ // Wishbone inputs
+ input wire i_wb_cyc, //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled)
+ input wire i_wb_stb, //request a transfer
+ input wire i_wb_we, //write-enable (1 = write, 0 = read)
+ input wire[wb_addr_bits - 1:0] i_wb_addr, //burst-addressable {row,bank,col}
+ input wire[wb_data_bits - 1:0] i_wb_data, //write data, for a 4:1 controller data width is 8 times the number of pins on the device
+ input wire[wb_sel_bits - 1:0] i_wb_sel, //byte strobe for write (1 = write the byte)
+ input wire[AUX_WIDTH - 1:0] i_aux, //for AXI-interface compatibility (given upon strobe)
+ // Wishbone outputs
+ output wire o_wb_stall, //1 = busy, cannot accept requests
+ output wire o_wb_ack, //1 = read/write request has completed
+ output wire o_wb_err, //1 = Error due to ECC double bit error (fixed to 0 if WB_ERROR = 0)
+ output wire[wb_data_bits - 1:0] o_wb_data, //read data, for a 4:1 controller data width is 8 times the number of pins on the device
+ output wire[AUX_WIDTH - 1:0] o_aux, //for AXI-interface compatibility (given upon strobe)
+ //
+ // Wishbone 2 (PHY) inputs
+ input wire i_wb2_cyc, //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled)
+ input wire i_wb2_stb, //request a transfer
+ input wire i_wb2_we, //write-enable (1 = write, 0 = read)
+ input wire[WB2_ADDR_BITS - 1:0] i_wb2_addr, // memory-mapped register to be accessed
+ input wire[WB2_DATA_BITS - 1:0] i_wb2_data, //write data
+ input wire[wb2_sel_bits - 1:0] i_wb2_sel, //byte strobe for write (1 = write the byte)
+ // Wishbone 2 (Controller) outputs
+ output wire o_wb2_stall, //1 = busy, cannot accept requests
+ output wire o_wb2_ack, //1 = read/write request has completed
+ output wire[WB2_DATA_BITS - 1:0] o_wb2_data, //read data
+ //
+ // DDR3 I/O Interface
+ output wire[DUAL_RANK_DIMM:0] o_ddr3_clk_p, o_ddr3_clk_n,
+ output wire o_ddr3_reset_n,
+ output wire[DUAL_RANK_DIMM:0] o_ddr3_cke, // CKE
+ output wire[DUAL_RANK_DIMM:0] o_ddr3_cs_n, // chip select signal
+ output wire o_ddr3_ras_n, // RAS#
+ output wire o_ddr3_cas_n, // CAS#
+ output wire o_ddr3_we_n, // WE#
+ output wire[ROW_BITS-1:0] o_ddr3_addr,
+ output wire[BA_BITS-1:0] o_ddr3_ba_addr,
+ inout wire[(DQ_BITS*BYTE_LANES)-1:0] io_ddr3_dq,
+ inout wire[BYTE_LANES-1:0] io_ddr3_dqs, io_ddr3_dqs_n,
+ output wire[BYTE_LANES-1:0] o_ddr3_dm,
+ output wire[DUAL_RANK_DIMM:0] o_ddr3_odt, // on-die termination
+ //
+ // Done Calibration pin
+ output wire o_calib_complete,
+ // Debug outputs
+ output wire[31:0] o_debug1,
+// output wire[31:0] o_debug2,
+// output wire[31:0] o_debug3,
+// output wire[(DQ_BITS*BYTE_LANES)/8-1:0] o_ddr3_debug_read_dqs_p,
+// output wire[(DQ_BITS*BYTE_LANES)/8-1:0] o_ddr3_debug_read_dqs_n
+ //
+ // User enabled self-refresh
+ input wire i_user_self_refresh,
+ output wire uart_tx
+ );
+
+// Instantiation Template (DEFAULT VALUE IS FOR ARTY S7)
+/*
+// DDR3 Controller
+ddr3_top #(
+ .CONTROLLER_CLK_PERIOD(12_000), //ps, clock period of the controller interface
+ .DDR3_CLK_PERIOD(3_000), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD)
+ .ROW_BITS(14), //width of row address
+ .COL_BITS(10), //width of column address
+ .BA_BITS(3), //width of bank address
+ .BYTE_LANES(2), //number of byte lanes of DDR3 RAM
+ .AUX_WIDTH(4), //width of aux line (must be >= 4)
+ .MICRON_SIM(0), //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW)
+ .ODELAY_SUPPORTED(0), //set to 1 if ODELAYE2 is supported
+ .SECOND_WISHBONE(0), //set to 1 if 2nd wishbone for debugging is needed
+ .ECC_ENABLE(0), // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC )
+ .WB_ERROR(0), // set to 1 to support Wishbone error (asserts at ECC double bit error)
+ ) ddr3_top
+ (
+ //clock and reset
+ .i_controller_clk(i_controller_clk),
+ .i_ddr3_clk(i_ddr3_clk), //i_controller_clk has period of CONTROLLER_CLK_PERIOD, i_ddr3_clk has period of DDR3_CLK_PERIOD
+ .i_ref_clk(i_ref_clk), // usually set to 200 MHz
+ .i_ddr3_clk_90(i_ddr3_clk_90), //90 degree phase shifted version i_ddr3_clk (required only when ODELAY_SUPPORTED is zero)
+ .i_rst_n(!i_rst && clk_locked),
+ //
+ // Wishbone inputs
+ .i_wb_cyc(1), //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled)
+ .i_wb_stb(i_wb_stb), //request a transfer
+ .i_wb_we(i_wb_we), //write-enable (1 = write, 0 = read)
+ .i_wb_addr(i_wb_addr), //burst-addressable {row,bank,col}
+ .i_wb_data(i_wb_data), //write data, for a 4:1 controller data width is 8 times the number of pins on the device
+ .i_wb_sel(16'hffff), //byte strobe for write (1 = write the byte)
+ .i_aux(i_wb_we), //for AXI-interface compatibility (given upon strobe)
+ // Wishbone outputs
+ .o_wb_stall(o_wb_stall), //1 = busy, cannot accept requests
+ .o_wb_ack(o_wb_ack), //1 = read/write request has completed
+ .o_wb_err(o_wb_err), //1 = Error due to ECC double bit error (fixed to 0 if WB_ERROR = 0)
+ .o_wb_data(o_wb_data), //read data, for a 4:1 controller data width is 8 times the number of pins on the device
+ .o_aux(o_aux),
+ //
+ // Wishbone 2 (PHY) inputs
+ .i_wb2_cyc(0), //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled)
+ .i_wb2_stb(0), //request a transfer
+ .i_wb2_we(0), //write-enable (1 = write, 0 = read)
+ .i_wb2_addr(0), //burst-addressable {row,bank,col}
+ .i_wb2_data(0), //write data, for a 4:1 controller data width is 8 times the number of pins on the device
+ .i_wb2_sel(0), //byte strobe for write (1 = write the byte)
+ // Wishbone 2 (Controller) outputs
+ .o_wb2_stall(), //1 = busy, cannot accept requests
+ .o_wb2_ack(), //1 = read/write request has completed
+ .o_wb2_data(), //read data, for a 4:1 controller data width is 8 times the number of pins on the device
+ //
+ // DDR3 I/O Interface
+ .o_ddr3_clk_p(ddr3_clk_p),
+ .o_ddr3_clk_n(ddr3_clk_n),
+ .o_ddr3_reset_n(ddr3_reset_n),
+ .o_ddr3_cke(ddr3_cke),
+ .o_ddr3_cs_n(ddr3_cs_n), // width = number of DDR3 ranks
+ .o_ddr3_ras_n(ddr3_ras_n),
+ .o_ddr3_cas_n(ddr3_cas_n),
+ .o_ddr3_we_n(ddr3_we_n),
+ .o_ddr3_addr(ddr3_addr), // width = ROW_BITS
+ .o_ddr3_ba_addr(ddr3_ba), // width = BA_BITS
+ .io_ddr3_dq(ddr3_dq), // width = BYTE_LANES*8
+ .io_ddr3_dqs(ddr3_dqs_p), // width = BYTE_LANES
+ .io_ddr3_dqs_n(ddr3_dqs_n), // width = BYTE_LANES
+ .o_ddr3_dm(ddr3_dm), // width = BYTE_LANES
+ .o_ddr3_odt(ddr3_odt),
+ // Debug outputs
+ .o_debug1(),
+ ////////////////////////////////////
+ );
+*/
+
+ // Wire connections between controller and phy
+ wire[cmd_len*serdes_ratio-1:0] cmd;
+ wire dqs_tri_control, dq_tri_control;
+ wire toggle_dqs;
+ wire[wb_data_bits-1:0] data;
+ wire[wb_sel_bits-1:0] dm;
+ wire[BYTE_LANES-1:0] bitslip;
+ wire[DQ_BITS*BYTE_LANES*8-1:0] iserdes_data;
+ wire[BYTE_LANES*8-1:0] iserdes_dqs;
+ wire[BYTE_LANES*8-1:0] iserdes_bitslip_reference;
+ wire idelayctrl_rdy;
+ wire[4:0] odelay_data_cntvaluein, odelay_dqs_cntvaluein;
+ wire[4:0] idelay_data_cntvaluein, idelay_dqs_cntvaluein;
+ wire[BYTE_LANES-1:0] odelay_data_ld, odelay_dqs_ld;
+ wire[BYTE_LANES-1:0] idelay_data_ld, idelay_dqs_ld;
+ wire write_leveling_calib;
+ wire reset;
+
+ // logic for self-refresh
+ reg[8:0] refresh_counter = 0;
+ reg user_self_refresh;
+ // refresh counter
+ always @(posedge i_controller_clk) begin
+ if(i_wb_stb && i_wb_cyc) begin // if there is Wishbone request, then reset counter
+ refresh_counter <= 0;
+ end
+ else if(!o_wb_stall || user_self_refresh) begin // if no request (but not stalled) OR already on self-refresh, then increment counter
+ refresh_counter <= refresh_counter + 1;
+ end
+ end
+ // choose self-refresh options
+ always @* begin
+ case(SELF_REFRESH)
+ 2'b00: user_self_refresh = i_user_self_refresh; // use input i_user_self_refresh (high = enter self-refresh, low = exit self-refresh)
+ 2'b01: user_self_refresh = refresh_counter[6]; // Self-refresh mode is enabled after 64 controller clock cycles of no requests, then exit Self-refresh after another 64 controller clk cycles
+ 2'b10: user_self_refresh = refresh_counter[7]; // Self-refresh mode is enabled after 128 controller clock cycles of no requests, then exit Self-refresh after another 128 controller clk cycles
+ 2'b11: user_self_refresh = refresh_counter[8]; // Self-refresh mode is enabled after 256 controller clock cycles of no requests, then exit Self-refresh after another 256 controller clk cycles
+ endcase
+ end
+
+
+
+ //module instantiations
+ ddr3_controller #(
+ .CONTROLLER_CLK_PERIOD(CONTROLLER_CLK_PERIOD), //ps, clock period of the controller interface
+ .DDR3_CLK_PERIOD(DDR3_CLK_PERIOD), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD)
+ .ROW_BITS(ROW_BITS), //width of row address
+ .COL_BITS(COL_BITS), //width of column address
+ .BA_BITS(BA_BITS), //width of bank address
+ .DQ_BITS(DQ_BITS), //width of DQ
+ .LANES(BYTE_LANES), // byte lanes
+ .AUX_WIDTH(AUX_WIDTH), //width of aux line (must be >= 4)
+ .WB2_ADDR_BITS(WB2_ADDR_BITS), //width of 2nd wishbone address bus
+ .WB2_DATA_BITS(WB2_DATA_BITS), //width of 2nd wishbone data bus
+ .MICRON_SIM(MICRON_SIM), //simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW)
+ .ODELAY_SUPPORTED(ODELAY_SUPPORTED), //set to 1 when ODELAYE2 is supported
+ .SECOND_WISHBONE(SECOND_WISHBONE), //set to 1 if 2nd wishbone is needed
+ .ECC_ENABLE(ECC_ENABLE), // set to 1 or 2 to add ECC (1 = Side-band ECC per burst, 2 = Side-band ECC per 8 bursts , 3 = Inline ECC )
+ .DLL_OFF(DLL_OFF), // 1 = DLL off for low frequency ddr3 clock (< 125MHz)
+ .WB_ERROR(WB_ERROR), // set to 1 to support Wishbone error (asserts at ECC double bit error)
+ .BIST_MODE(BIST_MODE), // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w)
+ .DIC(DIC), //Output Driver Impedance Control (2'b00 = RZQ/6, 2'b01 = RZQ/7, RZQ = 240ohms)
+ .RTT_NOM(RTT_NOM), //RTT Nominal (3'b000 = disabled, 3'b001 = RZQ/4, 3'b010 = RZQ/2 , 3'b011 = RZQ/6, RZQ = 240ohms)
+ .DUAL_RANK_DIMM(DUAL_RANK_DIMM), // enable dual rank DIMM (1 = enable, 0 = disable)
+ .SPEED_BIN(SPEED_BIN), // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11)
+ .SDRAM_CAPACITY(SDRAM_CAPACITY), // 0 = 256Mb, 1 = 512Mb, 2 = 1Gb, 3 = 2Gb, 4 = 4Gb, 5 = 8Gb, 6 = 16Gb
+ .TRCD(TRCD), // ps Active to Read/Write command time (only used if SPEED_BIN = 0)
+ .TRP(TRP), // ps Precharge command period (only used if SPEED_BIN = 0)
+ .TRAS(TRAS) // ps ACT to PRE command period (only used if SPEED_BIN = 0)
+ ) ddr3_controller_inst (
+ .i_controller_clk(i_controller_clk), //i_controller_clk has period of CONTROLLER_CLK_PERIOD
+ .i_rst_n(i_rst_n), //200MHz input clock
+ // Wishbone inputs
+ .i_wb_cyc(i_wb_cyc), //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled)
+ .i_wb_stb(i_wb_stb), //request a transfer
+ .i_wb_we(i_wb_we), //write-enable (1 = write, 0 = read)
+ .i_wb_addr(i_wb_addr), //burst-addressable {row,bank,col}
+ .i_wb_data(i_wb_data), //write data, for a 4:1 controller data width is 8 times the number of pins on the device
+ .i_wb_sel(i_wb_sel), //byte strobe for write (1 = write the byte)
+ .i_aux(i_aux), //for AXI-interface compatibility (given upon strobe)
+ // Wishbone outputs
+ .o_wb_stall(o_wb_stall), //1 = busy, cannot accept requests
+ .o_wb_ack(o_wb_ack), //1 = read/write request has completed
+ .o_wb_err(o_wb_err), //1 = Error due to ECC double bit error (fixed to 0 if WB_ERROR = 0)
+ .o_wb_data(o_wb_data), //read data, for a 4:1 controller data width is 8 times the number of pins on the device
+ .o_aux(o_aux), //for AXI-interface compatibility (returned upon ack)
+ // Wishbone 2 (PHY) inputs
+ .i_wb2_cyc(i_wb2_cyc), //bus cycle active (1 = normal operation, 0 = all ongoing transaction are to be cancelled)
+ .i_wb2_stb(i_wb2_stb), //request a transfer
+ .i_wb2_we(i_wb2_we), //write-enable (1 = write, 0 = read)
+ .i_wb2_addr(i_wb2_addr), // memory-mapped register to be accessed
+ .i_wb2_data(i_wb2_data), //write data
+ .i_wb2_sel(i_wb2_sel), //byte strobe for write (1 = write the byte)
+ // Wishbone 2 (Controller) outputs
+ .o_wb2_stall(o_wb2_stall), //1 = busy, cannot accept requests
+ .o_wb2_ack(o_wb2_ack), //1 = read/write request has completed
+ .o_wb2_data(o_wb2_data), //read data
+ //
+ // PHY interface
+ .i_phy_iserdes_data(iserdes_data),
+ .i_phy_iserdes_dqs(iserdes_dqs),
+ .i_phy_iserdes_bitslip_reference(iserdes_bitslip_reference),
+ .i_phy_idelayctrl_rdy(idelayctrl_rdy),
+ .o_phy_cmd(cmd),
+ .o_phy_dqs_tri_control(dqs_tri_control),
+ .o_phy_dq_tri_control(dq_tri_control),
+ .o_phy_toggle_dqs(toggle_dqs),
+ .o_phy_data(data),
+ .o_phy_dm(dm),
+ .o_phy_odelay_data_cntvaluein(odelay_data_cntvaluein),
+ .o_phy_odelay_dqs_cntvaluein(odelay_dqs_cntvaluein),
+ .o_phy_idelay_data_cntvaluein(idelay_data_cntvaluein),
+ .o_phy_idelay_dqs_cntvaluein(idelay_dqs_cntvaluein),
+ .o_phy_odelay_data_ld(odelay_data_ld),
+ .o_phy_odelay_dqs_ld(odelay_dqs_ld),
+ .o_phy_idelay_data_ld(idelay_data_ld),
+ .o_phy_idelay_dqs_ld(idelay_dqs_ld),
+ .o_phy_bitslip(bitslip),
+ .o_phy_write_leveling_calib(write_leveling_calib),
+ .o_phy_reset(reset),
+ // Done Calibration pin
+ .o_calib_complete(o_calib_complete),
+ // Debug outputs
+ .o_debug1(o_debug1),
+// .o_debug2(o_debug2),
+// .o_debug3(o_debug3)
+ // User enabled self-refresh
+ .i_user_self_refresh(user_self_refresh),
+ .uart_tx(uart_tx)
+ );
+ `ifndef LATTICE_ECP5_PHY // XILINX PHY
+ ddr3_phy #(
+ .ROW_BITS(ROW_BITS), //width of row address
+ .BA_BITS(BA_BITS), //width of bank address
+ .DQ_BITS(DQ_BITS), //width of DQ
+ .LANES(BYTE_LANES), //8 lanes of DQ
+ .CONTROLLER_CLK_PERIOD(CONTROLLER_CLK_PERIOD), //ps, period of clock input to this DDR3 controller module
+ .DDR3_CLK_PERIOD(DDR3_CLK_PERIOD), //ps, period of clock input to DDR3 RAM device
+ .ODELAY_SUPPORTED(ODELAY_SUPPORTED), //set to 1 when ODELAYE2 is supported
+ .DUAL_RANK_DIMM(DUAL_RANK_DIMM) // enable dual rank DIMM (1 = enable, 0 = disable)
+ ) ddr3_phy_inst (
+ .i_controller_clk(i_controller_clk),
+ .i_ddr3_clk(i_ddr3_clk),
+ .i_ref_clk(i_ref_clk),
+ .i_ddr3_clk_90(i_ddr3_clk_90),
+ .i_rst_n(i_rst_n),
+ // Controller Interface
+ .i_controller_reset(reset),
+ .i_controller_cmd(cmd),
+ .i_controller_dqs_tri_control(dqs_tri_control),
+ .i_controller_dq_tri_control(dq_tri_control),
+ .i_controller_toggle_dqs(toggle_dqs),
+ .i_controller_data(data),
+ .i_controller_dm(dm),
+ .i_controller_odelay_data_cntvaluein(odelay_data_cntvaluein),
+ .i_controller_odelay_dqs_cntvaluein(odelay_dqs_cntvaluein),
+ .i_controller_idelay_data_cntvaluein(idelay_data_cntvaluein),
+ .i_controller_idelay_dqs_cntvaluein(idelay_dqs_cntvaluein),
+ .i_controller_odelay_data_ld(odelay_data_ld),
+ .i_controller_odelay_dqs_ld(odelay_dqs_ld),
+ .i_controller_idelay_data_ld(idelay_data_ld),
+ .i_controller_idelay_dqs_ld(idelay_dqs_ld),
+ .i_controller_bitslip(bitslip),
+ .i_controller_write_leveling_calib(write_leveling_calib),
+ .o_controller_iserdes_data(iserdes_data),
+ .o_controller_iserdes_dqs(iserdes_dqs),
+ .o_controller_iserdes_bitslip_reference(iserdes_bitslip_reference),
+ .o_controller_idelayctrl_rdy(idelayctrl_rdy),
+ // DDR3 I/O Interface
+ .o_ddr3_clk_p(o_ddr3_clk_p),
+ .o_ddr3_clk_n(o_ddr3_clk_n),
+ .o_ddr3_reset_n(o_ddr3_reset_n),
+ .o_ddr3_cke(o_ddr3_cke), // CKE
+ .o_ddr3_cs_n(o_ddr3_cs_n), // chip select signal
+ .o_ddr3_ras_n(o_ddr3_ras_n), // RAS#
+ .o_ddr3_cas_n(o_ddr3_cas_n), // CAS#
+ .o_ddr3_we_n(o_ddr3_we_n), // WE#
+ .o_ddr3_addr(o_ddr3_addr),
+ .o_ddr3_ba_addr(o_ddr3_ba_addr),
+ .io_ddr3_dq(io_ddr3_dq),
+ .io_ddr3_dqs(io_ddr3_dqs),
+ .io_ddr3_dqs_n(io_ddr3_dqs_n),
+ .o_ddr3_dm(o_ddr3_dm),
+ .o_ddr3_odt(o_ddr3_odt), // on-die termination
+ .o_ddr3_debug_read_dqs_p(/*o_ddr3_debug_read_dqs_p*/),
+ .o_ddr3_debug_read_dqs_n(/*o_ddr3_debug_read_dqs_n*/)
+ );
+ `else // LATTICE ECP5 PHY
+ ddr3_phy_ecp5 #(
+ .ROW_BITS(ROW_BITS), //width of row address
+ .BA_BITS(BA_BITS), //width of bank address
+ .DQ_BITS(DQ_BITS), //width of DQ
+ .LANES(BYTE_LANES), //8 lanes of DQ
+ .CONTROLLER_CLK_PERIOD(CONTROLLER_CLK_PERIOD) //ps, period of clock input to this DDR3 controller module
+ ) ddr3_phy_inst (
+ .i_controller_clk(i_controller_clk),
+ .i_ddr3_clk(i_ddr3_clk),
+ .i_ref_clk(i_ref_clk),
+ .i_ddr3_clk_90(i_ddr3_clk_90),
+ .i_rst_n(i_rst_n),
+ // Controller Interface
+ .i_controller_reset(reset),
+ .i_controller_cmd(cmd),
+ .i_controller_dqs_tri_control(dqs_tri_control),
+ .i_controller_dq_tri_control(dq_tri_control),
+ .i_controller_toggle_dqs(toggle_dqs),
+ .i_controller_data(data),
+ .i_controller_dm(dm),
+ .i_controller_odelay_data_cntvaluein(odelay_data_cntvaluein),
+ .i_controller_odelay_dqs_cntvaluein(odelay_dqs_cntvaluein),
+ .i_controller_idelay_data_cntvaluein(idelay_data_cntvaluein),
+ .i_controller_idelay_dqs_cntvaluein(idelay_dqs_cntvaluein),
+ .i_controller_odelay_data_ld(odelay_data_ld),
+ .i_controller_odelay_dqs_ld(odelay_dqs_ld),
+ .i_controller_idelay_data_ld(idelay_data_ld),
+ .i_controller_idelay_dqs_ld(idelay_dqs_ld),
+ .i_controller_bitslip(bitslip),
+ .i_controller_write_leveling_calib(write_leveling_calib),
+ .o_controller_iserdes_data(iserdes_data),
+ .o_controller_iserdes_dqs(iserdes_dqs),
+ .o_controller_iserdes_bitslip_reference(iserdes_bitslip_reference),
+ .o_controller_idelayctrl_rdy(idelayctrl_rdy),
+ // DDR3 I/O Interface
+ .o_ddr3_clk_p(o_ddr3_clk_p),
+ .o_ddr3_clk_n(o_ddr3_clk_n),
+ .o_ddr3_reset_n(o_ddr3_reset_n),
+ .o_ddr3_cke(o_ddr3_cke), // CKE
+ .o_ddr3_cs_n(o_ddr3_cs_n), // chip select signal
+ .o_ddr3_ras_n(o_ddr3_ras_n), // RAS#
+ .o_ddr3_cas_n(o_ddr3_cas_n), // CAS#
+ .o_ddr3_we_n(o_ddr3_we_n), // WE#
+ .o_ddr3_addr(o_ddr3_addr),
+ .o_ddr3_ba_addr(o_ddr3_ba_addr),
+ .io_ddr3_dq(io_ddr3_dq),
+ .io_ddr3_dqs(io_ddr3_dqs),
+ .io_ddr3_dqs_n(io_ddr3_dqs_n),
+ .o_ddr3_dm(o_ddr3_dm),
+ .o_ddr3_odt(o_ddr3_odt), // on-die termination
+ .o_ddr3_debug_read_dqs_p(/*o_ddr3_debug_read_dqs_p*/),
+ .o_ddr3_debug_read_dqs_n(/*o_ddr3_debug_read_dqs_n*/)
+ );
+ `endif
+
+ // // display value of parameters for easy debugging
+ // initial begin
+ // $display("\nDDR3 TOP PARAMETERS:\n-----------------------------");
+ // $display("CONTROLLER_CLK_PERIOD = %0d", CONTROLLER_CLK_PERIOD);
+ // $display("DDR3_CLK_PERIOD = %0d", DDR3_CLK_PERIOD);
+ // $display("ROW_BITS = %0d", ROW_BITS);
+ // $display("COL_BITS = %0d", COL_BITS);
+ // $display("BA_BITS = %0d", BA_BITS);
+ // $display("BYTE_LANES = %0d", BYTE_LANES);
+ // $display("AUX_WIDTH = %0d", AUX_WIDTH);
+ // $display("WB2_ADDR_BITS = %0d", WB2_ADDR_BITS);
+ // $display("WB2_DATA_BITS = %0d", WB2_DATA_BITS);
+ // $display("MICRON_SIM = %0d", MICRON_SIM);
+ // $display("ODELAY_SUPPORTED = %0d", ODELAY_SUPPORTED);
+ // $display("SECOND_WISHBONE = %0d", SECOND_WISHBONE);
+ // $display("WB_ERROR = %0d", WB_ERROR);
+ // $display("BIST_MODE = %0d", BIST_MODE);
+ // $display("ECC_ENABLE = %0d", ECC_ENABLE);
+ // $display("DIC = %0d", DIC);
+ // $display("RTT_NOM = %0d", RTT_NOM);
+ // $display("SELF_REFRESH = %0d", SELF_REFRESH);
+ // $display("DUAL_RANK_DIMM = %0d", DUAL_RANK_DIMM);
+ // $display("End of DDR3 TOP PARAMETERS\n-----------------------------");
+ // end
+
+endmodule
diff --git a/example_demo/orangecrab_ecp5/iserdes_soft.v b/example_demo/orangecrab_ecp5/iserdes_soft.v
new file mode 120000
index 0000000..3b5c05f
--- /dev/null
+++ b/example_demo/orangecrab_ecp5/iserdes_soft.v
@@ -0,0 +1 @@
+../../rtl/ecp5_phy/iserdes_soft.v
\ No newline at end of file
diff --git a/example_demo/orangecrab_ecp5/orangecrab_ecp5_ddr3.dfu b/example_demo/orangecrab_ecp5/orangecrab_ecp5_ddr3.dfu
index 2b160a4..17d0c60 100644
Binary files a/example_demo/orangecrab_ecp5/orangecrab_ecp5_ddr3.dfu and b/example_demo/orangecrab_ecp5/orangecrab_ecp5_ddr3.dfu differ
diff --git a/example_demo/orangecrab_ecp5/oserdes_soft.v b/example_demo/orangecrab_ecp5/oserdes_soft.v
new file mode 120000
index 0000000..745696d
--- /dev/null
+++ b/example_demo/orangecrab_ecp5/oserdes_soft.v
@@ -0,0 +1 @@
+../../rtl/ecp5_phy/oserdes_soft.v
\ No newline at end of file
diff --git a/example_demo/qmtech_kintex_7/caas.conf b/example_demo/qmtech_kintex_7/caas.conf
new file mode 100644
index 0000000..f449be9
--- /dev/null
+++ b/example_demo/qmtech_kintex_7/caas.conf
@@ -0,0 +1,11 @@
+[project]
+Backend = openxc7
+Part = xc7k325tffg676-1
+Top = qmtech_kintex7_ddr3
+Constraint = qmtech_kintex7_ddr3.xdc
+Sources = *.v
+
+[caas]
+Server = https://caas.symbioticeda.com:18888/
+
+
diff --git a/example_demo/qmtech_kintex_7/clk_wiz.v b/example_demo/qmtech_kintex_7/clk_wiz.v
index 72bc093..e9132b1 100644
--- a/example_demo/qmtech_kintex_7/clk_wiz.v
+++ b/example_demo/qmtech_kintex_7/clk_wiz.v
@@ -24,16 +24,16 @@ module clk_wiz
.DIVCLK_DIVIDE (1),
.CLKFBOUT_MULT (20), // 50 MHz * 20 = 1000 MHz
.CLKFBOUT_PHASE (0.000),
- .CLKOUT0_DIVIDE (12), // 1000 MHz / 12 = 83.333 MHz
+ .CLKOUT0_DIVIDE (8), // 1000 MHz / 8 = 125 MHz
.CLKOUT0_PHASE (0.000),
.CLKOUT0_DUTY_CYCLE (0.500),
- .CLKOUT1_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz
+ .CLKOUT1_DIVIDE (2), // 1000 MHz / 2 = 500 MHz
.CLKOUT1_PHASE (0.000),
.CLKOUT1_DUTY_CYCLE (0.500),
.CLKOUT2_DIVIDE (5), // 1000 MHz / 5 = 200 MHz
.CLKOUT2_PHASE (0.000),
.CLKOUT2_DUTY_CYCLE (0.500),
- .CLKOUT3_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz, 90 phase
+ .CLKOUT3_DIVIDE (2), // 1000 MHz / 2 = 500 MHz, 90 phase
.CLKOUT3_PHASE (90.000),
.CLKOUT3_DUTY_CYCLE (0.500),
.CLKIN1_PERIOD (20.000) // 50 MHz input
diff --git a/example_demo/qmtech_kintex_7/ddr3_controller.v b/example_demo/qmtech_kintex_7/ddr3_controller.v
new file mode 120000
index 0000000..abadf6a
--- /dev/null
+++ b/example_demo/qmtech_kintex_7/ddr3_controller.v
@@ -0,0 +1 @@
+../../rtl/ddr3_controller.v
\ No newline at end of file
diff --git a/example_demo/qmtech_kintex_7/ddr3_phy.v b/example_demo/qmtech_kintex_7/ddr3_phy.v
new file mode 120000
index 0000000..6fd8666
--- /dev/null
+++ b/example_demo/qmtech_kintex_7/ddr3_phy.v
@@ -0,0 +1 @@
+../../rtl/ddr3_phy.v
\ No newline at end of file
diff --git a/example_demo/qmtech_kintex_7/ddr3_top.v b/example_demo/qmtech_kintex_7/ddr3_top.v
new file mode 120000
index 0000000..5595b49
--- /dev/null
+++ b/example_demo/qmtech_kintex_7/ddr3_top.v
@@ -0,0 +1 @@
+../../rtl/ddr3_top.v
\ No newline at end of file
diff --git a/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3.v b/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3.v
index 2e416ac..6df6113 100644
--- a/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3.v
+++ b/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3.v
@@ -141,8 +141,8 @@
// DDR3 Controller
ddr3_top #(
- .CONTROLLER_CLK_PERIOD(12_000), //ps, clock period of the controller interface
- .DDR3_CLK_PERIOD(3_000), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD)
+ .CONTROLLER_CLK_PERIOD(10_000), //ps, clock period of the controller interface
+ .DDR3_CLK_PERIOD(2_500), //ps, clock period of the DDR3 RAM device (must be 1/4 of the CONTROLLER_CLK_PERIOD)
.ROW_BITS(14), //width of row address
.COL_BITS(10), //width of column address
.BA_BITS(3), //width of bank address
@@ -152,7 +152,9 @@
.WB2_DATA_BITS(32), //width of 2nd wishbone data bus
.MICRON_SIM(0), //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW)
.ODELAY_SUPPORTED(1), //set to 1 when ODELAYE2 is supported
- .SECOND_WISHBONE(0) //set to 1 if 2nd wishbone is needed
+ .SECOND_WISHBONE(0), //set to 1 if 2nd wishbone is needed
+ .BIST_MODE(1), // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w)
+ .SPEED_BIN(1) // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11)
) ddr3_top
(
//clock and reset
diff --git a/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3_openxc7.bit b/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3_openxc7.bit
index 5d50a5c..c918970 100644
Binary files a/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3_openxc7.bit and b/example_demo/qmtech_kintex_7/qmtech_kintex7_ddr3_openxc7.bit differ
diff --git a/example_demo/qmtech_wukong/caas.conf b/example_demo/qmtech_wukong/caas.conf
new file mode 100644
index 0000000..e422074
--- /dev/null
+++ b/example_demo/qmtech_wukong/caas.conf
@@ -0,0 +1,11 @@
+[project]
+Backend = openxc7
+Part = xc7a100tfgg676-2
+Top = wukong_ddr3
+Constraint = wukong_ddr3.xdc
+Sources = *.v
+
+[caas]
+Server = https://caas.symbioticeda.com:18888/
+
+
diff --git a/example_demo/qmtech_wukong/clk_wiz.v b/example_demo/qmtech_wukong/clk_wiz.v
index 72bc093..bc59612 100644
--- a/example_demo/qmtech_wukong/clk_wiz.v
+++ b/example_demo/qmtech_wukong/clk_wiz.v
@@ -22,18 +22,18 @@ module clk_wiz
.COMPENSATION ("INTERNAL"),
.STARTUP_WAIT ("FALSE"),
.DIVCLK_DIVIDE (1),
- .CLKFBOUT_MULT (20), // 50 MHz * 20 = 1000 MHz
+ .CLKFBOUT_MULT (24), // 50 MHz * 24 = 1200 MHz
.CLKFBOUT_PHASE (0.000),
- .CLKOUT0_DIVIDE (12), // 1000 MHz / 12 = 83.333 MHz
+ .CLKOUT0_DIVIDE (12), // 1200 MHz / 12 = 100 MHz
.CLKOUT0_PHASE (0.000),
.CLKOUT0_DUTY_CYCLE (0.500),
- .CLKOUT1_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz
+ .CLKOUT1_DIVIDE (3), // 1200 MHz / 3 = 400 MHz
.CLKOUT1_PHASE (0.000),
.CLKOUT1_DUTY_CYCLE (0.500),
- .CLKOUT2_DIVIDE (5), // 1000 MHz / 5 = 200 MHz
+ .CLKOUT2_DIVIDE (6), // 1200 MHz / 6 = 200 MHz
.CLKOUT2_PHASE (0.000),
.CLKOUT2_DUTY_CYCLE (0.500),
- .CLKOUT3_DIVIDE (3), // 1000 MHz / 3 = 333.333 MHz, 90 phase
+ .CLKOUT3_DIVIDE (3), // 1200 MHz / 3 = 400 MHz, 90 phase
.CLKOUT3_PHASE (90.000),
.CLKOUT3_DUTY_CYCLE (0.500),
.CLKIN1_PERIOD (20.000) // 50 MHz input
diff --git a/example_demo/qmtech_wukong/ddr3_controller.v b/example_demo/qmtech_wukong/ddr3_controller.v
new file mode 120000
index 0000000..abadf6a
--- /dev/null
+++ b/example_demo/qmtech_wukong/ddr3_controller.v
@@ -0,0 +1 @@
+../../rtl/ddr3_controller.v
\ No newline at end of file
diff --git a/example_demo/qmtech_wukong/ddr3_phy.v b/example_demo/qmtech_wukong/ddr3_phy.v
new file mode 120000
index 0000000..6fd8666
--- /dev/null
+++ b/example_demo/qmtech_wukong/ddr3_phy.v
@@ -0,0 +1 @@
+../../rtl/ddr3_phy.v
\ No newline at end of file
diff --git a/example_demo/qmtech_wukong/ddr3_top.v b/example_demo/qmtech_wukong/ddr3_top.v
new file mode 120000
index 0000000..5595b49
--- /dev/null
+++ b/example_demo/qmtech_wukong/ddr3_top.v
@@ -0,0 +1 @@
+../../rtl/ddr3_top.v
\ No newline at end of file
diff --git a/example_demo/qmtech_wukong/wukong_ddr3.v b/example_demo/qmtech_wukong/wukong_ddr3.v
index 221f159..862f85a 100644
--- a/example_demo/qmtech_wukong/wukong_ddr3.v
+++ b/example_demo/qmtech_wukong/wukong_ddr3.v
@@ -156,7 +156,9 @@
.WB2_DATA_BITS(32), //width of 2nd wishbone data bus
.MICRON_SIM(0), //enable faster simulation for micron ddr3 model (shorten POWER_ON_RESET_HIGH and INITIAL_CKE_LOW)
.ODELAY_SUPPORTED(0), //set to 1 when ODELAYE2 is supported
- .SECOND_WISHBONE(0) //set to 1 if 2nd wishbone is needed
+ .SECOND_WISHBONE(0), //set to 1 if 2nd wishbone is needed
+ .BIST_MODE(1), // 0 = No BIST, 1 = run through all address space ONCE , 2 = run through all address space for every test (burst w/r, random w/r, alternating r/w)
+ .SPEED_BIN(1) // 0 = Use top-level parameters , 1 = DDR3-1066 (7-7-7) , 2 = DR3-1333 (9-9-9) , 3 = DDR3-1600 (11-11-11)
) ddr3_top
(
//clock and reset
diff --git a/example_demo/qmtech_wukong/wukong_ddr3_openxc7.bit b/example_demo/qmtech_wukong/wukong_ddr3_openxc7.bit
index 1ddad45..38fd93e 100644
Binary files a/example_demo/qmtech_wukong/wukong_ddr3_openxc7.bit and b/example_demo/qmtech_wukong/wukong_ddr3_openxc7.bit differ
diff --git a/example_demo/sechzig_mx2/caas.conf b/example_demo/sechzig_mx2/caas.conf
new file mode 100644
index 0000000..c2b2766
--- /dev/null
+++ b/example_demo/sechzig_mx2/caas.conf
@@ -0,0 +1,11 @@
+[project]
+Backend = openxc7
+Part = xc7a35tftg256-2
+Top = sechzig_mx2_ddr3
+Constraint = sechzig_mx2_ddr3.xdc
+Sources = *.v
+
+[caas]
+Server = https://caas.symbioticeda.com:18888/
+
+
diff --git a/example_demo/sechzig_mx2/ddr3_controller.v b/example_demo/sechzig_mx2/ddr3_controller.v
new file mode 120000
index 0000000..abadf6a
--- /dev/null
+++ b/example_demo/sechzig_mx2/ddr3_controller.v
@@ -0,0 +1 @@
+../../rtl/ddr3_controller.v
\ No newline at end of file
diff --git a/example_demo/sechzig_mx2/ddr3_phy.v b/example_demo/sechzig_mx2/ddr3_phy.v
new file mode 120000
index 0000000..6fd8666
--- /dev/null
+++ b/example_demo/sechzig_mx2/ddr3_phy.v
@@ -0,0 +1 @@
+../../rtl/ddr3_phy.v
\ No newline at end of file
diff --git a/example_demo/sechzig_mx2/ddr3_top.v b/example_demo/sechzig_mx2/ddr3_top.v
new file mode 120000
index 0000000..5595b49
--- /dev/null
+++ b/example_demo/sechzig_mx2/ddr3_top.v
@@ -0,0 +1 @@
+../../rtl/ddr3_top.v
\ No newline at end of file
diff --git a/rtl/ddr3_controller.v b/rtl/ddr3_controller.v
index 1caab4d..780b216 100644
--- a/rtl/ddr3_controller.v
+++ b/rtl/ddr3_controller.v
@@ -50,6 +50,7 @@
// `define UART_DEBUG_READ_LEVEL
// `define UART_DEBUG_WRITE_LEVEL
// `define UART_DEBUG_ALIGN
+// `define UART_DEBUG_BIST
`ifdef UART_DEBUG_READ_LEVEL
@@ -58,6 +59,8 @@
`define UART_DEBUG
`elsif UART_DEBUG_ALIGN
`define UART_DEBUG
+`elsif UART_DEBUG_BIST
+ `define UART_DEBUG
`endif
module ddr3_controller #(
@@ -295,6 +298,12 @@ module ddr3_controller #(
localparam[3:0] WRITE_TO_WRITE_DELAY = 0;
localparam[3:0] WRITE_TO_READ_DELAY = find_delay((CWL_nCK + 4 + ps_to_nCK(tWTR)), WRITE_SLOT, READ_SLOT); //4
localparam[3:0] WRITE_TO_PRECHARGE_DELAY = find_delay((CWL_nCK + 4 + ps_to_nCK(tWR)), WRITE_SLOT, PRECHARGE_SLOT); //5
+ // determines bitwidth of delay counters
+ localparam MAX_DELAY_BEFORE_PRECHARGE = max(ACTIVATE_TO_PRECHARGE_DELAY, max(WRITE_TO_PRECHARGE_DELAY, READ_TO_PRECHARGE_DELAY));
+ localparam MAX_DELAY_BEFORE_ACTIVATE = max(PRECHARGE_TO_ACTIVATE_DELAY, ACTIVATE_TO_ACTIVATE_DELAY);
+ localparam MAX_DELAY_BEFORE_WRITE = max(ACTIVATE_TO_WRITE_DELAY, max(READ_TO_WRITE_DELAY + 'd1, WRITE_TO_WRITE_DELAY));
+ localparam MAX_DELAY_BEFORE_READ = max(ACTIVATE_TO_READ_DELAY, max(WRITE_TO_READ_DELAY + 'd1, READ_TO_READ_DELAY));
+
/* verilator lint_on WIDTHEXPAND */
localparam PRE_REFRESH_DELAY = WRITE_TO_PRECHARGE_DELAY + 1;
`ifdef FORMAL
@@ -338,7 +347,7 @@ module ddr3_controller #(
//the delays included the ODELAY and OSERDES when issuing the read command
//and the IDELAY and ISERDES when receiving the data (NOTE TO SELF: ELABORATE ON WHY THOSE MAGIC NUMBERS)
localparam READ_ACK_PIPE_WIDTH = READ_DELAY + 1 + 2 + 1 + 1 + (DLL_OFF? 2 : 0); // FOr DLL_OFF, phy has no delay thus add delay here
- localparam MAX_ADDED_READ_ACK_DELAY = 16;
+ localparam MAX_ADDED_READ_ACK_DELAY = 2;
localparam DELAY_BEFORE_WRITE_LEVEL_FEEDBACK = STAGE2_DATA_DEPTH + ps_to_cycles(tWLO+tWLOE) + 10;
//plus 10 controller clocks for possible bus latency and the delay for receiving feedback DQ from IOBUF -> IDELAY -> ISERDES
localparam ECC_INFORMATION_BITS = (ECC_ENABLE == 2)? max_information_bits(wb_data_bits) : max_information_bits(wb_data_bits/8);
@@ -428,11 +437,12 @@ module ddr3_controller #(
/************************************************************* Registers and Wires *************************************************************/
integer index;
- (* mark_debug ="true" *) reg[4:0] instruction_address = 0; //address for accessing rom instruction
- reg[27:0] instruction = INITIAL_RESET_INSTRUCTION; //instruction retrieved from reset instruction rom
- reg[ DELAY_COUNTER_WIDTH - 1:0] delay_counter = INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0]; //counter used for delays
- reg delay_counter_is_zero = (INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0] == 0); //counter is now zero so retrieve next delay
- reg reset_done = 0; //high if reset has already finished
+ (* mark_debug ="true" *) reg[4:0] instruction_address = 0, instruction_address_d; //address for accessing rom instruction
+ reg[27:0] instruction = INITIAL_RESET_INSTRUCTION, instruction_d; //instruction retrieved from reset instruction rom
+ reg[ DELAY_COUNTER_WIDTH - 1:0] delay_counter = INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0], delay_counter_d; //counter used for delays
+ reg delay_counter_is_zero = (INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0] == 0), delay_counter_is_zero_d; //counter is now zero so retrieve next delay
+ reg reset_done = 0, reset_done_d; //high if reset has already finished
+ reg precharge_all_instruction, precharge_all_instruction_d;
reg pause_counter = 0;
wire issue_read_command;
reg stage2_update = 1;
@@ -444,9 +454,9 @@ module ddr3_controller #(
// ECC_ENABLE = 3 regs
/* verilator lint_off UNUSEDSIGNAL */
- reg[BA_BITS-1:0] ecc_bank_addr = 0, ecc_bank_addr_prev = 0;
- reg[ROW_BITS-1:0] ecc_row_addr = 0, ecc_row_addr_prev = 0;
- reg[COL_BITS-1:0] ecc_col_addr = 0, ecc_col_addr_prev = 0;
+ reg[BA_BITS-1:0] ecc_bank_addr = 0, ecc_bank_addr_prev = 0, ecc_bank_addr_d, ecc_bank_addr_prev_d;
+ reg[ROW_BITS-1:0] ecc_row_addr = 0, ecc_row_addr_prev = 0, ecc_row_addr_d, ecc_row_addr_prev_d;
+ reg[COL_BITS-1:0] ecc_col_addr = 0, ecc_col_addr_prev = 0, ecc_col_addr_d, ecc_col_addr_prev_d;
reg we_prev;
reg stage0_pending = 0;
reg[wb_addr_bits - 1:0] stage0_addr = 0;
@@ -472,43 +482,43 @@ module ddr3_controller #(
reg[wb_sel_bits - 1 : 0] stage2_ecc_write_data_mask_q = 0, stage2_ecc_write_data_mask_d;
wire[wb_data_bits/8 - 1 : 0] decoded_parity;
wire[wb_data_bits/8 - 1 : 0] encoded_parity;
- reg[wb_data_bits/8 - 1 : 0] stage2_encoded_parity = 0;
+ reg[wb_data_bits/8 - 1 : 0] stage2_encoded_parity = 0, stage2_encoded_parity_d;
reg ecc_req_stage2 = 0;
/* verilator lint_on UNUSEDSIGNAL */
//pipeline stage 1 regs
- reg stage1_pending = 0;
- reg[AUX_WIDTH-1:0] stage1_aux = 0;
- reg stage1_we = 0;
- reg[wb_data_bits - 1:0] stage1_data = 0;
+ reg stage1_pending = 0, stage1_pending_d;
+ reg[AUX_WIDTH-1:0] stage1_aux = 0, stage1_aux_d;
+ reg stage1_we = 0, stage1_we_d;
+ reg[wb_data_bits - 1:0] stage1_data = 0, stage1_data_d;
wire[wb_data_bits - 1:0] stage1_data_mux, stage1_data_encoded;
- reg[wb_sel_bits - 1:0] stage1_dm = 0;
- reg[COL_BITS-1:0] stage1_col = 0;
- reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage1_bank = 0;
- reg[ROW_BITS-1:0] stage1_row = 0;
- reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage1_next_bank = 0;
- reg[ROW_BITS-1:0] stage1_next_row = 0;
+ reg[wb_sel_bits - 1:0] stage1_dm = 0, stage1_dm_d;
+ reg[COL_BITS-1:0] stage1_col = 0, stage1_col_d;
+ reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage1_bank = 0, stage1_bank_d;
+ reg[ROW_BITS-1:0] stage1_row = 0, stage1_row_d;
+ reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage1_next_bank = 0, stage1_next_bank_d;
+ reg[ROW_BITS-1:0] stage1_next_row = 0, stage1_next_row_d;
wire[wb_addr_bits-1:0] wb_addr_plus_anticipate, calib_addr_plus_anticipate;
//pipeline stage 2 regs
- reg stage2_pending = 0;
- reg[AUX_WIDTH-1:0] stage2_aux = 0;
- reg stage2_we = 0;
- reg[wb_sel_bits - 1:0] stage2_dm_unaligned = 0, stage2_dm_unaligned_temp = 0;
+ reg stage2_pending = 0, stage2_pending_d;
+ reg[AUX_WIDTH-1:0] stage2_aux = 0, stage2_aux_d;
+ reg stage2_we = 0, stage2_we_d;
+ reg[wb_sel_bits - 1:0] stage2_dm_unaligned = 0, stage2_dm_unaligned_temp = 0, stage2_dm_unaligned_d, stage2_dm_unaligned_temp_d;
reg[wb_sel_bits - 1:0] stage2_dm[STAGE2_DATA_DEPTH-1:0];
- reg[wb_data_bits - 1:0] stage2_data_unaligned = 0, stage2_data_unaligned_temp = 0;
+ reg[wb_data_bits - 1:0] stage2_data_unaligned = 0, stage2_data_unaligned_temp = 0, stage2_data_unaligned_d, stage2_data_unaligned_temp_d;
reg[wb_data_bits - 1:0] stage2_data[STAGE2_DATA_DEPTH-1:0];
reg [DQ_BITS*8 - 1:0] unaligned_data[LANES-1:0];
reg [8 - 1:0] unaligned_dm[LANES-1:0];
- reg[COL_BITS-1:0] stage2_col = 0;
- reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage2_bank = 0;
- reg[ROW_BITS-1:0] stage2_row = 0;
-
+ reg[COL_BITS-1:0] stage2_col = 0, stage2_col_d;
+ reg[BA_BITS-1+DUAL_RANK_DIMM:0] stage2_bank = 0, stage2_bank_d;
+ reg[ROW_BITS-1:0] stage2_row = 0, stage2_row_d;
+
//delay counter for every banks
- reg[3:0] delay_before_precharge_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_precharge_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0]; //delay counters
- reg[3:0] delay_before_activate_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_activate_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ;
- reg[3:0] delay_before_write_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_write_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ;
- reg[3:0] delay_before_read_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] , delay_before_read_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ;
+ reg[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0] delay_before_precharge_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_precharge_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0]; //delay counters
+ reg[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0] delay_before_activate_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_activate_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ;
+ reg[$clog2(MAX_DELAY_BEFORE_WRITE):0] delay_before_write_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0], delay_before_write_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ;
+ reg[$clog2(MAX_DELAY_BEFORE_READ):0] delay_before_read_counter_q[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] , delay_before_read_counter_d[(1<<(BA_BITS+DUAL_RANK_DIMM))-1:0] ;
//commands to be sent to PHY (4 slots per controller clk cycle)
reg[cmd_len-1:0] cmd_d[3:0];
@@ -517,7 +527,7 @@ module ddr3_controller #(
end
reg cmd_odt_q = 0, cmd_odt, cmd_reset_n;
reg[DUAL_RANK_DIMM:0] cmd_ck_en, prev_cmd_ck_en;
- reg o_wb_stall_q = 1, o_wb_stall_d, o_wb_stall_calib = 1;
+ reg o_wb_stall_int_q = 1, o_wb_stall_int_d, o_wb_stall_calib;
reg precharge_slot_busy;
reg activate_slot_busy;
reg[1:0] write_dqs_q;
@@ -547,8 +557,8 @@ module ddr3_controller #(
/* verilator lint_off UNUSEDSIGNAL */
reg[15:0] dqs_bitslip_arrangement = 0;
/* verilator lint_off UNUSEDSIGNAL */
- reg[3:0] added_read_pipe_max = 0;
- reg[3:0] added_read_pipe[LANES - 1:0];
+ reg added_read_pipe_max = 0;
+ reg added_read_pipe[LANES - 1:0];
//each lane will have added delay relative to when ISERDES should actually return the data
//this make sure that we will wait until the lane with longest delay (added_read_pipe_max) is received before
//all lanes are sent to wishbone data
@@ -559,7 +569,7 @@ module ddr3_controller #(
reg[$clog2(READ_ACK_PIPE_WIDTH-1):0] write_ack_index_q = 1, write_ack_index_d = 1;
reg index_read_pipe; //tells which delay_read_pipe will be updated (there are two delay_read_pipe)
reg index_wb_data; //tells which o_wb_data_q will be sent to o_wb_data
- reg[15:0] delay_read_pipe[1:0]; //delay when each lane will retrieve i_phy_iserdes_data (since different lanes might not be aligned with each other and needs to be retrieved at a different time)
+ reg[1:0] delay_read_pipe[1:0]; //delay when each lane will retrieve i_phy_iserdes_data (since different lanes might not be aligned with each other and needs to be retrieved at a different time)
reg[wb_data_bits - 1:0] o_wb_data_q[1:0]; //store data retrieved from i_phy_iserdes_data to be sent to o_wb_data
wire[wb_data_bits - 1:0] o_wb_data_q_current;
reg[wb_data_bits - 1:0] o_wb_data_q_q;
@@ -581,6 +591,7 @@ module ddr3_controller #(
reg prev_write_level_feedback = 1;
reg[wb_data_bits-1:0] read_data_store = 0;
reg[127:0] write_pattern = 0;
+ reg[63:0] write_pattern_lane = 0;
reg[$clog2(64):0] data_start_index[LANES-1:0];
reg[LANES-1:0] lane_write_dq_late = 0;
reg[LANES-1:0] lane_read_dq_early = 0;
@@ -593,6 +604,7 @@ module ddr3_controller #(
reg stored_write_level_feedback = 0;
reg[5:0] start_index_check = 0;
reg[63:0] read_lane_data = 0;
+ reg[31:0] read_lane_data_shifted = 0;
reg odelay_cntvalue_halfway = 0;
reg initial_calibration_done = 0;
reg final_calibration_done = 0;
@@ -650,9 +662,24 @@ module ddr3_controller #(
reg[wb_data_bits-1:0] wrong_data = 0, expected_data=0;
wire[wb_data_bits-1:0] correct_data;
reg[LANES-1:0] late_dq;
+ reg stage2_do_wr_or_rd, stage2_do_wr_or_rd_d;
+ reg stage2_do_wr, stage2_do_wr_d;
+ reg stage2_do_update_delay_before_precharge_after_wr, stage2_do_update_delay_before_precharge_after_wr_d;
+ reg stage2_do_rd, stage2_do_rd_d;
+ reg stage2_do_update_delay_before_precharge_after_rd, stage2_do_update_delay_before_precharge_after_rd_d;
+ reg stage2_do_act, stage2_do_act_d;
+ reg stage2_do_update_delay_before_read_after_act, stage2_do_update_delay_before_read_after_act_d;
+ reg stage2_do_update_delay_before_write_after_act, stage2_do_update_delay_before_write_after_act_d;
+ reg stage2_do_pre, stage2_do_pre_d;
+ reg stage1_do_pre, stage1_do_pre_d;
+ reg stage1_do_act, stage1_do_act_d;
+ reg force_o_wb_stall_high_q, force_o_wb_stall_high_d;
+ reg force_o_wb_stall_calib_high_q, force_o_wb_stall_calib_high_d;
+ reg[1:0] prep_done;
+ reg write_pattern_matches;
+
// initial block for all regs
initial begin
- o_wb_stall = 1;
for(index = 0; index < MAX_ADDED_READ_ACK_DELAY; index = index + 1) begin
o_wb_ack_read_q[index] = 0;
end
@@ -838,60 +865,88 @@ module ddr3_controller #(
always @(posedge i_controller_clk) begin
if(sync_rst_controller) begin
- instruction_address <= 0;
`ifdef FORMAL_COVER
instruction_address <= 21;
+ `else
+ instruction_address <= 0;
`endif
instruction <= INITIAL_RESET_INSTRUCTION;
delay_counter <= INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0];
delay_counter_is_zero <= (INITIAL_RESET_INSTRUCTION[DELAY_COUNTER_WIDTH - 1:0] == 0);
reset_done <= 1'b0;
+ precharge_all_instruction <= 1'b0;
end
else begin
- //update counter after reaching zero
- if(delay_counter_is_zero) begin
- delay_counter <= instruction[DELAY_COUNTER_WIDTH - 1:0]; //retrieve delay value of current instruction, we count to zero thus minus 1
- end
-
- //else: decrement delay counter when current instruction needs delay
- //don't decrement (has infinite time) when last bit of
- //delay_counter is 1 (for r/w calibration and prestall delay)
- //address will only move forward for these kinds of delay only
- //when skip_reset_seq_delay is toggled
- else if(instruction[USE_TIMER] /*&& delay_counter != {(DELAY_COUNTER_WIDTH){1'b1}}*/ && !pause_counter && delay_counter != 0) delay_counter <= delay_counter - 1;
-
- //delay_counter of 1 means we will need to update the delay_counter next clock cycle (delay_counter of zero) so we need to retrieve
- //now the next instruction. The same thing needs to be done when current instruction does not need the timer delay.
- if( ((delay_counter == 1) && !pause_counter) || !instruction[USE_TIMER]/* || skip_reset_seq_delay*/) begin
- delay_counter_is_zero <= 1;
- instruction <= read_rom_instruction(instruction_address);
- if(instruction_address == 5'd22) begin // if user_self_refresh is disabled, wrap back to 19 (Precharge All before Refresh)
- instruction_address <= 5'd19;
- end
- else if(instruction_address == 5'd26) begin // self-refresh exit always wraps back to 20 (Refresh)
- instruction_address <= 5'd20;
- end
- else begin
- instruction_address <= instruction_address + 5'd1; // just increment address
- end
- end
- //we are now on the middle of a delay
- else begin
- delay_counter_is_zero <=0;
- end
-
- if(instruction_address == 5'd22 && user_self_refresh_q) begin // if user_self_refresh is enabled, go straight to 23
- instruction_address <= 23; // go to Precharge All for Self-refresh
- delay_counter_is_zero <= 1;
- delay_counter <= 0;
- instruction <= read_rom_instruction(instruction_address);
- end
-
- //instruction[RST_DONE] is non-persistent thus we need to register it once it goes high
- reset_done <= instruction[RST_DONE]? 1'b1:reset_done;
+ instruction_address <= instruction_address_d;
+ instruction <= instruction_d;
+ delay_counter <= delay_counter_d;
+ delay_counter_is_zero <= delay_counter_is_zero_d;
+ reset_done <= reset_done_d;
+ precharge_all_instruction <= precharge_all_instruction_d;
end
end
+ always @* begin
+ instruction_address_d = instruction_address;
+ instruction_d = instruction;
+ delay_counter_d = delay_counter;
+ delay_counter_is_zero_d = delay_counter_is_zero;
+ reset_done_d = reset_done;
+
+ //update counter after reaching zero
+ if(delay_counter_is_zero) begin
+ //retrieve delay value of current instruction, we count to zero thus minus 1
+ delay_counter_d = instruction[DELAY_COUNTER_WIDTH - 1:0];
+ end
+ //else: decrement delay counter when current instruction needs delay
+ //don't decrement (has infinite time) when last bit of
+ //delay_counter is 1 (for r/w calibration and prestall delay)
+ //address will only move forward for these kinds of delay only
+ //when skip_reset_seq_delay is toggled
+ else if(instruction[USE_TIMER] /*&& delay_counter != {(DELAY_COUNTER_WIDTH){1'b1}}*/ && !pause_counter && delay_counter != 0) begin
+ delay_counter_d = delay_counter - 1;
+ end
+
+ //delay_counter of 1 means we will need to update the delay_counter next clock cycle (delay_counter of zero) so we need to retrieve
+ //now the next instruction. The same thing needs to be done when current instruction does not need the timer delay.
+ if( ((delay_counter == 1) && !pause_counter) || !instruction[USE_TIMER]/* || skip_reset_seq_delay*/) begin
+ delay_counter_is_zero_d = 1;
+ instruction_d = read_rom_instruction(instruction_address);
+ if(instruction_address == 5'd22) begin
+ // if user_self_refresh is disabled, wrap back to 19 (Precharge All before Refresh)
+ instruction_address_d = 5'd19;
+ end
+ else if(instruction_address == 5'd26) begin
+ // self-refresh exit always wraps back to 20 (Refresh)
+ instruction_address_d = 5'd20;
+ end
+ else begin
+ // just increment address
+ instruction_address_d = instruction_address + 5'd1; // just increment address
+ end
+ end
+ //we are now on the middle of a delay
+ else begin
+ delay_counter_is_zero_d =0;
+ end
+
+ // if user_self_refresh is enabled, go straight to 23
+ if(instruction_address == 5'd22 && user_self_refresh_q) begin
+ // go to Precharge All for Self-refresh (23)
+ instruction_address_d = 23;
+ delay_counter_is_zero_d = 1;
+ delay_counter_d = 0;
+ instruction_d = read_rom_instruction(instruction_address);
+ end
+
+ //instruction[RST_DONE] is non-persistent thus we need to register it once it goes high
+ reset_done_d = instruction[RST_DONE]? 1 : reset_done;
+
+ // instruction is at precharge all (20 or 24)
+ precharge_all_instruction_d = instruction_address_d == 20 || instruction_address_d == 24;
+ end
+
+
// register user-enabled self-refresh
always @(posedge i_controller_clk) begin
user_self_refresh_q <= i_user_self_refresh && (user_self_refresh_q || (instruction_address != 5'd26)) && final_calibration_done; //will not go high again if already at instruction_address 26 (self-refresh exit), only go high when calibration is done
@@ -900,7 +955,6 @@ module ddr3_controller #(
user_self_refresh_q <= 1'b1;
end
end
-
end
/*********************************************************************************************************************************************/
@@ -909,9 +963,7 @@ module ddr3_controller #(
//process request transaction
always @(posedge i_controller_clk) begin
if(sync_rst_controller) begin
- o_wb_stall <= 1'b1;
- o_wb_stall_q <= 1'b1;
- o_wb_stall_calib <= 1'b1;
+ o_wb_stall_int_q <= 1'b1;
//set stage 1 to 0
stage1_pending <= 0;
stage1_aux <= 0;
@@ -957,283 +1009,68 @@ module ddr3_controller #(
end
//reset bank status and active row
for( index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin
- bank_status_q[index] <= 0;
- bank_active_row_q[index] <= 0;
+ bank_status_q[index] <= 0;
+ bank_active_row_q[index] <= 0;
end
//reset data
for(index = 0; index < STAGE2_DATA_DEPTH; index = index+1) begin
stage2_data[index] <= 0;
stage2_dm[index] <= 0;
end
+ for(index=0; index shiftreg(CWL) -> OSERDES(DDR) -> ODELAY -> RAM
- end
- if(!ODELAY_SUPPORTED && !DLL_OFF) begin
- stage2_data_unaligned <= stage2_data_unaligned_temp; //_temp is for added delay of 1 clock cycle (no ODELAY so no added delay)
- stage2_dm_unaligned <= stage2_dm_unaligned_temp; //_temp is for added delay of 1 clock cycle (no ODELAY so no added delay)
- end
-
- if(stage1_update) begin
- //stage1 will not do the request (pending low) when the
- //request is on the same bank as the current request. This
- //will ensure stage1 bank will be different from stage2 bank
-
- // if ECC_ENABLE != 3, then stage1 will always receive wishbone interface
- if(ECC_ENABLE != 3) begin
- stage1_pending <= i_wb_stb;//actual request flag
- stage1_aux <= i_aux; //aux ID for AXI compatibility
- stage1_we <= i_wb_we; //write-enable
- stage1_dm <= (ECC_ENABLE == 0)? i_wb_sel : {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled
- end
- // ECC_ENABLE == 3
- else begin // if ECC_ENABLE = 3 (inline ECC), then stage1 will either receive stage0 or wishbone
- stage1_pending <= wb_stb_mux;//actual request flag
- stage1_aux <= aux_mux; //aux ID for AXI compatibility
- stage1_we <= wb_we_mux; //write-enable
- stage1_dm <= {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled
- end
-
- if(row_bank_col == 1) begin // memory address mapping: {row, bank, col}
- if(DUAL_RANK_DIMM[0]) begin
- stage1_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= i_wb_addr[DUAL_RANK_DIMM[0]? (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) : 0]; // msb determines rank
- stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= wb_addr_plus_anticipate[DUAL_RANK_DIMM[0]? (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) : 0]; // msb determines rank
- end
- stage1_row <= i_wb_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS - $clog2(serdes_ratio*2)) ]; //row_address
- stage1_bank[BA_BITS-1:0] <= i_wb_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address
- stage1_col <= { i_wb_addr[ (COL_BITS- $clog2(serdes_ratio*2)-1) : 0 ], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned)
- //stage1_next_bank will not increment unless stage1_next_col
- //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated
- //precharge and activate will happen only at the end of the
- //current column with a margin dictated by
- //MARGIN_BEFORE_ANTICIPATE
- /* verilator lint_off WIDTH */
- {stage1_next_row , stage1_next_bank[BA_BITS-1:0]} <= wb_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2));
- //anticipated next row and bank to be accessed
- /* verilator lint_on WIDTH */
- stage1_data <= i_wb_data;
- end
-
- else if(row_bank_col == 0) begin // memory address mapping: {bank, row, col}
- stage1_bank[BA_BITS-1:0] <= i_wb_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address
- stage1_row <= i_wb_addr[ (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address
- stage1_col <= { i_wb_addr[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned)
- //stage1_next_row will not increment unless stage1_next_col
- //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated
- //precharge and activate will happen only at the end of the
- //current column with a margin dictated by
- //MARGIN_BEFORE_ANTICIPATE
- /* verilator lint_off WIDTH */
- {stage1_next_bank, stage1_next_row} <= wb_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2));
- //anticipated next row and bank to be accessed
- /* verilator lint_on WIDTH */
- stage1_data <= i_wb_data;
- end
-
- else if(row_bank_col == 2) begin // memory address mapping: {bank[2:1], row, bank[0], col} , used for ECC_ENABLE = 3 (Inline ECC)
- stage1_bank[2:1] <= wb_addr_mux[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)]; //bank_address
- stage1_row <= wb_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1) ]; //row_address
- stage1_bank[0] <= wb_addr_mux[COL_BITS - $clog2(serdes_ratio*2)];
- stage1_col <= { wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned)
- //stage1_next_bank will not increment unless stage1_next_col
- //overwraps due to MARGIN_BEFORE_ANTICIPATE. This will overwrap every two banks
- //MARGIN_BEFORE_ANTICIPATE
- /* verilator lint_off WIDTH */
- {stage1_next_bank[2:1], stage1_next_row, stage1_next_bank[0]} <= wb_addr_plus_anticipate >> (COL_BITS - $clog2(serdes_ratio*2));
- //anticipated next row and bank to be accessed
- /* verilator lint_on WIDTH */
- // ECC Mapping (Excel sheet design planning: https://docs.google.com/spreadsheets/d/1_8vrLmVSFpvRD13Mk8aNAMYlh62SfpPXOCYIQFEtcs4/edit?gid=0#gid=0)
- ecc_bank_addr <= {2'b11,!wb_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]};
- ecc_row_addr <= {1'b1, wb_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1 + 1) ]};
- ecc_col_addr <= { wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) + 1)] ,
- wb_addr_mux[(BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)] ,
- wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) - 1) : 3], 3'b000 };
- stage1_data <= wb_data_mux;
- end
- end
-
- // request from calibrate FSM will be accepted here
- else if(stage1_update_calib) begin
- // if ECC_ENABLE != 3, then stage1 will always receive wishbone interface
- if(ECC_ENABLE != 3) begin
- stage1_pending <= calib_stb;//actual request flag
- stage1_aux <= calib_aux; //aux ID for AXI compatibility
- stage1_we <= calib_we; //write-enable
- stage1_dm <= (ECC_ENABLE == 0)? calib_sel : {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled
- end
- // ECC_ENABLE == 3
- else begin // if ECC_ENABLE = 3 (inline ECC), then stage1 will either receive stage0 or wishbone
- stage1_pending <= calib_stb_mux;//actual request flag
- stage1_we <= calib_we_mux; //write-enable
- stage1_dm <= {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled
- stage1_aux <= calib_aux_mux; //aux ID for AXI compatibility
- end
-
- if(row_bank_col == 1) begin // memory address mapping: {row, bank, col}
- if(DUAL_RANK_DIMM[0]) begin
- stage1_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= current_rank; // rank depends on current_rank
- stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] <= current_rank; // rank depends on current_rank
- end
- stage1_row <= calib_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS - $clog2(serdes_ratio*2)) ]; //row_address
- stage1_bank[BA_BITS-1:0] <= calib_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address
- stage1_col <= { calib_addr[ (COL_BITS- $clog2(serdes_ratio*2)-1) : 0 ], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (8-burst word-aligned)
- //stage1_next_bank will not increment unless stage1_next_col
- //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated
- //precharge and activate will happen only at the end of the
- //current column with a margin dictated by
- //MARGIN_BEFORE_ANTICIPATE
- /* verilator lint_off WIDTH */
- {stage1_next_row , stage1_next_bank[BA_BITS-1:0] } <= calib_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2));
- //anticipated next row and bank to be accessed
- /* verilator lint_on WIDTH */
- stage1_data <= calib_data;
- end
- else if(row_bank_col == 0) begin // memory address mapping: {bank, row, col}
- stage1_bank[BA_BITS-1:0] <= calib_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address
- stage1_row <= calib_addr[ (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address
- stage1_col <= { calib_addr[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (8-burst word-aligned)
- //stage1_next_row will not increment unless stage1_next_col
- //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated
- //precharge and activate will happen only at the end of the
- //current column with a margin dictated by
- //MARGIN_BEFORE_ANTICIPATE
- /* verilator lint_off WIDTH */
- {stage1_next_bank, stage1_next_row} <= calib_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2));
- //anticipated next row and bank to be accessed
- /* verilator lint_on WIDTH */
- stage1_data <= calib_data;
- end
- else if(row_bank_col == 2) begin // memory address mapping: {bank[2:1], row, bank[0], col}
- stage1_bank[2:1] <= calib_addr_mux[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)]; //bank_address
- stage1_row <= calib_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1) ]; //row_address
- stage1_bank[0] <= calib_addr_mux[COL_BITS - $clog2(serdes_ratio*2)];
- stage1_col <= { calib_addr_mux[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned)
- //stage1_next_row will not increment unless stage1_next_col
- //overwraps due to MARGIN_BEFORE_ANTICIPATE. This will overwrap every two banks
- //MARGIN_BEFORE_ANTICIPATE
- /* verilator lint_off WIDTH */
- {stage1_next_bank[2:1], stage1_next_row, stage1_next_bank[0]} <= calib_addr_plus_anticipate >> (COL_BITS - $clog2(serdes_ratio*2));
- //anticipated next row and bank to be accessed
- /* verilator lint_on WIDTH */
- // ECC Mapping (Excel sheet design planning: https://docs.google.com/spreadsheets/d/1_8vrLmVSFpvRD13Mk8aNAMYlh62SfpPXOCYIQFEtcs4/edit?gid=0#gid=0)
- // ECC_BANK = {11,!bank[0]}
- // ECC_ROW = {1,row>>1}
- // ECC_COL = {row[0],bank[2:1],col>>3}"
- ecc_bank_addr <= {2'b11,!calib_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]};
- ecc_row_addr <= {1'b1, calib_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1 + 1) ]};
- ecc_col_addr <= { calib_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) + 1)] ,
- calib_addr_mux[(BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)] ,
- calib_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) - 1) : 3], 3'b000 };
- stage1_data <= calib_data_mux;
- end
- end
-
// stage2 can have multiple pipelined stages inside it which acts as delay before issuing the write data (after issuing write command)
for(index = 0; index < STAGE2_DATA_DEPTH-1; index = index+1) begin
- stage2_data[index+1] <= stage2_data[index]; // 0->1, 1->2
+ stage2_data[index+1] <= stage2_data[index]; // 0->1, 1->2
stage2_dm[index+1] <= stage2_dm[index];
end
@@ -1328,20 +1165,341 @@ module ddr3_controller #(
/* verilator lint_on WIDTH */
end // end for else statement (dq is not late for this lane)
end // end of for loop to forward stage2_unaligned to stage2 by lane
-
- //abort any outgoing ack when cyc is low
- if(!i_wb_cyc && final_calibration_done) begin
- stage2_pending <= 0;
- stage1_pending <= 0;
+ end
+ end
+
+ always @* begin
+ // stage 1
+ stage1_pending_d = stage1_pending;
+ stage1_aux_d = stage1_aux;
+ stage1_we_d = stage1_we;
+ stage1_dm_d = stage1_dm;
+ stage1_col_d = stage1_col;
+ stage1_bank_d = stage1_bank;
+ stage1_row_d = stage1_row;
+ stage1_next_bank_d = stage1_next_bank;
+ stage1_next_row_d = stage1_next_row;
+ stage1_data_d = stage1_data;
+ // stage 2
+ stage2_pending_d = stage2_pending;
+ stage2_aux_d = stage2_aux;
+ stage2_we_d = stage2_we;
+ stage2_col_d = stage2_col;
+ stage2_bank_d = stage2_bank;
+ stage2_row_d = stage2_row;
+ stage2_data_unaligned_d = stage2_data_unaligned;
+ stage2_data_unaligned_temp_d = stage2_data_unaligned_temp;
+ stage2_dm_unaligned_d = stage2_dm_unaligned;
+ stage2_dm_unaligned_temp_d = stage2_dm_unaligned_temp;
+ if(ECC_ENABLE == 3) begin
+ ecc_col_addr_prev_d = ecc_col_addr_prev;
+ ecc_bank_addr_prev_d = ecc_bank_addr_prev;
+ ecc_row_addr_prev_d = ecc_row_addr_prev;
+ ecc_bank_addr_d = ecc_bank_addr;
+ ecc_row_addr_d = ecc_row_addr;
+ ecc_col_addr_d = ecc_col_addr;
+ stage2_encoded_parity_d = stage2_encoded_parity;
+ end
+
+ /////////////////////////////////////////
+ // Stage 2
+ /////////////////////////////////////////
+ //if pipeline is not stalled (or a request is left on the prestall
+ //delay address 19 or if in calib), move pipeline to stage 2
+ if(stage2_update) begin //ITS POSSIBLE ONLY NEXT CLK WILL STALL SUPPOSE TO GO LOW
+ stage2_pending_d = stage1_pending;
+ if(ECC_ENABLE != 3) begin
+ stage1_pending_d = 1'b0; //no request initially unless overridden by the actual stb request
+ stage2_pending_d = stage1_pending;
+ stage2_aux_d = stage1_aux;
+ stage2_we_d = stage1_we;
+ stage2_col_d = stage1_col;
+ stage2_bank_d = stage1_bank;
+ stage2_row_d = stage1_row;
+ if(ODELAY_SUPPORTED || DLL_OFF) begin
+ stage2_data_unaligned_d = stage1_data_mux;
+ stage2_dm_unaligned_d = ~stage1_dm; //inverse each bit (1 must mean "masked" or not written)
+ end
+ else begin
+ stage2_data_unaligned_temp_d = stage1_data_mux;
+ stage2_dm_unaligned_temp_d = ~stage1_dm; //inverse each bit (1 must mean "masked" or not written)
+ end
+ end
+ // ECC_ENABLE == 3
+ else begin
+ stage1_pending_d = ecc_stage1_stall? stage1_pending : 1'b0; //stage1 remains the same for ECC op (no request initially unless overridden by the actual stb request)
+ // if switching from write to read and ECC is not yet written then do a write first to store those ECC bits
+ if(!stage1_we && stage2_we && stage1_pending && !write_ecc_stored_to_mem_d && initial_calibration_done) begin
+ stage2_we_d = 1'b1;
+ // if ecc_stage1_stall, stage2 will start ECC write/read operation
+ // if ECC write, then we are writing ECC for previous address
+ // if ECC read, then we are reading ECC for current address
+ stage2_col_d = ecc_col_addr_prev;
+ stage2_bank_d[BA_BITS-1:0] = ecc_bank_addr_prev;
+ stage2_row_d = ecc_row_addr_prev;
+ ecc_col_addr_prev_d = ecc_col_addr;
+ ecc_bank_addr_prev_d = ecc_bank_addr;
+ ecc_row_addr_prev_d = ecc_row_addr;
+ // For ECC requests, 2MSB of aux determines type of ECC request (read = 2'10, write = 2'b11)
+ stage2_aux_d = { 1'b1, 1'b1, 3'b000, {(AUX_WIDTH-5){1'b1}} };
+ end
+ // else pass stage 1 to stage 2
+ else begin
+ stage2_we_d = stage1_we;
+ // if ecc_stage1_stall, stage2 will start ECC write/read operation
+ // if ECC write, then we are writing ECC for previous address
+ // if ECC read, then we are reading ECC for current address
+ stage2_col_d = ecc_stage1_stall? (stage1_we? ecc_col_addr_prev : ecc_col_addr) : stage1_col;
+ stage2_bank_d[BA_BITS-1:0] = ecc_stage1_stall? (stage1_we? ecc_bank_addr_prev : ecc_bank_addr) : stage1_bank[BA_BITS-1:0];
+ stage2_row_d = ecc_stage1_stall? (stage1_we? ecc_row_addr_prev : ecc_row_addr) : stage1_row;
+ ecc_col_addr_prev_d = ecc_col_addr;
+ ecc_bank_addr_prev_d = ecc_bank_addr;
+ ecc_row_addr_prev_d = ecc_row_addr;
+ // For ECC requests, 2MSB of aux determines type of ECC request (read = 2'10, write = 2'b11)
+ // For non-ECC request (MSB is 0), next 3MSB is allotted for the column (burst position to know position of encoded parity ECC bits)
+ stage2_aux_d = ecc_stage1_stall? { 1'b1, !stage1_we, 3'b000, {(AUX_WIDTH-5){1'b1}} } : {1'b0, !stage1_we, stage1_col[5:3], stage1_aux[AUX_WIDTH-6:0]};
+ end
+ // store parity code for stage1_data
+ stage2_encoded_parity_d = encoded_parity;
+ if(ODELAY_SUPPORTED || DLL_OFF) begin
+ stage2_data_unaligned_d = stage1_data_mux;
+ stage2_dm_unaligned_d = ecc_stage1_stall? ~stage2_ecc_write_data_mask_d : ~stage1_dm; //inverse each bit (1 must mean "masked" or not written)
+ end
+ else begin
+ stage2_data_unaligned_temp_d = stage1_data_mux;
+ stage2_dm_unaligned_temp_d = ecc_stage1_stall? ~stage2_ecc_write_data_mask_d : ~stage1_dm; //inverse each bit (1 must mean "masked" or not written)
+ end
+ end
+ // pipeline: stage2_data -> shiftreg(CWL) -> OSERDES(DDR) -> ODELAY -> RAM
+ end
+
+ if(!ODELAY_SUPPORTED && !DLL_OFF) begin
+ //_temp is for added delay of 1 clock cycle (no ODELAY so no added delay)
+ stage2_data_unaligned_d = stage2_data_unaligned_temp;
+ stage2_dm_unaligned_d = stage2_dm_unaligned_temp;
+ end
+
+ /////////////////////////////////////////
+ // Stage 1
+ /////////////////////////////////////////
+ if(stage1_update && reset_done) begin
+ //stage1 will not do the request (pending low) when the
+ //request is on the same bank as the current request. This
+ //will ensure stage1 bank will be different from stage2 bank
+
+ // if ECC_ENABLE != 3, then stage1 will always receive wishbone interface
+ if(ECC_ENABLE != 3) begin
+ stage1_pending_d = i_wb_stb;//actual request flag
+ stage1_aux_d = i_aux; //aux ID for AXI compatibility
+ stage1_we_d = i_wb_we; //write-enable
+ stage1_dm_d = (ECC_ENABLE == 0)? i_wb_sel : {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled
+ end
+ // ECC_ENABLE == 3
+ else begin // if ECC_ENABLE = 3 (inline ECC), then stage1 will either receive stage0 or wishbone
+ stage1_pending_d = wb_stb_mux;//actual request flag
+ stage1_aux_d = aux_mux; //aux ID for AXI compatibility
+ stage1_we_d = wb_we_mux; //write-enable
+ stage1_dm_d = {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled
+ end
+
+ if(row_bank_col == 1) begin // memory address mapping: {row, bank, col}
+ if(DUAL_RANK_DIMM[0]) begin
+ stage1_bank_d[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] = i_wb_addr[DUAL_RANK_DIMM[0]? (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) : 0]; // msb determines rank
+ stage1_next_bank_d[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] = wb_addr_plus_anticipate[DUAL_RANK_DIMM[0]? (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2)) : 0]; // msb determines rank
+ end
+ stage1_row_d = i_wb_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS - $clog2(serdes_ratio*2)) ]; //row_address
+ stage1_bank_d[BA_BITS-1:0] = i_wb_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address
+ stage1_col_d = { i_wb_addr[ (COL_BITS- $clog2(serdes_ratio*2)-1) : 0 ], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned)
+ //stage1_next_bank will not increment unless stage1_next_col
+ //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated
+ //precharge and activate will happen only at the end of the
+ //current column with a margin dictated by
+ //MARGIN_BEFORE_ANTICIPATE
+ /* verilator lint_off WIDTH */
+ {stage1_next_row_d , stage1_next_bank_d[BA_BITS-1:0]} = wb_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2));
+ //anticipated next row and bank to be accessed
+ /* verilator lint_on WIDTH */
+ stage1_data_d = i_wb_data;
+ end
+
+ else if(row_bank_col == 0) begin // memory address mapping: {bank, row, col}
+ stage1_bank_d[BA_BITS-1:0] = i_wb_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address
+ stage1_row_d = i_wb_addr[ (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address
+ stage1_col_d = { i_wb_addr[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned)
+ //stage1_next_row will not increment unless stage1_next_col
+ //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated
+ //precharge and activate will happen only at the end of the
+ //current column with a margin dictated by
+ //MARGIN_BEFORE_ANTICIPATE
+ /* verilator lint_off WIDTH */
+ {stage1_next_bank_d, stage1_next_row_d} = wb_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2));
+ //anticipated next row and bank to be accessed
+ /* verilator lint_on WIDTH */
+ stage1_data_d = i_wb_data;
+ end
+
+ else if(row_bank_col == 2) begin // memory address mapping: {bank[2:1], row, bank[0], col} , used for ECC_ENABLE = 3 (Inline ECC)
+ stage1_bank_d[2:1] = wb_addr_mux[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)]; //bank_address
+ stage1_row_d = wb_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1) ]; //row_address
+ stage1_bank_d[0] = wb_addr_mux[COL_BITS - $clog2(serdes_ratio*2)];
+ stage1_col_d = { wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned)
+ //stage1_next_bank will not increment unless stage1_next_col
+ //overwraps due to MARGIN_BEFORE_ANTICIPATE. This will overwrap every two banks
+ //MARGIN_BEFORE_ANTICIPATE
+ /* verilator lint_off WIDTH */
+ {stage1_next_bank_d[2:1], stage1_next_row_d, stage1_next_bank_d[0]} = wb_addr_plus_anticipate >> (COL_BITS - $clog2(serdes_ratio*2));
+ //anticipated next row and bank to be accessed
+ /* verilator lint_on WIDTH */
+ // ECC Mapping (Excel sheet design planning: https://docs.google.com/spreadsheets/d/1_8vrLmVSFpvRD13Mk8aNAMYlh62SfpPXOCYIQFEtcs4/edit?gid=0#gid=0)
+ ecc_bank_addr_d = {2'b11,!wb_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]};
+ ecc_row_addr_d = {1'b1, wb_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1 + 1) ]};
+ ecc_col_addr_d = { wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) + 1)] ,
+ wb_addr_mux[(BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)] ,
+ wb_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) - 1) : 3], 3'b000 };
+ stage1_data_d = wb_data_mux;
end
end
- end
- always @* begin
- for(index = 0; index < LANES; index = index + 1) begin
- late_dq[index] = (lane_write_dq_late[index] && (data_start_index[index] != 0)) && (STAGE2_DATA_DEPTH > 1);
+
+ // request from calibrate FSM will be accepted here
+ else if(stage1_update_calib && reset_done) begin
+ // if ECC_ENABLE != 3, then stage1 will always receive wishbone interface
+ if(ECC_ENABLE != 3) begin
+ stage1_pending_d = calib_stb;//actual request flag
+ stage1_aux_d = calib_aux; //aux ID for AXI compatibility
+ stage1_we_d = calib_we; //write-enable
+ stage1_dm_d = (ECC_ENABLE == 0)? calib_sel : {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled
+ end
+ // ECC_ENABLE == 3
+ else begin // if ECC_ENABLE = 3 (inline ECC), then stage1 will either receive stage0 or wishbone
+ stage1_pending_d = calib_stb_mux;//actual request flag
+ stage1_we_d = calib_we_mux; //write-enable
+ stage1_dm_d = {wb_sel_bits{1'b1}}; // no data masking when ECC is enabled
+ stage1_aux_d = calib_aux_mux; //aux ID for AXI compatibility
+ end
+
+ if(row_bank_col == 1) begin // memory address mapping: {row, bank, col}
+ if(DUAL_RANK_DIMM[0]) begin
+ stage1_bank_d[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] = current_rank; // rank depends on current_rank
+ stage1_next_bank_d[(DUAL_RANK_DIMM[0]? BA_BITS : 0)] = current_rank; // rank depends on current_rank
+ end
+ stage1_row_d = calib_addr[ (ROW_BITS + BA_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (BA_BITS + COL_BITS - $clog2(serdes_ratio*2)) ]; //row_address
+ stage1_bank_d[BA_BITS-1:0] = calib_addr[ (BA_BITS + COL_BITS - $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //bank_address
+ stage1_col_d = { calib_addr[ (COL_BITS- $clog2(serdes_ratio*2)-1) : 0 ], {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (8-burst word-aligned)
+ //stage1_next_bank will not increment unless stage1_next_col
+ //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated
+ //precharge and activate will happen only at the end of the
+ //current column with a margin dictated by
+ //MARGIN_BEFORE_ANTICIPATE
+ /* verilator lint_off WIDTH */
+ {stage1_next_row_d , stage1_next_bank_d[BA_BITS-1:0] } = calib_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2));
+ //anticipated next row and bank to be accessed
+ /* verilator lint_on WIDTH */
+ stage1_data_d = calib_data;
+ end
+ else if(row_bank_col == 0) begin // memory address mapping: {bank, row, col}
+ stage1_bank_d[BA_BITS-1:0] = calib_addr[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2))]; //bank_address
+ stage1_row_d = calib_addr[ (ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (COL_BITS- $clog2(serdes_ratio*2)) ]; //row_address
+ stage1_col_d = { calib_addr[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (8-burst word-aligned)
+ //stage1_next_row will not increment unless stage1_next_col
+ //overwraps due to MARGIN_BEFORE_ANTICIPATE. Thus, anticipated
+ //precharge and activate will happen only at the end of the
+ //current column with a margin dictated by
+ //MARGIN_BEFORE_ANTICIPATE
+ /* verilator lint_off WIDTH */
+ {stage1_next_bank_d, stage1_next_row_d} = calib_addr_plus_anticipate >> (COL_BITS- $clog2(serdes_ratio*2));
+ //anticipated next row and bank to be accessed
+ /* verilator lint_on WIDTH */
+ stage1_data_d = calib_data;
+ end
+ else if(row_bank_col == 2) begin // memory address mapping: {bank[2:1], row, bank[0], col}
+ stage1_bank_d[2:1] = calib_addr_mux[ (BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)]; //bank_address
+ stage1_row_d = calib_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1) ]; //row_address
+ stage1_bank_d[0] = calib_addr_mux[COL_BITS - $clog2(serdes_ratio*2)];
+ stage1_col_d = { calib_addr_mux[(COL_BITS- $clog2(serdes_ratio*2)-1) : 0] , {{$clog2(serdes_ratio*2)}{1'b0}} }; //column address (n-burst word-aligned)
+ //stage1_next_row will not increment unless stage1_next_col
+ //overwraps due to MARGIN_BEFORE_ANTICIPATE. This will overwrap every two banks
+ //MARGIN_BEFORE_ANTICIPATE
+ /* verilator lint_off WIDTH */
+ {stage1_next_bank_d[2:1], stage1_next_row_d, stage1_next_bank_d[0]} = calib_addr_plus_anticipate >> (COL_BITS - $clog2(serdes_ratio*2));
+ //anticipated next row and bank to be accessed
+ /* verilator lint_on WIDTH */
+ // ECC Mapping (Excel sheet design planning: https://docs.google.com/spreadsheets/d/1_8vrLmVSFpvRD13Mk8aNAMYlh62SfpPXOCYIQFEtcs4/edit?gid=0#gid=0)
+ // ECC_BANK = {11,!bank[0]}
+ // ECC_ROW = {1,row>>1}
+ // ECC_COL = {row[0],bank[2:1],col>>3}"
+ ecc_bank_addr_d = {2'b11,!calib_addr_mux[COL_BITS - $clog2(serdes_ratio*2)]};
+ ecc_row_addr_d = {1'b1, calib_addr_mux[ (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2)) : (COL_BITS - $clog2(serdes_ratio*2) + 1 + 1) ]};
+ ecc_col_addr_d = { calib_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) + 1)] ,
+ calib_addr_mux[(BA_BITS + ROW_BITS + COL_BITS- $clog2(serdes_ratio*2) - 1) : (ROW_BITS + COL_BITS - $clog2(serdes_ratio*2) + 1)] ,
+ calib_addr_mux[(COL_BITS - $clog2(serdes_ratio*2) - 1) : 3], 3'b000 };
+ stage1_data_d = calib_data_mux;
+ end
+ end
+
+ //abort any outgoing ack when cyc is low
+ if(!i_wb_cyc && final_calibration_done) begin
+ stage2_pending_d = 0;
+ stage1_pending_d = 0;
end
end
+ always @(posedge i_controller_clk) begin
+ for(index = 0; index < LANES; index = index + 1) begin
+ late_dq[index] <= (lane_write_dq_late[index] && (data_start_index[index] != 0)) && (STAGE2_DATA_DEPTH > 1);
+ end
+ end
+
+ // Logic for registering the conditions used for the 2-stage pipeline logic
+ // to cut the timing path and achieve higher max frequnecy
+ always @(posedge i_controller_clk) begin
+ if(sync_rst_controller) begin
+ stage2_do_wr_or_rd <= 0;
+ stage2_do_wr <= 0;
+ stage2_do_update_delay_before_precharge_after_wr <= 0;
+ stage2_do_rd <= 0;
+ stage2_do_update_delay_before_precharge_after_rd <= 0;
+ stage2_do_act <= 0;
+ stage2_do_update_delay_before_read_after_act <= 0;
+ stage2_do_update_delay_before_write_after_act <= 0;
+ stage2_do_pre <= 0;
+
+ stage1_do_pre <= 0;
+ stage1_do_act <= 0;
+ end
+ else begin
+ // stage 2 conditions
+ stage2_do_wr_or_rd <= stage2_do_wr_or_rd_d;
+ stage2_do_wr <= stage2_do_wr_d;
+ stage2_do_update_delay_before_precharge_after_wr <= stage2_do_update_delay_before_precharge_after_wr_d;
+ stage2_do_rd <= stage2_do_rd_d;
+ stage2_do_update_delay_before_precharge_after_rd <= stage2_do_update_delay_before_precharge_after_rd_d;
+ stage2_do_act <= stage2_do_act_d;
+ stage2_do_update_delay_before_read_after_act <= stage2_do_update_delay_before_read_after_act_d;
+ stage2_do_update_delay_before_write_after_act <= stage2_do_update_delay_before_write_after_act_d;
+ stage2_do_pre <= stage2_do_pre_d;
+ // stage 1 conditions
+ stage1_do_pre <= stage1_do_pre_d;
+ stage1_do_act <= stage1_do_act_d;
+ end
+ end
+
+ always @* begin
+ // stage 2 conditions
+ stage2_do_wr_or_rd_d = bank_status_d[stage2_bank_d] && bank_active_row_d[stage2_bank_d] == stage2_row_d;
+ stage2_do_wr_d = stage2_we_d && delay_before_write_counter_d[stage2_bank_d] == 0;
+ stage2_do_update_delay_before_precharge_after_wr_d = delay_before_precharge_counter_d[stage2_bank_d] <= WRITE_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0];
+ stage2_do_rd_d = !stage2_we_d && delay_before_read_counter_d[stage2_bank_d] == 0;
+ stage2_do_update_delay_before_precharge_after_rd_d = delay_before_precharge_counter_d[stage2_bank_d] <= READ_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0];
+ stage2_do_act_d = !bank_status_d[stage2_bank_d] && delay_before_activate_counter_d[stage2_bank_d] == 0;
+ stage2_do_update_delay_before_read_after_act_d = delay_before_read_counter_d[stage2_bank_d] <= ACTIVATE_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0];
+ stage2_do_update_delay_before_write_after_act_d = delay_before_write_counter_d[stage2_bank_d] <= ACTIVATE_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0];
+ stage2_do_pre_d = bank_status_d[stage2_bank_d] && bank_active_row_d[stage2_bank_d] != stage2_row_d && delay_before_precharge_counter_d[stage2_bank_d] == 0 ;
+ // stage 2 conditions
+ stage1_do_pre_d = bank_status_d[stage1_next_bank_d] && bank_active_row_d[stage1_next_bank_d] != stage1_next_row_d && delay_before_precharge_counter_d[stage1_next_bank_d] == 0;
+ stage1_do_act_d = !bank_status_d[stage1_next_bank_d] && delay_before_activate_counter_d[stage1_next_bank_d] == 0;
+ end
+
+
// generate signals to be received by stage1
generate
if(ECC_ENABLE == 3) begin : ecc_3_pipeline_control
@@ -1351,7 +1509,7 @@ module ddr3_controller #(
// AND ecc_stage1_stall low (if high then stage2 will have ECC operation while stage1 remains)
assign stage0_update = ((i_wb_cyc && !o_wb_stall) || (!final_calibration_done && !o_wb_stall_calib)) && ecc_stage1_stall; // stage0 is only used when ECC will be inserted next cycle (stage1 must remain)
assign stage1_update = ( (i_wb_cyc && !o_wb_stall) || (stage0_pending && !ecc_stage2_stall) ) && !ecc_stage1_stall;
- assign stage1_update_calib = ( ((state_calibrate != DONE_CALIBRATE) && !o_wb_stall_calib) || (stage0_pending && !ecc_stage2_stall) ) && !ecc_stage1_stall;
+ assign stage1_update_calib = ( ((!final_calibration_done) && !o_wb_stall_calib) || (stage0_pending && !ecc_stage2_stall) ) && !ecc_stage1_stall;
/* verilator lint_off WIDTH */
assign wb_addr_plus_anticipate = wb_addr_mux + MARGIN_BEFORE_ANTICIPATE; // wb_addr_plus_anticipate determines if it is near the end of column by checking if it jumps to next row
assign calib_addr_plus_anticipate = calib_addr_mux + MARGIN_BEFORE_ANTICIPATE; // just same as wb_addr_plus_anticipate but while doing calibration
@@ -1541,7 +1699,6 @@ module ddr3_controller #(
stage2_stall = 1'b0;
ecc_stage2_stall = 1'b0;
stage2_update = 1'b1; //always update stage 2 UNLESS it has a pending request (stage2_pending high)
- // o_wb_stall_d = 1'b0; //wb_stall going high is determined on stage 1 (higher priority), wb_stall going low is determined at stage2 (lower priority)
precharge_slot_busy = 0; //flag that determines if stage 2 is issuing precharge (thus stage 1 cannot issue precharge)
activate_slot_busy = 0; //flag that determines if stage 2 is issuing activate (thus stage 1 cannot issue activate)
write_dqs_d = write_calib_dqs;
@@ -1614,15 +1771,60 @@ module ddr3_controller #(
//USE _d in ALL
//if there is a pending request, issue the appropriate commands
if(stage2_pending) begin
- stage2_stall = 1; //initially high when stage 2 is pending
ecc_stage2_stall = 1;
stage2_update = 0;
+ //bank is not idle but wrong row is activated so do precharge
+ if(stage2_do_pre) begin
+ precharge_slot_busy = 1'b1;
+ //set-up delay before activate
+ delay_before_activate_counter_d[stage2_bank] = PRECHARGE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0];
+ //issue precharge command
+ if(DUAL_RANK_DIMM[0]) begin
+ cmd_d[PRECHARGE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[DUAL_RANK_DIMM[0]? 9 : 8:0] } };
+ end
+ else begin
+ cmd_d[PRECHARGE_SLOT] = {1'b0, CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[9:0] } };
+ end
+ //update bank status and active row
+ bank_status_d[stage2_bank] = 1'b0;
+ end
+
+ //bank is idle so activate it
+ else if(stage2_do_act) begin
+ activate_slot_busy = 1'b1;
+ // must meet TRRD (activate to activate delay)
+ for(index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin //the activate to activate delay applies to all banks
+ if(delay_before_activate_counter_q[index] <= ACTIVATE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0]) begin // if delay is > ACTIVATE_TO_ACTIVATE_DELAY, then updating it to the lower delay will cause the previous delay to be violated
+ delay_before_activate_counter_d[index] = ACTIVATE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0];
+ end
+ end
+
+ delay_before_precharge_counter_d[stage2_bank] = ACTIVATE_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0];
+
+ //set-up delay before read and write
+ if(stage2_do_update_delay_before_read_after_act) begin // if current delay is > ACTIVATE_TO_READ_DELAY, then updating it to the lower delay will cause the previous delay to be violated
+ delay_before_read_counter_d[stage2_bank] = ACTIVATE_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0];
+ end
+ if(stage2_do_update_delay_before_write_after_act) begin // if current delay is > ACTIVATE_TO_WRITE_DELAY, then updating it to the lower delay will cause the previous delay to be violated
+ delay_before_write_counter_d[stage2_bank] = ACTIVATE_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0];
+ end
+ //issue activate command
+ if(DUAL_RANK_DIMM[0]) begin
+ cmd_d[ACTIVATE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], stage2_row[(DUAL_RANK_DIMM[0]? ROW_BITS-1 : ROW_BITS-2):0]};
+ end
+ else begin
+ cmd_d[ACTIVATE_SLOT] = {1'b0, CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank , stage2_row};
+ end
+ //update bank status and active row
+ bank_status_d[stage2_bank] = 1'b1;
+ bank_active_row_d[stage2_bank] = stage2_row;
+ end
+
//right row is already active so go straight to read/write
- if(bank_status_q[stage2_bank] && bank_active_row_q[stage2_bank] == stage2_row) begin //read/write operation
+ else if(stage2_do_wr_or_rd) begin //read/write operation
//write request
- if(stage2_we && delay_before_write_counter_q[stage2_bank] == 0) begin
- stage2_stall = 0;
+ if(stage2_do_wr) begin
ecc_stage2_stall = 0;
stage2_update = 1;
cmd_odt = 1'b1;
@@ -1645,7 +1847,7 @@ module ddr3_controller #(
// where the transaction can continue regardless when ack returns
//set-up delay before precharge, read, and write
- if(delay_before_precharge_counter_q[stage2_bank] <= WRITE_TO_PRECHARGE_DELAY) begin
+ if(stage2_do_update_delay_before_precharge_after_wr) begin
//it is possible that the delay_before_precharge is
//set to tRAS (activate to precharge delay). And if we
//overwrite delay_before_precharge, we might overwrite
@@ -1653,12 +1855,12 @@ module ddr3_controller #(
//tRAS requirement. Thus, we must first check if the
//delay_before_precharge is set to a value not more
//than the WRITE_TO_PRECHARGE_DELAY
- delay_before_precharge_counter_d[stage2_bank] = WRITE_TO_PRECHARGE_DELAY;
+ delay_before_precharge_counter_d[stage2_bank] = WRITE_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0];
end
for(index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin //the write to read delay applies to all banks (odt must be turned off properly before reading)
- delay_before_read_counter_d[index] = WRITE_TO_READ_DELAY + 1; //NOTE TO SELF: why plus 1?
+ delay_before_read_counter_d[index] = WRITE_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0] + 'd1; //NOTE TO SELF: why plus 1?
end
- delay_before_write_counter_d[stage2_bank] = WRITE_TO_WRITE_DELAY;
+ delay_before_write_counter_d[stage2_bank] = WRITE_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0];
//issue read command
if(DUAL_RANK_DIMM[0]) begin
if(COL_BITS <= 10) begin
@@ -1711,19 +1913,17 @@ module ddr3_controller #(
end
//read request
- else if(!stage2_we && delay_before_read_counter_q[stage2_bank]==0) begin
- stage2_stall = 0;
+ else if(stage2_do_rd) begin
ecc_stage2_stall = 0;
stage2_update = 1;
cmd_odt = 1'b0;
//set-up delay before precharge, read, and write
- if(delay_before_precharge_counter_q[stage2_bank] <= READ_TO_PRECHARGE_DELAY) begin
- delay_before_precharge_counter_d[stage2_bank] = READ_TO_PRECHARGE_DELAY;
+ if(stage2_do_update_delay_before_precharge_after_rd) begin
+ delay_before_precharge_counter_d[stage2_bank] = READ_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0];
end
- delay_before_read_counter_d[stage2_bank] = READ_TO_READ_DELAY;
- delay_before_write_counter_d[stage2_bank] = READ_TO_WRITE_DELAY + 1; //temporary solution since its possible odt to go high already while reading previously
+ delay_before_read_counter_d[stage2_bank] = READ_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0];
for(index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin //the read to write delay applies to all banks (odt must be turned on properly before writing and this delay is for ODT to settle)
- delay_before_write_counter_d[index] = READ_TO_WRITE_DELAY + 1; // NOTE TO SELF: why plus 1?
+ delay_before_write_counter_d[index] = READ_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0] + 'd1; // NOTE TO SELF: why plus 1? temporary solution since its possible odt to go high already while reading previously
end
// don't acknowledge if ECC request
// higher shift_read_pipe means the earlier it will check data received from i_phy_iserdes_data
@@ -1755,52 +1955,6 @@ module ddr3_controller #(
cmd_d[3][CMD_ODT] = cmd_odt;
end
end
-
- //bank is idle so activate it
- else if(!bank_status_q[stage2_bank] && delay_before_activate_counter_q[stage2_bank] == 0) begin
- activate_slot_busy = 1'b1;
- // must meet TRRD (activate to activate delay)
- for(index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin //the activate to activate delay applies to all banks
- if(delay_before_activate_counter_q[index] <= ACTIVATE_TO_ACTIVATE_DELAY) begin // if delay is > ACTIVATE_TO_ACTIVATE_DELAY, then updating it to the lower delay will cause the previous delay to be violated
- delay_before_activate_counter_d[index] = ACTIVATE_TO_ACTIVATE_DELAY;
- end
- end
-
- delay_before_precharge_counter_d[stage2_bank] = ACTIVATE_TO_PRECHARGE_DELAY;
-
- //set-up delay before read and write
- if(delay_before_read_counter_q[stage2_bank] <= ACTIVATE_TO_READ_DELAY) begin // if current delay is > ACTIVATE_TO_READ_DELAY, then updating it to the lower delay will cause the previous delay to be violated
- delay_before_read_counter_d[stage2_bank] = ACTIVATE_TO_READ_DELAY;
- end
- if(delay_before_write_counter_q[stage2_bank] <= ACTIVATE_TO_WRITE_DELAY) begin // if current delay is > ACTIVATE_TO_WRITE_DELAY, then updating it to the lower delay will cause the previous delay to be violated
- delay_before_write_counter_d[stage2_bank] = ACTIVATE_TO_WRITE_DELAY;
- end
- //issue activate command
- if(DUAL_RANK_DIMM[0]) begin
- cmd_d[ACTIVATE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], stage2_row[(DUAL_RANK_DIMM[0]? ROW_BITS-1 : ROW_BITS-2):0]};
- end
- else begin
- cmd_d[ACTIVATE_SLOT] = {1'b0, CMD_ACT[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank , stage2_row};
- end
- //update bank status and active row
- bank_status_d[stage2_bank] = 1'b1;
- bank_active_row_d[stage2_bank] = stage2_row;
- end
- //bank is not idle but wrong row is activated so do precharge
- else if(bank_status_q[stage2_bank] && bank_active_row_q[stage2_bank] != stage2_row && delay_before_precharge_counter_q[stage2_bank] ==0) begin
- precharge_slot_busy = 1'b1;
- //set-up delay before activate
- delay_before_activate_counter_d[stage2_bank] = PRECHARGE_TO_ACTIVATE_DELAY;
- //issue precharge command
- if(DUAL_RANK_DIMM[0]) begin
- cmd_d[PRECHARGE_SLOT] = {!stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage2_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank[BA_BITS-1:0], { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[DUAL_RANK_DIMM[0]? 9 : 8:0] } };
- end
- else begin
- cmd_d[PRECHARGE_SLOT] = {1'b0, CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage2_bank, { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage2_row[9:0] } };
- end
- //update bank status and active row
- bank_status_d[stage2_bank] = 1'b0;
- end
end //end of stage 2 pending
// pending request on stage 1
@@ -1819,9 +1973,9 @@ module ddr3_controller #(
//issue Activate and Precharge on the CURRENT bank. Else, stage
//1 will issue Activate and Precharge for the NEXT bank
// Thus stage 1 anticipate makes sure smooth burst operation that jumps banks
- if(bank_status_q[stage1_next_bank] && bank_active_row_q[stage1_next_bank] != stage1_next_row && delay_before_precharge_counter_q[stage1_next_bank] ==0 && !precharge_slot_busy) begin
+ if(stage1_do_pre && !precharge_slot_busy) begin
//set-up delay before read and write
- delay_before_activate_counter_d[stage1_next_bank] = PRECHARGE_TO_ACTIVATE_DELAY;
+ delay_before_activate_counter_d[stage1_next_bank] = PRECHARGE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0];
if(DUAL_RANK_DIMM[0]) begin
cmd_d[PRECHARGE_SLOT] = {!stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_PRE[2:0], cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank[BA_BITS-1:0], { {{ROW_BITS-32'd11}{1'b0}} , 1'b0 , stage1_next_row[(DUAL_RANK_DIMM[0]? 9 : 8):0] } };
end
@@ -1832,22 +1986,22 @@ module ddr3_controller #(
end //end of anticipate precharge
//anticipated bank is idle so do activate
- else if(!bank_status_q[stage1_next_bank] && delay_before_activate_counter_q[stage1_next_bank] == 0 && !activate_slot_busy) begin
+ else if(stage1_do_act && !activate_slot_busy) begin
// must meet TRRD (activate to activate delay)
for(index=0; index < (1<<(BA_BITS+DUAL_RANK_DIMM)); index=index+1) begin //the activate to activate delay applies to all banks
- if(delay_before_activate_counter_d[index] <= ACTIVATE_TO_ACTIVATE_DELAY) begin // if delay is > ACTIVATE_TO_ACTIVATE_DELAY, then updating it to the lower delay will cause the previous delay to be violated
- delay_before_activate_counter_d[index] = ACTIVATE_TO_ACTIVATE_DELAY;
+ if(delay_before_activate_counter_d[index] <= ACTIVATE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0]) begin // if delay is > ACTIVATE_TO_ACTIVATE_DELAY, then updating it to the lower delay will cause the previous delay to be violated
+ delay_before_activate_counter_d[index] = ACTIVATE_TO_ACTIVATE_DELAY[$clog2(MAX_DELAY_BEFORE_ACTIVATE):0];
end
end
- delay_before_precharge_counter_d[stage1_next_bank] = ACTIVATE_TO_PRECHARGE_DELAY;
+ delay_before_precharge_counter_d[stage1_next_bank] = ACTIVATE_TO_PRECHARGE_DELAY[$clog2(MAX_DELAY_BEFORE_PRECHARGE):0];
//set-up delay before read and write
- if(delay_before_read_counter_d[stage1_next_bank] <= ACTIVATE_TO_READ_DELAY) begin // if current delay is > ACTIVATE_TO_READ_DELAY, then updating it to the lower delay will cause the previous delay to be violated
- delay_before_read_counter_d[stage1_next_bank] = ACTIVATE_TO_READ_DELAY;
+ if(delay_before_read_counter_d[stage1_next_bank] <= ACTIVATE_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0]) begin // if current delay is > ACTIVATE_TO_READ_DELAY, then updating it to the lower delay will cause the previous delay to be violated
+ delay_before_read_counter_d[stage1_next_bank] = ACTIVATE_TO_READ_DELAY[$clog2(MAX_DELAY_BEFORE_READ):0];
end
- if(delay_before_write_counter_d[stage1_next_bank] <= ACTIVATE_TO_WRITE_DELAY) begin // if current delay is > ACTIVATE_TO_WRITE_DELAY, then updating it to the lower delay will cause the previous delay to be violated
- delay_before_write_counter_d[stage1_next_bank] = ACTIVATE_TO_WRITE_DELAY;
+ if(delay_before_write_counter_d[stage1_next_bank] <= ACTIVATE_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0]) begin // if current delay is > ACTIVATE_TO_WRITE_DELAY, then updating it to the lower delay will cause the previous delay to be violated
+ delay_before_write_counter_d[stage1_next_bank] = ACTIVATE_TO_WRITE_DELAY[$clog2(MAX_DELAY_BEFORE_WRITE):0];
end
if(DUAL_RANK_DIMM[0]) begin
cmd_d[ACTIVATE_SLOT] = {!stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], stage1_next_bank[(DUAL_RANK_DIMM[0]? BA_BITS : 0)], CMD_ACT[2:0] , cmd_odt, cmd_ck_en, cmd_reset_n, stage1_next_bank[BA_BITS-1:0] , stage1_next_row[(DUAL_RANK_DIMM[0]? ROW_BITS-1 : ROW_BITS-2):0]};
@@ -1862,35 +2016,36 @@ module ddr3_controller #(
end //end of stage1 anticipate
end
- // control stage 1 stall
- if(stage1_pending) begin //raise stall only if stage2 will still be busy next clock
- // Stage1 bank and row will determine if transaction will be
- // stalled (bank is idle OR wrong row is active).
- if(!bank_status_d[stage1_bank] || (bank_status_d[stage1_bank] && bank_active_row_d[stage1_bank] != stage1_row)) begin
- stage1_stall = 1;
+ // control stage 1 stall in advance
+ if(stage1_pending) begin // raise stall only if stage2 will still be busy next clock
+ // stall stage 1 by default if there is pending request on stage 1
+ stage1_stall = 1;
+
+ if(bank_status_d[stage1_bank] && bank_active_row_d[stage1_bank] == stage1_row) begin
+ // if write request and delay before write is already met then deassert stall
+ if(stage1_we && delay_before_write_counter_d[stage1_bank] == 0) begin
+ stage1_stall = 0;
+ end
+ // if read request and delay before read is already met then deassert stall
+ else if(!stage1_we && delay_before_read_counter_d[stage1_bank] == 0) begin
+ stage1_stall = 0;
+ end
end
- else if(!stage1_we && delay_before_read_counter_d[stage1_bank] != 0) begin // if read request but delay before read is not yet met then stall
- stage1_stall = 1;
- end
- else if(stage1_we && delay_before_write_counter_d[stage1_bank] != 0) begin // if write request but delay before write is not yet met then stall
- stage1_stall = 1;
- end
- //different request type will need a delay of more than 1 clk cycle so stall the pipeline
- //if(stage1_we != stage2_we) begin
- // stage1_stall = 1;
- //end
end
- //control stage 2 stall
+ //control stage 2 stall in advance
if(stage2_pending) begin
- //control stage2 stall in advance
- if(bank_status_d[stage2_bank] && bank_active_row_d[stage2_bank] == stage2_row) begin //read/write operation
- //write request
- if(stage2_we && delay_before_write_counter_d[stage2_bank] == 0) begin // if write request and delay before write is already met then deassert stall
+ // by default, stage 2 stall deasserts once conditions for write/read command is met
+ stage2_stall = !(stage2_do_wr_or_rd && (stage2_do_wr || stage2_do_rd));
+ // equivalent to: if(bank_status_d[stage2_bank] && bank_active_row_d[stage2_bank] == stage2_row)
+ // can start read/write operation if right row is active on the bank
+ if(stage2_do_act || stage2_do_wr_or_rd) begin
+ // if write request and delay before write is already met then deassert stall
+ if(stage2_we && delay_before_write_counter_d[stage2_bank] == 0) begin
stage2_stall = 0; //to low stall next stage, but not yet at this stage
end
- //read request
- else if(!stage2_we && delay_before_read_counter_d[stage2_bank]==0) begin // if read request and delay before read is already met then deassert stall
+ // if read request and delay before read is already met then deassert stall
+ else if(!stage2_we && delay_before_read_counter_d[stage2_bank]==0) begin
stage2_stall = 0;
end
end
@@ -1904,66 +2059,77 @@ module ddr3_controller #(
// a way that it will only stall next clock cycle if the pipeline will be full on the next clock cycle.
// Excel sheet design planning: https://docs.google.com/spreadsheets/d/1_8vrLmVSFpvRD13Mk8aNAMYlh62SfpPXOCYIQFEtcs4/edit?gid=668378527#gid=668378527
// Old: https://1drv.ms/x/s!AhWdq9CipeVagSqQXPwRmXhDgttL?e=vVYIxE&nav=MTVfezAwMDAwMDAwLTAwMDEtMDAwMC0wMDAwLTAwMDAwMDAwMDAwMH0
- // if(o_wb_stall_q) o_wb_stall_d = stage2_stall;
- // else if( (!i_wb_stb && final_calibration_done) || (!calib_stb && state_calibrate != DONE_CALIBRATE) ) o_wb_stall_d = 0;
- // else if(!stage1_pending) o_wb_stall_d = stage2_stall;
- // else o_wb_stall_d = stage1_stall;
+ // if(o_wb_stall_int_q) o_wb_stall_int_d = stage2_stall;
+ // else if( (!i_wb_stb && final_calibration_done) || (!calib_stb && !final_calibration_done) ) o_wb_stall_int_d = 0;
+ // else if(!stage1_pending) o_wb_stall_int_d = stage2_stall;
+ // else o_wb_stall_int_d = stage1_stall;
- // if( !o_wb_stall_q && !i_wb_stb ) o_wb_stall_d = 1'b0;
- // else if(ecc_stage1_stall) o_wb_stall_d = 1'b1;
- // else if(stage0_pending) o_wb_stall_d = ecc_stage2_stall || stage1_stall;
+ // if( !o_wb_stall_int_q && !i_wb_stb ) o_wb_stall_int_d = 1'b0;
+ // else if(ecc_stage1_stall) o_wb_stall_int_d = 1'b1;
+ // else if(stage0_pending) o_wb_stall_int_d = ecc_stage2_stall || stage1_stall;
// else begin
- // if(o_wb_stall_q) o_wb_stall_d = stage2_stall;
- // else o_wb_stall_d = stage1_stall;
+ // if(o_wb_stall_int_q) o_wb_stall_int_d = stage2_stall;
+ // else o_wb_stall_int_d = stage1_stall;
// end
// pipeline control for ECC_ENABLE != 3
+
if(ECC_ENABLE != 3) begin
if(!i_wb_cyc && final_calibration_done) begin
- o_wb_stall_d = 0;
+ o_wb_stall_int_d = 0;
end
- else if(!o_wb_stall_q && ( (!i_wb_stb && final_calibration_done) || (!calib_stb && !final_calibration_done) )) begin
- o_wb_stall_d = 0;
+ else if(!o_wb_stall_int_q && ( (!i_wb_stb && final_calibration_done) || (!calib_stb && !final_calibration_done) )) begin
+ o_wb_stall_int_d = 0;
end
- else if(o_wb_stall_q || !stage1_pending) begin
- o_wb_stall_d = stage2_stall;
+ else if(o_wb_stall_int_q || !stage1_pending) begin
+ o_wb_stall_int_d = stage2_stall;
end
else begin
- o_wb_stall_d = stage1_stall;
+ o_wb_stall_int_d = stage1_stall;
end
end
// pipeline control for ECC_ENABLE = 3
else begin
if(!i_wb_cyc && final_calibration_done) begin
- o_wb_stall_d = 1'b0;
+ o_wb_stall_int_d = 1'b0;
end
else if(ecc_stage1_stall) begin
- o_wb_stall_d = 1'b1;
+ o_wb_stall_int_d = 1'b1;
end
- else if(!o_wb_stall_q && ( (!i_wb_stb && final_calibration_done) || (!calib_stb && !final_calibration_done) )) begin
- o_wb_stall_d = 1'b0;
+ else if(!o_wb_stall_int_q && ( (!i_wb_stb && final_calibration_done) || (!calib_stb && !final_calibration_done) )) begin
+ o_wb_stall_int_d = 1'b0;
end
else if(stage0_pending) begin
- o_wb_stall_d = !stage2_update || stage1_stall;
+ o_wb_stall_int_d = !stage2_update || stage1_stall;
end
else begin
- if(o_wb_stall_q || !stage1_pending) begin
- o_wb_stall_d = stage2_stall;
+ if(o_wb_stall_int_q || !stage1_pending) begin
+ o_wb_stall_int_d = stage2_stall;
end
else begin
- o_wb_stall_d = stage1_stall;
+ o_wb_stall_int_d = stage1_stall;
end
end
end
end //end of always block
-
+
+ always @* begin
+ force_o_wb_stall_high_d = !final_calibration_done || !instruction[REF_IDLE];
+ force_o_wb_stall_calib_high_d = !instruction[REF_IDLE];
+ o_wb_stall = o_wb_stall_int_q || force_o_wb_stall_high_q;
+ o_wb_stall_calib = o_wb_stall_int_q || force_o_wb_stall_calib_high_q;
+ end
// register previous value of cmd_ck_en
always @(posedge i_controller_clk) begin
if(sync_rst_controller) begin
prev_cmd_ck_en <= 0;
+ force_o_wb_stall_high_q <= 0;
+ force_o_wb_stall_calib_high_q <= 0;
end
else begin
prev_cmd_ck_en <= cmd_ck_en;
+ force_o_wb_stall_high_q <= force_o_wb_stall_high_d;
+ force_o_wb_stall_calib_high_q <= force_o_wb_stall_calib_high_d;
end
end
@@ -2251,6 +2417,9 @@ module ddr3_controller #(
pause_counter <= 0;
read_data_store <= 0;
write_pattern <= 0;
+ write_pattern_lane <= 0;
+ read_lane_data_shifted <= 0;
+ write_pattern_matches <= 0;
added_read_pipe_max <= 0;
dqs_start_index_stored <= 0;
dqs_start_index_repeat <= 0;
@@ -2271,6 +2440,7 @@ module ddr3_controller #(
lane_read_dq_early <= 0;
shift_read_pipe <= 0;
bitslip_counter <= 0;
+ prep_done <= 0;
`ifdef UART_DEBUG
uart_start_send <= 0;
uart_text <= 0;
@@ -2303,6 +2473,7 @@ module ddr3_controller #(
idelay_data_cntvaluein_prev <= idelay_data_cntvaluein[lane];
reset_from_calibrate <= 0;
reset_after_rank_1 <= 0; // reset for dual rank
+ prep_done <= 0;
if(wb2_update) begin
odelay_data_cntvaluein[wb2_write_lane] <= wb2_phy_odelay_data_ld[wb2_write_lane]? wb2_phy_odelay_data_cntvaluein : odelay_data_cntvaluein[wb2_write_lane];
@@ -2315,7 +2486,7 @@ module ddr3_controller #(
o_phy_idelay_dqs_ld <= wb2_phy_idelay_dqs_ld;
lane <= wb2_write_lane;
end
- else if(state_calibrate != DONE_CALIBRATE) begin
+ else if(!final_calibration_done) begin
// increase cntvalue every load to prepare for possible next load
odelay_data_cntvaluein[lane] <= o_phy_odelay_data_ld[lane]? odelay_data_cntvaluein[lane] + 1: odelay_data_cntvaluein[lane];
odelay_dqs_cntvaluein[lane] <= o_phy_odelay_dqs_ld[lane]? odelay_dqs_cntvaluein[lane] + 1: odelay_dqs_cntvaluein[lane];
@@ -2507,8 +2678,8 @@ module ddr3_controller #(
CALIBRATE_DQS: if(dqs_start_index_stored == dqs_target_index) begin
// dq_target_index still stores the original dqs_target_index_value. The bit size of dq_target_index is just enough
// to count the bits in dqs_store (the received 8 DQS stored STORED_DQS_SIZE times)
- added_read_pipe[lane] <= { {( 4 - ($clog2(STORED_DQS_SIZE*8) - (3+1)) ){1'b0}} , dq_target_index[lane][$clog2(STORED_DQS_SIZE*8)-1:(3+1)] }
- + { 3'b0 , (dq_target_index[lane][3:0] >= (5+8)) };
+ added_read_pipe[lane] <= |({ {( 4 - ($clog2(STORED_DQS_SIZE*8) - (3+1)) ){1'b0}} , dq_target_index[lane][$clog2(STORED_DQS_SIZE*8)-1:(3+1)] }
+ + { 3'b0 , (dq_target_index[lane][3:0] >= (5+8)) })? 'd1 : 'd0; // added_read_pipe can just be 1 or 0
// if target_index is > 13, then a 1 CONTROLLLER_CLK cycle delay (4 ddr3_clk cycles) is added on that particular lane (due to trace delay)
// added_read_pipe[lane] <= dq_target_index[lane][$clog2(STORED_DQS_SIZE*8)-1 : (4)] + ( dq_target_index[lane][3:0] >= 13 ) ;
dqs_bitslip_arrangement <= 16'b0011_1100_0011_1100 >> dq_target_index[lane][2:0];
@@ -2608,73 +2779,75 @@ module ddr3_controller #(
pause_counter <= 0;
end
- WAIT_FOR_FEEDBACK: if(delay_before_write_level_feedback == 0) begin
- /* verilator lint_off WIDTH */ //_verilator warning: Bit extraction of var[511:0] requires 9 bit index, not 3 bits (but [lane<<3] is much simpler and cleaner)
- sample_clk_repeat <= (i_phy_iserdes_data[lane_times_8] == stored_write_level_feedback)? sample_clk_repeat + 1 : 0; //sample_clk_repeat should get the same response
- stored_write_level_feedback <= i_phy_iserdes_data[lane_times_8];
- write_calib_dqs <= 0;
- if(sample_clk_repeat == REPEAT_CLK_SAMPLING) begin
- sample_clk_repeat <= 0;
- prev_write_level_feedback <= stored_write_level_feedback;
- if(({prev_write_level_feedback, stored_write_level_feedback} == 2'b01) /*|| write_level_fail[lane]*/) begin
- /* verilator lint_on WIDTH */
- /* verilator lint_off WIDTH */
- if(lane == LANES - 1) begin
- /* verilator lint_on WIDTH */
- write_calib_odt <= 0;
- pause_counter <= 0; //write calibration now complete so continue the reset instruction sequence
- lane <= 0;
- o_phy_write_leveling_calib <= 0;
- state_calibrate <= ISSUE_WRITE_1;
+ WAIT_FOR_FEEDBACK: if(ODELAY_SUPPORTED) begin
+ if(delay_before_write_level_feedback == 0) begin
+ /* verilator lint_off WIDTH */ //_verilator warning: Bit extraction of var[511:0] requires 9 bit index, not 3 bits (but [lane<<3] is much simpler and cleaner)
+ sample_clk_repeat <= (i_phy_iserdes_data[lane_times_8] == stored_write_level_feedback)? sample_clk_repeat + 1 : 0; //sample_clk_repeat should get the same response
+ stored_write_level_feedback <= i_phy_iserdes_data[lane_times_8];
+ write_calib_dqs <= 0;
+ if(sample_clk_repeat == REPEAT_CLK_SAMPLING) begin
+ sample_clk_repeat <= 0;
+ prev_write_level_feedback <= stored_write_level_feedback;
+ if(({prev_write_level_feedback, stored_write_level_feedback} == 2'b01) /*|| write_level_fail[lane]*/) begin
+ /* verilator lint_on WIDTH */
+ /* verilator lint_off WIDTH */
+ if(lane == LANES - 1) begin
+ /* verilator lint_on WIDTH */
+ write_calib_odt <= 0;
+ pause_counter <= 0; //write calibration now complete so continue the reset instruction sequence
+ lane <= 0;
+ o_phy_write_leveling_calib <= 0;
+ state_calibrate <= ISSUE_WRITE_1;
+ `ifdef UART_DEBUG_WRITE_LEVEL
+ uart_start_send <= 1'b1;
+ uart_text <= {"state=WAIT_FOR_FEEDBACK, All Lanes Done",8'h0a,"----------------------",8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= ISSUE_WRITE_1;
+ `endif
+ end
+ else begin
+ lane <= lane + 1;
+ odelay_cntvalue_halfway <= 0;
+ prev_write_level_feedback <= 1'b1;
+ sample_clk_repeat <= 0;
+ state_calibrate <= START_WRITE_LEVEL;
`ifdef UART_DEBUG_WRITE_LEVEL
uart_start_send <= 1'b1;
- uart_text <= {"state=WAIT_FOR_FEEDBACK, All Lanes Done",8'h0a,"----------------------",8'h0a};
+ uart_text <= {"state=WAIT_FOR_FEEDBACK, Done lane=",hex_to_ascii(lane),8'h0a,"----------------------",8'h0a};
state_calibrate <= WAIT_UART;
- state_calibrate_next <= ISSUE_WRITE_1;
+ state_calibrate_next <= START_WRITE_LEVEL;
`endif
+ end
end
else begin
- lane <= lane + 1;
- odelay_cntvalue_halfway <= 0;
- prev_write_level_feedback <= 1'b1;
- sample_clk_repeat <= 0;
+ o_phy_odelay_data_ld[lane] <= 1;
+ o_phy_odelay_dqs_ld[lane] <= 1;
+ write_level_fail[lane] <= odelay_cntvalue_halfway;
+ // if(odelay_cntvalue_halfway) begin // if halfway cntvalue is reached which is illegal (or impossible to happen), then we load the original cntvalues
+ // odelay_data_cntvaluein[lane] <= DATA_INITIAL_ODELAY_TAP[4:0];
+ // odelay_dqs_cntvaluein[lane] <= DQS_INITIAL_ODELAY_TAP[4:0];
+ // end
state_calibrate <= START_WRITE_LEVEL;
`ifdef UART_DEBUG_WRITE_LEVEL
uart_start_send <= 1'b1;
- uart_text <= {"state=WAIT_FOR_FEEDBACK, Done lane=",hex_to_ascii(lane),8'h0a,"----------------------",8'h0a};
+ uart_text <= {"state=WAIT_FOR_FEEDBACK, lane=",hex_to_ascii(lane), ", {prev,stored}=", hex_to_ascii(prev_write_level_feedback),
+ hex_to_ascii(stored_write_level_feedback), ", o_phy_odelay_data_cntvaluein=0x", hex_to_ascii(o_phy_odelay_data_cntvaluein[4]),
+ hex_to_ascii(o_phy_odelay_data_cntvaluein[3:0]), 8'h0a,8'h0a};
state_calibrate <= WAIT_UART;
state_calibrate_next <= START_WRITE_LEVEL;
`endif
end
- end
- else begin
- o_phy_odelay_data_ld[lane] <= 1;
- o_phy_odelay_dqs_ld[lane] <= 1;
- write_level_fail[lane] <= odelay_cntvalue_halfway;
- // if(odelay_cntvalue_halfway) begin // if halfway cntvalue is reached which is illegal (or impossible to happen), then we load the original cntvalues
- // odelay_data_cntvaluein[lane] <= DATA_INITIAL_ODELAY_TAP[4:0];
- // odelay_dqs_cntvaluein[lane] <= DQS_INITIAL_ODELAY_TAP[4:0];
- // end
- state_calibrate <= START_WRITE_LEVEL;
- `ifdef UART_DEBUG_WRITE_LEVEL
- uart_start_send <= 1'b1;
- uart_text <= {"state=WAIT_FOR_FEEDBACK, lane=",hex_to_ascii(lane), ", {prev,stored}=", hex_to_ascii(prev_write_level_feedback),
- hex_to_ascii(stored_write_level_feedback), ", o_phy_odelay_data_cntvaluein=0x", hex_to_ascii(o_phy_odelay_data_cntvaluein[4]),
- hex_to_ascii(o_phy_odelay_data_cntvaluein[3:0]), 8'h0a,8'h0a};
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= START_WRITE_LEVEL;
- `endif
- end
- end
- `ifdef UART_DEBUG_WRITE_LEVEL
- else begin
- uart_start_send <= 1'b1;
- uart_text <= {"state=WAIT_FOR_FEEDBACK, sample_clk_repeat=",hex_to_ascii(sample_clk_repeat),8'h0a};
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= START_WRITE_LEVEL;
- end
- `endif
- end
+ end
+ `ifdef UART_DEBUG_WRITE_LEVEL
+ else begin
+ uart_start_send <= 1'b1;
+ uart_text <= {"state=WAIT_FOR_FEEDBACK, sample_clk_repeat=",hex_to_ascii(sample_clk_repeat),8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= START_WRITE_LEVEL;
+ end
+ `endif
+ end
+ end
ISSUE_WRITE_1: if(instruction_address == 22 && !o_wb_stall_calib) begin
calib_stb <= 1;//actual request flag
@@ -2765,74 +2938,75 @@ module ddr3_controller #(
// end
end
- ANALYZE_DATA_LOW_FREQ: begin // read_data_store should have the expected 9177298cd0ad51c1, if not then issue bitslip
- if(write_pattern[0 +: 64] == {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }) begin
- /* verilator lint_off WIDTH */
- if(lane == LANES - 1) begin
- /* verilator lint_on WIDTH */
- state_calibrate <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; // go straight to FINISH_READ if BIST_MODE == 0
- initial_calibration_done <= 1'b1;
- `ifdef UART_DEBUG_ALIGN
- uart_start_send <= 1'b1;
- //uart_text <= {"state=ANALYZE_DATA_LOW_FREQ, Done All Lanes",8'h0a,"-----------------",8'h0a,8'h0a};
- uart_text <= {8'h0a,8'h0a, "Done All Lanes, bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe),
- ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a,
- {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] },
- 8'h0a,8'h0a,8'h0a,8'h0a};
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= BIST_MODE == 0? FINISH_READ : BURST_WRITE;
- `endif
- end
- else begin
- lane <= lane + 1;
- bitslip_counter <= 0;
- `ifdef UART_DEBUG_ALIGN
- uart_start_send <= 1'b1;
- // uart_text <= {"state=ANALYZE_DATA_LOW_FREQ, Done lane=",hex_to_ascii(lane),8'h0a,"-----------------",8'h0a};
- uart_text <= {8'h0a,8'h0a, "Done lane=", hex_to_ascii(lane), ", bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe),
- ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a,
- {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] },
- 8'h0a,8'h0a,8'h0a,8'h0a};
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= ANALYZE_DATA_LOW_FREQ;
- `endif
- end
- end
- else begin // issue bitslip then repeat write-read
- o_phy_bitslip[lane] <= 1'b1;
- bitslip_counter <= bitslip_counter + 1; // increment counter every bitslip
- if(bitslip_counter == 7) begin // there are only 8 bitslip, once past this then we shift read pipe backwards (assumption is that we read too early)
- shift_read_pipe <= shift_read_pipe + 1;
- bitslip_counter <= 0;
- if(shift_read_pipe == 1) begin // if shift_read_pipe at end then we increase data_start_index since problem might be write DQ too early thus we shift it later using data_start_index
- shift_read_pipe <= 0;
- data_start_index[lane] <= lane_write_dq_late[lane]? data_start_index[lane] - 8: data_start_index[lane] + 8;
- if((data_start_index[lane] == 64) && !lane_write_dq_late[lane]) begin // if data_start_index at end then we assert data_start_index, last assumption is that we are writing DQ too late thus we move stage2_data forward to be sent out earlier
- data_start_index[lane] <= 64;
- lane_write_dq_late[lane] <= 1'b1;
- end
- end
- end
- state_calibrate <= ISSUE_WRITE_1;
- `ifdef UART_DEBUG_ALIGN
- uart_start_send <= 1'b1;
- uart_text <= {8'h0a,8'h0a, "lane=", hex_to_ascii(lane), ", bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe),
- ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a,
- {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] },
- 8'h0a,8'h0a,8'h0a,8'h0a};
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= ISSUE_WRITE_1;
- `endif
- end
- end
+ANALYZE_DATA_LOW_FREQ: if(DLL_OFF) begin // read_data_store should have the expected 9177298cd0ad51c1, if not then issue bitslip
+ if(write_pattern[0 +: 64] == {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }) begin
+ /* verilator lint_off WIDTH */
+ if(lane == LANES - 1) begin
+ /* verilator lint_on WIDTH */
+ state_calibrate <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; // go straight to FINISH_READ if BIST_MODE == 0
+ initial_calibration_done <= 1'b1;
+ `ifdef UART_DEBUG_ALIGN
+ uart_start_send <= 1'b1;
+ //uart_text <= {"state=ANALYZE_DATA_LOW_FREQ, Done All Lanes",8'h0a,"-----------------",8'h0a,8'h0a};
+ uart_text <= {8'h0a,8'h0a, "Done All Lanes, bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe),
+ ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a,
+ {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] },
+ 8'h0a,8'h0a,8'h0a,8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= BIST_MODE == 0? FINISH_READ : BURST_WRITE;
+ `endif
+ end
+ else begin
+ lane <= lane + 1;
+ bitslip_counter <= 0;
+ `ifdef UART_DEBUG_ALIGN
+ uart_start_send <= 1'b1;
+ // uart_text <= {"state=ANALYZE_DATA_LOW_FREQ, Done lane=",hex_to_ascii(lane),8'h0a,"-----------------",8'h0a};
+ uart_text <= {8'h0a,8'h0a, "Done lane=", hex_to_ascii(lane), ", bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe),
+ ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a,
+ {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] },
+ 8'h0a,8'h0a,8'h0a,8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= ANALYZE_DATA_LOW_FREQ;
+ `endif
+ end
+ end
+ else begin // issue bitslip then repeat write-read
+ o_phy_bitslip[lane] <= 1'b1;
+ bitslip_counter <= bitslip_counter + 1; // increment counter every bitslip
+ if(bitslip_counter == 7) begin // there are only 8 bitslip, once past this then we shift read pipe backwards (assumption is that we read too early)
+ shift_read_pipe <= shift_read_pipe + 1;
+ bitslip_counter <= 0;
+ if(shift_read_pipe == 1) begin // if shift_read_pipe at end then we increase data_start_index since problem might be write DQ too early thus we shift it later using data_start_index
+ shift_read_pipe <= 0;
+ data_start_index[lane] <= lane_write_dq_late[lane]? data_start_index[lane] - 8: data_start_index[lane] + 8;
+ if((data_start_index[lane] == 64) && !lane_write_dq_late[lane]) begin // if data_start_index at end then we assert data_start_index, last assumption is that we are writing DQ too late thus we move stage2_data forward to be sent out earlier
+ data_start_index[lane] <= 64;
+ lane_write_dq_late[lane] <= 1'b1;
+ end
+ end
+ end
+ state_calibrate <= ISSUE_WRITE_1;
+ `ifdef UART_DEBUG_ALIGN
+ uart_start_send <= 1'b1;
+ uart_text <= {8'h0a,8'h0a, "lane=", hex_to_ascii(lane), ", bitslip_counter=", hex_to_ascii(bitslip_counter), ", shift_read_pipe=", hex_to_ascii(shift_read_pipe),
+ ", data_start_index=", hex8_to_ascii(data_start_index[lane]), ", lane_late=", hex_to_ascii(lane_write_dq_late[lane]), 8'h0a,8'h0a,
+ {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] },
+ 8'h0a,8'h0a,8'h0a,8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= ISSUE_WRITE_1;
+ `endif
+ end
+ end
+
// extract burst_0-to-burst_7 data for a specified lane then determine which byte in write_pattern does it starts (ASSUMPTION: the DQ is too early [3d_9177298cd0ad51]c1 is written)
// NOTE TO SELF: all "8" here assume DQ_BITS are 8? parameterize this properly
// data_start_index for a specified lane determine how many bits are off the data from the write command
@@ -2840,149 +3014,157 @@ module ddr3_controller #(
// e.g. LANE={burst7, burst6, burst5, burst4, burst3, burst2, burst1, burst0} then with 1 ddr3 cycle delay between DQ and command
// burst0 will not be written but only starting on burst1
// if lane_write_dq_late is already set to 1 for this lane, then current lane should already be fixed without changing the data_start_index
- ANALYZE_DATA: if(write_pattern[ (lane_write_dq_late[lane]? 0 : data_start_index[lane]) +: 64] == {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }) begin
- /* verilator lint_off WIDTH */
- if(lane == LANES - 1) begin
- /* verilator lint_on WIDTH */
- state_calibrate <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; // go straight to FINISH_READ if BIST_MODE == 0
- initial_calibration_done <= 1'b1;
- `ifdef UART_DEBUG_ALIGN
- uart_start_send <= 1'b1;
- uart_text <= {"state=ANALYZE_DATA, Done All Lanes",8'h0a,"-----------------",8'h0a,8'h0a};
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= BIST_MODE == 0? FINISH_READ : BURST_WRITE;
- `endif
- end
+ ANALYZE_DATA: if(prep_done[1]) begin
+ if(write_pattern_matches) begin
+ /* verilator lint_off WIDTH */
+ if(lane == LANES - 1) begin
+ /* verilator lint_on WIDTH */
+ state_calibrate <= BIST_MODE == 0? FINISH_READ : BURST_WRITE; // go straight to FINISH_READ if BIST_MODE == 0
+ initial_calibration_done <= 1'b1;
+ `ifdef UART_DEBUG_ALIGN
+ uart_start_send <= 1'b1;
+ uart_text <= {"state=ANALYZE_DATA, Done All Lanes",8'h0a,"-----------------",8'h0a,8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= BIST_MODE == 0? FINISH_READ : BURST_WRITE;
+ `endif
+ end
+ else begin
+ lane <= lane + 1;
+ data_start_index[lane+1] <= 0;
+ state_calibrate <= ANALYZE_DATA;
+ `ifdef UART_DEBUG_ALIGN
+ uart_start_send <= 1'b1;
+ uart_text <= {"state=ANALYZE_DATA, Done lane=",hex_to_ascii(lane),8'h0a,"-----------------",8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= ANALYZE_DATA;
+ `endif
+ end
+ end
else begin
- lane <= lane + 1;
- data_start_index[lane+1] <= 0;
- `ifdef UART_DEBUG_ALIGN
- uart_start_send <= 1'b1;
- uart_text <= {"state=ANALYZE_DATA, Done lane=",hex_to_ascii(lane),8'h0a,"-----------------",8'h0a};
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= ANALYZE_DATA;
- `endif
- end
- end
- else begin
- data_start_index[lane] <= data_start_index[lane] + 8; //skip by 8 (basically we want to delay DQ since it was too early)
- if(lane_write_dq_late[lane] && lane_read_dq_early[lane]) begin // both assumption is wrong so we reset the controller
- reset_from_calibrate <= 1;
- end
- // first assumption (write DQ is late) is wrong so we repeat write-read with data_start_index back to 0
- else if(lane_write_dq_late[lane]) begin
- data_start_index[lane] <= 0; // set delay to outgoing stage2_data back to zero
- if(data_start_index[lane] == 0) begin // if already set to zero then we already did write-read with default zero data_start_index, so we go to CHECK_STARTING_DATA to try second assumtpion
+ data_start_index[lane] <= data_start_index[lane] + 8; //skip by 8 (basically we want to delay DQ since it was too early)
+ if(lane_write_dq_late[lane] && lane_read_dq_early[lane]) begin // both assumption is wrong so we reset the controller
+ reset_from_calibrate <= 1;
+ end
+ // first assumption (write DQ is late) is wrong so we repeat write-read with data_start_index back to 0
+ else if(lane_write_dq_late[lane]) begin
+ data_start_index[lane] <= 0; // set delay to outgoing stage2_data back to zero
+ if(data_start_index[lane] == 0) begin // if already set to zero then we already did write-read with default zero data_start_index, so we go to CHECK_STARTING_DATA to try second assumtpion
+ state_calibrate <= CHECK_STARTING_DATA;
+ `ifdef UART_DEBUG_ALIGN
+ uart_start_send <= 1'b1;
+ uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", First Assumption wrong, Start second assumption: Read too early",8'h0a,8'h0a,
+ 8'h0a,8'h0a,
+ {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] },
+ 8'h0a,8'h0a,8'h0a,8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= CHECK_STARTING_DATA;
+ `endif
+ end
+ else begin // if not yet zero then we have to write-read again
+ state_calibrate <= ISSUE_WRITE_1;
+ end
+ end
+ //reached the end but STILL has error, issue might be WRITING TOO LATE (298cd0ad51c1XXXX is written) OR READING TOO EARLY ([9177]_298cd0ad51c1XXXX is read)
+ else if(data_start_index[lane] == 56) begin
+ data_start_index[lane] <= 0;
+ start_index_check <= 0;
state_calibrate <= CHECK_STARTING_DATA;
`ifdef UART_DEBUG_ALIGN
uart_start_send <= 1'b1;
- uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", First Assumption wrong, Start second assumption: Read too early",8'h0a,8'h0a,
- 8'h0a,8'h0a,
- {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] },
- 8'h0a,8'h0a,8'h0a,8'h0a};
+ uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", Reached end",8'h0a,8'h0a};
state_calibrate <= WAIT_UART;
state_calibrate_next <= CHECK_STARTING_DATA;
`endif
- end
- else begin // if not yet zero then we have to write-read again
- state_calibrate <= ISSUE_WRITE_1;
- end
- end
- //reached the end but STILL has error, issue might be WRITING TOO LATE (298cd0ad51c1XXXX is written) OR READING TOO EARLY ([9177]_298cd0ad51c1XXXX is read)
- else if(data_start_index[lane] == 56) begin
- data_start_index[lane] <= 0;
- start_index_check <= 0;
- state_calibrate <= CHECK_STARTING_DATA;
- `ifdef UART_DEBUG_ALIGN
+ end
+ `ifdef UART_DEBUG_ALIGN
+ else begin
uart_start_send <= 1'b1;
- uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", Reached end",8'h0a,8'h0a};
+ state_calibrate <= ANALYZE_DATA;
+ uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", data_start_index[lane]=0x",
+ hex_to_ascii(data_start_index[lane][6:4]),hex_to_ascii(data_start_index[lane][3:0]),8'h0a,8'h0a,8'h0a,8'h0a,
+ {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
+ read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] },
+ 8'h0a,8'h0a,8'h0a,8'h0a
+ };
state_calibrate <= WAIT_UART;
- state_calibrate_next <= CHECK_STARTING_DATA;
- `endif
- end
- `ifdef UART_DEBUG_ALIGN
- else begin
- uart_start_send <= 1'b1;
- uart_text <= {"state=ANALYZE_DATA, lane=",hex_to_ascii(lane), ", data_start_index[lane]=0x",
- hex_to_ascii(data_start_index[lane][6:4]),hex_to_ascii(data_start_index[lane][3:0]),8'h0a,8'h0a,8'h0a,8'h0a,
- {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] },
- 8'h0a,8'h0a,8'h0a,8'h0a
- };
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= ANALYZE_DATA;
- end
- `endif
- end
+ state_calibrate_next <= ANALYZE_DATA;
+ end
+ `endif
+ end
+ end
+ else begin
+ prep_done <= {prep_done[0],1'b1};
+ end
// check when the 4 MSB of write_pattern {d0ad51c1} starts on read_lane_data (read_lane_data is just the concatenation of read_data_store of a specific lane)
// assumption here read_lane_data ~= 298cd0ad51c1XXXX is written: either because we write too late (thus we need to delay outgoing stage2_data) OR we read too early (thus we need to calibrate incoming iserdes_dq)
- CHECK_STARTING_DATA: begin
- /* verilator lint_off WIDTHTRUNC */
- if(read_lane_data[start_index_check +: 32] == write_pattern[0 +: 32]) begin
- /* verilator lint_on WIDTHTRUNC */
- // first assumption: controller DQ is late WHEN WRITING(THUS WE NEED TO CALIBRATE data_start_index of outgoing stage2_data)
- if(!lane_write_dq_late[lane]) begin // lane_write_dq_late is not yet set so we know this first assunmption is not yet tested
- state_calibrate <= ISSUE_WRITE_1; // start writing again (the next write should fix the late DQ for this current lane)
- data_start_index[lane] <= 64 - start_index_check; // stage2_data_unaligned is forwarded to stage[1] so we are now 8-bursts early, so we subtract from 64 so the burst we will be forwarded to the tip of stage2_data
- lane_write_dq_late[lane] <= 1'b1;
- `ifdef UART_DEBUG_ALIGN
- uart_start_send <= 1'b1;
- uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=0x",hex8_to_ascii(start_index_check), ", Ongoing First Assumption",8'h0a};
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= ISSUE_WRITE_1;
- `endif
- end
- // if first assumption is not the fix then second assmption: controller reads the DQ too early (THUS WE NEED TO CALIBRATE INCOMING DQ SIGNAL starting from bitslip training)
- else begin
- lane_read_dq_early[lane] <= 1'b1; // set to 1 to see later what lanes has this problem
- state_calibrate <= BITSLIP_DQS_TRAIN_3;
- added_read_pipe[lane] <= { {( 4 - ($clog2(STORED_DQS_SIZE*8) - (3+1)) ){1'b0}} , dq_target_index[lane][$clog2(STORED_DQS_SIZE*8)-1:(3+1)] }
- + { 3'b0 , (dq_target_index[lane][3:0] >= (5+8)) };
- dqs_bitslip_arrangement <= 16'b0011_1100_0011_1100 >> dq_target_index[lane][2:0];
- `ifdef UART_DEBUG_ALIGN
- uart_start_send <= 1'b1;
- uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=0x",hex8_to_ascii(start_index_check), ", Ongoing Second Assumption",8'h0a};
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= BITSLIP_DQS_TRAIN_3;
- `endif
- end
- end
- else begin
- start_index_check <= start_index_check + 16; // plus 16, we assume here that DQ will be late BY 1 DDR3 CLK CYCLE (if only +8, then it will be late by half DDR3 cycle, that should NOT happen)
- dq_target_index[lane] <= dq_target_index[lane] + 2;
- if(start_index_check == 48)begin // start_index_check is now outside the possible values
- // first assumption: controller DQ is 1 CONTROLLER CYCLE late WHEN WRITING (data is written to address 1 and not address 0)
- if(!lane_write_dq_late[lane]) begin // lane_write_dq_late is not yet set so we know this first assunmption is not yet tested
+ CHECK_STARTING_DATA: if(prep_done[1]) begin
+ /* verilator lint_off WIDTHTRUNC */
+ if(read_lane_data_shifted == write_pattern[0 +: 32]) begin
+ /* verilator lint_on WIDTHTRUNC */
+ // first assumption: controller DQ is late WHEN WRITING(THUS WE NEED TO CALIBRATE data_start_index of outgoing stage2_data)
+ if(!lane_write_dq_late[lane]) begin // lane_write_dq_late is not yet set so we know this first assunmption is not yet tested
state_calibrate <= ISSUE_WRITE_1; // start writing again (the next write should fix the late DQ for this current lane)
- data_start_index[lane] <= 1; // stage2_data_unaligned is forwarded to stage[1] so we are now 8-bursts early, since assumption is we are 1 controller cycle early then data_start_index is 64
+ data_start_index[lane] <= 64 - start_index_check; // stage2_data_unaligned is forwarded to stage[1] so we are now 8-bursts early, so we subtract from 64 so the burst we will be forwarded to the tip of stage2_data
lane_write_dq_late[lane] <= 1'b1;
`ifdef UART_DEBUG_ALIGN
uart_start_send <= 1'b1;
- uart_text <= {"state=CHECK_STARTING_DATA, Reached end, First Assumption: Write is 1 Controller cycle early",8'h0a};
+ uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=0x",hex8_to_ascii(start_index_check), ", Ongoing First Assumption",8'h0a};
state_calibrate <= WAIT_UART;
state_calibrate_next <= ISSUE_WRITE_1;
`endif
end
- else begin // if first assumption is wrong and start_index_check is still outside of possible values then reset
- reset_from_calibrate <= 1;
+ // if first assumption is not the fix then second assmption: controller reads the DQ too early (THUS WE NEED TO CALIBRATE INCOMING DQ SIGNAL starting from bitslip training)
+ else begin
+ lane_read_dq_early[lane] <= 1'b1; // set to 1 to see later what lanes has this problem
+ state_calibrate <= BITSLIP_DQS_TRAIN_3;
+ added_read_pipe[lane] <= |({ {( 4 - ($clog2(STORED_DQS_SIZE*8) - (3+1)) ){1'b0}} , dq_target_index[lane][$clog2(STORED_DQS_SIZE*8)-1:(3+1)] }
+ + { 3'b0 , (dq_target_index[lane][3:0] >= (5+8)) })? 'd1 : 'd0; // added_read_pipe can just be 1 or 0
+ dqs_bitslip_arrangement <= 16'b0011_1100_0011_1100 >> dq_target_index[lane][2:0];
+ `ifdef UART_DEBUG_ALIGN
+ uart_start_send <= 1'b1;
+ uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=0x",hex8_to_ascii(start_index_check), ", Ongoing Second Assumption",8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= BITSLIP_DQS_TRAIN_3;
+ `endif
end
end
- `ifdef UART_DEBUG_ALIGN
else begin
- uart_start_send <= 1'b1;
- uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=", hex_to_ascii(start_index_check[5:4]), hex_to_ascii(start_index_check[3:0]),8'h0a};
- state_calibrate <= WAIT_UART;
- state_calibrate_next <= CHECK_STARTING_DATA;
+ start_index_check <= start_index_check + 16; // plus 16, we assume here that DQ will be late BY 1 DDR3 CLK CYCLE (if only +8, then it will be late by half DDR3 cycle, that should NOT happen)
+ dq_target_index[lane] <= dq_target_index[lane] + 2;
+ if(start_index_check == 48)begin // start_index_check is now outside the possible values
+ // first assumption: controller DQ is 1 CONTROLLER CYCLE late WHEN WRITING (data is written to address 1 and not address 0)
+ if(!lane_write_dq_late[lane]) begin // lane_write_dq_late is not yet set so we know this first assunmption is not yet tested
+ state_calibrate <= ISSUE_WRITE_1; // start writing again (the next write should fix the late DQ for this current lane)
+ data_start_index[lane] <= 1; // stage2_data_unaligned is forwarded to stage[1] so we are now 8-bursts early, since assumption is we are 1 controller cycle early then data_start_index is 64
+ lane_write_dq_late[lane] <= 1'b1;
+ `ifdef UART_DEBUG_ALIGN
+ uart_start_send <= 1'b1;
+ uart_text <= {"state=CHECK_STARTING_DATA, Reached end, First Assumption: Write is 1 Controller cycle early",8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= ISSUE_WRITE_1;
+ `endif
+ end
+ else begin // if first assumption is wrong and start_index_check is still outside of possible values then reset
+ reset_from_calibrate <= 1;
+ end
+ end
+ `ifdef UART_DEBUG_ALIGN
+ else begin
+ uart_start_send <= 1'b1;
+ uart_text <= {"state=CHECK_STARTING_DATA, start_index_check=", hex_to_ascii(start_index_check[5:4]), hex_to_ascii(start_index_check[3:0]),8'h0a};
+ state_calibrate <= WAIT_UART;
+ state_calibrate_next <= CHECK_STARTING_DATA;
+ end
+ `endif
end
- `endif
end
- end
+ else begin
+ prep_done <= {prep_done[0],1'b1};
+ end
BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to capture the DQ correctly
if(i_phy_iserdes_bitslip_reference[lane*serdes_ratio*2 +: 8] == dqs_bitslip_arrangement[7:0]) begin
@@ -3013,7 +3195,7 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap
write_test_address_counter <= 0;
end
state_calibrate <= BURST_READ;
- `ifdef UART_DEBUG_ALIGN
+ `ifdef UART_DEBUG_BIST
uart_start_send <= 1'b1;
uart_text <= {"DONE BURST WRITE (PER BYTE): BIST_MODE=",hex_to_ascii(BIST_MODE),8'h0a};
state_calibrate <= WAIT_UART;
@@ -3036,7 +3218,7 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap
write_test_address_counter <= 0;
end
state_calibrate <= BURST_READ;
- `ifdef UART_DEBUG_ALIGN
+ `ifdef UART_DEBUG_BIST
uart_start_send <= 1'b1;
uart_text <= {"DONE BURST WRITE (ALL BYTES): BIST_MODE=",hex_to_ascii(BIST_MODE),8'h0a};
state_calibrate <= WAIT_UART;
@@ -3059,7 +3241,7 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap
read_test_address_counter <= 0;
end
state_calibrate <= RANDOM_WRITE;
- `ifdef UART_DEBUG_ALIGN
+ `ifdef UART_DEBUG_BIST
uart_start_send <= 1'b1;
uart_text <= {"DONE BURST READ: BIST_MODE=",hex_to_ascii(BIST_MODE),8'h0a};
state_calibrate <= WAIT_UART;
@@ -3087,7 +3269,7 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap
write_test_address_counter <= 0;
end
state_calibrate <= RANDOM_READ;
- `ifdef UART_DEBUG_ALIGN
+ `ifdef UART_DEBUG_BIST
uart_start_send <= 1'b1;
uart_text <= {"DONE RANDOM WRITE: BIST_MODE=",hex_to_ascii(BIST_MODE),8'h0a};
state_calibrate <= WAIT_UART;
@@ -3113,7 +3295,7 @@ BITSLIP_DQS_TRAIN_3: if(train_delay == 0) begin //train again the ISERDES to cap
read_test_address_counter <= 0;
end
state_calibrate <= ALTERNATE_WRITE_READ;
- `ifdef UART_DEBUG_ALIGN
+ `ifdef UART_DEBUG_BIST
uart_start_send <= 1'b1;
uart_text <= {"DONE RANDOM READ: BIST_MODE=",hex_to_ascii(BIST_MODE),8'h0a};
state_calibrate <= WAIT_UART;
@@ -3137,7 +3319,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin
/* verilator lint_on WIDTHEXPAND */
train_delay <= 15;
state_calibrate <= FINISH_READ;
- `ifdef UART_DEBUG_ALIGN
+ `ifdef UART_DEBUG_BIST
uart_start_send <= 1'b1;
uart_text <= {"DONE ALTERNATING WRITE-READ",8'h0a};
state_calibrate <= WAIT_UART;
@@ -3161,7 +3343,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin
state_calibrate <= DONE_CALIBRATE;
final_calibration_done <= 1'b1;
end
- `ifdef UART_DEBUG_ALIGN
+ `ifdef UART_DEBUG_BIST
uart_start_send <= 1'b1;
uart_text <= {"DONE BIST_MODE=",hex_to_ascii(BIST_MODE),", correct_read_data=",
8'h0a, 8'h0a, correct_read_data, 8'h0a, 8'h0a, 8'h0a, 8'h0a
@@ -3199,10 +3381,13 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin
`ifdef FORMAL_COVER
state_calibrate <= DONE_CALIBRATE;
`endif
-
- read_lane_data <= {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
- read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] };
+ read_lane_data <= {read_data_store[((DQ_BITS*LANES)*7 + ({29'd0, lane}<<3)) +: 8], read_data_store[((DQ_BITS*LANES)*6 + ({29'd0, lane}<<3)) +: 8],
+ read_data_store[((DQ_BITS*LANES)*5 + ({29'd0, lane}<<3)) +: 8], read_data_store[((DQ_BITS*LANES)*4 + ({29'd0, lane}<<3)) +: 8], read_data_store[((DQ_BITS*LANES)*3 + ({29'd0, lane}<<3)) +: 8],
+ read_data_store[((DQ_BITS*LANES)*2 + ({29'd0, lane}<<3)) +: 8],read_data_store[((DQ_BITS*LANES)*1 + ({29'd0, lane}<<3)) +: 8],read_data_store[((DQ_BITS*LANES)*0 + ({29'd0, lane}<<3)) +: 8] };
+ write_pattern_lane <= write_pattern[ (lane_write_dq_late[lane]? 0 : data_start_index[lane]) +: 64];
+ read_lane_data_shifted <= read_lane_data[start_index_check +: 32];
+ write_pattern_matches <= write_pattern_lane == read_lane_data;
+
//halfway value has been reached (illegal) and will go back to zero at next load
if(odelay_data_cntvaluein[lane] == 15) begin
odelay_cntvalue_halfway <= 1;
@@ -3211,7 +3396,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin
pause_counter <= 1; // pause instruction address until pre-stall delay before refresh sequence finishes
//skip to instruction address 20 (precharge all before refresh) when no pending requests anymore
//toggle it for 1 clk cycle only
- if( !stage1_pending && !stage2_pending && ( (o_wb_stall && final_calibration_done) || (o_wb_stall_calib && state_calibrate != DONE_CALIBRATE) ) ) begin
+ if( !stage1_pending && !stage2_pending && ( (o_wb_stall && final_calibration_done) || (o_wb_stall_calib && !final_calibration_done) ) ) begin
pause_counter <= 0; // pre-stall delay done since all remaining requests are completed
end
end
@@ -3455,7 +3640,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin
end
else begin
reset_from_test <= 0;
- if(state_calibrate != DONE_CALIBRATE) begin
+ if(!final_calibration_done) begin
if ( o_aux[2:0] == 3'd3 && o_wb_ack_uncalibrated ) begin //o_aux = 3 is for read from calibration
if(o_wb_data == correct_data) begin
correct_read_data <= correct_read_data + 1;
@@ -4936,7 +5121,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin
// stage0_pending will rise to high if ecc_stage1_stall is high the previous cycle and stall is low
if(stage0_pending && !$past(stage0_pending)) begin
- assert($past(ecc_stage1_stall) && !$past(o_wb_stall_q));
+ assert($past(ecc_stage1_stall) && !$past(o_wb_stall_int_q));
end
// stage0_pending currently high means stage2 and stage1 is pending, and there is ECC request on stage2
@@ -5035,7 +5220,7 @@ ALTERNATE_WRITE_READ: if(!o_wb_stall_calib) begin
assert(!stage1_pending);
assert(!stage2_pending);
end
- if($past(o_wb_stall_q) && stage1_pending && !$past(stage1_update)) begin //if pipe did not move forward
+ if($past(o_wb_stall_int_q) && stage1_pending && !$past(stage1_update)) begin //if pipe did not move forward
assert(stage1_we == $past(stage1_we));
assert(stage1_aux == $past(stage1_aux));
assert(stage1_bank == $past(stage1_bank));