added more comments

This commit is contained in:
Angelo Jacobo 2024-03-22 18:30:51 +08:00 committed by GitHub
parent 666f98d6c1
commit cf3bc8c629
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 63 additions and 10 deletions

View File

@ -807,8 +807,8 @@ module ddr3_controller #(
//stage2_data -> shiftreg(CWL) -> OSERDES(DDR) -> ODELAY -> RAM
end
if(!ODELAY_SUPPORTED) begin
stage2_data_unaligned <= stage2_data_unaligned_temp; //delayed by 1 clock cycle
stage2_dm_unaligned <= stage2_dm_unaligned_temp; //delayed by 1 clock cycle
stage2_data_unaligned <= stage2_data_unaligned_temp; //_temp is for added delay of 1 clock cycle (no ODELAY so no added delay)
stage2_dm_unaligned <= stage2_dm_unaligned_temp; //_temp is for added delay of 1 clock cycle (no ODELAY so no added delay)
end
// when not in refresh, transaction can only be processed when i_wb_cyc is high and not stall
@ -834,6 +834,7 @@ module ddr3_controller #(
/* verilator lint_on WIDTH */
stage1_data <= i_wb_data;
end
// request from calibrate FSM will be accepted here
else if(state_calibrate != DONE_CALIBRATE && !o_wb_stall_calib) begin
stage1_pending <= calib_stb;//actual request flag
stage1_we <= calib_we; //write-enable
@ -850,6 +851,8 @@ module ddr3_controller #(
for(index = 0; index < LANES; index = index + 1) begin
/* verilator lint_off WIDTH */
// stage2_data_unaligned is the DQ_BITS*LANES*8 raw data from stage 1 so not yet aligned
// unaligned_data is 64 bits
{unaligned_data[index], {
stage2_data[0][((DQ_BITS*LANES)*7 + 8*index) +: 8], stage2_data[0][((DQ_BITS*LANES)*6 + 8*index) +: 8],
stage2_data[0][((DQ_BITS*LANES)*5 + 8*index) +: 8], stage2_data[0][((DQ_BITS*LANES)*4 + 8*index) +: 8],
@ -860,7 +863,37 @@ module ddr3_controller #(
stage2_data_unaligned[((DQ_BITS*LANES)*3 + 8*index) +: 8], stage2_data_unaligned[((DQ_BITS*LANES)*2 + 8*index) +: 8],
stage2_data_unaligned[((DQ_BITS*LANES)*1 + 8*index) +: 8], stage2_data_unaligned[((DQ_BITS*LANES)*0 + 8*index) +: 8] }
<< data_start_index[index]) | unaligned_data[index];
/*
// Example with LANE 0:
// Burst_0 to burst_7 of unaligned LANE 0 will be extracted which will be shifted by data_start_index.
// Each 8 bits of shift means a burst will be moved to next ddr3_clk cycle, this is needed if for example
// the DQ trace is longer than the command trace where the DQ bits must be delayed by 1 ddr3_clk cycle
// to align the DQ data to the write command.
//
// Since 1 controller clk cycle will have 4 ddr3_clk cycle, and each ddr3_clk cycle is DDR:
// CONTROLLER CLK CYCLE 0: [burst0,burst1] [burst2,burst3] [burst4,burst5] [burst6,burst7]
// CONTROLLER CLK CYCLE 1: [burst0,burst1] [burst2,burst3] [burst4,burst5] [burst6,burst7]
// CONTROLLER CLK CYCLE 2: [burst0,burst1] [burst2,burst3] [burst4,burst5] [burst6,burst7]
//
// shifting by 1 burst means burst 7 will be sent on next controller clk cycle and EVERY BURST WILL SHIFT:
// CONTROLLER CLK CYCLE 0: [xxxxxx,xxxxxx] [burst0,burst1] [burst2,burst3] [burst4,burst5]
// CONTROLLER CLK CYCLE 1: [burst6,burst7] [burst0,burst1] [burst2,burst3] [burst4,burst5]
// CONTROLLER CLK CYCLE 2: [burst6,burst7] [burst0,burst1] [burst2,burst3] [burst4,burst5]
//
// the [burst6,burst7] which has to be stored and delayed until next clk cycle will be handled by unaligned_data
{unaligned_data[0], {
stage2_data[0][((64)*7 + 8*0) +: 8], stage2_data[0][((64)*6 + 8*0) +: 8],
stage2_data[0][((64)*5 + 8*0) +: 8], stage2_data[0][((64)*4 + 8*0) +: 8],
stage2_data[0][((64)*3 + 8*0) +: 8], stage2_data[0][((64)*2 + 8*0) +: 8],
stage2_data[0][((64)*1 + 8*0) +: 8], stage2_data[0][((64)*0 + 8*0) +: 8] }}
<= ( { stage2_data_unaligned[((64)*7 + 8*0) +: 8], stage2_data_unaligned[((64)*6 + 8*0) +: 8],
stage2_data_unaligned[((64)*5 + 8*0) +: 8], stage2_data_unaligned[((64)*4 + 8*0) +: 8],
stage2_data_unaligned[((64)*3 + 8*0) +: 8], stage2_data_unaligned[((64)*2 + 8*0) +: 8],
stage2_data_unaligned[((64)*1 + 8*0) +: 8], stage2_data_unaligned[((64)*0 + 8*0) +: 8] }
<< data_start_index[0]) | unaligned_data[0];
*/
// The same alignment logic is done with data mask
{unaligned_dm[index], {
stage2_dm[0][LANES*7 + index], stage2_dm[0][LANES*6 + index],
stage2_dm[0][LANES*5 + index], stage2_dm[0][LANES*4 + index],
@ -873,6 +906,7 @@ module ddr3_controller #(
<< (data_start_index[index]>>3)) | unaligned_dm[index];
/* verilator lint_on WIDTH */
end
// stage2 can have multiple pipelined stages inside it
for(index = 0; index < STAGE2_DATA_DEPTH-1; index = index+1) begin
stage2_data[index+1] <= stage2_data[index];
stage2_dm[index+1] <= stage2_dm[index];
@ -885,11 +919,11 @@ module ddr3_controller #(
end
end
end
assign o_phy_data = stage2_data[STAGE2_DATA_DEPTH-1];
assign o_phy_data = stage2_data[STAGE2_DATA_DEPTH-1]; // the data sent to PHY is the last stage of of stage 2 (since stage 2 can have multiple pipelined stages inside it_
assign o_phy_dm = stage2_dm[STAGE2_DATA_DEPTH-1];
/* verilator lint_off WIDTH */
assign wb_addr_plus_anticipate = i_wb_addr + MARGIN_BEFORE_ANTICIPATE;
assign calib_addr_plus_anticipate = calib_addr + MARGIN_BEFORE_ANTICIPATE;
assign wb_addr_plus_anticipate = i_wb_addr + MARGIN_BEFORE_ANTICIPATE; // wb_addr_plus_anticipate determines if it is near the end of column by checking if it jumps to next row
assign calib_addr_plus_anticipate = calib_addr + MARGIN_BEFORE_ANTICIPATE; // just same as wb_addr_plus_anticipate but while doing calibration
/* verilator lint_on WIDTH */
// DIAGRAM FOR ALL RELEVANT TIMING PARAMETERS:
//
@ -1548,7 +1582,13 @@ module ddr3_controller #(
calib_sel <= {wb_sel_bits{1'b1}};
calib_we <= 1; //write-enable
calib_addr <= 0;
// burst_7 burst_6 burst_5 burst_4 burst_3 burst_2 burst_1 burst_0
calib_data <= { {LANES{8'h91}}, {LANES{8'h77}}, {LANES{8'h29}}, {LANES{8'h8c}}, {LANES{8'hd0}}, {LANES{8'had}}, {LANES{8'h51}}, {LANES{8'hc1}} };
// write to address 0 is a burst of 8 writes, where all lanes has same data written: 64'h9177298cd0ad51c1
// Example for LANES of 2, DQ of 8: the 128 bits (8 bursts * 2 lanes/burst * 8bits/lane) are 8 bursts with 8 bytes each:
// { { {burst7_lane1} , {burst7_lane0} } , { {burst6_lane1} , {burst6_lane0} } , { {burst5_lane1} , {burst5_lane0} }
// , { {burst4_lane1} , {burst4_lane0} } , { {burst3_lane1} , {burst3_lane0} } , { {burst2_lane1} , {burst2_lane0} }
// , { {burst1_lane1} , {burst1_lane0} } , { {burst0_lane1} , {burst0_lane0} } }
state_calibrate <= ISSUE_WRITE_2;
end
ISSUE_WRITE_2: begin
@ -1557,9 +1597,17 @@ module ddr3_controller #(
calib_sel <= {wb_sel_bits{1'b1}};
calib_we <= 1; //write-enable
calib_addr <= 1;
calib_data <= { {LANES{8'h80}}, {LANES{8'hdb}}, {LANES{8'hcf}}, {LANES{8'hd2}}, {LANES{8'h75}}, {LANES{8'hf1}}, {LANES{8'h2c}}, {LANES{8'h3d}} };
// burst_7 burst_6 burst_5 burst_4 burst_3 burst_2 burst_1 burst_0
calib_data <= { {LANES{8'h80}}, {LANES{8'hdb}}, {LANES{8'hcf}}, {LANES{8'hd2}}, {LANES{8'h75}}, {LANES{8'hf1}}, {LANES{8'h2c}}, {LANES{8'h3d}} };
// write to address 1 is also a burst of 8 writes, where all lanes has same data written: 128'h80dbcfd275f12c3d
state_calibrate <= ISSUE_READ;
end
end
// NOTE WHY THERE ARE TWO ISSUE_WRITE
// address 0 and 1 is written with a deterministic data, if the DQ trace has long delay then the data will be delayed
// compared to the write command so the data aligned to the write command for address 0 MIGHT START AT MIDDLE OF EXPECTED OUTPUT
// DATA (64'h9177298cd0ad51c1) e.g. the data written might be 64'h[2c3d][9177298cd0ad] where the data written starts
// at burst 2 (burst 0 and burst 1 are cut-off since each burst uses 1 ddr3_clk cycle)
ISSUE_READ: begin
calib_stb <= 1;//actual request flag
calib_aux <= 1; //AUX ID to determine later if ACK is for read or write
@ -1569,7 +1617,7 @@ module ddr3_controller #(
end
READ_DATA: if(o_wb_ack_read_q[0] == {{(AUX_WIDTH-2){1'b0}}, 2'd1, 1'b1}) begin //wait for the read ack (which has AUX ID of 1}
read_data_store <= o_wb_data;
read_data_store <= o_wb_data; // read data on address 0
calib_stb <= 0;
state_calibrate <= ANALYZE_DATA;
data_start_index[lane] <= 0;
@ -1583,7 +1631,12 @@ module ddr3_controller #(
else if(!o_wb_stall_calib) begin
calib_stb <= 0;
end
// extract burst_0-to-burst_7 data for a specified lane then determine which byte in write_pattern does it starts
// NOTE TO ME: all "8" here assume DQ_BITS are 8? parameterize this properly
// data_start_index for a specified lane determine how many bits are off the data from the write command
// so for every 1 ddr3 clk cycle delay of DQ from write command, each lane will be 1 burst off:
// e.g. LANE={burst7, burst6, burst5, burst4, burst3, burst2, burst1, burst0} then with 1 ddr3 cycle delay between DQ and command
// burst0 will not be written but only starting on burst1
ANALYZE_DATA: if(write_pattern[data_start_index[lane] +: 64] == {read_data_store[((DQ_BITS*LANES)*7 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*6 + 8*lane) +: 8],
read_data_store[((DQ_BITS*LANES)*5 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*4 + 8*lane) +: 8], read_data_store[((DQ_BITS*LANES)*3 + 8*lane) +: 8],
read_data_store[((DQ_BITS*LANES)*2 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*1 + 8*lane) +: 8],read_data_store[((DQ_BITS*LANES)*0 + 8*lane) +: 8] }) begin
@ -1598,7 +1651,7 @@ module ddr3_controller #(
end
end
else begin
data_start_index[lane] <= data_start_index[lane] + 8;
data_start_index[lane] <= data_start_index[lane] + 8; //skip by 8
if(data_start_index[lane] == 56) begin //reached the end but no byte in write-pattern matches the data read, issue might be reading at wrong DQS toggle
data_start_index[lane] <= 0; //so we need to recalibrate the bitslip
start_index_check <= 0;