fix pipelining in video_sprite exmaple

This commit is contained in:
Fischer Moseley 2023-04-13 18:00:22 -04:00
parent 153ae7e3df
commit d8eeb65b8f
6 changed files with 37 additions and 173 deletions

View File

@ -81,7 +81,13 @@ lut_ram_tb:
examples: icestick nexys_a7
nexys_a7: nexys_a7_io_core nexys_a7_logic_analyzer nexys_a7_lut_ram
nexys_a7: nexys_a7_video_sprite nexys_a7_io_core nexys_a7_logic_analyzer nexys_a7_lut_ram
nexys_a7_video_sprite:
cd examples/nexys_a7/video_sprite; \
manta gen manta.yaml src/manta.v; \
mkdir -p obj/; \
python3 lab-bc.py
nexys_a7_io_core:
cd examples/nexys_a7/io_core/; \

View File

@ -1,60 +0,0 @@
// Xilinx True Dual Port RAM, Read First, Dual Clock
// This code implements a parameterizable true dual port memory (both ports can read and write).
// The behavior of this RAM is when data is written, the prior memory contents at the write
// address are presented on the output port. If the output data is
// not needed during writes or the last read value is desired to be retained,
// it is suggested to use a no change RAM as it is more power efficient.
// If a reset or enable is not necessary, it may be tied off or removed from the code.
// Modified from the xilinx_true_dual_port_read_first_2_clock_ram verilog language template.
module dual_port_bram #(
parameter RAM_WIDTH = 0,
parameter RAM_DEPTH = 0
) (
input wire [$clog2(RAM_DEPTH-1)-1:0] addra,
input wire [$clog2(RAM_DEPTH-1)-1:0] addrb,
input wire [RAM_WIDTH-1:0] dina,
input wire [RAM_WIDTH-1:0] dinb,
input wire clka,
input wire clkb,
input wire wea,
input wire web,
output wire [RAM_WIDTH-1:0] douta,
output wire [RAM_WIDTH-1:0] doutb
);
// The following code either initializes the memory values to a specified file or to all zeros to match hardware
generate
integer i;
initial begin
for (i = 0; i < RAM_DEPTH; i = i + 1)
BRAM[i] = {RAM_WIDTH{1'b0}};
end
endgenerate
reg [RAM_WIDTH-1:0] BRAM [RAM_DEPTH-1:0];
reg [RAM_WIDTH-1:0] ram_data_a = {RAM_WIDTH{1'b0}};
reg [RAM_WIDTH-1:0] ram_data_b = {RAM_WIDTH{1'b0}};
always @(posedge clka) begin
if (wea) BRAM[addra] <= dina;
ram_data_a <= BRAM[addra];
end
always @(posedge clkb) begin
if (web) BRAM[addrb] <= dinb;
ram_data_b <= BRAM[addrb];
end
// Add a 2 clock cycle read latency to improve clock-to-out timing
reg [RAM_WIDTH-1:0] douta_reg = {RAM_WIDTH{1'b0}};
reg [RAM_WIDTH-1:0] doutb_reg = {RAM_WIDTH{1'b0}};
always @(posedge clka) douta_reg <= ram_data_a;
always @(posedge clkb) doutb_reg <= ram_data_b;
assign douta = douta_reg;
assign doutb = doutb_reg;
endmodule

View File

@ -34,6 +34,26 @@
.vsync_out(vsync),
.blank_out(blank));
// VGA Pipelining
reg[1:0][10:0] hcount_pipe;
reg[1:0][10:0] vcount_pipe;
reg[1:0] hsync_pipe;
reg[1:0] vsync_pipe;
reg[1:0] blank_pipe;
always_ff @(posedge clk_65mhz)begin
hcount_pipe[0] <= hcount;
vcount_pipe[0] <= vcount;
hsync_pipe[0] <= hsync;
vsync_pipe[0] <= vsync;
for (int i=1; i<4; i = i+1)begin
hcount_pipe[i] <= hcount_pipe[i-1];
vcount_pipe[i] <= vcount_pipe[i-1];
hsync_pipe[i] <= hsync_pipe[i-1];
vsync_pipe[i] <= vsync_pipe[i-1];
end
end
localparam WIDTH = 128;
localparam HEIGHT = 128;
@ -45,8 +65,8 @@
assign image_addr = (hcount - X) + ((vcount - Y) * WIDTH);
logic in_sprite;
assign in_sprite = ((hcount >= X && hcount < (X + WIDTH)) &&
(vcount >= Y && vcount < (Y + HEIGHT)));
assign in_sprite = ((hcount_pipe[1] >= X && hcount_pipe[1] < (X + WIDTH)) &&
(vcount_pipe[1] >= Y && vcount_pipe[1] < (Y + HEIGHT)));
manta manta_inst (
.clk(clk_65mhz),
@ -65,13 +85,12 @@
assign color = in_sprite ? sprite_color : 12'h0;
// the following lines are required for the Nexys4 VGA circuit - do not change
assign vga_r = ~blank ? color[11:8]: 0;
assign vga_g = ~blank ? color[7:4] : 0;
assign vga_b = ~blank ? color[3:0] : 0;
assign vga_hs = ~hsync;
assign vga_vs = ~vsync;
assign vga_r = ~blank_pipe[1] ? color[11:8]: 0;
assign vga_g = ~blank_pipe[1] ? color[7:4] : 0;
assign vga_b = ~blank_pipe[1] ? color[3:0] : 0;
assign vga_hs = ~hsync_pipe[1];
assign vga_vs = ~vsync_pipe[1];
// debug
assign led = manta_inst.brx_image_mem_addr;

View File

@ -1,102 +0,0 @@
// Xilinx Single Port Read First RAM
// This code implements a parameterizable single-port read-first memory where when data
// is written to the memory, the output reflects the prior contents of the memory location.
// If the output data is not needed during writes or the last read value is desired to be
// retained, it is suggested to set WRITE_MODE to NO_CHANGE as it is more power efficient.
// If a reset or enable is not necessary, it may be tied off or removed from the code.
// Modify the parameters for the desired RAM characteristics.
module xilinx_single_port_ram_read_first #(
parameter RAM_WIDTH = 18, // Specify RAM data width
parameter RAM_DEPTH = 1024, // Specify RAM depth (number of entries)
parameter RAM_PERFORMANCE = "HIGH_PERFORMANCE", // Select "HIGH_PERFORMANCE" or "LOW_LATENCY"
parameter INIT_FILE = "" // Specify name/location of RAM initialization file if using one (leave blank if not)
) (
input [clogb2(RAM_DEPTH-1)-1:0] addra, // Address bus, width determined from RAM_DEPTH
input [RAM_WIDTH-1:0] dina, // RAM input data
input clka, // Clock
input wea, // Write enable
input ena, // RAM Enable, for additional power savings, disable port when not in use
input rsta, // Output reset (does not affect memory contents)
input regcea, // Output register enable
output [RAM_WIDTH-1:0] douta // RAM output data
);
reg [RAM_WIDTH-1:0] BRAM [RAM_DEPTH-1:0];
reg [RAM_WIDTH-1:0] ram_data = {RAM_WIDTH{1'b0}};
// The following code either initializes the memory values to a specified file or to all zeros to match hardware
generate
if (INIT_FILE != "") begin: use_init_file
initial
$readmemh(INIT_FILE, BRAM, 0, RAM_DEPTH-1);
end else begin: init_bram_to_zero
integer ram_index;
initial
for (ram_index = 0; ram_index < RAM_DEPTH; ram_index = ram_index + 1)
BRAM[ram_index] = {RAM_WIDTH{1'b0}};
end
endgenerate
always @(posedge clka)
if (ena) begin
if (wea)
BRAM[addra] <= dina;
ram_data <= BRAM[addra];
end
// The following code generates HIGH_PERFORMANCE (use output register) or LOW_LATENCY (no output register)
generate
if (RAM_PERFORMANCE == "LOW_LATENCY") begin: no_output_register
// The following is a 1 clock cycle read latency at the cost of a longer clock-to-out timing
assign douta = ram_data;
end else begin: output_register
// The following is a 2 clock cycle read latency with improve clock-to-out timing
reg [RAM_WIDTH-1:0] douta_reg = {RAM_WIDTH{1'b0}};
always @(posedge clka)
if (rsta)
douta_reg <= {RAM_WIDTH{1'b0}};
else if (regcea)
douta_reg <= ram_data;
assign douta = douta_reg;
end
endgenerate
// The following function calculates the address width based on specified RAM depth
function integer clogb2;
input integer depth;
for (clogb2=0; depth>0; clogb2=clogb2+1)
depth = depth >> 1;
endfunction
endmodule
// The following is an instantiation template for xilinx_single_port_ram_read_first
/*
// Xilinx Single Port Read First RAM
xilinx_single_port_ram_read_first #(
.RAM_WIDTH(18), // Specify RAM data width
.RAM_DEPTH(1024), // Specify RAM depth (number of entries)
.RAM_PERFORMANCE("HIGH_PERFORMANCE"), // Select "HIGH_PERFORMANCE" or "LOW_LATENCY"
.INIT_FILE(`FPATH(data.mem)) // Specify name/location of RAM initialization file if using one (leave blank if not)
) your_instance_name (
.addra(addra), // Address bus, width determined from RAM_DEPTH
.dina(dina), // RAM input data, width determined from RAM_WIDTH
.clka(clka), // Clock
.wea(wea), // Write enable
.ena(ena), // RAM Enable, for additional power savings, disable port when not in use
.rsta(rsta), // Output reset (does not affect memory contents)
.regcea(regcea), // Output register enable
.douta(douta) // RAM output data, width determined from RAM_WIDTH
);
*/

View File

@ -683,8 +683,9 @@ class BlockMemoryCore:
return inst.get_hdl()
def hdl_def(self):
return VerilogManipulator("block_memory_tmpl.v").get_hdl()
block_memory = VerilogManipulator("block_memory.v").get_hdl()
dual_port_bram = VerilogManipulator("dual_port_bram.v").get_hdl()
return block_memory + "\n" + dual_port_bram
def hdl_top_level_ports(self):
if not self.expose_port: