fix pipelining in video_sprite exmaple
This commit is contained in:
parent
153ae7e3df
commit
d8eeb65b8f
8
Makefile
8
Makefile
|
|
@ -81,7 +81,13 @@ lut_ram_tb:
|
|||
|
||||
examples: icestick nexys_a7
|
||||
|
||||
nexys_a7: nexys_a7_io_core nexys_a7_logic_analyzer nexys_a7_lut_ram
|
||||
nexys_a7: nexys_a7_video_sprite nexys_a7_io_core nexys_a7_logic_analyzer nexys_a7_lut_ram
|
||||
|
||||
nexys_a7_video_sprite:
|
||||
cd examples/nexys_a7/video_sprite; \
|
||||
manta gen manta.yaml src/manta.v; \
|
||||
mkdir -p obj/; \
|
||||
python3 lab-bc.py
|
||||
|
||||
nexys_a7_io_core:
|
||||
cd examples/nexys_a7/io_core/; \
|
||||
|
|
|
|||
|
|
@ -1,60 +0,0 @@
|
|||
|
||||
// Xilinx True Dual Port RAM, Read First, Dual Clock
|
||||
// This code implements a parameterizable true dual port memory (both ports can read and write).
|
||||
// The behavior of this RAM is when data is written, the prior memory contents at the write
|
||||
// address are presented on the output port. If the output data is
|
||||
// not needed during writes or the last read value is desired to be retained,
|
||||
// it is suggested to use a no change RAM as it is more power efficient.
|
||||
// If a reset or enable is not necessary, it may be tied off or removed from the code.
|
||||
|
||||
// Modified from the xilinx_true_dual_port_read_first_2_clock_ram verilog language template.
|
||||
|
||||
module dual_port_bram #(
|
||||
parameter RAM_WIDTH = 0,
|
||||
parameter RAM_DEPTH = 0
|
||||
) (
|
||||
input wire [$clog2(RAM_DEPTH-1)-1:0] addra,
|
||||
input wire [$clog2(RAM_DEPTH-1)-1:0] addrb,
|
||||
input wire [RAM_WIDTH-1:0] dina,
|
||||
input wire [RAM_WIDTH-1:0] dinb,
|
||||
input wire clka,
|
||||
input wire clkb,
|
||||
input wire wea,
|
||||
input wire web,
|
||||
output wire [RAM_WIDTH-1:0] douta,
|
||||
output wire [RAM_WIDTH-1:0] doutb
|
||||
);
|
||||
|
||||
// The following code either initializes the memory values to a specified file or to all zeros to match hardware
|
||||
generate
|
||||
integer i;
|
||||
initial begin
|
||||
for (i = 0; i < RAM_DEPTH; i = i + 1)
|
||||
BRAM[i] = {RAM_WIDTH{1'b0}};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
reg [RAM_WIDTH-1:0] BRAM [RAM_DEPTH-1:0];
|
||||
reg [RAM_WIDTH-1:0] ram_data_a = {RAM_WIDTH{1'b0}};
|
||||
reg [RAM_WIDTH-1:0] ram_data_b = {RAM_WIDTH{1'b0}};
|
||||
|
||||
always @(posedge clka) begin
|
||||
if (wea) BRAM[addra] <= dina;
|
||||
ram_data_a <= BRAM[addra];
|
||||
end
|
||||
|
||||
always @(posedge clkb) begin
|
||||
if (web) BRAM[addrb] <= dinb;
|
||||
ram_data_b <= BRAM[addrb];
|
||||
end
|
||||
|
||||
// Add a 2 clock cycle read latency to improve clock-to-out timing
|
||||
reg [RAM_WIDTH-1:0] douta_reg = {RAM_WIDTH{1'b0}};
|
||||
reg [RAM_WIDTH-1:0] doutb_reg = {RAM_WIDTH{1'b0}};
|
||||
|
||||
always @(posedge clka) douta_reg <= ram_data_a;
|
||||
always @(posedge clkb) doutb_reg <= ram_data_b;
|
||||
|
||||
assign douta = douta_reg;
|
||||
assign doutb = doutb_reg;
|
||||
endmodule
|
||||
|
|
@ -34,6 +34,26 @@
|
|||
.vsync_out(vsync),
|
||||
.blank_out(blank));
|
||||
|
||||
// VGA Pipelining
|
||||
reg[1:0][10:0] hcount_pipe;
|
||||
reg[1:0][10:0] vcount_pipe;
|
||||
reg[1:0] hsync_pipe;
|
||||
reg[1:0] vsync_pipe;
|
||||
reg[1:0] blank_pipe;
|
||||
|
||||
always_ff @(posedge clk_65mhz)begin
|
||||
hcount_pipe[0] <= hcount;
|
||||
vcount_pipe[0] <= vcount;
|
||||
hsync_pipe[0] <= hsync;
|
||||
vsync_pipe[0] <= vsync;
|
||||
for (int i=1; i<4; i = i+1)begin
|
||||
hcount_pipe[i] <= hcount_pipe[i-1];
|
||||
vcount_pipe[i] <= vcount_pipe[i-1];
|
||||
hsync_pipe[i] <= hsync_pipe[i-1];
|
||||
vsync_pipe[i] <= vsync_pipe[i-1];
|
||||
end
|
||||
end
|
||||
|
||||
localparam WIDTH = 128;
|
||||
localparam HEIGHT = 128;
|
||||
|
||||
|
|
@ -45,8 +65,8 @@
|
|||
assign image_addr = (hcount - X) + ((vcount - Y) * WIDTH);
|
||||
|
||||
logic in_sprite;
|
||||
assign in_sprite = ((hcount >= X && hcount < (X + WIDTH)) &&
|
||||
(vcount >= Y && vcount < (Y + HEIGHT)));
|
||||
assign in_sprite = ((hcount_pipe[1] >= X && hcount_pipe[1] < (X + WIDTH)) &&
|
||||
(vcount_pipe[1] >= Y && vcount_pipe[1] < (Y + HEIGHT)));
|
||||
|
||||
manta manta_inst (
|
||||
.clk(clk_65mhz),
|
||||
|
|
@ -65,13 +85,12 @@
|
|||
assign color = in_sprite ? sprite_color : 12'h0;
|
||||
|
||||
// the following lines are required for the Nexys4 VGA circuit - do not change
|
||||
assign vga_r = ~blank ? color[11:8]: 0;
|
||||
assign vga_g = ~blank ? color[7:4] : 0;
|
||||
assign vga_b = ~blank ? color[3:0] : 0;
|
||||
|
||||
assign vga_hs = ~hsync;
|
||||
assign vga_vs = ~vsync;
|
||||
assign vga_r = ~blank_pipe[1] ? color[11:8]: 0;
|
||||
assign vga_g = ~blank_pipe[1] ? color[7:4] : 0;
|
||||
assign vga_b = ~blank_pipe[1] ? color[3:0] : 0;
|
||||
|
||||
assign vga_hs = ~hsync_pipe[1];
|
||||
assign vga_vs = ~vsync_pipe[1];
|
||||
|
||||
// debug
|
||||
assign led = manta_inst.brx_image_mem_addr;
|
||||
|
|
|
|||
|
|
@ -1,102 +0,0 @@
|
|||
|
||||
// Xilinx Single Port Read First RAM
|
||||
// This code implements a parameterizable single-port read-first memory where when data
|
||||
// is written to the memory, the output reflects the prior contents of the memory location.
|
||||
// If the output data is not needed during writes or the last read value is desired to be
|
||||
// retained, it is suggested to set WRITE_MODE to NO_CHANGE as it is more power efficient.
|
||||
// If a reset or enable is not necessary, it may be tied off or removed from the code.
|
||||
// Modify the parameters for the desired RAM characteristics.
|
||||
|
||||
module xilinx_single_port_ram_read_first #(
|
||||
parameter RAM_WIDTH = 18, // Specify RAM data width
|
||||
parameter RAM_DEPTH = 1024, // Specify RAM depth (number of entries)
|
||||
parameter RAM_PERFORMANCE = "HIGH_PERFORMANCE", // Select "HIGH_PERFORMANCE" or "LOW_LATENCY"
|
||||
parameter INIT_FILE = "" // Specify name/location of RAM initialization file if using one (leave blank if not)
|
||||
) (
|
||||
input [clogb2(RAM_DEPTH-1)-1:0] addra, // Address bus, width determined from RAM_DEPTH
|
||||
input [RAM_WIDTH-1:0] dina, // RAM input data
|
||||
input clka, // Clock
|
||||
input wea, // Write enable
|
||||
input ena, // RAM Enable, for additional power savings, disable port when not in use
|
||||
input rsta, // Output reset (does not affect memory contents)
|
||||
input regcea, // Output register enable
|
||||
output [RAM_WIDTH-1:0] douta // RAM output data
|
||||
);
|
||||
|
||||
reg [RAM_WIDTH-1:0] BRAM [RAM_DEPTH-1:0];
|
||||
reg [RAM_WIDTH-1:0] ram_data = {RAM_WIDTH{1'b0}};
|
||||
|
||||
// The following code either initializes the memory values to a specified file or to all zeros to match hardware
|
||||
generate
|
||||
if (INIT_FILE != "") begin: use_init_file
|
||||
initial
|
||||
$readmemh(INIT_FILE, BRAM, 0, RAM_DEPTH-1);
|
||||
end else begin: init_bram_to_zero
|
||||
integer ram_index;
|
||||
initial
|
||||
for (ram_index = 0; ram_index < RAM_DEPTH; ram_index = ram_index + 1)
|
||||
BRAM[ram_index] = {RAM_WIDTH{1'b0}};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
always @(posedge clka)
|
||||
if (ena) begin
|
||||
if (wea)
|
||||
BRAM[addra] <= dina;
|
||||
ram_data <= BRAM[addra];
|
||||
end
|
||||
|
||||
// The following code generates HIGH_PERFORMANCE (use output register) or LOW_LATENCY (no output register)
|
||||
generate
|
||||
if (RAM_PERFORMANCE == "LOW_LATENCY") begin: no_output_register
|
||||
|
||||
// The following is a 1 clock cycle read latency at the cost of a longer clock-to-out timing
|
||||
assign douta = ram_data;
|
||||
|
||||
end else begin: output_register
|
||||
|
||||
// The following is a 2 clock cycle read latency with improve clock-to-out timing
|
||||
|
||||
reg [RAM_WIDTH-1:0] douta_reg = {RAM_WIDTH{1'b0}};
|
||||
|
||||
always @(posedge clka)
|
||||
if (rsta)
|
||||
douta_reg <= {RAM_WIDTH{1'b0}};
|
||||
else if (regcea)
|
||||
douta_reg <= ram_data;
|
||||
|
||||
assign douta = douta_reg;
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// The following function calculates the address width based on specified RAM depth
|
||||
function integer clogb2;
|
||||
input integer depth;
|
||||
for (clogb2=0; depth>0; clogb2=clogb2+1)
|
||||
depth = depth >> 1;
|
||||
endfunction
|
||||
|
||||
endmodule
|
||||
|
||||
// The following is an instantiation template for xilinx_single_port_ram_read_first
|
||||
/*
|
||||
// Xilinx Single Port Read First RAM
|
||||
xilinx_single_port_ram_read_first #(
|
||||
.RAM_WIDTH(18), // Specify RAM data width
|
||||
.RAM_DEPTH(1024), // Specify RAM depth (number of entries)
|
||||
.RAM_PERFORMANCE("HIGH_PERFORMANCE"), // Select "HIGH_PERFORMANCE" or "LOW_LATENCY"
|
||||
.INIT_FILE(`FPATH(data.mem)) // Specify name/location of RAM initialization file if using one (leave blank if not)
|
||||
) your_instance_name (
|
||||
.addra(addra), // Address bus, width determined from RAM_DEPTH
|
||||
.dina(dina), // RAM input data, width determined from RAM_WIDTH
|
||||
.clka(clka), // Clock
|
||||
.wea(wea), // Write enable
|
||||
.ena(ena), // RAM Enable, for additional power savings, disable port when not in use
|
||||
.rsta(rsta), // Output reset (does not affect memory contents)
|
||||
.regcea(regcea), // Output register enable
|
||||
.douta(douta) // RAM output data, width determined from RAM_WIDTH
|
||||
);
|
||||
*/
|
||||
|
||||
|
||||
|
|
@ -683,8 +683,9 @@ class BlockMemoryCore:
|
|||
return inst.get_hdl()
|
||||
|
||||
def hdl_def(self):
|
||||
return VerilogManipulator("block_memory_tmpl.v").get_hdl()
|
||||
|
||||
block_memory = VerilogManipulator("block_memory.v").get_hdl()
|
||||
dual_port_bram = VerilogManipulator("dual_port_bram.v").get_hdl()
|
||||
return block_memory + "\n" + dual_port_bram
|
||||
|
||||
def hdl_top_level_ports(self):
|
||||
if not self.expose_port:
|
||||
|
|
|
|||
Loading…
Reference in New Issue