diff --git a/techlibs/rapidflex/Makefile.inc b/techlibs/rapidflex/Makefile.inc new file mode 100644 index 000000000..cc090bd33 --- /dev/null +++ b/techlibs/rapidflex/Makefile.inc @@ -0,0 +1,53 @@ +# cell lib generation +techlibs/rapidflex/alkaidC/cell_sim_pcnt.v: techlibs/rapidflex/util/pcnt_cell_sim_gen.py + $(P) mkdir -p $(dir $@) && $(PYTHON_EXECUTABLE) $^ --file $@ + +CXXFLAGS += -Itechlibs/rapidflex/src/ +OBJS += techlibs/rapidflex/src/synth_rapidflex.o +OBJS += techlibs/rapidflex/src/clock_buffer_cmd.o + +# -------------------------------------- + +OBJS += techlibs/rapidflex/src/rf_new_dsp.o +OBJS += techlibs/rapidflex/src/rf_dsp_mad.o +GENFILES += techlibs/rapidflex/src/rf_new_dsp_pm.h techlibs/rapidflex/src/rf_dsp_mad_pm.h techlibs/rapidflex/alkaidC/cell_sim_pcnt.v +techlibs/rapidflex/src/rf_new_dsp.o: techlibs/rapidflex/src/rf_new_dsp_pm.h +techlibs/rapidflex/src/rf_dsp_mad.o: techlibs/rapidflex/src/rf_dsp_mad_pm.h +$(eval $(call add_extra_objs,techlibs/rapidflex/src/rf_new_dsp_pm.h,techlibs/rapidflex/src/rf_dsp_mad_pm.h)) + +# -------------------------------------- + +$(eval $(call add_share_file,share/rapidflex/common,techlibs/rapidflex/common/cells_sim.v)) + +# --------------AlkaidC cell lib ------------------------ +$(eval $(call add_share_file,share/rapidflex/alkaidC,techlibs/rapidflex/alkaidC/arith_map.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidC,techlibs/rapidflex/alkaidC/ccb_inst_code.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidC,techlibs/rapidflex/alkaidC/cell_sim_arith.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidC,techlibs/rapidflex/alkaidC/cell_sim_ccb.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidC,techlibs/rapidflex/alkaidC/cell_sim_ff.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidC,techlibs/rapidflex/alkaidC/cell_sim.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidC,techlibs/rapidflex/alkaidC/dff_map.v)) +$(eval $(call add_gen_share_file,share/rapidflex/alkaidC,techlibs/rapidflex/alkaidC/cell_sim_pcnt.v)) + +$(eval $(call add_share_file,share/rapidflex/alkaidL,techlibs/rapidflex/alkaidL/arith_map.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidL,techlibs/rapidflex/alkaidL/bram_map.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidL,techlibs/rapidflex/alkaidL/bram.txt)) +$(eval $(call add_share_file,share/rapidflex/alkaidL,techlibs/rapidflex/alkaidL/cell_sim_arith.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidL,techlibs/rapidflex/alkaidL/cell_sim_bram.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidL,techlibs/rapidflex/alkaidL/cell_sim_dsp.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidL,techlibs/rapidflex/alkaidL/cell_sim_ff.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidL,techlibs/rapidflex/alkaidL/cell_sim.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidL,techlibs/rapidflex/alkaidL/dff_map.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidL,techlibs/rapidflex/alkaidL/dsp_map.v)) + +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/arith_map.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/bram_map.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/bram.txt)) +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/cell_sim_arith.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/cell_sim_bram.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/cell_sim_dsp.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/cell_sim_new_dsp.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/cell_sim_ff.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/cell_sim.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/dff_map.v)) +$(eval $(call add_share_file,share/rapidflex/alkaidT,techlibs/rapidflex/alkaidT/dsp_map.v)) diff --git a/techlibs/rapidflex/alkaidC/.gitignore b/techlibs/rapidflex/alkaidC/.gitignore new file mode 100644 index 000000000..26149bb8e --- /dev/null +++ b/techlibs/rapidflex/alkaidC/.gitignore @@ -0,0 +1 @@ +/cell_sim_pcnt.v diff --git a/techlibs/rapidflex/alkaidC/arith_map.v b/techlibs/rapidflex/alkaidC/arith_map.v new file mode 100644 index 000000000..d54433654 --- /dev/null +++ b/techlibs/rapidflex/alkaidC/arith_map.v @@ -0,0 +1,154 @@ +// Arithmetic units: adder +// Adapt from: https://github.com/chipsalliance/yosys-f4pga-plugins/blob/0ad1af26a29243a9e76379943d735e119dcd0cc6/ql-qlf-plugin/qlf_k6n10/cells_sim.v +// Many thanks to F4PGA for their contribution + +(* techmap_celltype = "$alu" *) +module _80_quicklogic_alu (A, B, CI, BI, X, Y, CO); + + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 1; + parameter B_WIDTH = 1; + parameter Y_WIDTH = 1; + + input [A_WIDTH-1:0] A; + input [B_WIDTH-1:0] B; + output [Y_WIDTH-1:0] X, Y; + + input CI, BI; + output [Y_WIDTH-1:0] CO; + + // The max. number of adders we can support in AlkaidS is (12x2-1)x4x16 = 1472 + // Fail when resource limit exceeds + // Also fail when a low utilization rate is detected + // Originally prefer to defer carry mapping when < 2-bit adder is detected + // Due to a bug found in scalable seq detector, the bound is increased to 4-bit adder + wire _TECHMAP_FAIL_ = Y_WIDTH > 1472 || Y_WIDTH < 4; + generate + if ((A_WIDTH == 0 || B_WIDTH == 0) && Y_WIDTH > 0) begin + wire _TECHMAP_FAIL_ = 1; + end + endgenerate + wire [1024:0] _TECHMAP_DO_ = "splitnets CARRY; clean"; + localparam Y_COL_WIDTH = 96 - 3; + localparam Y_MAX_WIDTH = 12 - 3; + + (* force_downto *) + wire [Y_WIDTH-1:0] A_buf, B_buf; + \$pos #(.A_SIGNED(A_SIGNED), .A_WIDTH(A_WIDTH), .Y_WIDTH(Y_WIDTH)) A_conv (.A(A), .Y(A_buf)); + \$pos #(.A_SIGNED(B_SIGNED), .A_WIDTH(B_WIDTH), .Y_WIDTH(Y_WIDTH)) B_conv (.A(B), .Y(B_buf)); + + (* force_downto *) + wire [Y_WIDTH-1:0] AA = A_buf; + (* force_downto *) + wire [Y_WIDTH-1:0] BB = BI ? ~B_buf : B_buf; + wire [Y_WIDTH: 0] CARRY; + + assign CO[Y_WIDTH-1:0] = CARRY[Y_WIDTH:1]; + genvar i; + generate if (Y_WIDTH < Y_COL_WIDTH) begin + wire CARRY_end_buf; + wire [1024:0] _TECHMAP_DO_ = "insbuf CARRY[Y_WIDTH] CARRY_end_buf"; + _fpga_adder intermediate_adder ( + .cin ( ), + .cout (CARRY[0]), + .a (CI ), + .b (CI ), + .sumout ( ) + ); + + _fpga_adder first_adder ( + .cin (CARRY[0]), + .cout (CARRY[1]), + .a (AA[0] ), + .b (BB[0] ), + .sumout (Y[0] ) + ); + + _fpga_adder pretaill_adder ( + .cin (CARRY[Y_WIDTH-1] ), + .cout (CARRY_end_buf), + .a (AA[Y_WIDTH-1] ), + .b (BB[Y_WIDTH-1] ), + .sumout (Y[Y_WIDTH-1] ) + ); + + + _fpga_adder tail_adder ( + .cin (CARRY_end_buf), + .cout (), + .a (1'b0), + .b (1'b0), + .sumout (CARRY[Y_WIDTH]) + ); + + generate for (i = 1; i < Y_WIDTH-1 ; i = i+1) begin:gen3 + _fpga_adder my_adder ( + .cin (CARRY[i] ), + .cout (CARRY[i+1]), + .a (AA[i] ), + .b (BB[i] ), + .sumout (Y[i] ) + ); + end endgenerate + end else begin + generate for (i = 0; i < Y_WIDTH ; i = i+1) begin:gen4 + // Due to VPR limitations regarding IO connexion to carry chain, + // we generate the carry chain input signal using an intermediate adder + // since we can connect a & b from io pads, but not cin & cout + if (i == 0) begin + _fpga_adder intermediate_adder ( + .cin ( ), + .cout (CARRY[0]), + .a (CI ), + .b (CI ), + .sumout ( ) + ); + + _fpga_adder first_adder ( + .cin (CARRY[0]), + .cout (CARRY[1]), + .a (AA[0] ), + .b (BB[0] ), + .sumout (Y[0] ) + ); + end else if (i % (Y_MAX_WIDTH + 1) == 0) begin + wire CARRY_end_buf; + wire CARRY_start_buf; + wire [1024:0] _TECHMAP_DO_ = "insbuf CARRY[i+1] CARRY_end_buf; insbuf CARRY_end_buf CARRY_start_buf"; + _fpga_adder tail_adder ( + .cin (CARRY[i]), + .cout (), + .a (1'b0), + .b (1'b0), + .sumout (CARRY_end_buf) + ); + + _fpga_adder intermediate_adder ( + .cin ( ), + .cout (CARRY_start_buf), + .a (CARRY_end_buf), + .b (1'b1), + .sumout ( ) + ); + + _fpga_adder first_adder ( + .cin (CARRY_start_buf), + .cout (CARRY[i+1]), + .a (AA[i] ), + .b (BB[i] ), + .sumout (Y[i] ) + ); + end else begin + _fpga_adder my_adder ( + .cin (CARRY[i] ), + .cout (CARRY[i+1]), + .a (AA[i] ), + .b (BB[i] ), + .sumout (Y[i] ) + ); + end + end endgenerate + end endgenerate + assign X = AA ^ BB; +endmodule diff --git a/techlibs/rapidflex/alkaidC/ccb_inst_code.v b/techlibs/rapidflex/alkaidC/ccb_inst_code.v new file mode 100644 index 000000000..99c34f8d8 --- /dev/null +++ b/techlibs/rapidflex/alkaidC/ccb_inst_code.v @@ -0,0 +1,29 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File name: define.v +// Descriptions: This file is the opcode for ccb tile instructions +// Author: Yihong +// Date: 2025/8/14 +// Revision: 0.0.1 +// Revision History: +// V0.0.1 - 2025/8/14 initial release +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//Operations +`define ADD 4'b1000 +`define SUB 4'b1001 +`define PUSH 4'b1010 +`define PULL 4'b1011 +`define MOV 4'b1100 +`define MOV_T1 4'b1101 +`define MOV_T2 4'b1110 +`define INTR 4'b1111 +`define NA 10'h000 + +// SRC/DES +`define R0 3'b000 +`define R1 3'b001 +`define R2 3'b010 +`define R3 3'b011 +`define C0 3'b100 +`define C1 3'b101 +`define C2 3'b110 \ No newline at end of file diff --git a/techlibs/rapidflex/alkaidC/cell_sim.v b/techlibs/rapidflex/alkaidC/cell_sim.v new file mode 100644 index 000000000..8680b4c47 --- /dev/null +++ b/techlibs/rapidflex/alkaidC/cell_sim.v @@ -0,0 +1,8 @@ +//------------------------------------------------- +// Include all the primitives +//------------------------------------------------- +`include "cell_sim_arith.v" +`include "cell_sim_ff.v" +`include "cell_sim_pcnt.v" +`include "ccb_inst_code.v" +`include "cell_sim_ccb.v" diff --git a/techlibs/rapidflex/alkaidC/cell_sim_arith.v b/techlibs/rapidflex/alkaidC/cell_sim_arith.v new file mode 100644 index 000000000..5a325e5b6 --- /dev/null +++ b/techlibs/rapidflex/alkaidC/cell_sim_arith.v @@ -0,0 +1,15 @@ +//--------------------------------------- +// 1-bit adder +//--------------------------------------- +(* abc9_box, lib_whitebox *) +module _fpga_adder( + output sumout, + output cout, + input a, + input b, + input cin +); + assign sumout = a ^ b ^ cin; + assign cout = (a & b) | ((a | b) & cin); + +endmodule diff --git a/techlibs/rapidflex/alkaidC/cell_sim_ccb.v b/techlibs/rapidflex/alkaidC/cell_sim_ccb.v new file mode 100644 index 000000000..c8b454fdf --- /dev/null +++ b/techlibs/rapidflex/alkaidC/cell_sim_ccb.v @@ -0,0 +1,101 @@ +//------------------------------------------------- +// Counter Configuration Block (CCB) Primitives +//------------------------------------------------- +`default_nettype none + +module ccb # ( + // Location constraints + parameter FPGA_LOC_X = 0, + parameter FPGA_LOC_Y = 0, + parameter FPGA_LOC_Z = 0, + // Event0 triggered instructions + parameter [0:9] EVENT0_INST0 = `NA, + parameter [0:9] EVENT0_INST1 = `NA, + parameter [0:9] EVENT0_INST2 = `NA, + parameter [0:9] EVENT0_INST3 = `NA, + parameter [0:9] EVENT0_INST4 = `NA, + parameter [0:9] EVENT0_INST5 = `NA, + parameter [0:9] EVENT0_INST6 = `NA, + parameter [0:9] EVENT0_INST7 = `NA, + // Event1 triggered instructions + parameter [0:9] EVENT1_INST0 = `NA, + parameter [0:9] EVENT1_INST1 = `NA, + parameter [0:9] EVENT1_INST2 = `NA, + parameter [0:9] EVENT1_INST3 = `NA, + parameter [0:9] EVENT1_INST4 = `NA, + parameter [0:9] EVENT1_INST5 = `NA, + parameter [0:9] EVENT1_INST6 = `NA, + parameter [0:9] EVENT1_INST7 = `NA, + // Event2 triggered instructions + parameter [0:9] EVENT2_INST0 = `NA, + parameter [0:9] EVENT2_INST1 = `NA, + parameter [0:9] EVENT2_INST2 = `NA, + parameter [0:9] EVENT2_INST3 = `NA, + parameter [0:9] EVENT2_INST4 = `NA, + parameter [0:9] EVENT2_INST5 = `NA, + parameter [0:9] EVENT2_INST6 = `NA, + parameter [0:9] EVENT2_INST7 = `NA, + // Event3 triggered instructions + parameter [0:9] EVENT3_INST0 = `NA, + parameter [0:9] EVENT3_INST1 = `NA, + parameter [0:9] EVENT3_INST2 = `NA, + parameter [0:9] EVENT3_INST3 = `NA, + parameter [0:9] EVENT3_INST4 = `NA, + parameter [0:9] EVENT3_INST5 = `NA, + parameter [0:9] EVENT3_INST6 = `NA, + parameter [0:9] EVENT3_INST7 = `NA, + // Initial register values, R0-R3 + parameter [0:31] R0 = {32{1'b0}}, + parameter [0:31] R1 = {32{1'b0}}, + parameter [0:31] R2 = {32{1'b0}}, + parameter [0:31] R3 = {32{1'b0}}, + // FIFO initial values + parameter [0:31] FIFO_INIT0 = {32{1'b0}}, + parameter [0:31] FIFO_INIT1 = {32{1'b0}}, + parameter [0:31] FIFO_INIT2 = {32{1'b0}}, + parameter [0:31] FIFO_INIT3 = {32{1'b0}}, + // PCNT initial values + parameter [0:31] LOAD_VAL_PCNT0 = {32{1'b0}}, + parameter [0:31] LOAD_VAL_PCNT1 = {32{1'b0}}, + parameter [0:31] LOAD_VAL_PCNT2 = {32{1'b0}}, + parameter [0:31] MATCH0_REF_PCNT0 = {32{1'b0}}, + parameter [0:31] MATCH0_REF_PCNT1 = {32{1'b0}}, + parameter [0:31] MATCH0_REF_PCNT2 = {32{1'b0}}, + parameter [0:31] MATCH1_REF_PCNT0 = {32{1'b0}}, + parameter [0:31] MATCH1_REF_PCNT1 = {32{1'b0}}, + parameter [0:31] MATCH1_REF_PCNT2 = {32{1'b0}} +)( + input ccb_clk_i, + input ccb_rst_ni, + input [0:3] ccb_event_i, + input [0:5] pcnt_event_i, + output [0:31] match0_ref0_o, + output [0:31] match1_ref0_o, + output [0:31] load_val0_o, + output [0:31] match0_ref1_o, + output [0:31] match1_ref1_o, + output [0:31] load_val1_o, + output [0:31] match0_ref2_o, + output [0:31] match1_ref2_o, + output [0:31] load_val2_o + +); + + +// Dummy +assign match0_ref0_o = 0; +assign match1_ref0_o = 0; +assign load_val0_o = 0; + +assign match0_ref1_o = 0; +assign match1_ref1_o = 0; +assign load_val1_o = 0; + +assign match0_ref2_o = 0; +assign match1_ref2_o = 0; +assign load_val2_o = 0; + + +endmodule + +`default_nettype wire diff --git a/techlibs/rapidflex/alkaidC/cell_sim_ff.v b/techlibs/rapidflex/alkaidC/cell_sim_ff.v new file mode 100644 index 000000000..726987e16 --- /dev/null +++ b/techlibs/rapidflex/alkaidC/cell_sim_ff.v @@ -0,0 +1,586 @@ +//----------------------------- +// Rising-edge D-type flip-flop +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dff( + output reg Q, + input D, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + Q <= D; + 1'b1: + always @(negedge C) + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffs( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffs( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffn( + output reg Q, + input D, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + Q <= D; + 1'b1: + always @(negedge C) + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffnr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffns( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffnrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffnsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffnr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffns( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffnrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffnsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Two-bit D-type flip-flop with active-high asynchronous reset +// 1st stage is positive-edge triggered +// 2nd stage is negative-edge triggered +//----------------------------- +// Do not allow ABC or other optimization to touch the ff! +//(* abc9_flop, lib_whitebox *) +module dffnr_dffr( + output Q, + input D, + input R, + input C +); + +wire Q0; + + dffnr FF_0 (.D(D), .C(C), .R(R), .Q(Q0)); + dffr FF_1 (.D(Q0), .C(C), .R(R), .Q(Q)); + +endmodule + +//----------------------------- +// Two-bit D-type flip-flop with active-high asynchronous reset +// 1st stage is positive-edge triggered +// 2nd stage is negative-edge triggered +//----------------------------- +// Do not allow ABC or other optimization to touch the ff! +//(* abc9_flop, lib_whitebox *) +module dffr_dffnr( + output Q, + input D, + input R, + input C +); + +wire Q0; + + dffr FF_0 (.D(D), .C(C), .R(R), .Q(Q0)); + dffnr FF_1 (.D(Q0), .C(C), .R(R), .Q(Q)); + +endmodule + diff --git a/techlibs/rapidflex/alkaidC/dff_map.v b/techlibs/rapidflex/alkaidC/dff_map.v new file mode 100644 index 000000000..f6d01a4ed --- /dev/null +++ b/techlibs/rapidflex/alkaidC/dff_map.v @@ -0,0 +1,177 @@ +// Rising edge DFF +module \$_DFF_P_ (D, C, Q); + input D; + input C; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dff _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C)); +endmodule + +// Rising edge DFF with async active-high reset +module \$_DFF_PP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Rising edge DFF with async active-high set +module \$_DFF_PP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffs _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Rising edge DFF with async active-low reset +module \$_DFF_PN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Rising edge DFF with async active-low set +module \$_DFF_PN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule + +// Rising edge DFF with sync active-high reset +module \$_SDFF_PP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Rising edge DFF with sync active-high set +module \$_SDFF_PP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffs _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Rising edge DFF with sync active-low reset +module \$_SDFF_PN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Rising edge DFF with sync active-low set +module \$_SDFF_PN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule + +// Falling edge DFF +module \$_DFF_N_ (D, C, Q); + input D; + input C; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C)); +endmodule + +// Falling edge DFF with async active-high reset +module \$_DFF_NP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffnr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Falling edge DFF with async active-high set +module \$_DFF_NP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffns _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Falling edge DFF with async active-low reset +module \$_DFF_NN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffnrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Falling edge DFF with async active-low set +module \$_DFF_NN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffnsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule + +// Falling edge DFF with sync active-high reset +module \$_SDFF_NP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffnr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Falling edge DFF with sync active-high set +module \$_SDFF_NP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffns _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Falling edge DFF with sync active-low reset +module \$_SDFF_NN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffnrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Falling edge DFF with sync active-low set +module \$_SDFF_NN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffnsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule diff --git a/techlibs/rapidflex/alkaidL/arith_map.v b/techlibs/rapidflex/alkaidL/arith_map.v new file mode 100644 index 000000000..d54433654 --- /dev/null +++ b/techlibs/rapidflex/alkaidL/arith_map.v @@ -0,0 +1,154 @@ +// Arithmetic units: adder +// Adapt from: https://github.com/chipsalliance/yosys-f4pga-plugins/blob/0ad1af26a29243a9e76379943d735e119dcd0cc6/ql-qlf-plugin/qlf_k6n10/cells_sim.v +// Many thanks to F4PGA for their contribution + +(* techmap_celltype = "$alu" *) +module _80_quicklogic_alu (A, B, CI, BI, X, Y, CO); + + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 1; + parameter B_WIDTH = 1; + parameter Y_WIDTH = 1; + + input [A_WIDTH-1:0] A; + input [B_WIDTH-1:0] B; + output [Y_WIDTH-1:0] X, Y; + + input CI, BI; + output [Y_WIDTH-1:0] CO; + + // The max. number of adders we can support in AlkaidS is (12x2-1)x4x16 = 1472 + // Fail when resource limit exceeds + // Also fail when a low utilization rate is detected + // Originally prefer to defer carry mapping when < 2-bit adder is detected + // Due to a bug found in scalable seq detector, the bound is increased to 4-bit adder + wire _TECHMAP_FAIL_ = Y_WIDTH > 1472 || Y_WIDTH < 4; + generate + if ((A_WIDTH == 0 || B_WIDTH == 0) && Y_WIDTH > 0) begin + wire _TECHMAP_FAIL_ = 1; + end + endgenerate + wire [1024:0] _TECHMAP_DO_ = "splitnets CARRY; clean"; + localparam Y_COL_WIDTH = 96 - 3; + localparam Y_MAX_WIDTH = 12 - 3; + + (* force_downto *) + wire [Y_WIDTH-1:0] A_buf, B_buf; + \$pos #(.A_SIGNED(A_SIGNED), .A_WIDTH(A_WIDTH), .Y_WIDTH(Y_WIDTH)) A_conv (.A(A), .Y(A_buf)); + \$pos #(.A_SIGNED(B_SIGNED), .A_WIDTH(B_WIDTH), .Y_WIDTH(Y_WIDTH)) B_conv (.A(B), .Y(B_buf)); + + (* force_downto *) + wire [Y_WIDTH-1:0] AA = A_buf; + (* force_downto *) + wire [Y_WIDTH-1:0] BB = BI ? ~B_buf : B_buf; + wire [Y_WIDTH: 0] CARRY; + + assign CO[Y_WIDTH-1:0] = CARRY[Y_WIDTH:1]; + genvar i; + generate if (Y_WIDTH < Y_COL_WIDTH) begin + wire CARRY_end_buf; + wire [1024:0] _TECHMAP_DO_ = "insbuf CARRY[Y_WIDTH] CARRY_end_buf"; + _fpga_adder intermediate_adder ( + .cin ( ), + .cout (CARRY[0]), + .a (CI ), + .b (CI ), + .sumout ( ) + ); + + _fpga_adder first_adder ( + .cin (CARRY[0]), + .cout (CARRY[1]), + .a (AA[0] ), + .b (BB[0] ), + .sumout (Y[0] ) + ); + + _fpga_adder pretaill_adder ( + .cin (CARRY[Y_WIDTH-1] ), + .cout (CARRY_end_buf), + .a (AA[Y_WIDTH-1] ), + .b (BB[Y_WIDTH-1] ), + .sumout (Y[Y_WIDTH-1] ) + ); + + + _fpga_adder tail_adder ( + .cin (CARRY_end_buf), + .cout (), + .a (1'b0), + .b (1'b0), + .sumout (CARRY[Y_WIDTH]) + ); + + generate for (i = 1; i < Y_WIDTH-1 ; i = i+1) begin:gen3 + _fpga_adder my_adder ( + .cin (CARRY[i] ), + .cout (CARRY[i+1]), + .a (AA[i] ), + .b (BB[i] ), + .sumout (Y[i] ) + ); + end endgenerate + end else begin + generate for (i = 0; i < Y_WIDTH ; i = i+1) begin:gen4 + // Due to VPR limitations regarding IO connexion to carry chain, + // we generate the carry chain input signal using an intermediate adder + // since we can connect a & b from io pads, but not cin & cout + if (i == 0) begin + _fpga_adder intermediate_adder ( + .cin ( ), + .cout (CARRY[0]), + .a (CI ), + .b (CI ), + .sumout ( ) + ); + + _fpga_adder first_adder ( + .cin (CARRY[0]), + .cout (CARRY[1]), + .a (AA[0] ), + .b (BB[0] ), + .sumout (Y[0] ) + ); + end else if (i % (Y_MAX_WIDTH + 1) == 0) begin + wire CARRY_end_buf; + wire CARRY_start_buf; + wire [1024:0] _TECHMAP_DO_ = "insbuf CARRY[i+1] CARRY_end_buf; insbuf CARRY_end_buf CARRY_start_buf"; + _fpga_adder tail_adder ( + .cin (CARRY[i]), + .cout (), + .a (1'b0), + .b (1'b0), + .sumout (CARRY_end_buf) + ); + + _fpga_adder intermediate_adder ( + .cin ( ), + .cout (CARRY_start_buf), + .a (CARRY_end_buf), + .b (1'b1), + .sumout ( ) + ); + + _fpga_adder first_adder ( + .cin (CARRY_start_buf), + .cout (CARRY[i+1]), + .a (AA[i] ), + .b (BB[i] ), + .sumout (Y[i] ) + ); + end else begin + _fpga_adder my_adder ( + .cin (CARRY[i] ), + .cout (CARRY[i+1]), + .a (AA[i] ), + .b (BB[i] ), + .sumout (Y[i] ) + ); + end + end endgenerate + end endgenerate + assign X = AA ^ BB; +endmodule diff --git a/techlibs/rapidflex/alkaidL/bram.txt b/techlibs/rapidflex/alkaidL/bram.txt new file mode 100644 index 000000000..7d0da595a --- /dev/null +++ b/techlibs/rapidflex/alkaidL/bram.txt @@ -0,0 +1,18 @@ +bram $__FLEX_TDPRAM_256x36 # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 8 # Number of address bits + dbits 36 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 1 1 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_256x36 + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary +endmatch + diff --git a/techlibs/rapidflex/alkaidL/bram_map.v b/techlibs/rapidflex/alkaidL/bram_map.v new file mode 100644 index 000000000..f559f57b3 --- /dev/null +++ b/techlibs/rapidflex/alkaidL/bram_map.v @@ -0,0 +1,38 @@ +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_256x36 (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 1; + parameter [0:0] CLKPOL3 = 1; + + input CLK2; + input CLK3; + input [0:7] A1ADDR; + input A1EN; + output [0:35] A1DATA; + input [0:7] B1ADDR; + input B1EN; + input [0:35] B1DATA; + + generate + tdpram_core #( + .ADDR_WIDTH(8), + .BYTE_WIDTH(9), + .NUM_BYTES(4), + ) _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule diff --git a/techlibs/rapidflex/alkaidL/cell_sim.v b/techlibs/rapidflex/alkaidL/cell_sim.v new file mode 100644 index 000000000..b97638d7a --- /dev/null +++ b/techlibs/rapidflex/alkaidL/cell_sim.v @@ -0,0 +1,7 @@ +//------------------------------------------------- +// Include all the primitives +//------------------------------------------------- +`include "cell_sim_arith.v" +`include "cell_sim_dsp.v" +`include "cell_sim_bram.v" +`include "cell_sim_ff.v" diff --git a/techlibs/rapidflex/alkaidL/cell_sim_arith.v b/techlibs/rapidflex/alkaidL/cell_sim_arith.v new file mode 100644 index 000000000..5a325e5b6 --- /dev/null +++ b/techlibs/rapidflex/alkaidL/cell_sim_arith.v @@ -0,0 +1,15 @@ +//--------------------------------------- +// 1-bit adder +//--------------------------------------- +(* abc9_box, lib_whitebox *) +module _fpga_adder( + output sumout, + output cout, + input a, + input b, + input cin +); + assign sumout = a ^ b ^ cin; + assign cout = (a & b) | ((a | b) & cin); + +endmodule diff --git a/techlibs/rapidflex/alkaidL/cell_sim_bram.v b/techlibs/rapidflex/alkaidL/cell_sim_bram.v new file mode 100644 index 000000000..57a320821 --- /dev/null +++ b/techlibs/rapidflex/alkaidL/cell_sim_bram.v @@ -0,0 +1,876 @@ +//------------------------------------------------- +// Block RAM Primitives +//------------------------------------------------- + +//------------------------------------------------- +// True Dual-port RAM Core logic +// This module is written in a scalable way +// By default it is configured as 256x36 = 9k-bits +// +// IMPORTANT: Please do not use this module as a hard ip!!! +module tdpram_core (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); +// Parameters +parameter ADDR_WIDTH = 8; +parameter DEPTH = 2**ADDR_WIDTH; +parameter BYTE_WIDTH = 9; +parameter NUM_BYTES = 4; +parameter [0:0] IS_WCLK_N = 1'b0; // Indicate if the write clock is triggered at negative edge: 1 = Yes; 0 = No +parameter [0:0] IS_RCLK_N = 1'b0; // Indicate if the read clock is triggered at negative edge: 1 = Yes; 0 = No + +input ren_ni; +input wen_ni; +input [0:ADDR_WIDTH-1] raddr_i; +input [0:ADDR_WIDTH-1] waddr_i; +input [0:BYTE_WIDTH*NUM_BYTES-1] bwen_ni; +input [0:BYTE_WIDTH*NUM_BYTES-1] data_i; +input wclk_i; +input rclk_i; +output [0:BYTE_WIDTH*NUM_BYTES-1] q_o; + +reg [0:NUM_BYTES*BYTE_WIDTH-1] ram[0:DEPTH-1]; +reg [0:NUM_BYTES*BYTE_WIDTH-1] q_reg; + +integer i; + +assign q_o = q_reg; + +// Initial values are all random, to mimic the actual behavoir of a RAM +initial begin + for (i = 0; i < DEPTH; i = i + 1) begin + ram[i] = $random; + end + q_reg <= $random; +end + +case(|IS_WCLK_N) + 1'b0: + always @(posedge wclk_i) begin + if (~wen_ni) begin + for (i = 0; i < NUM_BYTES * BYTE_WIDTH; i = i + 1) begin + if (~bwen_ni[i]) begin + ram[waddr_i][i] <= data_i[i]; + end + end + end + end + 1'b1: + always @(negedge wclk_i) begin + if (~wen_ni) begin + for (i = 0; i < NUM_BYTES * BYTE_WIDTH; i = i + 1) begin + if (~bwen_ni[i]) begin + ram[waddr_i][i] <= data_i[i]; + end + end + end + end +endcase + +case(|IS_RCLK_N) + 1'b0: + always @(posedge rclk_i) begin + if (~ren_ni) begin + q_reg <= ram[raddr_i]; + end + end + 1'b1: + always @(negedge rclk_i) begin + if (~ren_ni) begin + q_reg <= ram[raddr_i]; + end + end +endcase + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 256x36 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram256x36 (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:7] raddr_i; +input [0:7] waddr_i; +input [0:35] bwen_ni; +input [0:35] data_i; +input wclk_i; +input rclk_i; +output [0:35] q_o; + + tdpram_core #( + .ADDR_WIDTH(8), + .BYTE_WIDTH(9), + .NUM_BYTES(4), + .IS_WCLK_N(0), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 256x36 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram256x36_wclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:7] raddr_i; +input [0:7] waddr_i; +input [0:35] bwen_ni; +input [0:35] data_i; +input wclk_i; +input rclk_i; +output [0:35] q_o; + + tdpram_core #( + .ADDR_WIDTH(8), + .BYTE_WIDTH(9), + .NUM_BYTES(4), + .IS_WCLK_N(1), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 256x36 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram256x36_rclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:7] raddr_i; +input [0:7] waddr_i; +input [0:35] bwen_ni; +input [0:35] data_i; +input wclk_i; +input rclk_i; +output [0:35] q_o; + + tdpram_core #( + .ADDR_WIDTH(8), + .BYTE_WIDTH(9), + .NUM_BYTES(4), + .IS_WCLK_N(0), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 256x36 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram256x36_rwclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:7] raddr_i; +input [0:7] waddr_i; +input [0:35] bwen_ni; +input [0:35] data_i; +input wclk_i; +input rclk_i; +output [0:35] q_o; + + tdpram_core #( + .ADDR_WIDTH(8), + .BYTE_WIDTH(9), + .NUM_BYTES(4), + .IS_WCLK_N(1), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 512x18 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram512x18 (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:8] raddr_i; +input [0:8] waddr_i; +input [0:17] bwen_ni; +input [0:17] data_i; +input wclk_i; +input rclk_i; +output [0:17] q_o; + + tdpram_core #( + .ADDR_WIDTH(9), + .BYTE_WIDTH(9), + .NUM_BYTES(2), + .IS_WCLK_N(0), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 512x18 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram512x18_wclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:8] raddr_i; +input [0:8] waddr_i; +input [0:17] bwen_ni; +input [0:17] data_i; +input wclk_i; +input rclk_i; +output [0:17] q_o; + + tdpram_core #( + .ADDR_WIDTH(9), + .BYTE_WIDTH(9), + .NUM_BYTES(2), + .IS_WCLK_N(1), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 512x18 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram512x18_rclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:8] raddr_i; +input [0:8] waddr_i; +input [0:17] bwen_ni; +input [0:17] data_i; +input wclk_i; +input rclk_i; +output [0:17] q_o; + + tdpram_core #( + .ADDR_WIDTH(9), + .BYTE_WIDTH(9), + .NUM_BYTES(2), + .IS_WCLK_N(0), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 512x18 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram512x18_rwclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:8] raddr_i; +input [0:8] waddr_i; +input [0:17] bwen_ni; +input [0:17] data_i; +input wclk_i; +input rclk_i; +output [0:17] q_o; + + tdpram_core #( + .ADDR_WIDTH(9), + .BYTE_WIDTH(9), + .NUM_BYTES(2), + .IS_WCLK_N(1), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 1024x9 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram1024x9 (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:9] raddr_i; +input [0:9] waddr_i; +input [0:8] bwen_ni; +input [0:8] data_i; +input wclk_i; +input rclk_i; +output [0:8] q_o; + + tdpram_core #( + .ADDR_WIDTH(10), + .BYTE_WIDTH(9), + .NUM_BYTES(1), + .IS_WCLK_N(0), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 1024x9 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram1024x9_wclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:9] raddr_i; +input [0:9] waddr_i; +input [0:8] bwen_ni; +input [0:8] data_i; +input wclk_i; +input rclk_i; +output [0:8] q_o; + + tdpram_core #( + .ADDR_WIDTH(10), + .BYTE_WIDTH(9), + .NUM_BYTES(1), + .IS_WCLK_N(1), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 1024x9 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram1024x9_rclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:9] raddr_i; +input [0:9] waddr_i; +input [0:8] bwen_ni; +input [0:8] data_i; +input wclk_i; +input rclk_i; +output [0:8] q_o; + + tdpram_core #( + .ADDR_WIDTH(10), + .BYTE_WIDTH(9), + .NUM_BYTES(1), + .IS_WCLK_N(0), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 1024x9 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram1024x9_rwclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:9] raddr_i; +input [0:9] waddr_i; +input [0:8] bwen_ni; +input [0:8] data_i; +input wclk_i; +input rclk_i; +output [0:8] q_o; + + tdpram_core #( + .ADDR_WIDTH(10), + .BYTE_WIDTH(9), + .NUM_BYTES(1), + .IS_WCLK_N(1), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 2048x4 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram2048x4 (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:10] raddr_i; +input [0:10] waddr_i; +input [0:3] bwen_ni; +input [0:3] data_i; +input wclk_i; +input rclk_i; +output [0:3] q_o; + + tdpram_core #( + .ADDR_WIDTH(11), + .BYTE_WIDTH(4), + .NUM_BYTES(1), + .IS_WCLK_N(0), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 2048x4 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram2048x4_wclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:10] raddr_i; +input [0:10] waddr_i; +input [0:3] bwen_ni; +input [0:3] data_i; +input wclk_i; +input rclk_i; +output [0:3] q_o; + + tdpram_core #( + .ADDR_WIDTH(11), + .BYTE_WIDTH(4), + .NUM_BYTES(1), + .IS_WCLK_N(1), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 2048x4 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram2048x4_rclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:10] raddr_i; +input [0:10] waddr_i; +input [0:3] bwen_ni; +input [0:3] data_i; +input wclk_i; +input rclk_i; +output [0:3] q_o; + + tdpram_core #( + .ADDR_WIDTH(11), + .BYTE_WIDTH(4), + .NUM_BYTES(1), + .IS_WCLK_N(0), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 2048x4 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram2048x4_rwclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:10] raddr_i; +input [0:10] waddr_i; +input [0:3] bwen_ni; +input [0:3] data_i; +input wclk_i; +input rclk_i; +output [0:3] q_o; + + tdpram_core #( + .ADDR_WIDTH(11), + .BYTE_WIDTH(4), + .NUM_BYTES(1), + .IS_WCLK_N(1), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule diff --git a/techlibs/rapidflex/alkaidL/cell_sim_dsp.v b/techlibs/rapidflex/alkaidL/cell_sim_dsp.v new file mode 100644 index 000000000..8bbac6d14 --- /dev/null +++ b/techlibs/rapidflex/alkaidL/cell_sim_dsp.v @@ -0,0 +1,545 @@ +//------------------------------------------------- +// DSP Primitives +//------------------------------------------------- + +//------------------------------------------------- +// Multiply accumulators +module quad_mac12x10 (A0, B0, A1, B1, A2, B2, A3, B3, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +input [0:A_WIDTH-1] A2; +input [0:B_WIDTH-1] B2; +input [0:A_WIDTH-1] A3; +input [0:B_WIDTH-1] B3; +output [0:Y_WIDTH-1] Y; + + assign Y = A0 * B0 + A1 * B1 + A2 * B2 + A3 * B3; + +endmodule + +//------------------------------------------------- +// Multiply accumulators with input registering +module quad_mac12x10_regi (CLK, RSTB, A0, B0, A1, B1, A2, B2, A3, B3, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +input [0:A_WIDTH-1] A2; +input [0:B_WIDTH-1] B2; +input [0:A_WIDTH-1] A3; +input [0:B_WIDTH-1] B3; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A0_reg; +reg [0:B_WIDTH-1] B0_reg; +reg [0:A_WIDTH-1] A1_reg; +reg [0:B_WIDTH-1] B1_reg; +reg [0:A_WIDTH-1] A2_reg; +reg [0:B_WIDTH-1] B2_reg; +reg [0:A_WIDTH-1] A3_reg; +reg [0:B_WIDTH-1] B3_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A0_reg <= 0; + B0_reg <= 0; + A1_reg <= 0; + B1_reg <= 0; + A2_reg <= 0; + B2_reg <= 0; + A3_reg <= 0; + B3_reg <= 0; + end else begin + A0_reg <= A0; + B0_reg <= B0; + A1_reg <= A1; + B1_reg <= B1; + A2_reg <= A2; + B2_reg <= B2; + A3_reg <= A3; + B3_reg <= B3; + end +end + +assign Y = A0_reg * B0_reg + A1_reg * B1_reg + A2_reg * B2_reg + A3_reg * B3_reg; + +endmodule + +//------------------------------------------------- +// Multiply accumulators with output registering +module quad_mac12x10_rego (CLK, RSTB, A0, B0, A1, B1, A2, B2, A3, B3, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +input [0:A_WIDTH-1] A2; +input [0:B_WIDTH-1] B2; +input [0:A_WIDTH-1] A3; +input [0:B_WIDTH-1] B3; +output [0:Y_WIDTH-1] Y; + +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + Y_reg <= 0; + end else begin + Y_reg <= A0 * B0 + A1 * B1 + A2 * B2 + A3 * B3; + end +end + +assign Y = Y_reg; + +endmodule + +//------------------------------------------------- +// Multiply accumulators with input and output registering +module quad_mac12x10_regio (CLK, RSTB, A0, B0, A1, B1, A2, B2, A3, B3, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +input [0:A_WIDTH-1] A2; +input [0:B_WIDTH-1] B2; +input [0:A_WIDTH-1] A3; +input [0:B_WIDTH-1] B3; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A0_reg; +reg [0:B_WIDTH-1] B0_reg; +reg [0:A_WIDTH-1] A1_reg; +reg [0:B_WIDTH-1] B1_reg; +reg [0:A_WIDTH-1] A2_reg; +reg [0:B_WIDTH-1] B2_reg; +reg [0:A_WIDTH-1] A3_reg; +reg [0:B_WIDTH-1] B3_reg; +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A0_reg <= 0; + B0_reg <= 0; + A1_reg <= 0; + B1_reg <= 0; + A2_reg <= 0; + B2_reg <= 0; + A3_reg <= 0; + B3_reg <= 0; + Y_reg <= 0; + end else begin + A0_reg <= A0; + B0_reg <= B0; + A1_reg <= A1; + B1_reg <= B1; + A2_reg <= A2; + B2_reg <= B2; + A3_reg <= A3; + B3_reg <= B3; + Y_reg <= A0_reg * B0_reg + A1_reg * B1_reg + A2_reg * B2_reg + A3_reg * B3_reg; + end +end + +assign Y = Y_reg; + +endmodule + + +module quad_mac12x10_dual_output (A0, B0, A1, B1, A2, B2, A3, B3, Y0, Y1); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +input [0:A_WIDTH-1] A2; +input [0:B_WIDTH-1] B2; +input [0:A_WIDTH-1] A3; +input [0:B_WIDTH-1] B3; +output [0:Y_WIDTH-1] Y0; +output [0:Y_WIDTH-1] Y1; + + assign Y0 = A0 * B0 + A1 * B1 + A2 * B2 + A3 * B3; + assign Y1 = A2 * B2 + A3 * B3; + +endmodule + +module mac12x10 (A0, B0, A1, B1, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +output [0:Y_WIDTH-1] Y; + + assign Y = A0 * B0 + A1 * B1; + +endmodule + +module mac12x10_regi (CLK, RSTB, A0, B0, A1, B1, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A0_reg; +reg [0:B_WIDTH-1] B0_reg; +reg [0:A_WIDTH-1] A1_reg; +reg [0:B_WIDTH-1] B1_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A0_reg <= 0; + B0_reg <= 0; + A1_reg <= 0; + B1_reg <= 0; + end else begin + A0_reg <= A0; + B0_reg <= B0; + A1_reg <= A1; + B1_reg <= B1; + end +end + + assign Y = A0_reg * B0_reg + A1_reg * B1_reg; + +endmodule + +module mac12x10_rego (CLK, RSTB, A0, B0, A1, B1, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +output [0:Y_WIDTH-1] Y; + + +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + Y_reg <= 0; + end else begin + Y_reg <= A0 * B0 + A1 * B1; + end +end + + assign Y = Y_reg; + +endmodule + +module mac12x10_regio (CLK, RSTB, A0, B0, A1, B1, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A0_reg; +reg [0:B_WIDTH-1] B0_reg; +reg [0:A_WIDTH-1] A1_reg; +reg [0:B_WIDTH-1] B1_reg; +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A0_reg <= 0; + B0_reg <= 0; + A1_reg <= 0; + B1_reg <= 0; + Y_reg <= 0; + end else begin + A0_reg <= A0; + B0_reg <= B0; + A1_reg <= A1; + B1_reg <= B1; + Y_reg = A0_reg * B0_reg + A1_reg * B1_reg; + end +end + + assign Y = Y_reg; + +endmodule + + +//------------------------------------------------- +// Multipliers +module mult12x10 (A, B, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + + assign Y = A * B; + +endmodule + +module mult12x10_regi (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A_reg; +reg [0:B_WIDTH-1] B_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A_reg <= 0; + B_reg <= 0; + end else begin + A_reg <= A; + B_reg <= B; + end +end + + assign Y = A_reg * B_reg; + +endmodule + +module mult12x10_rego (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + Y_reg <= 0; + end else begin + Y_reg <= A * B; + end +end + + assign Y = Y_reg; + +endmodule + +module mult12x10_regio (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A_reg; +reg [0:B_WIDTH-1] B_reg; +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A_reg <= 0; + B_reg <= 0; + Y_reg <= 0; + end else begin + A_reg <= A; + B_reg <= B; + Y_reg <= A_reg * B_reg; + end +end + + assign Y = Y_reg; + +endmodule + + +module mult24x20 (A, B, Y); +// Parameters +parameter A_WIDTH = 24; +parameter B_WIDTH = 20; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + + assign Y = A * B; + +endmodule + +module mult24x20_regi (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 24; +parameter B_WIDTH = 20; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A_reg; +reg [0:B_WIDTH-1] B_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A_reg <= 0; + B_reg <= 0; + end else begin + A_reg <= A; + B_reg <= B; + end +end + + assign Y = A_reg * B_reg; + +endmodule + +module mult24x20_rego (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 24; +parameter B_WIDTH = 20; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + Y_reg <= 0; + end else begin + Y_reg <= A * B; + end +end + + assign Y = Y_reg; + +endmodule + +module mult24x20_regio (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 24; +parameter B_WIDTH = 20; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A_reg; +reg [0:B_WIDTH-1] B_reg; +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A_reg <= 0; + B_reg <= 0; + Y_reg <= 0; + end else begin + A_reg <= A; + B_reg <= B; + Y_reg <= A_reg * B_reg; + end +end + + assign Y = Y_reg; + +endmodule + + +// A half multiplier which only output the most significant 11 bit +module half_mult12x10 (A, B, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH/2-1] Y; + +wire [0:Y_WIDTH-1] mult_out; + + mult12x10 FULL_MULT (.A(A), + .B(B), + .Y(mult_out) + ); + assign Y = mult_out[0:Y_WIDTH/2-1]; + +endmodule diff --git a/techlibs/rapidflex/alkaidL/cell_sim_ff.v b/techlibs/rapidflex/alkaidL/cell_sim_ff.v new file mode 100644 index 000000000..726987e16 --- /dev/null +++ b/techlibs/rapidflex/alkaidL/cell_sim_ff.v @@ -0,0 +1,586 @@ +//----------------------------- +// Rising-edge D-type flip-flop +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dff( + output reg Q, + input D, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + Q <= D; + 1'b1: + always @(negedge C) + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffs( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffs( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffn( + output reg Q, + input D, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + Q <= D; + 1'b1: + always @(negedge C) + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffnr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffns( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffnrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffnsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffnr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffns( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffnrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffnsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Two-bit D-type flip-flop with active-high asynchronous reset +// 1st stage is positive-edge triggered +// 2nd stage is negative-edge triggered +//----------------------------- +// Do not allow ABC or other optimization to touch the ff! +//(* abc9_flop, lib_whitebox *) +module dffnr_dffr( + output Q, + input D, + input R, + input C +); + +wire Q0; + + dffnr FF_0 (.D(D), .C(C), .R(R), .Q(Q0)); + dffr FF_1 (.D(Q0), .C(C), .R(R), .Q(Q)); + +endmodule + +//----------------------------- +// Two-bit D-type flip-flop with active-high asynchronous reset +// 1st stage is positive-edge triggered +// 2nd stage is negative-edge triggered +//----------------------------- +// Do not allow ABC or other optimization to touch the ff! +//(* abc9_flop, lib_whitebox *) +module dffr_dffnr( + output Q, + input D, + input R, + input C +); + +wire Q0; + + dffr FF_0 (.D(D), .C(C), .R(R), .Q(Q0)); + dffnr FF_1 (.D(Q0), .C(C), .R(R), .Q(Q)); + +endmodule + diff --git a/techlibs/rapidflex/alkaidL/dff_map.v b/techlibs/rapidflex/alkaidL/dff_map.v new file mode 100644 index 000000000..f6d01a4ed --- /dev/null +++ b/techlibs/rapidflex/alkaidL/dff_map.v @@ -0,0 +1,177 @@ +// Rising edge DFF +module \$_DFF_P_ (D, C, Q); + input D; + input C; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dff _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C)); +endmodule + +// Rising edge DFF with async active-high reset +module \$_DFF_PP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Rising edge DFF with async active-high set +module \$_DFF_PP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffs _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Rising edge DFF with async active-low reset +module \$_DFF_PN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Rising edge DFF with async active-low set +module \$_DFF_PN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule + +// Rising edge DFF with sync active-high reset +module \$_SDFF_PP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Rising edge DFF with sync active-high set +module \$_SDFF_PP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffs _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Rising edge DFF with sync active-low reset +module \$_SDFF_PN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Rising edge DFF with sync active-low set +module \$_SDFF_PN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule + +// Falling edge DFF +module \$_DFF_N_ (D, C, Q); + input D; + input C; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C)); +endmodule + +// Falling edge DFF with async active-high reset +module \$_DFF_NP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffnr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Falling edge DFF with async active-high set +module \$_DFF_NP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffns _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Falling edge DFF with async active-low reset +module \$_DFF_NN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffnrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Falling edge DFF with async active-low set +module \$_DFF_NN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffnsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule + +// Falling edge DFF with sync active-high reset +module \$_SDFF_NP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffnr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Falling edge DFF with sync active-high set +module \$_SDFF_NP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffns _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Falling edge DFF with sync active-low reset +module \$_SDFF_NN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffnrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Falling edge DFF with sync active-low set +module \$_SDFF_NN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffnsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule diff --git a/techlibs/rapidflex/alkaidL/dsp_map.v b/techlibs/rapidflex/alkaidL/dsp_map.v new file mode 100644 index 000000000..adc8aff29 --- /dev/null +++ b/techlibs/rapidflex/alkaidL/dsp_map.v @@ -0,0 +1,17 @@ +module mult_14x10_map ( + input [0:13] A, + input [0:9] B, + output [0:23] Y +); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 0; + parameter B_WIDTH = 0; + parameter Y_WIDTH = 0; + + mult_14x10 #() _TECHMAP_REPLACE_ ( + .A (A), + .B (B), + .Y (Y) ); + +endmodule diff --git a/techlibs/rapidflex/alkaidT/arith_map.v b/techlibs/rapidflex/alkaidT/arith_map.v new file mode 100644 index 000000000..d54433654 --- /dev/null +++ b/techlibs/rapidflex/alkaidT/arith_map.v @@ -0,0 +1,154 @@ +// Arithmetic units: adder +// Adapt from: https://github.com/chipsalliance/yosys-f4pga-plugins/blob/0ad1af26a29243a9e76379943d735e119dcd0cc6/ql-qlf-plugin/qlf_k6n10/cells_sim.v +// Many thanks to F4PGA for their contribution + +(* techmap_celltype = "$alu" *) +module _80_quicklogic_alu (A, B, CI, BI, X, Y, CO); + + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 1; + parameter B_WIDTH = 1; + parameter Y_WIDTH = 1; + + input [A_WIDTH-1:0] A; + input [B_WIDTH-1:0] B; + output [Y_WIDTH-1:0] X, Y; + + input CI, BI; + output [Y_WIDTH-1:0] CO; + + // The max. number of adders we can support in AlkaidS is (12x2-1)x4x16 = 1472 + // Fail when resource limit exceeds + // Also fail when a low utilization rate is detected + // Originally prefer to defer carry mapping when < 2-bit adder is detected + // Due to a bug found in scalable seq detector, the bound is increased to 4-bit adder + wire _TECHMAP_FAIL_ = Y_WIDTH > 1472 || Y_WIDTH < 4; + generate + if ((A_WIDTH == 0 || B_WIDTH == 0) && Y_WIDTH > 0) begin + wire _TECHMAP_FAIL_ = 1; + end + endgenerate + wire [1024:0] _TECHMAP_DO_ = "splitnets CARRY; clean"; + localparam Y_COL_WIDTH = 96 - 3; + localparam Y_MAX_WIDTH = 12 - 3; + + (* force_downto *) + wire [Y_WIDTH-1:0] A_buf, B_buf; + \$pos #(.A_SIGNED(A_SIGNED), .A_WIDTH(A_WIDTH), .Y_WIDTH(Y_WIDTH)) A_conv (.A(A), .Y(A_buf)); + \$pos #(.A_SIGNED(B_SIGNED), .A_WIDTH(B_WIDTH), .Y_WIDTH(Y_WIDTH)) B_conv (.A(B), .Y(B_buf)); + + (* force_downto *) + wire [Y_WIDTH-1:0] AA = A_buf; + (* force_downto *) + wire [Y_WIDTH-1:0] BB = BI ? ~B_buf : B_buf; + wire [Y_WIDTH: 0] CARRY; + + assign CO[Y_WIDTH-1:0] = CARRY[Y_WIDTH:1]; + genvar i; + generate if (Y_WIDTH < Y_COL_WIDTH) begin + wire CARRY_end_buf; + wire [1024:0] _TECHMAP_DO_ = "insbuf CARRY[Y_WIDTH] CARRY_end_buf"; + _fpga_adder intermediate_adder ( + .cin ( ), + .cout (CARRY[0]), + .a (CI ), + .b (CI ), + .sumout ( ) + ); + + _fpga_adder first_adder ( + .cin (CARRY[0]), + .cout (CARRY[1]), + .a (AA[0] ), + .b (BB[0] ), + .sumout (Y[0] ) + ); + + _fpga_adder pretaill_adder ( + .cin (CARRY[Y_WIDTH-1] ), + .cout (CARRY_end_buf), + .a (AA[Y_WIDTH-1] ), + .b (BB[Y_WIDTH-1] ), + .sumout (Y[Y_WIDTH-1] ) + ); + + + _fpga_adder tail_adder ( + .cin (CARRY_end_buf), + .cout (), + .a (1'b0), + .b (1'b0), + .sumout (CARRY[Y_WIDTH]) + ); + + generate for (i = 1; i < Y_WIDTH-1 ; i = i+1) begin:gen3 + _fpga_adder my_adder ( + .cin (CARRY[i] ), + .cout (CARRY[i+1]), + .a (AA[i] ), + .b (BB[i] ), + .sumout (Y[i] ) + ); + end endgenerate + end else begin + generate for (i = 0; i < Y_WIDTH ; i = i+1) begin:gen4 + // Due to VPR limitations regarding IO connexion to carry chain, + // we generate the carry chain input signal using an intermediate adder + // since we can connect a & b from io pads, but not cin & cout + if (i == 0) begin + _fpga_adder intermediate_adder ( + .cin ( ), + .cout (CARRY[0]), + .a (CI ), + .b (CI ), + .sumout ( ) + ); + + _fpga_adder first_adder ( + .cin (CARRY[0]), + .cout (CARRY[1]), + .a (AA[0] ), + .b (BB[0] ), + .sumout (Y[0] ) + ); + end else if (i % (Y_MAX_WIDTH + 1) == 0) begin + wire CARRY_end_buf; + wire CARRY_start_buf; + wire [1024:0] _TECHMAP_DO_ = "insbuf CARRY[i+1] CARRY_end_buf; insbuf CARRY_end_buf CARRY_start_buf"; + _fpga_adder tail_adder ( + .cin (CARRY[i]), + .cout (), + .a (1'b0), + .b (1'b0), + .sumout (CARRY_end_buf) + ); + + _fpga_adder intermediate_adder ( + .cin ( ), + .cout (CARRY_start_buf), + .a (CARRY_end_buf), + .b (1'b1), + .sumout ( ) + ); + + _fpga_adder first_adder ( + .cin (CARRY_start_buf), + .cout (CARRY[i+1]), + .a (AA[i] ), + .b (BB[i] ), + .sumout (Y[i] ) + ); + end else begin + _fpga_adder my_adder ( + .cin (CARRY[i] ), + .cout (CARRY[i+1]), + .a (AA[i] ), + .b (BB[i] ), + .sumout (Y[i] ) + ); + end + end endgenerate + end endgenerate + assign X = AA ^ BB; +endmodule diff --git a/techlibs/rapidflex/alkaidT/bram.txt b/techlibs/rapidflex/alkaidT/bram.txt new file mode 100644 index 000000000..4ccf4ba0a --- /dev/null +++ b/techlibs/rapidflex/alkaidT/bram.txt @@ -0,0 +1,314 @@ +bram $__FLEX_TDPRAM_256x36 # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 8 # Number of address bits + dbits 36 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 1 1 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_256x36 + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_256x36_wclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 8 # Number of address bits + dbits 36 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 1 0 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_256x36_wclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 19 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_256x36_rclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 8 # Number of address bits + dbits 36 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 0 1 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_256x36_rclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 19 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_256x36_rwclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 8 # Number of address bits + dbits 36 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 0 0 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_256x36_rwclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 19 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_512x18 # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 9 # Number of address bits + dbits 18 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 1 1 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_512x18 + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 10 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_512x18_wclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 9 # Number of address bits + dbits 18 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 1 0 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_512x18_wclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 10 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_512x18_rclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 9 # Number of address bits + dbits 18 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 0 1 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_512x18_rclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 10 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_512x18_rwclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 9 # Number of address bits + dbits 18 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 0 0 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_512x18_rwclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 10 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_1024x9 # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 10 # Number of address bits + dbits 9 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 1 1 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_1024x9 + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 5 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_1024x9_wclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 10 # Number of address bits + dbits 9 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 1 0 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_1024x9_wclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 5 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_1024x9_rclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 10 # Number of address bits + dbits 9 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 0 1 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_1024x9_rclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 5 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_1024x9_rwclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 10 # Number of address bits + dbits 9 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 0 0 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_1024x9_rwclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + min dbits 5 + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_2048x4 # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 11 # Number of address bits + dbits 4 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 1 1 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_2048x4 + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_2048x4_wclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 11 # Number of address bits + dbits 4 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 1 0 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_2048x4_wclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_2048x4_rclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 11 # Number of address bits + dbits 4 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 0 1 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_2048x4_rclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary + or_next_if_better +endmatch + +bram $__FLEX_TDPRAM_2048x4_rwclkn # Name of the BRAM cell + init 0 # Set to '1' if BRAM can be initialized + abits 11 # Number of address bits + dbits 4 # Number of data bits + groups 2 # Number of port groups + ports 1 1 # Number of ports in each group + wrmode 0 1 # Set to '1' if this group is write ports + enable 1 1 # Number of enable bits + transp 0 0 # transparent (read ports) + clocks 2 3 # clock configuration + clkpol 0 0 # clock polarity configuration +endbram + +match $__FLEX_TDPRAM_2048x4_rwclkn + min efficiency 0 # Only use this bram is <=0 ram bits are used + make_transp # Add external circuitry to simulate 'transparent read' if necessary +endmatch + diff --git a/techlibs/rapidflex/alkaidT/bram_map.v b/techlibs/rapidflex/alkaidT/bram_map.v new file mode 100644 index 000000000..fa9892fc7 --- /dev/null +++ b/techlibs/rapidflex/alkaidT/bram_map.v @@ -0,0 +1,575 @@ +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_256x36 (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 1; + parameter [0:0] CLKPOL3 = 1; + + input CLK2; + input CLK3; + input [0:7] A1ADDR; + input A1EN; + output [0:35] A1DATA; + input [0:7] B1ADDR; + input B1EN; + input [0:35] B1DATA; + + generate + dpram256x36 + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_256x36_WCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 1; + parameter [0:0] CLKPOL3 = 0; + + input CLK2; + input CLK3; + input [0:7] A1ADDR; + input A1EN; + output [0:35] A1DATA; + input [0:7] B1ADDR; + input B1EN; + input [0:35] B1DATA; + + generate + dpram256x36_wclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_256x36_RCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 0; + parameter [0:0] CLKPOL3 = 1; + + input CLK2; + input CLK3; + input [0:7] A1ADDR; + input A1EN; + output [0:35] A1DATA; + input [0:7] B1ADDR; + input B1EN; + input [0:35] B1DATA; + + generate + dpram256x36_rclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_256x36_RWCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 0; + parameter [0:0] CLKPOL3 = 0; + + input CLK2; + input CLK3; + input [0:7] A1ADDR; + input A1EN; + output [0:35] A1DATA; + input [0:7] B1ADDR; + input B1EN; + input [0:35] B1DATA; + + generate + dpram256x36_rwclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_512x18 (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 1; + parameter [0:0] CLKPOL3 = 1; + + input CLK2; + input CLK3; + input [0:8] A1ADDR; + input A1EN; + output [0:17] A1DATA; + input [0:8] B1ADDR; + input B1EN; + input [0:17] B1DATA; + + generate + dpram512x18 + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_512x18_WCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 1; + parameter [0:0] CLKPOL3 = 0; + + input CLK2; + input CLK3; + input [0:8] A1ADDR; + input A1EN; + output [0:17] A1DATA; + input [0:8] B1ADDR; + input B1EN; + input [0:17] B1DATA; + + generate + dpram512x18_wclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_512x18_RCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 0; + parameter [0:0] CLKPOL3 = 1; + + input CLK2; + input CLK3; + input [0:8] A1ADDR; + input A1EN; + output [0:17] A1DATA; + input [0:8] B1ADDR; + input B1EN; + input [0:17] B1DATA; + + generate + dpram512x18_rclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_512x18_RWCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 0; + parameter [0:0] CLKPOL3 = 0; + + input CLK2; + input CLK3; + input [0:8] A1ADDR; + input A1EN; + output [0:17] A1DATA; + input [0:8] B1ADDR; + input B1EN; + input [0:17] B1DATA; + + generate + dpram512x18_rwclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_1024x9 (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 1; + parameter [0:0] CLKPOL3 = 1; + + input CLK2; + input CLK3; + input [0:9] A1ADDR; + input A1EN; + output [0:8] A1DATA; + input [0:9] B1ADDR; + input B1EN; + input [0:8] B1DATA; + + generate + dpram1024x9 + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_1024x9_WCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 1; + parameter [0:0] CLKPOL3 = 0; + + input CLK2; + input CLK3; + input [0:9] A1ADDR; + input A1EN; + output [0:8] A1DATA; + input [0:9] B1ADDR; + input B1EN; + input [0:8] B1DATA; + + generate + dpram1024x9_wclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_1024x9_RCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 0; + parameter [0:0] CLKPOL3 = 1; + + input CLK2; + input CLK3; + input [0:9] A1ADDR; + input A1EN; + output [0:8] A1DATA; + input [0:9] B1ADDR; + input B1EN; + input [0:8] B1DATA; + + generate + dpram1024x9_rclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_1024x9_RWCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 0; + parameter [0:0] CLKPOL3 = 0; + + input CLK2; + input CLK3; + input [0:9] A1ADDR; + input A1EN; + output [0:8] A1DATA; + input [0:9] B1ADDR; + input B1EN; + input [0:8] B1DATA; + + generate + dpram1024x9_rwclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_2048x4 (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 1; + parameter [0:0] CLKPOL3 = 1; + + input CLK2; + input CLK3; + input [0:10] A1ADDR; + input A1EN; + output [0:3] A1DATA; + input [0:10] B1ADDR; + input B1EN; + input [0:3] B1DATA; + + generate + dpram2048x4 + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_2048x4_WCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 1; + parameter [0:0] CLKPOL3 = 0; + + input CLK2; + input CLK3; + input [0:10] A1ADDR; + input A1EN; + output [0:3] A1DATA; + input [0:10] B1ADDR; + input B1EN; + input [0:3] B1DATA; + + generate + dpram2048x4_wclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_2048x4_RCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 0; + parameter [0:0] CLKPOL3 = 1; + + input CLK2; + input CLK3; + input [0:10] A1ADDR; + input A1EN; + output [0:3] A1DATA; + input [0:10] B1ADDR; + input B1EN; + input [0:3] B1DATA; + + generate + dpram2048x4_rclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule + +//----------------------------- +// This is a true dual-port RAM +// BUT without support on Byte-Write-Enable +// Due to limited support from Yosys +//----------------------------- +module \$__FLEX_TDPRAM_2048x4_RWCLKN (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN); + + parameter [0:0] CLKPOL2 = 0; + parameter [0:0] CLKPOL3 = 0; + + input CLK2; + input CLK3; + input [0:10] A1ADDR; + input A1EN; + output [0:3] A1DATA; + input [0:10] B1ADDR; + input B1EN; + input [0:3] B1DATA; + + generate + dpram2048x4_rwclkn + _TECHMAP_REPLACE_ ( + .rclk_i (CLK2), + .wclk_i (CLK3), + .bwen_ni (|1), + .wen_ni (B1EN), + .waddr_i (B1ADDR), + .data_i (B1DATA), + .ren_ni (A1EN), + .raddr_i (A1ADDR), + .q_o (A1DATA) + ); + endgenerate + +endmodule diff --git a/techlibs/rapidflex/alkaidT/cell_sim.v b/techlibs/rapidflex/alkaidT/cell_sim.v new file mode 100644 index 000000000..fe5126e06 --- /dev/null +++ b/techlibs/rapidflex/alkaidT/cell_sim.v @@ -0,0 +1,8 @@ +//------------------------------------------------- +// Include all the primitives +//------------------------------------------------- +`include "cell_sim_arith.v" +`include "cell_sim_dsp.v" +`include "cell_sim_new_dsp.v" +`include "cell_sim_bram.v" +`include "cell_sim_ff.v" diff --git a/techlibs/rapidflex/alkaidT/cell_sim_arith.v b/techlibs/rapidflex/alkaidT/cell_sim_arith.v new file mode 100644 index 000000000..5a325e5b6 --- /dev/null +++ b/techlibs/rapidflex/alkaidT/cell_sim_arith.v @@ -0,0 +1,15 @@ +//--------------------------------------- +// 1-bit adder +//--------------------------------------- +(* abc9_box, lib_whitebox *) +module _fpga_adder( + output sumout, + output cout, + input a, + input b, + input cin +); + assign sumout = a ^ b ^ cin; + assign cout = (a & b) | ((a | b) & cin); + +endmodule diff --git a/techlibs/rapidflex/alkaidT/cell_sim_bram.v b/techlibs/rapidflex/alkaidT/cell_sim_bram.v new file mode 100644 index 000000000..57a320821 --- /dev/null +++ b/techlibs/rapidflex/alkaidT/cell_sim_bram.v @@ -0,0 +1,876 @@ +//------------------------------------------------- +// Block RAM Primitives +//------------------------------------------------- + +//------------------------------------------------- +// True Dual-port RAM Core logic +// This module is written in a scalable way +// By default it is configured as 256x36 = 9k-bits +// +// IMPORTANT: Please do not use this module as a hard ip!!! +module tdpram_core (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); +// Parameters +parameter ADDR_WIDTH = 8; +parameter DEPTH = 2**ADDR_WIDTH; +parameter BYTE_WIDTH = 9; +parameter NUM_BYTES = 4; +parameter [0:0] IS_WCLK_N = 1'b0; // Indicate if the write clock is triggered at negative edge: 1 = Yes; 0 = No +parameter [0:0] IS_RCLK_N = 1'b0; // Indicate if the read clock is triggered at negative edge: 1 = Yes; 0 = No + +input ren_ni; +input wen_ni; +input [0:ADDR_WIDTH-1] raddr_i; +input [0:ADDR_WIDTH-1] waddr_i; +input [0:BYTE_WIDTH*NUM_BYTES-1] bwen_ni; +input [0:BYTE_WIDTH*NUM_BYTES-1] data_i; +input wclk_i; +input rclk_i; +output [0:BYTE_WIDTH*NUM_BYTES-1] q_o; + +reg [0:NUM_BYTES*BYTE_WIDTH-1] ram[0:DEPTH-1]; +reg [0:NUM_BYTES*BYTE_WIDTH-1] q_reg; + +integer i; + +assign q_o = q_reg; + +// Initial values are all random, to mimic the actual behavoir of a RAM +initial begin + for (i = 0; i < DEPTH; i = i + 1) begin + ram[i] = $random; + end + q_reg <= $random; +end + +case(|IS_WCLK_N) + 1'b0: + always @(posedge wclk_i) begin + if (~wen_ni) begin + for (i = 0; i < NUM_BYTES * BYTE_WIDTH; i = i + 1) begin + if (~bwen_ni[i]) begin + ram[waddr_i][i] <= data_i[i]; + end + end + end + end + 1'b1: + always @(negedge wclk_i) begin + if (~wen_ni) begin + for (i = 0; i < NUM_BYTES * BYTE_WIDTH; i = i + 1) begin + if (~bwen_ni[i]) begin + ram[waddr_i][i] <= data_i[i]; + end + end + end + end +endcase + +case(|IS_RCLK_N) + 1'b0: + always @(posedge rclk_i) begin + if (~ren_ni) begin + q_reg <= ram[raddr_i]; + end + end + 1'b1: + always @(negedge rclk_i) begin + if (~ren_ni) begin + q_reg <= ram[raddr_i]; + end + end +endcase + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 256x36 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram256x36 (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:7] raddr_i; +input [0:7] waddr_i; +input [0:35] bwen_ni; +input [0:35] data_i; +input wclk_i; +input rclk_i; +output [0:35] q_o; + + tdpram_core #( + .ADDR_WIDTH(8), + .BYTE_WIDTH(9), + .NUM_BYTES(4), + .IS_WCLK_N(0), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 256x36 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram256x36_wclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:7] raddr_i; +input [0:7] waddr_i; +input [0:35] bwen_ni; +input [0:35] data_i; +input wclk_i; +input rclk_i; +output [0:35] q_o; + + tdpram_core #( + .ADDR_WIDTH(8), + .BYTE_WIDTH(9), + .NUM_BYTES(4), + .IS_WCLK_N(1), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 256x36 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram256x36_rclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:7] raddr_i; +input [0:7] waddr_i; +input [0:35] bwen_ni; +input [0:35] data_i; +input wclk_i; +input rclk_i; +output [0:35] q_o; + + tdpram_core #( + .ADDR_WIDTH(8), + .BYTE_WIDTH(9), + .NUM_BYTES(4), + .IS_WCLK_N(0), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 256x36 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram256x36_rwclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:7] raddr_i; +input [0:7] waddr_i; +input [0:35] bwen_ni; +input [0:35] data_i; +input wclk_i; +input rclk_i; +output [0:35] q_o; + + tdpram_core #( + .ADDR_WIDTH(8), + .BYTE_WIDTH(9), + .NUM_BYTES(4), + .IS_WCLK_N(1), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 512x18 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram512x18 (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:8] raddr_i; +input [0:8] waddr_i; +input [0:17] bwen_ni; +input [0:17] data_i; +input wclk_i; +input rclk_i; +output [0:17] q_o; + + tdpram_core #( + .ADDR_WIDTH(9), + .BYTE_WIDTH(9), + .NUM_BYTES(2), + .IS_WCLK_N(0), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 512x18 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram512x18_wclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:8] raddr_i; +input [0:8] waddr_i; +input [0:17] bwen_ni; +input [0:17] data_i; +input wclk_i; +input rclk_i; +output [0:17] q_o; + + tdpram_core #( + .ADDR_WIDTH(9), + .BYTE_WIDTH(9), + .NUM_BYTES(2), + .IS_WCLK_N(1), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 512x18 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram512x18_rclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:8] raddr_i; +input [0:8] waddr_i; +input [0:17] bwen_ni; +input [0:17] data_i; +input wclk_i; +input rclk_i; +output [0:17] q_o; + + tdpram_core #( + .ADDR_WIDTH(9), + .BYTE_WIDTH(9), + .NUM_BYTES(2), + .IS_WCLK_N(0), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 512x18 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram512x18_rwclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:8] raddr_i; +input [0:8] waddr_i; +input [0:17] bwen_ni; +input [0:17] data_i; +input wclk_i; +input rclk_i; +output [0:17] q_o; + + tdpram_core #( + .ADDR_WIDTH(9), + .BYTE_WIDTH(9), + .NUM_BYTES(2), + .IS_WCLK_N(1), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 1024x9 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram1024x9 (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:9] raddr_i; +input [0:9] waddr_i; +input [0:8] bwen_ni; +input [0:8] data_i; +input wclk_i; +input rclk_i; +output [0:8] q_o; + + tdpram_core #( + .ADDR_WIDTH(10), + .BYTE_WIDTH(9), + .NUM_BYTES(1), + .IS_WCLK_N(0), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 1024x9 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram1024x9_wclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:9] raddr_i; +input [0:9] waddr_i; +input [0:8] bwen_ni; +input [0:8] data_i; +input wclk_i; +input rclk_i; +output [0:8] q_o; + + tdpram_core #( + .ADDR_WIDTH(10), + .BYTE_WIDTH(9), + .NUM_BYTES(1), + .IS_WCLK_N(1), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 1024x9 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram1024x9_rclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:9] raddr_i; +input [0:9] waddr_i; +input [0:8] bwen_ni; +input [0:8] data_i; +input wclk_i; +input rclk_i; +output [0:8] q_o; + + tdpram_core #( + .ADDR_WIDTH(10), + .BYTE_WIDTH(9), + .NUM_BYTES(1), + .IS_WCLK_N(0), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 1024x9 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram1024x9_rwclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:9] raddr_i; +input [0:9] waddr_i; +input [0:8] bwen_ni; +input [0:8] data_i; +input wclk_i; +input rclk_i; +output [0:8] q_o; + + tdpram_core #( + .ADDR_WIDTH(10), + .BYTE_WIDTH(9), + .NUM_BYTES(1), + .IS_WCLK_N(1), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 2048x4 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram2048x4 (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:10] raddr_i; +input [0:10] waddr_i; +input [0:3] bwen_ni; +input [0:3] data_i; +input wclk_i; +input rclk_i; +output [0:3] q_o; + + tdpram_core #( + .ADDR_WIDTH(11), + .BYTE_WIDTH(4), + .NUM_BYTES(1), + .IS_WCLK_N(0), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 2048x4 +// - read clock is triggered at +// - [x] positive edge +// - [ ] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram2048x4_wclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:10] raddr_i; +input [0:10] waddr_i; +input [0:3] bwen_ni; +input [0:3] data_i; +input wclk_i; +input rclk_i; +output [0:3] q_o; + + tdpram_core #( + .ADDR_WIDTH(11), + .BYTE_WIDTH(4), + .NUM_BYTES(1), + .IS_WCLK_N(1), + .IS_RCLK_N(0) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 2048x4 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [x] positive edge +// - [ ] negative edge +module dpram2048x4_rclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:10] raddr_i; +input [0:10] waddr_i; +input [0:3] bwen_ni; +input [0:3] data_i; +input wclk_i; +input rclk_i; +output [0:3] q_o; + + tdpram_core #( + .ADDR_WIDTH(11), + .BYTE_WIDTH(4), + .NUM_BYTES(1), + .IS_WCLK_N(0), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule + +//------------------------------------------------- +// True Dual-port RAM Core logic 2048x4 +// - read clock is triggered at +// - [ ] positive edge +// - [x] negative edge +// - write clock is triggered at +// - [ ] positive edge +// - [x] negative edge +module dpram2048x4_rwclkn (wclk_i, + bwen_ni, + wen_ni, + waddr_i, + data_i, + rclk_i, + ren_ni, + raddr_i, + q_o + ); + +input ren_ni; +input wen_ni; +input [0:10] raddr_i; +input [0:10] waddr_i; +input [0:3] bwen_ni; +input [0:3] data_i; +input wclk_i; +input rclk_i; +output [0:3] q_o; + + tdpram_core #( + .ADDR_WIDTH(11), + .BYTE_WIDTH(4), + .NUM_BYTES(1), + .IS_WCLK_N(1), + .IS_RCLK_N(1) + ) tdpram_core ( + .rclk_i (rclk_i), + .wclk_i (wclk_i), + .bwen_ni (bwen_ni), + .wen_ni (wen_ni), + .waddr_i (waddr_i), + .data_i (data_i), + .ren_ni (ren_ni), + .raddr_i (raddr_i), + .q_o (q_o) + ); + +endmodule diff --git a/techlibs/rapidflex/alkaidT/cell_sim_dsp.v b/techlibs/rapidflex/alkaidT/cell_sim_dsp.v new file mode 100644 index 000000000..244086e10 --- /dev/null +++ b/techlibs/rapidflex/alkaidT/cell_sim_dsp.v @@ -0,0 +1,561 @@ +//------------------------------------------------- +// DSP Primitives +//------------------------------------------------- + +//------------------------------------------------- +// Multiply accumulators +module quad_mac12x10 (A0, B0, A1, B1, A2, B2, A3, B3, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +input [0:A_WIDTH-1] A2; +input [0:B_WIDTH-1] B2; +input [0:A_WIDTH-1] A3; +input [0:B_WIDTH-1] B3; +output [0:Y_WIDTH-1] Y; + + assign Y = A0 * B0 + A1 * B1 + A2 * B2 + A3 * B3; + +endmodule + +//------------------------------------------------- +// Multiply accumulators with input registering +module quad_mac12x10_regi (CLK, RSTB, A0, B0, A1, B1, A2, B2, A3, B3, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +input [0:A_WIDTH-1] A2; +input [0:B_WIDTH-1] B2; +input [0:A_WIDTH-1] A3; +input [0:B_WIDTH-1] B3; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A0_reg; +reg [0:B_WIDTH-1] B0_reg; +reg [0:A_WIDTH-1] A1_reg; +reg [0:B_WIDTH-1] B1_reg; +reg [0:A_WIDTH-1] A2_reg; +reg [0:B_WIDTH-1] B2_reg; +reg [0:A_WIDTH-1] A3_reg; +reg [0:B_WIDTH-1] B3_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A0_reg <= 0; + B0_reg <= 0; + A1_reg <= 0; + B1_reg <= 0; + A2_reg <= 0; + B2_reg <= 0; + A3_reg <= 0; + B3_reg <= 0; + end else begin + A0_reg <= A0; + B0_reg <= B0; + A1_reg <= A1; + B1_reg <= B1; + A2_reg <= A2; + B2_reg <= B2; + A3_reg <= A3; + B3_reg <= B3; + end +end + +assign Y = A0_reg * B0_reg + A1_reg * B1_reg + A2_reg * B2_reg + A3_reg * B3_reg; + +endmodule + +//------------------------------------------------- +// Multiply accumulators with output registering +module quad_mac12x10_rego (CLK, RSTB, A0, B0, A1, B1, A2, B2, A3, B3, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +input [0:A_WIDTH-1] A2; +input [0:B_WIDTH-1] B2; +input [0:A_WIDTH-1] A3; +input [0:B_WIDTH-1] B3; +output [0:Y_WIDTH-1] Y; + +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + Y_reg <= 0; + end else begin + Y_reg <= A0 * B0 + A1 * B1 + A2 * B2 + A3 * B3; + end +end + +assign Y = Y_reg; + +endmodule + +//------------------------------------------------- +// Multiply accumulators with input and output registering +module quad_mac12x10_regio (CLK, RSTB, A0, B0, A1, B1, A2, B2, A3, B3, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +input [0:A_WIDTH-1] A2; +input [0:B_WIDTH-1] B2; +input [0:A_WIDTH-1] A3; +input [0:B_WIDTH-1] B3; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A0_reg; +reg [0:B_WIDTH-1] B0_reg; +reg [0:A_WIDTH-1] A1_reg; +reg [0:B_WIDTH-1] B1_reg; +reg [0:A_WIDTH-1] A2_reg; +reg [0:B_WIDTH-1] B2_reg; +reg [0:A_WIDTH-1] A3_reg; +reg [0:B_WIDTH-1] B3_reg; +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A0_reg <= 0; + B0_reg <= 0; + A1_reg <= 0; + B1_reg <= 0; + A2_reg <= 0; + B2_reg <= 0; + A3_reg <= 0; + B3_reg <= 0; + Y_reg <= 0; + end else begin + A0_reg <= A0; + B0_reg <= B0; + A1_reg <= A1; + B1_reg <= B1; + A2_reg <= A2; + B2_reg <= B2; + A3_reg <= A3; + B3_reg <= B3; + Y_reg <= A0_reg * B0_reg + A1_reg * B1_reg + A2_reg * B2_reg + A3_reg * B3_reg; + end +end + +assign Y = Y_reg; + +endmodule + + +module quad_mac12x10_dual_output (A0, B0, A1, B1, A2, B2, A3, B3, Y0, Y1); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +input [0:A_WIDTH-1] A2; +input [0:B_WIDTH-1] B2; +input [0:A_WIDTH-1] A3; +input [0:B_WIDTH-1] B3; +output [0:Y_WIDTH-1] Y0; +output [0:Y_WIDTH-1] Y1; + + assign Y0 = A0 * B0 + A1 * B1 + A2 * B2 + A3 * B3; + assign Y1 = A2 * B2 + A3 * B3; + +endmodule + +module mac12x10 (A0, B0, A1, B1, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +output [0:Y_WIDTH-1] Y; + + assign Y = A0 * B0 + A1 * B1; + +endmodule + +module mac12x10_regi (CLK, RSTB, A0, B0, A1, B1, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A0_reg; +reg [0:B_WIDTH-1] B0_reg; +reg [0:A_WIDTH-1] A1_reg; +reg [0:B_WIDTH-1] B1_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A0_reg <= 0; + B0_reg <= 0; + A1_reg <= 0; + B1_reg <= 0; + end else begin + A0_reg <= A0; + B0_reg <= B0; + A1_reg <= A1; + B1_reg <= B1; + end +end + + assign Y = A0_reg * B0_reg + A1_reg * B1_reg; + +endmodule + +module mac12x10_rego (CLK, RSTB, A0, B0, A1, B1, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +output [0:Y_WIDTH-1] Y; + + +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + Y_reg <= 0; + end else begin + Y_reg <= A0 * B0 + A1 * B1; + end +end + + assign Y = Y_reg; + +endmodule + +module mac12x10_regio (CLK, RSTB, A0, B0, A1, B1, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:A_WIDTH-1] A1; +input [0:B_WIDTH-1] B1; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A0_reg; +reg [0:B_WIDTH-1] B0_reg; +reg [0:A_WIDTH-1] A1_reg; +reg [0:B_WIDTH-1] B1_reg; +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A0_reg <= 0; + B0_reg <= 0; + A1_reg <= 0; + B1_reg <= 0; + Y_reg <= 0; + end else begin + A0_reg <= A0; + B0_reg <= B0; + A1_reg <= A1; + B1_reg <= B1; + Y_reg = A0_reg * B0_reg + A1_reg * B1_reg; + end +end + + assign Y = Y_reg; + +endmodule + + +//------------------------------------------------- +// Multipliers +module mult12x10 (A, B, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + + assign Y = A * B; + +endmodule + +module mult12x10_regi (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A_reg; +reg [0:B_WIDTH-1] B_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A_reg <= 0; + B_reg <= 0; + end else begin + A_reg <= A; + B_reg <= B; + end +end + + assign Y = A_reg * B_reg; + +endmodule + +module mult12x10_rego (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + Y_reg <= 0; + end else begin + Y_reg <= A * B; + end +end + + assign Y = Y_reg; + +endmodule + +module mult12x10_regio (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A_reg; +reg [0:B_WIDTH-1] B_reg; +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A_reg <= 0; + B_reg <= 0; + Y_reg <= 0; + end else begin + A_reg <= A; + B_reg <= B; + Y_reg <= A_reg * B_reg; + end +end + + assign Y = Y_reg; + +endmodule + + +module mult24x20 (A, B, Y); +// Parameters +parameter A_WIDTH = 24; +parameter B_WIDTH = 20; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + + assign Y = A * B; + +endmodule + +module mult24x20_regi (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 24; +parameter B_WIDTH = 20; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A_reg; +reg [0:B_WIDTH-1] B_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A_reg <= 0; + B_reg <= 0; + end else begin + A_reg <= A; + B_reg <= B; + end +end + + assign Y = A_reg * B_reg; + +endmodule + +module mult24x20_rego (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 24; +parameter B_WIDTH = 20; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + Y_reg <= 0; + end else begin + Y_reg <= A * B; + end +end + + assign Y = Y_reg; + +endmodule + +module mult24x20_regio (CLK, RSTB, A, B, Y); +// Parameters +parameter A_WIDTH = 24; +parameter B_WIDTH = 20; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input CLK; +input RSTB; +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH-1] Y; + +reg [0:A_WIDTH-1] A_reg; +reg [0:B_WIDTH-1] B_reg; +reg [0:Y_WIDTH-1] Y_reg; + +always @(posedge CLK) begin + if (RSTB == 1'b0) begin + A_reg <= 0; + B_reg <= 0; + Y_reg <= 0; + end else begin + A_reg <= A; + B_reg <= B; + Y_reg <= A_reg * B_reg; + end +end + + assign Y = Y_reg; + +endmodule + + +// A half multiplier which only output the most significant 11 bit +module half_mult12x10 (A, B, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A; +input [0:B_WIDTH-1] B; +output [0:Y_WIDTH/2-1] Y; + +wire [0:Y_WIDTH-1] mult_out; + + mult12x10 FULL_MULT (.A(A), + .B(B), + .Y(mult_out) + ); + assign Y = mult_out[0:Y_WIDTH/2-1]; + +endmodule + +module mad12x10x22 (A0, B0, C0, Y); +// Parameters +parameter A_WIDTH = 12; +parameter B_WIDTH = 10; +parameter Y_WIDTH = A_WIDTH + B_WIDTH; + +input [0:A_WIDTH-1] A0; +input [0:B_WIDTH-1] B0; +input [0:Y_WIDTH-1] C0; +output [0:Y_WIDTH-1] Y; + + assign Y = A0 * B0 + C0; + +endmodule + diff --git a/techlibs/rapidflex/alkaidT/cell_sim_ff.v b/techlibs/rapidflex/alkaidT/cell_sim_ff.v new file mode 100644 index 000000000..726987e16 --- /dev/null +++ b/techlibs/rapidflex/alkaidT/cell_sim_ff.v @@ -0,0 +1,586 @@ +//----------------------------- +// Rising-edge D-type flip-flop +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dff( + output reg Q, + input D, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + Q <= D; + 1'b1: + always @(negedge C) + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffs( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-high synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffs( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Rising-edge D-type flip-flop with active-low synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b0; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffn( + output reg Q, + input D, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + Q <= D; + 1'b1: + always @(negedge C) + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffnr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or posedge R) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffns( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or posedge S) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low asynchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffnrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C or negedge RN) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low asynchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module dffnsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C or negedge SN) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffnr( + output reg Q, + input D, + input R, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (R == 1'b1) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-high synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffns( + output reg Q, + input D, + input S, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (S == 1'b1) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low synchronous reset +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffnrn( + output reg Q, + input D, + input RN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + 1'b1: + always @(negedge C) + if (RN == 1'b0) + Q <= 1'b0; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Falling-edge D-type flip-flop with active-low synchronous set +//----------------------------- +(* abc9_flop, lib_whitebox *) +module sdffnsn( + output reg Q, + input D, + input SN, + (* clkbuf_sink *) + (* invertible_pin = "IS_C_INVERTED" *) + input C +); + parameter [0:0] INIT = 1'b0; + parameter [0:0] IS_C_INVERTED = 1'b1; + initial Q = INIT; + case(|IS_C_INVERTED) + 1'b0: + always @(posedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + 1'b1: + always @(negedge C) + if (SN == 1'b0) + Q <= 1'b1; + else + Q <= D; + endcase +endmodule + +//----------------------------- +// Two-bit D-type flip-flop with active-high asynchronous reset +// 1st stage is positive-edge triggered +// 2nd stage is negative-edge triggered +//----------------------------- +// Do not allow ABC or other optimization to touch the ff! +//(* abc9_flop, lib_whitebox *) +module dffnr_dffr( + output Q, + input D, + input R, + input C +); + +wire Q0; + + dffnr FF_0 (.D(D), .C(C), .R(R), .Q(Q0)); + dffr FF_1 (.D(Q0), .C(C), .R(R), .Q(Q)); + +endmodule + +//----------------------------- +// Two-bit D-type flip-flop with active-high asynchronous reset +// 1st stage is positive-edge triggered +// 2nd stage is negative-edge triggered +//----------------------------- +// Do not allow ABC or other optimization to touch the ff! +//(* abc9_flop, lib_whitebox *) +module dffr_dffnr( + output Q, + input D, + input R, + input C +); + +wire Q0; + + dffr FF_0 (.D(D), .C(C), .R(R), .Q(Q0)); + dffnr FF_1 (.D(Q0), .C(C), .R(R), .Q(Q)); + +endmodule + diff --git a/techlibs/rapidflex/alkaidT/cell_sim_new_dsp.v b/techlibs/rapidflex/alkaidT/cell_sim_new_dsp.v new file mode 100644 index 000000000..18faf9cf3 --- /dev/null +++ b/techlibs/rapidflex/alkaidT/cell_sim_new_dsp.v @@ -0,0 +1,385 @@ +module mad12x10x22 (clk_i, rst_ni, a_i, b_i, d_i, out_o, mode_i, rst_acc, accsel, cas_g, overflow); + +input [0:47] a_i; +input [0:39] b_i; +input [0:59] d_i; +input [0:12] mode_i; +output [0:59] out_o; + +input clk_i; +input rst_ni; +input rst_acc; +input accsel; +input cas_g; +output overflow; + + dsp #( + .N_SIZE (12), + .M_SIZE (10) + ) u_dsp( + .a_i (a_i), + .b_i (b_i), + .out_o (out_o), + + .clk_i (clk_i), + .rst_ni (rst_ni), + .d_i (d_i), + .mode_i (mode_i), + .rst_acc (rst_acc), + .accsel (accsel), + .cas_g (cas_g), + .overflow (overflow) + ); + +endmodule + +module mad24x20x44 (clk_i, rst_ni, a_i, b_i, d_i, out_o, mode_i, rst_acc, accsel, cas_g, overflow); + +input [0:95] a_i; +input [0:79] b_i; +input [0:103] d_i; +input [0:12] mode_i; +output [0:103] out_o; + +input clk_i; +input rst_ni; +input rst_acc; +input accsel; +input cas_g; +output overflow; + + dsp #( + .N_SIZE (24), + .M_SIZE (20) + ) u_dsp( + .a_i (a_i), + .b_i (b_i), + .out_o (out_o), + + .clk_i (clk_i), + .rst_ni (rst_ni), + .d_i (d_i), + .mode_i (mode_i), + .rst_acc (rst_acc), + .accsel (accsel), + .cas_g (cas_g), + .overflow (overflow) + ); + +endmodule + +//----------------------------------------------------- +// Design Name : Parameterized DSP block +// File Name : dsp.v +// Function : A N*M-bit DSP block which can operate in fracturable modes: +// 1. four [N/4]*[M/4]-bit multiplication with accumulation +// 1.1 (combinational) +// 1.2 (with input registers triggered by rising edge) +// 1.3 (with output registers triggered by rising edge) +// 1.4 (with input and output registers triggered by rising edge) +// 1.5 (with input registers triggered by falling edge) +// 1.6 (with output registers triggered by falling edge) +// 1.7 (with input and output registers triggered by falling edge) +// 2. two [N/2]*[M/2]-bit multipliers +// 2.1 (combinational) +// 2.2 (with input registers triggered by rising edge) +// 2.3 (with output registers triggered by rising edge) +// 2.4 (with input and output registers triggered by rising edge) +// 2.5 (with input registers triggered by falling edge) +// 2.6 (with output registers triggered by falling edge) +// 2.7 (with input and output registers triggered by falling edge) +// 3. Single N*M-bit multipliers +// 3.1 (combinational) +// 3.2 (with input registers triggered by rising edge) +// 3.3 (with output registers triggered by rising edge) +// 3.4 (with input and output registers triggered by rising edge) +// 3.5 (with input registers triggered by falling edge) +// 3.6 (with output registers triggered by falling edge) +// 3.7 (with input and output registers triggered by falling edge) +// 4. Two [N/4]*[M/4]-bit multiply-accumulators +// 4.1 (combinational) +// 4.2 (with input registers triggered by rising edge) +// 4.3 (with output registers triggered by rising edge) +// 4.4 (with input and output registers triggered by rising edge) +// 4.5 (with input registers triggered by falling edge) +// 4.6 (with output registers triggered by falling edge) +// 4.7 (with input and output registers triggered by falling edge) +// 5. One [N/4]*[M/4]-bit multiplier + One [N/4]*[M/4]-bit MAC +// 5.1 (combinational) +// 5.2 (with input registers triggered by rising edge) +// 5.3 (with output registers triggered by rising edge) +// 5.4 (with input and output registers triggered by rising edge) +// 5.5 (with input registers triggered by falling edge) +// 5.6 (with output registers triggered by falling edge) +// 5.7 (with input and output registers triggered by falling edge) +// 6. One [N/4]*[M/4]-bit MAC + One [N/4]*[M/4]-bit multiply +// 6.1 (combinational) +// 6.2 (with input registers triggered by rising edge) +// 6.3 (with output registers triggered by rising edge) +// 6.4 (with input and output registers triggered by rising edge) +// 6.5 (with input registers triggered by falling edge) +// 6.6 (with output registers triggered by falling edge) +// 6.7 (with input and output registers triggered by falling edge) +// 7. MSB parts [N/2+M/2] of four multipliers +// 7.1 (combinational) +// 7.2 (with input registers triggered by rising edge) +// 7.3 (with output registers triggered by rising edge) +// 7.4 (with input and output registers triggered by rising edge) +// 7.5 (with input registers triggered by falling edge) +// 7.6 (with output registers triggered by falling edge) +// 7.7 (with input and output registers triggered by falling edge) +// 8. One [N/4]*[M/4]-bit multiply + MSB parts [N/2+M/2] of four multipliers +// 8.1 (combinational) +// 8.2 (with input registers triggered by rising edge) +// 8.3 (with output registers triggered by rising edge) +// 8.4 (with input and output registers triggered by rising edge) +// 8.5 (with input registers triggered by falling edge) +// 8.6 (with output registers triggered by falling edge) +// 8.7 (with input and output registers triggered by falling edge) +// 9. One [N/4]*[M/4]-bit MAC + MSB parts [N/2+M/2] of four multipliers +// 9.1 (combinational) +// 9.2 (with input registers triggered by rising edge) +// 9.3 (with output registers triggered by rising edge) +// 9.4 (with input and output registers triggered by rising edge) +// 9.5 (with input registers triggered by falling edge) +// 9.6 (with output registers triggered by falling edge) +// 9.7 (with input and output registers triggered by falling edge) +// - In all the above modes, clock edges can be either positive or negative triggered +// Coder : Xifan Tang +//----------------------------------------------------- +`default_nettype wire + +module dsp (clk_i, rst_ni, a_i, b_i, d_i, out_o, mode_i, rst_acc, accsel, cas_g, overflow); + // Parameters that can pass through + parameter N_SIZE = 12; // Default parameter for N + parameter M_SIZE = 10; // Default parameter for M + // Local parameters + localparam A_WIDTH = 4 * N_SIZE; // Default parameter for a + localparam B_WIDTH = 4 * M_SIZE; // Default parameter for b + localparam C_WIDTH = N_SIZE + M_SIZE; // Default parameter for cin + localparam OUT_WIDTH = A_WIDTH / 2 + B_WIDTH / 2; // Default parameter for data output + + parameter P_SIZE = OUT_WIDTH; // Default parameter for previous d + + // Ensure that all the mode bit constants unique!!! + localparam MODE_BIT_CLK = 0; // Mode bit that controls polarity of the clock signals + localparam MODE_BIT_REGI_UPPER = 1; // Mode bit that controls the registering of upper part of the inputs + localparam MODE_BIT_REGI_LOWER = 2; // Mode bit that controls the registering of lower part of the inputs + localparam MODE_BIT_REGO_UPPER = 3; // Mode bit that controls the registering of upper part of the outputs + localparam MODE_BIT_REGO_LOWER = 4; // Mode bit that controls the registering of lower part of the outputs + localparam MODE_BIT_MAC_LSB = 5; // LSB of the mode bits that control the core computing units + localparam MODE_BIT_MAC_MSB = 8; // MSB of the mode bits that contorl the core computing units + localparam MODE_BIT_RST = 9; // MSB of the mode bits that contorl the polarity of reset signals + localparam MODE_BIT_SIGN = 10; //Mode bit that controls valid of the sign bit + // localparam MODE_BIT_CARRY = 11; //Mode bit that controls valid of the carry bit + localparam MODE_MUL_INPUT_REG = 11; // Mode bit that controls the registering of the inputs of the multipliers + localparam MODE_MUL_OUTPUT_REG = 12; // Mode bit that controls the registering of the outputs of the multipliers + + localparam ADDER_REDUNDENT = 8; // Default parameter for adder redundancy + localparam ADD_ACC_WIDTH = OUT_WIDTH/2 + ADDER_REDUNDENT; // Default accumulating parameter for adder width + localparam ACC_OUT_WIDTH = OUT_WIDTH + 2*ADDER_REDUNDENT; + + // Ports + input clk_i; + input rst_ni; + input [0:A_WIDTH-1] a_i; + input [0:B_WIDTH-1] b_i; + input [0:ACC_OUT_WIDTH-1] d_i; + output [0:ACC_OUT_WIDTH-1] out_o; + input [0:12] mode_i; + output overflow; + // input cin; + // output cout; + input rst_acc; //For accumulate resettable + input accsel; // Accumulate or add new data + input cas_g; // Global cascade mode for top level dsp + + + wire clk_core; + wire clr; + assign clk_core = mode_i[MODE_BIT_CLK] ? clk_i : ~clk_i; + assign clr = mode_i[MODE_BIT_RST] ? ~rst_ni : rst_ni; + + // Control logic for registering inputs and outputs + + wire [0:A_WIDTH-1] in_a; + wire [0:B_WIDTH-1] in_b; + wire [0:ACC_OUT_WIDTH-1] in_d; + wire [0:ACC_OUT_WIDTH-1] cas_out; + + reg [0:A_WIDTH-1] a_i_reg; + reg [0:B_WIDTH-1] b_i_reg; + reg [0:ACC_OUT_WIDTH-1] d_i_reg; + reg [0:ACC_OUT_WIDTH-1] out_o_reg; + + always @(posedge clk_core or negedge clr) begin + if (clr == 1'b0) begin + a_i_reg <= 0; + b_i_reg <= 0; + d_i_reg <= 0; + out_o_reg <= 0; + end else begin + a_i_reg <= a_i; + b_i_reg <= b_i; + d_i_reg <= d_i; + out_o_reg <= cas_out; + end + end + + assign in_a[0:A_WIDTH/2-1] = mode_i[MODE_BIT_REGI_LOWER] ? a_i_reg[0:A_WIDTH/2-1] : a_i[0:A_WIDTH/2-1]; + assign in_a[A_WIDTH/2:A_WIDTH-1] = mode_i[MODE_BIT_REGI_UPPER] ? a_i_reg[A_WIDTH/2:A_WIDTH-1] : a_i[A_WIDTH/2:A_WIDTH-1]; + assign in_b[0:B_WIDTH/2-1] = mode_i[MODE_BIT_REGI_LOWER] ? b_i_reg[0:B_WIDTH/2-1] : b_i[0:B_WIDTH/2-1]; + assign in_b[B_WIDTH/2:B_WIDTH-1] = mode_i[MODE_BIT_REGI_UPPER] ? b_i_reg[B_WIDTH/2:B_WIDTH-1] : b_i[B_WIDTH/2:B_WIDTH-1]; + assign in_d[0:ACC_OUT_WIDTH/2-1] = mode_i[MODE_BIT_REGI_LOWER] ? d_i_reg[0:ACC_OUT_WIDTH/2-1] : d_i[0:ACC_OUT_WIDTH/2-1]; + assign in_d[ACC_OUT_WIDTH/2:ACC_OUT_WIDTH-1] = mode_i[MODE_BIT_REGI_UPPER] ? d_i_reg[ACC_OUT_WIDTH/2:ACC_OUT_WIDTH-1] : d_i[ACC_OUT_WIDTH/2:ACC_OUT_WIDTH-1]; + assign out_o[0:ACC_OUT_WIDTH/2-1] = mode_i[MODE_BIT_REGO_LOWER] ? out_o_reg[0:ACC_OUT_WIDTH/2-1] : cas_out[0:ACC_OUT_WIDTH/2-1]; + assign out_o[ACC_OUT_WIDTH/2:ACC_OUT_WIDTH-1] = mode_i[MODE_BIT_REGO_UPPER] ? out_o_reg[ACC_OUT_WIDTH/2:ACC_OUT_WIDTH-1] : cas_out[ACC_OUT_WIDTH/2:ACC_OUT_WIDTH-1]; + + // Control logic for registering inputs and outputs of the multipliers + + wire [0:A_WIDTH-1] mac_a; + wire [0:B_WIDTH-1] mac_b; + wire [0:ACC_OUT_WIDTH-1] mac_d; + wire [0:ACC_OUT_WIDTH-1] mac_out; + + reg [0:A_WIDTH-1] in_a_reg; + reg [0:B_WIDTH-1] in_b_reg; + reg [0:ACC_OUT_WIDTH-1] in_d_reg; + + wire [0:ACC_OUT_WIDTH/2-1] mul_out_0; + wire [0:ACC_OUT_WIDTH/2-1] mul_out_1; + wire [0:ACC_OUT_WIDTH/2-1] mul_out_2; + wire [0:ACC_OUT_WIDTH/2-1] mul_out_3; + + reg [0:ACC_OUT_WIDTH/2-1] mul_out_0_reg; + reg [0:ACC_OUT_WIDTH/2-1] mul_out_1_reg; + reg [0:ACC_OUT_WIDTH/2-1] mul_out_2_reg; + reg [0:ACC_OUT_WIDTH/2-1] mul_out_3_reg; + + wire [0:ACC_OUT_WIDTH/2-1] q_o_0; + wire [0:ACC_OUT_WIDTH/2-1] q_o_1; + wire [0:ACC_OUT_WIDTH/2-1] q_o_2; + wire [0:ACC_OUT_WIDTH/2-1] q_o_3; + + always @(posedge clk_core or negedge clr) begin + if (clr == 1'b0) begin + in_a_reg <= 0; + in_b_reg <= 0; + in_d_reg <= 0; + mul_out_0_reg <= 0; + mul_out_1_reg <= 0; + mul_out_2_reg <= 0; + mul_out_3_reg <= 0; + end else begin + in_a_reg <= in_a; + in_b_reg <= in_b; + in_d_reg <= in_d; + mul_out_0_reg <= mul_out_0; + mul_out_1_reg <= mul_out_1; + mul_out_2_reg <= mul_out_2; + mul_out_3_reg <= mul_out_3; + end + end + + assign mac_a = mode_i[MODE_MUL_INPUT_REG] ? in_a_reg : in_a; + assign mac_b = mode_i[MODE_MUL_INPUT_REG] ? in_b_reg : in_b; + assign mac_d = mode_i[MODE_MUL_INPUT_REG] ? in_d_reg : in_d; + + assign q_o_0 = mode_i[MODE_MUL_OUTPUT_REG] ? mul_out_0_reg : mul_out_0; + assign q_o_1 = mode_i[MODE_MUL_OUTPUT_REG] ? mul_out_1_reg : mul_out_1; + assign q_o_2 = mode_i[MODE_MUL_OUTPUT_REG] ? mul_out_2_reg : mul_out_2; + assign q_o_3 = mode_i[MODE_MUL_OUTPUT_REG] ? mul_out_3_reg : mul_out_3; + + // Control logic around the core computing units + always @(*) begin + case (mode_i[MODE_BIT_MAC_LSB:MODE_BIT_MAC_MSB]) + 4'b0000: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1]; + mul_out_1 = mac_a[A_WIDTH/4:A_WIDTH/2-1] * mac_b[B_WIDTH/4:B_WIDTH/2-1] + q_o_0; + mul_out_2 = mac_a[A_WIDTH/2:A_WIDTH/4*3-1] * mac_b[B_WIDTH/2:B_WIDTH/4*3-1] + q_o_1; + mul_out_3 = mac_a[A_WIDTH/4*3:A_WIDTH-1] * mac_b[B_WIDTH/4*3:B_WIDTH-1] + q_o_2; + mac_out = q_o_3; + end + 4'b0001: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1] + mac_d[0:ACC_OUT_WIDTH/2-1]; + mul_out_2 = mac_a[A_WIDTH/2:A_WIDTH/4*3-1] * mac_b[B_WIDTH/2:B_WIDTH/4*3-1] + mac_d[ACC_OUT_WIDTH/2:ACC_OUT_WIDTH-1]; + mul_out_3 = mac_a[A_WIDTH/4*3:A_WIDTH-1] * mac_b[B_WIDTH/4*3:B_WIDTH-1] + q_o_2; + mac_out = {q_o_0, q_o_3}; + end + 4'b0010: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1]; + mul_out_1 = mac_a[A_WIDTH/4:A_WIDTH/2-1] * mac_b[B_WIDTH/4:B_WIDTH/2-1] + q_o_0; + mul_out_3 = mac_a[A_WIDTH/2:A_WIDTH/4*3-1] * mac_b[B_WIDTH/2:B_WIDTH/4*3-1]; + mac_out = {q_o_1, q_o_2}; + end + 4'b0011: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1] + out_o_reg[0:ACC_OUT_WIDTH/2-1]; + mul_out_1 = mac_a[A_WIDTH/4:A_WIDTH/2-1] * mac_b[B_WIDTH/4:B_WIDTH/2-1] + out_o_reg[ACC_OUT_WIDTH/2:ACC_OUT_WIDTH-1]; + mac_out = {q_o_0, q_o_1}; + end + 4'b0100: begin + mac_out = mac_a[0:A_WIDTH/2-1] * mac_b[0:B_WIDTH/2-1]; + end + 4'b0101: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1] + mac_d[0:ACC_OUT_WIDTH/2-1]; + mul_out_1 = mac_a[A_WIDTH/4:A_WIDTH/2-1] * mac_b[B_WIDTH/4:B_WIDTH/2-1] + mac_d[ACC_OUT_WIDTH/2:ACC_OUT_WIDTH-1]; + mac_out = {q_o_0, q_o_1}; + end + 4'b0110: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1]; + mul_out_1 = mac_a[A_WIDTH/4:A_WIDTH/2-1] * mac_b[B_WIDTH/4:B_WIDTH/2-1]; + mul_out_2 = mac_a[A_WIDTH/2:A_WIDTH/4*3-1] * mac_b[B_WIDTH/2:B_WIDTH/4*3-1]; + mul_out_3 = mac_a[A_WIDTH/4*3:A_WIDTH-1] * mac_b[B_WIDTH/4*3:B_WIDTH-1]; + mac_out = {q_o_0[0:ACC_OUT_WIDTH/4-1], q_o_1[0:ACC_OUT_WIDTH/4-1], q_o_2[0:ACC_OUT_WIDTH/4-1], q_o_3[0:ACC_OUT_WIDTH/4-1]}; + end + 4'b1000: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1]; + mul_out_1 = mac_a[A_WIDTH/4:A_WIDTH/2-1] * mac_b[B_WIDTH/4:B_WIDTH/2-1]; + mac_out = {q_o_0, q_o_1}; + end + 4'b1001: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1] + mac_d[0:ACC_OUT_WIDTH/2-1]; + mul_out_1 = mac_a[A_WIDTH/4:A_WIDTH/2-1] * mac_b[B_WIDTH/4:B_WIDTH/2-1] + q_o_0; + mul_out_2 = mac_a[A_WIDTH/2:A_WIDTH/4*3-1] * mac_b[B_WIDTH/2:B_WIDTH/4*3-1] + q_o_1; + mul_out_3 = mac_a[A_WIDTH/4*3:A_WIDTH-1] * mac_b[B_WIDTH/4*3:B_WIDTH-1] + q_o_2; + mac_out = {q_o_2, q_o_3}; + end + 4'b1010: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1]; + mul_out_1 = mac_a[A_WIDTH/4:A_WIDTH/2-1] * mac_b[B_WIDTH/4:B_WIDTH/2-1] + q_o_0; + mul_out_2 = mac_a[A_WIDTH/2:A_WIDTH/4*3-1] * mac_b[B_WIDTH/2:B_WIDTH/4*3-1] + q_o_1; + mul_out_3 = mac_a[A_WIDTH/4*3:A_WIDTH-1] * mac_b[B_WIDTH/4*3:B_WIDTH-1] + q_o_2; + mac_out = {q_o_1, q_o_3}; + end + 4'b1011: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1]; + mul_out_1 = mac_a[A_WIDTH/4:A_WIDTH/2-1] * mac_b[B_WIDTH/4:B_WIDTH/2-1] + q_o_0; + mul_out_2 = mac_a[A_WIDTH/2:A_WIDTH/4*3-1] * mac_b[B_WIDTH/2:B_WIDTH/4*3-1]; + mul_out_3 = mac_a[A_WIDTH/4*3:A_WIDTH-1] * mac_b[B_WIDTH/4*3:B_WIDTH-1] + q_o_2; + mac_out = {q_o_1, q_o_3}; + end + 4'b1101: begin + mul_out_2 = mac_a[A_WIDTH/2:A_WIDTH/4*3-1] * mac_b[B_WIDTH/2:B_WIDTH/4*3-1] + mac_d[0:ACC_OUT_WIDTH/2-1]; + mul_out_3 = mac_a[A_WIDTH/4*3:A_WIDTH-1] * mac_b[B_WIDTH/4*3:B_WIDTH-1] + mac_d[ACC_OUT_WIDTH/2:ACC_OUT_WIDTH-1]; + mac_out = {q_o_2, q_o_3}; + end + 4'b1110: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1]; + mul_out_2 = mac_a[A_WIDTH/2:A_WIDTH/4*3-1] * mac_b[B_WIDTH/2:B_WIDTH/4*3-1]; + mul_out_3 = mac_a[A_WIDTH/4*3:A_WIDTH-1] * mac_b[B_WIDTH/4*3:B_WIDTH-1]; + mac_out = {q_o_0, q_o_2[0:ACC_OUT_WIDTH/4-1], q_o_3[0:ACC_OUT_WIDTH/4-1]}; + end + default: begin + mul_out_0 = mac_a[0:A_WIDTH/4-1] * mac_b[0:B_WIDTH/4-1]; + mul_out_2 = mac_a[A_WIDTH/2:A_WIDTH/4*3-1] * mac_b[B_WIDTH/2:B_WIDTH/4*3-1]; + mul_out_3 = mac_a[A_WIDTH/4*3:A_WIDTH-1] * mac_b[B_WIDTH/4*3:B_WIDTH-1]; + mac_out = {q_o_0, q_o_2[0:ACC_OUT_WIDTH/4-1], q_o_3[0:ACC_OUT_WIDTH/4-1]}; + end + endcase + end + + always @(*) begin + cas_out = cas_g ? mac_out + mac_d : mac_out; + end + +endmodule \ No newline at end of file diff --git a/techlibs/rapidflex/alkaidT/dff_map.v b/techlibs/rapidflex/alkaidT/dff_map.v new file mode 100644 index 000000000..f6d01a4ed --- /dev/null +++ b/techlibs/rapidflex/alkaidT/dff_map.v @@ -0,0 +1,177 @@ +// Rising edge DFF +module \$_DFF_P_ (D, C, Q); + input D; + input C; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dff _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C)); +endmodule + +// Rising edge DFF with async active-high reset +module \$_DFF_PP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Rising edge DFF with async active-high set +module \$_DFF_PP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffs _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Rising edge DFF with async active-low reset +module \$_DFF_PN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Rising edge DFF with async active-low set +module \$_DFF_PN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule + +// Rising edge DFF with sync active-high reset +module \$_SDFF_PP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Rising edge DFF with sync active-high set +module \$_SDFF_PP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffs _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Rising edge DFF with sync active-low reset +module \$_SDFF_PN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Rising edge DFF with sync active-low set +module \$_SDFF_PN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule + +// Falling edge DFF +module \$_DFF_N_ (D, C, Q); + input D; + input C; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C)); +endmodule + +// Falling edge DFF with async active-high reset +module \$_DFF_NP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffnr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Falling edge DFF with async active-high set +module \$_DFF_NP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffns _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Falling edge DFF with async active-low reset +module \$_DFF_NN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffnrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Falling edge DFF with async active-low set +module \$_DFF_NN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + dffnsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule + +// Falling edge DFF with sync active-high reset +module \$_SDFF_NP0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffnr _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .R(R)); +endmodule + +// Falling edge DFF with sync active-high set +module \$_SDFF_NP1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffns _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .S(R)); +endmodule + +// Falling edge DFF with sync active-low reset +module \$_SDFF_NN0_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffnrn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .RN(R)); +endmodule + +// Falling edge DFF with sync active-low set +module \$_SDFF_NN1_ (D, C, R, Q); + input D; + input C; + input R; + output Q; + parameter _TECHMAP_WIREINIT_Q_ = 1'bx; + sdffnsn _TECHMAP_REPLACE_ (.Q(Q), .D(D), .C(C), .SN(R)); +endmodule diff --git a/techlibs/rapidflex/alkaidT/dsp_map.v b/techlibs/rapidflex/alkaidT/dsp_map.v new file mode 100644 index 000000000..115649efc --- /dev/null +++ b/techlibs/rapidflex/alkaidT/dsp_map.v @@ -0,0 +1,35 @@ +module mult_24x20_map ( + input [0:23] A, + input [0:19] B, + output [0:43] Y +); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 0; + parameter B_WIDTH = 0; + parameter Y_WIDTH = 0; + + mult24x20 #() _TECHMAP_REPLACE_ ( + .A (A), + .B (B), + .Y (Y) ); + +endmodule + +module mult_12x10_map ( + input [0:11] A, + input [0:9] B, + output [0:21] Y +); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 0; + parameter B_WIDTH = 0; + parameter Y_WIDTH = 0; + + mult12x10 #() _TECHMAP_REPLACE_ ( + .A (A), + .B (B), + .Y (Y) ); + +endmodule diff --git a/techlibs/rapidflex/common/cells_sim.v b/techlibs/rapidflex/common/cells_sim.v new file mode 100644 index 000000000..95bc86be4 --- /dev/null +++ b/techlibs/rapidflex/common/cells_sim.v @@ -0,0 +1,53 @@ +// Copyright 2020-2022 F4PGA Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + + +module inv ( + output Q, + input A +); + assign Q = A ? 0 : 1; +endmodule + +module buff ( + output Q, + input A +); + assign Q = A; +endmodule + +module logic_0 ( + output a +); + assign a = 0; +endmodule + +module logic_1 ( + output a +); + assign a = 1; +endmodule + +(* blackbox *) +module gclkbuff ( + input A, + output Z +); + + assign Z = A; + +endmodule + diff --git a/techlibs/rapidflex/src/clock_buffer_cmd.cc b/techlibs/rapidflex/src/clock_buffer_cmd.cc new file mode 100644 index 000000000..5f642fb70 --- /dev/null +++ b/techlibs/rapidflex/src/clock_buffer_cmd.cc @@ -0,0 +1,581 @@ +#include +#include +#include +#include + +#include "backends/rtlil/rtlil_backend.h" +#include "kernel/celltypes.h" +#include "kernel/log.h" +#include "kernel/register.h" +#include "kernel/rtlil.h" +#include "kernel/sigtools.h" +#include "kernel/yosys.h" +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +struct InsertClockBuffer : public Pass { + InsertClockBuffer() + : Pass("rapidflex_insert_clock_buffer", + "This command is to insert clock buffer into the design") {} + + /*utility function used by insert_ckbuff; copied from blif.cc*/ + const std::string str(RTLIL::IdString id) { + std::string str = RTLIL::unescape_id(id); + for (size_t i = 0; i < str.size(); i++) + if (str[i] == '#' || str[i] == '=' || str[i] == '<' || str[i] == '>') + str[i] = '?'; + return str; + } + + /*utility function used by insert_ckbuff; copied from blif.cc*/ + const std::string str(RTLIL::SigBit sig) { + if (sig.wire == NULL) { + return "null"; + } + + std::string str = RTLIL::unescape_id(sig.wire->name); + for (size_t i = 0; i < str.size(); i++) + if (str[i] == '#' || str[i] == '=' || str[i] == '<' || str[i] == '>') + str[i] = '?'; + + if (sig.wire->width != 1) + str += + stringf("[%d]", sig.wire->upto ? sig.wire->start_offset + + sig.wire->width - sig.offset - 1 + : sig.wire->start_offset + sig.offset); + + return str; + } + + // eval_lut: Evaluate the output of a single LUT based on given input values + // Parameters: + // lut - Pointer to an RTLIL $lut cell + // inputs - map specifying the values of input signals + // (can provide only a subset of inputs) + // sigmap - SigMap, used to get the actual driving signal for each input + // Returns: + // Boolean output of the LUT (true/false) + bool eval_lut(const RTLIL::Cell *lut, std::map inputs, + const SigMap &sigmap) { + // Get the input vector of the LUT and map each signal to its actual driver + SigSpec lut_inputs = sigmap(lut->getPort(ID::A)); + + // Number of LUT inputs + int width = lut->getParam(ID::WIDTH).as_int(); + + // LUT truth table storing output for each input combination + Const lut_table = lut->getParam(ID::LUT); + + // Index into the LUT truth table + int lut_index = 0; + + // Iterate through each input bit + for (int i = 0; i < width; i++) { + // Get the i-th input signal and map it to its final driver + SigBit bit = sigmap(lut_inputs[i]); + + // Boolean value of the current input + bool value; + + // If the input value is provided by the user, use it + if (inputs.count(bit)) + value = inputs[bit]; + // Otherwise, use the signal's default or constant value + else + value = SigSpec(bit).as_bool(); + + // Accumulate this bit into the LUT index + // '<< i' places the value at the correct bit position + lut_index |= (value << i); + } + + // Lookup the LUT output for the computed index and return as bool + return lut_table.extract(lut_index).as_bool(); + } + + /* This function rewires subckt such as flip-flops (FFs) and pcounter that use + internally generated signals as their clock inputs. We can perform this + rewiring before connecting the new clock buffer (ckbuf), because the new wires + are added to the top module—not to the ckbuf itself. Therefore, once an + internally generated clock is detected, a new wire is created in the top + module and directly connected to the affected subckt. This rewiring process is + independent of the addition of the ckbuf. */ + void rewire_subckt(RTLIL::Module *module, RTLIL::Cell *cell, + RTLIL::IdString id_name, std::string C_input) { + std::string C_output = C_input + "_ckbuf"; +// if (!module->wire("\\" + C_output)) { +// auto output_wire = module->addWire("\\" + C_output, 1); +// } + /* connect new ckbuf to the subckt */ + cell->unsetPort(id_name); // unsetPort("C") + cell->setPort(id_name, module->wire("\\" + C_output)); + cell->fixup_parameters(); + } + + /* This is a general-purpose function that works for all subcircuits (subckt). + It detects clock and reset signals and determines whether they are global. + If any global signal is detected, the function will rewire the subcircuit + accordingly. + */ + void process_cell(RTLIL::Module *module, RTLIL::Cell *cell, + std::map inputs, + std::vector clk_indicator_group, + std::vector reset_indicator_group, + std::set &ckbuf_info, + std::map &ckbuf_type) { + RTLIL::IdString clk_indicator; + for (auto clk : clk_indicator_group) { + if (cell->hasPort(clk)) { + clk_indicator = clk; + break; + } + } + + // dff could have reset signal with keywords R or RN. We shall determine + // which is the port name for current cell + RTLIL::IdString reset_indicator; + for (auto rst : reset_indicator_group) { + if (cell->hasPort(rst)) { + reset_indicator = rst; + break; + } + } + + if (reset_indicator.empty() && clk_indicator.empty()) { + return; + } + + bool global_clock = false; + bool global_reset = false; + for (auto &it : inputs) { + RTLIL::Wire *wire = it.second; + for (int i = 0; i < wire->width; i++) { + if (cell->hasPort(clk_indicator)) { + if (cell->getPort(clk_indicator) == RTLIL::SigSpec(wire, i)) { + global_clock = true; + continue; /*if the signal is global clock, then there is no need + to check whether it is global reset or not*/ + } + } + + if (cell->hasPort(reset_indicator)) { + if (cell->getPort(reset_indicator) == RTLIL::SigSpec(wire, i)) { + global_reset = true; + } + } + } + } + /*grab the information of the internally generated clocks*/ + if (!global_clock && cell->hasPort(clk_indicator)) { + std::string C_input = str(cell->getPort(clk_indicator)).c_str(); + ckbuf_info.insert(C_input); + ckbuf_type[C_input] = "clock"; + rewire_subckt(module, cell, clk_indicator, C_input); + } + /*grab the information of the internally generated resets*/ + if (!global_reset && cell->hasPort(reset_indicator)) { + std::string C_input = str(cell->getPort(reset_indicator)).c_str(); + ckbuf_info.insert(C_input); + ckbuf_type[C_input] = "reset"; + rewire_subckt(module, cell, reset_indicator, C_input); + } + } + + /* This function examines sequential logic and returns a set of strings + representing internally generated signals. When such a signal is found, it + invokes rewire_subckt.*/ + std::set + find_internal_clk_r_signal(RTLIL::Module *module, + std::map &ckbuf_type) { + std::set ckbuf_info; + /*get input ports of the top module*/ + std::map inputs, outputs; + for (auto wire : module->wires()) { + if (wire->port_input) + inputs[wire->port_id] = wire; + } + for (auto cell : module->cells()) { + /*Bypass yosys internal cells $lut which doesn't have clock signal */ + if ((cell->type) == ID($lut)) { + continue; + } else { + /*check whether the C port is internally generated clock signal or not*/ + std::vector clk_indicator_group = {ID(C), ID(clk_i)}; + std::vector reset_indicator_group = {ID(RN), ID(R), + ID(rst_i)}; + process_cell(module, cell, inputs, clk_indicator_group, + reset_indicator_group, ckbuf_info, ckbuf_type); + } + } + return ckbuf_info; + } + + /* insert .subckt ckbuf for each internally generated clock or reset signal */ + void insert_ckbuf(RTLIL::Module *module, + const std::set &ckbuf_info) { + for (const std::string &ckbuf : ckbuf_info) { + RTLIL::Cell *ckbuf_cell = + module->addCell(stringf("$ckbuf$%s", ckbuf.c_str()), "\\ckbuf"); + ckbuf_cell->setPort("\\in", module->wire("\\" + ckbuf)); + ckbuf_cell->setPort("\\out", module->wire("\\" + ckbuf + "_ckbuf")); + ckbuf_cell->set_src_attribute(""); + } + } + + static std::string xml_escape(const std::string &s) { + std::string out; + out.reserve(s.size()); + for (char c : s) { + switch (c) { + case '&': + out += "&"; + break; + case '"': + out += """; + break; + case '<': + out += "<"; + break; + case '>': + out += ">"; + break; + default: + out += c; + break; + } + } + return out; + } + + /*This function is for generating cell map file*/ + void generate_cell_map(const char *fname, + const std::set &ckbuf_info, + const std::map &ckbuf_type) { + FILE *f = fopen(fname, "w"); + if (f == nullptr) + log_error("Can't open file `%s` for writing.\n", fname); + + fprintf(f, "\n"); + fprintf(f, "\n"); + for (const std::string &ckbuf : ckbuf_info) { + std::string in = ckbuf; + std::string out = ckbuf + "_ckbuf"; + std::string type; + + auto it = ckbuf_type.find(ckbuf); + if (it != ckbuf_type.end()) { + type = it->second; + } else { + log_error("No port type defined for ckbuf %s \n", out.c_str()); + } + + fprintf(f, " \n", + xml_escape(in).c_str(), xml_escape(out).c_str(), + xml_escape(type).c_str()); + } + fprintf(f, "\n"); + fclose(f); + log("cell map is stored in file %s \n", fname); + } + void rewire_lut_primitive(RTLIL::Cell *cell, + const std::vector &sig, + const RTLIL::Const &new_lut, int new_width) { + cell->unsetPort(ID::A); + cell->setPort(ID::A, sig); + + // Update LUT parameters + cell->parameters[ID::LUT] = new_lut; // Set new truth table + cell->parameters[ID::WIDTH] = new_width; + cell->fixup_parameters(); + } + + void process_cell_rewire_lut( + RTLIL::Module *module, RTLIL::Cell *cell, + const std::map> + &internal_signal_io_map, + const std::map &internal_signal_lut, + const Yosys::SigMap &sigmap) { + std::set sig; + /* sig and ordered_sig contain the same signals. + * sig is used to store the rewired signals and avoid duplicates, + * while ordered_sig preserves the signal order so it stays aligned + * with the truth table information. + */ + std::vector ordered_sig; + std::map> sig_replace_port_map; + std::map sig_replace_cell_map; + std::set temp_sig; + std::set lut_inputs = cell->getPort(ID::A).to_sigbit_set(); + std::vector lut_inputs_vector = + cell->getPort(ID::A).to_sigbit_vector(); + std::set common_port_all; + std::vector replaced_boolean_value; + std::vector remained_boolean_value; + bool rewire_required = false; + std::vector old_lut = + cell->parameters.at(ID::LUT).bits(); // Original LUT truth table + for (const auto &[internal_signal_output, internal_signal_input] : + internal_signal_io_map) { + std::set common_port; + std::string mapped_ckbuf_name; + if (std::includes(lut_inputs.begin(), lut_inputs.end(), + internal_signal_input.begin(), + internal_signal_input.end())) { + std::set_intersection(lut_inputs.begin(), lut_inputs.end(), + internal_signal_input.begin(), + internal_signal_input.end(), + std::inserter(common_port, common_port.end())); + std::string C_output = internal_signal_output + "_ckbuf"; + mapped_ckbuf_name = internal_signal_output; + if (!module->wire("\\" + C_output)) { + log("wire %s is not defined", C_output.c_str()); + } + auto replaced_sig = module->wire("\\" + C_output); + if (sig.find(replaced_sig) != sig.end()) { + /* element has already been recorded*/ + log("signal %s has been replaced!\n", C_output.c_str()); + continue; + } + sig.insert(replaced_sig); + if (std::find(ordered_sig.begin(), ordered_sig.end(), replaced_sig) == + ordered_sig.end()) { + ordered_sig.push_back(replaced_sig); + } + sig_replace_port_map[replaced_sig] = common_port; + auto src_cell = internal_signal_lut.at(mapped_ckbuf_name); + sig_replace_cell_map[replaced_sig] = src_cell; + rewire_required = true; + common_port_all.insert(common_port.begin(), common_port.end()); + } + } + + if (rewire_required) { + // For example, the following lut will be rewired + // .names a b internal_clock1 + // .names a b c out + // as + // .names internal_clock1_ckbuf c out + // where internal_clock1 is the original internal signals and + // internal_clock1_ckbuf is the outputs of ckbuf. (.subckt ckbuf + // internal_clock1 internal_clock1_ckbuf) + // we need to update the truth table of .names internal_clock1_ckbuf + // c out concurrently + /* The first step is to get the original truth table, i.e. the truth + * table of .names a b c d out */ + int bit_width = lut_inputs_vector.size(); + std::vector> original_truth_table; + std::map sig_index_map; + + for (size_t index = 0; index < lut_inputs_vector.size(); index++) { + sig_index_map[lut_inputs_vector[index]] = index; + } + + for (int index = 0; index < old_lut.size(); index++) { + if (old_lut[index] == RTLIL::State::S1) { + std::vector bits(bit_width, false); + for (int b = 0; b < bit_width; b++) { + bits[b] = (index >> b) & 1; + } + original_truth_table.push_back(bits); + } + } + + /* get remained sigs */ + std::set_difference(lut_inputs.begin(), lut_inputs.end(), + common_port_all.begin(), common_port_all.end(), + std::inserter(temp_sig, temp_sig.end())); + + std::vector> modified_bit; + + for (const auto &line : original_truth_table) { + std::vector remained_bit; + /* get sigs that can be replaced by ckbuf and evaluate its truth + * table values and updates the final truth table*/ + for (auto replaced_sig : sig) { + const RTLIL::Cell *cell_temp = + module->cell(sig_replace_cell_map[replaced_sig]); + auto ports = sig_replace_port_map[replaced_sig]; + + std::map common_port_map; + for (auto port : ports) { + common_port_map[port] = line[sig_index_map[port]]; + } + + bool changed_bit = eval_lut(cell_temp, common_port_map, sigmap); + remained_bit.push_back(changed_bit); + } + /* get remained sigs and use previous truth table values*/ + for (auto port : temp_sig) { + remained_bit.push_back(line[sig_index_map[port]]); + } + + modified_bit.push_back(remained_bit); + } + /* get the final truth table of the rewired lut such as .names + * internal_clock1_ckbuf c out */ + std::vector modified_bit_int; + for (const auto &line : modified_bit) { + int value = 0; + for (size_t i = 0; i < line.size(); i++) { + if (line[i]) { + value |= (1 << i); + } + } + modified_bit_int.push_back(value); + } + + sig.insert(temp_sig.begin(), temp_sig.end()); + ordered_sig.insert(ordered_sig.end(), temp_sig.begin(), temp_sig.end()); + + int new_width = sig.size(); + int num_entries = 1 << new_width; + RTLIL::Const new_lut(num_entries); + /*initialize lut value to 0*/ + for (int i = 0; i < num_entries; i++) { + new_lut.bits()[i] = RTLIL::State::S0; + } + /* assign final truth table's info to current cell */ + for (int i : modified_bit_int) { + new_lut.bits()[i] = RTLIL::State::S1; + } + + rewire_lut_primitive(cell, ordered_sig, new_lut, new_width); + module->fixup_ports(); + std::set lut_outputs = + cell->getPort(ID::Y).to_sigbit_set(); + } + } + /* This function rewires luts which have internally generated signals as its + * input*/ + void rewire_luts(RTLIL::Module *module, + const std::set &ckbuf_info) { + /* find the lut that has internally generated clock as an output and get its + * io map */ + Yosys::SigMap sigmap(module); + std::map> internal_signal_io_map; + std::map internal_signal_lut; + std::set flattened_io_map; + /*the first for loop constructs the map between internall signal name and + its corresponding fan-ins For example, consider the following netlist: + .names a b internal_clock1 + .names c d internal_clock2 + The internal_signal_io_map stores key-value pairs from all luts like this: + [internal_clock1, {a, b}], [internal_clock2, {c, d}]. + */ + for (const auto cell : module->cells()) { + if ((cell->type) == ID($lut)) { + auto &inputs = cell->getPort(ID::A); + std::string output = str(cell->getPort(ID::Y)); + auto width = cell->parameters.at(ID::WIDTH).as_int(); + log_assert(inputs.size() == width); + if (ckbuf_info.find(output) != ckbuf_info.end()) { + /* We assume that all LUT cells are explicitly named before the + current pass. Anonymous cells indicate an unexpected state after + techmapping and are treated as a fatal error. */ + if (cell->name.empty()) { + log_error("cell->name is empty for output=%s\n", output.c_str()); + } + auto io_set = inputs.to_sigbit_set(); + internal_signal_io_map[output] = io_set; + internal_signal_lut[output] = cell->name.c_str(); + flattened_io_map.insert(internal_signal_io_map[output].begin(), + internal_signal_io_map[output].end()); + continue; + } + } + } + + /* This for loop does two things: + 1. detect logic that can be replaced by internal signals + For example, consider the following netlist: + .names a b internal_clock1 + .names c d internal_clock2 + The internal_signal_io_map stores key-value pairs from all luts like + this: [internal_clock1, {a, b}], [internal_clock2, {c, d}]. When we + encounter a netlist like: .names a b c d out we can replace it with .names + internal_clock1 internal_clock2 out + + 2. rewire luts + For example, the following lut will be rewired + .names internal_clock1 internal_clock2 out + as + .names internal_clock1_ckbuf internal_clock2_ckbuf out_ckbuf + where internal_clock1/2 are the original internal signals and + internal_clock1_ckbuf/2_ckbuf are the outputs of ckbuf. (.subckt ckbuf + internal_clock1 internal_clock1_ckbuf) + */ + for (auto cell : module->cells()) { + if ((cell->type) == ID($lut)) { + std::string output = str(cell->getPort(ID::Y)); + if (ckbuf_info.find(output) != ckbuf_info.end()) { + continue; /*by pass the luts that generate internal clk/reset */ + } else { + process_cell_rewire_lut(module, cell, internal_signal_io_map, + internal_signal_lut, sigmap); + /*replace internal clk/reset with clk/reset_buf signal */ + } + } + } + } + + void execute(std::vector args, RTLIL::Design *design) override { + log("Arguments to the command rapidflex_insert_clock_buffer:\n"); + std::string top_module_name; + std::string cell_map_file; + for (size_t i = 0; i < args.size(); i++) { + log(" %s\n", args[i].c_str()); + + if (args[i] == "-top" && i + 1 < args.size()) { + top_module_name = args[i + 1]; + } else if (args[i] == "-cell_map_file" && i + 1 < args.size()) { + cell_map_file = args[i + 1]; + } + } + + if (cell_map_file.empty()) { + cell_map_file = "cell_map.xml"; + } + log("cell map location is %s \n", cell_map_file.c_str()); + + /*if top_module_name is empty, get it from design*/ + if (top_module_name.empty()) + for (auto module : design->modules()) + if (module->get_bool_attribute(ID::top)) + top_module_name = module->name.str(); + + if (top_module_name.empty()) { + log_error("No top module detected. Insert clock buffer failed."); + } else { + log("Top module in current design: %s \n", top_module_name.c_str()); + } + + /*Insert clock buffer into the top module*/ + design->sort(); + for (auto module : design->modules()) { + /*only insert buffer to top module*/ + if (module->name == RTLIL::escape_id(top_module_name)) { + std::map ckbuf_type; + std::set ckbuf_info = + find_internal_clk_r_signal(module, ckbuf_type); + /*insert ckbuf and rewire dff */ + insert_ckbuf(module, ckbuf_info); + + module->fixup_ports(); + /*rewire luts */ + rewire_luts(module, ckbuf_info); + + module->fixup_ports(); + /* print out cells */ + if (!ckbuf_info.empty()) { + generate_cell_map(cell_map_file.c_str(), ckbuf_info, ckbuf_type); + } else { + log("Ckbuf info is empty. No cell map file will be generated! \n"); + } + break; + } + } + design->check(); + } +} rapidflex_insert_clock_buffer; + +PRIVATE_NAMESPACE_END diff --git a/techlibs/rapidflex/src/rf_dsp_mad.cc b/techlibs/rapidflex/src/rf_dsp_mad.cc new file mode 100644 index 000000000..7f943eadb --- /dev/null +++ b/techlibs/rapidflex/src/rf_dsp_mad.cc @@ -0,0 +1,192 @@ +#include "kernel/sigtools.h" +#include "kernel/yosys.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +#include "rf_dsp_mad_pm.h" + +static void create_rf_mad_dsp(rf_dsp_mad_pm &pm) { + auto &st = pm.st_rf_dsp_mad; + + // Reject if multiplier drives anything else than $add + if (st.mul_nusers > 2) { + return; + } + + // Get port widths + size_t a_width = GetSize(st.mul->getPort(ID(A))); + size_t b_width = GetSize(st.mul->getPort(ID(B))); + size_t c_width = GetSize(st.add->getPort(ID(A))); + if (st.add_ba == ID(B)) { + c_width = GetSize(st.add->getPort(ID(B))); + } + size_t z_width = GetSize(st.add->getPort(ID(Y))); + + size_t min_width = std::min(a_width, b_width); + size_t max_width = std::max(a_width, b_width); + + // Signed / unsigned + bool a_signed = st.mul->getParam(ID(A_SIGNED)).as_bool(); + bool b_signed = st.mul->getParam(ID(B_SIGNED)).as_bool(); + bool c_signed = st.add->getParam(ID(A_SIGNED)).as_bool(); + if (st.add_ba == ID(B)) { + c_signed = st.add->getParam(ID(B_SIGNED)).as_bool(); + } + + // Determine DSP type or discard if too narrow / wide + RTLIL::IdString type; + size_t tgt_a_width; + size_t tgt_b_width; + size_t tgt_c_width; + size_t tgt_z_width; + + string cell_base_name = "mad"; + string cell_size_name = ""; + string cell_cfg_name = ""; + string cell_full_name = ""; + + if (min_width <= 2 && max_width <= 2 && z_width <= 4) { + // Too narrow + return; + } else if (min_width <= 12 && max_width <= 10 && z_width <= 22) { + cell_size_name = "12x10x22"; + tgt_a_width = 12; + tgt_b_width = 10; + tgt_c_width = 22; + tgt_z_width = 22; + } else if (min_width <= 24 && max_width <= 20 && z_width <= 44) { + cell_size_name = "24x20x44"; + tgt_a_width = 24; + tgt_b_width = 20; + tgt_c_width = 44; + tgt_z_width = 44; + } else { + // Too wide + return; + } + + cell_full_name = cell_base_name + cell_size_name + cell_cfg_name; + + type = RTLIL::escape_id(cell_full_name); + log("Inferring MAD %zux%zu+%zu->%zu as %s from:\n", a_width, b_width, c_width, + z_width, RTLIL::unescape_id(type).c_str()); + + for (auto cell : {st.mul, st.add}) { + if (cell != nullptr) { + log(" %s (%s)\n", RTLIL::unescape_id(cell->name).c_str(), + RTLIL::unescape_id(cell->type).c_str()); + } + } + + // Build the DSP cell name + std::string name; + name += RTLIL::unescape_id(st.mul->name) + "_"; + name += RTLIL::unescape_id(st.add->name) + "_"; + + // Add the DSP cell + RTLIL::Cell *cell = pm.module->addCell(RTLIL::escape_id(name), type); + + // Set attributes + cell->set_bool_attribute(RTLIL::escape_id("is_inferred"), true); + + // Get input/output data signals + RTLIL::SigSpec sig_a; + RTLIL::SigSpec sig_b; + RTLIL::SigSpec sig_c; + RTLIL::SigSpec sig_z; + + if (a_width >= b_width) { + sig_a = st.mul->getPort(ID(A)); + sig_b = st.mul->getPort(ID(B)); + } else { + sig_a = st.mul->getPort(ID(B)); + sig_b = st.mul->getPort(ID(A)); + } + + sig_c = st.add->getPort(ID(A)); + if (st.add_ba == ID(B)) { + sig_c = st.add->getPort(ID(B)); + } + sig_z = st.add->getPort(ID(Y)); + + // Connect input data ports, sign extend / pad with zeros + sig_a.extend_u0(tgt_a_width, a_signed); + sig_b.extend_u0(tgt_b_width, b_signed); + sig_c.extend_u0(tgt_c_width, c_signed); + cell->setPort(RTLIL::escape_id("A0"), sig_a); + cell->setPort(RTLIL::escape_id("B0"), sig_b); + + // Connect input data port, pad if needed + if ((size_t)GetSize(sig_c) < tgt_c_width) { + auto *wire = pm.module->addWire(NEW_ID, tgt_c_width - GetSize(sig_c)); + sig_c.append(wire); + } + cell->setPort(RTLIL::escape_id("C0"), sig_c); + + // Connect output data port, pad if needed + if ((size_t)GetSize(sig_z) < tgt_z_width) { + auto *wire = pm.module->addWire(NEW_ID, tgt_z_width - GetSize(sig_z)); + sig_z.append(wire); + } + cell->setPort(RTLIL::escape_id("Y"), sig_z); + + bool subtract = (st.add->type == RTLIL::escape_id("$sub")); + if (subtract) { + cell->setPort(RTLIL::escape_id("subtract_i"), + RTLIL::SigSpec(subtract ? RTLIL::S1 : RTLIL::S0)); + } + + // Mark the cells for removal + pm.autoremove(st.mul); + pm.autoremove(st.add); +} + +struct RfDspMacc : public Pass { + // Local variables + bool show_help; + + RfDspMacc() + : Pass("rf_dsp_mad", "Extract multiply-add and multiply-subtract " + "operators and map to dedicated DSPs") {} + + void help() override { + log("\n"); + log(" rf_dsp_mad [options] [selection]\n"); + log("\n"); + log(" Extract multiply-add and multiply-subtract operators and map to " + "dedicated DSPs\n"); + log("\n"); + log(" -help: show help desk\n"); + log("\n"); + } + + void clear_flags() override { show_help = false; } + + void execute(std::vector a_Args, + RTLIL::Design *a_Design) override { + log_header(a_Design, "Executing RF_DSP_MAD pass.\n"); + + size_t argidx; + for (argidx = 1; argidx < a_Args.size(); argidx++) { + if (a_Args[argidx] == "-help") { + show_help = true; + continue; + } + break; + } + extra_args(a_Args, argidx, a_Design); + if (show_help) { + help(); + return; + } + + for (auto module : a_Design->selected_modules()) { + rf_dsp_mad_pm(module, module->selected_cells()) + .run_rf_dsp_mad(create_rf_mad_dsp); + } + } + +} RfDspMad; + +PRIVATE_NAMESPACE_END diff --git a/techlibs/rapidflex/src/rf_dsp_mad.pmg b/techlibs/rapidflex/src/rf_dsp_mad.pmg new file mode 100644 index 000000000..fa3a75dd3 --- /dev/null +++ b/techlibs/rapidflex/src/rf_dsp_mad.pmg @@ -0,0 +1,26 @@ +pattern rf_dsp_mad + +state add_ba + +state mul_nusers +state add_nusers + +match mul + select mul->type.in($mul) + select nusers(port(mul, \Y)) <= 3 + set mul_nusers nusers(port(mul, \Y)) +endmatch + +match add + select add->type.in($add, $sub) + choice AB {\A, \B} + define BA (AB == \A ? \B : \A) + index port(add, AB) === port(mul, \Y) + select nusers(port(add, \Y)) <= 3 + set add_nusers nusers(port(add, \Y)) + set add_ba BA +endmatch + +code + accept; +endcode diff --git a/techlibs/rapidflex/src/rf_new_dsp.cc b/techlibs/rapidflex/src/rf_new_dsp.cc new file mode 100644 index 000000000..50d11ec05 --- /dev/null +++ b/techlibs/rapidflex/src/rf_new_dsp.cc @@ -0,0 +1,441 @@ +#include "kernel/sigtools.h" +#include "kernel/yosys.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +#include "rf_new_dsp_pm.h" + +void swapinput(RTLIL::SigSpec &sigA, RTLIL::SigSpec &sigB) { + if (GetSize(sigA) < GetSize(sigB)) { + RTLIL::SigSpec sigC = sigB; + sigB = sigA; + sigA = sigC; + } +} + +void rf_new_dsp(rf_new_dsp_pm &pm) { + auto &st = pm.st_rf_new_dsp; + + log("mul1: %s\n", log_id(st.mul1, "--")); + log("mul2: %s\n", log_id(st.mul2, "--")); + log("mul3: %s\n", log_id(st.mul3, "--")); + log("mul4: %s\n", log_id(st.mul4, "--")); + log("postAdd1: %s\n", log_id(st.postAdd1, "--")); + log("postAdd2: %s\n", log_id(st.postAdd2, "--")); + log("postAdd3: %s\n", log_id(st.postAdd3, "--")); + log("postAdd4: %s\n", log_id(st.postAdd4, "--")); + + RTLIL::SigSpec sigA, sigB, sigD, sigY; + + // mode + string mode; + if (st.level == 4) { + if (st.dinput) + mode += + "1001"; // 4-level mac with d input: d + a1*b1 + a2*b2 + a3*b3 + a4*b4 + else + mode += + "0000"; // 4-level mac without d input: a1*b1 + a2*b2 + a3*b3 + a4*b4 + } + if (st.level == 3) + mode += "1001"; // 3-level mac with d input: d + a1*b1 + a2*b2 + a3*b3 + if (st.level == 2) { + if (st.dinput) + mode += "0001"; // 2-level mac with d input: d + a1*b1 + a2*b2 + else + mode += "0010"; // 2-level mac without d input: a1*b1 + a2*b2 + } + if (st.level == 1) { + if (st.dinput) + mode += "0101"; // 1-level mac with d input: d + a1*b1 + else + return; + } + + // input size + int n_size = 0; + int m_size = 0; + int d_size = 0; + + string cell_base_name = "mad"; + string cell_size_name = ""; + string cell_cfg_name = ""; + string cell_full_name = ""; + + if (st.mul1) { + swapinput(st.sigA1, st.sigB1); + n_size = n_size > GetSize(st.sigA1) ? n_size : GetSize(st.sigA1); + m_size = m_size > GetSize(st.sigB1) ? m_size : GetSize(st.sigB1); + } + + if (st.mul2) { + swapinput(st.sigA2, st.sigB2); + n_size = n_size > GetSize(st.sigA2) ? n_size : GetSize(st.sigA2); + m_size = m_size > GetSize(st.sigB2) ? m_size : GetSize(st.sigB2); + } + + if (st.mul3) { + swapinput(st.sigA3, st.sigB3); + n_size = n_size > GetSize(st.sigA3) ? n_size : GetSize(st.sigA3); + m_size = m_size > GetSize(st.sigB3) ? m_size : GetSize(st.sigB3); + } + + if (st.mul4) { + swapinput(st.sigA4, st.sigB4); + n_size = n_size > GetSize(st.sigA4) ? n_size : GetSize(st.sigA4); + m_size = m_size > GetSize(st.sigB4) ? m_size : GetSize(st.sigB4); + } + + if (st.dinput) + d_size = GetSize(st.sigD); + + if (mode == "0100") { + n_size = (n_size + 1) / 2; + m_size = (m_size + 1) / 2; + } + + if (n_size <= 2 && m_size <= 2 && d_size <= 4) { + // Too narrow + return; + } else if (n_size <= 12 && m_size <= 10 && d_size <= 30) { + cell_size_name = "12x10x22"; + n_size = 12; + m_size = 10; + d_size = 30; + } else if (n_size <= 24 && m_size <= 20 && d_size <= 52) { + cell_size_name = "24x20x44"; + n_size = 24; + m_size = 20; + d_size = 52; + } else { + // Too wide + return; + } + + // cell + cell_full_name = cell_base_name + cell_size_name + cell_cfg_name; + + string cellname; + cellname += "newdsp_" + RTLIL::unescape_id(st.mul1->name); + RTLIL::Cell *cell = pm.module->addCell(RTLIL::escape_id(cellname), + RTLIL::escape_id(cell_full_name)); + + // D input + bool d_signed = false; + if (st.dinput) { + d_signed = st.postAdd1->getParam(ID(A_SIGNED)).as_bool(); + if (mode == "0001") + sigD.extend_u0(d_size); + sigD.append(st.sigD); + } + sigD.extend_u0(2 * d_size, d_signed); + + // output + if (st.multiout2 || st.multiout3) { + auto *wire = pm.module->addWire(NEW_ID, d_size - GetSize(st.sigY2)); + sigY.append(st.sigY2); + sigY.append(wire); + sigY.append(st.sigY); + } else if (mode == "0001" || (st.level == 4 && st.dinput)) { + auto *wire = pm.module->addWire(NEW_ID, d_size); + sigY.append(wire); + sigY.append(st.sigY); + } else + sigY.append(st.sigY); + auto *wire = pm.module->addWire(NEW_ID, 2 * d_size - GetSize(sigY)); + sigY.append(wire); + + // input + bool a_signed, b_signed; + if (mode == "0001") { + sigA.extend_u0(2 * n_size); + sigB.extend_u0(2 * m_size); + } + if (st.mul1 && mode != "0100") { + a_signed = st.mul1->getParam(ID(A_SIGNED)).as_bool(); + b_signed = st.mul1->getParam(ID(B_SIGNED)).as_bool(); + st.sigA1.extend_u0(n_size, a_signed); + st.sigB1.extend_u0(m_size, b_signed); + sigA.append(st.sigA1); + sigB.append(st.sigB1); + } + if (mode == "0100") { + a_signed = st.mul1->getParam(ID(A_SIGNED)).as_bool(); + b_signed = st.mul1->getParam(ID(B_SIGNED)).as_bool(); + st.sigA1.extend_u0(2 * n_size, a_signed); + st.sigB1.extend_u0(2 * m_size, b_signed); + sigA.append(st.sigA1); + sigB.append(st.sigB1); + } + if (!st.dinput && st.mul4) { + a_signed = st.mul4->getParam(ID(A_SIGNED)).as_bool(); + b_signed = st.mul4->getParam(ID(B_SIGNED)).as_bool(); + st.sigA4.extend_u0(n_size, a_signed); + st.sigB4.extend_u0(m_size, b_signed); + sigA.append(st.sigA4); + sigB.append(st.sigB4); + } + if (st.mul2) { + a_signed = st.mul2->getParam(ID(A_SIGNED)).as_bool(); + b_signed = st.mul2->getParam(ID(B_SIGNED)).as_bool(); + st.sigA2.extend_u0(n_size, a_signed); + st.sigB2.extend_u0(m_size, b_signed); + sigA.append(st.sigA2); + sigB.append(st.sigB2); + } + if (st.mul3) { + a_signed = st.mul3->getParam(ID(A_SIGNED)).as_bool(); + b_signed = st.mul3->getParam(ID(B_SIGNED)).as_bool(); + st.sigA3.extend_u0(n_size, a_signed); + st.sigB3.extend_u0(m_size, b_signed); + sigA.append(st.sigA3); + sigB.append(st.sigB3); + } + if (st.dinput && st.mul4) { + a_signed = st.mul4->getParam(ID(A_SIGNED)).as_bool(); + b_signed = st.mul4->getParam(ID(B_SIGNED)).as_bool(); + st.sigA4.extend_u0(n_size, a_signed); + st.sigB4.extend_u0(m_size, b_signed); + sigA.append(st.sigA4); + sigB.append(st.sigB4); + } + sigA.extend_u0(4 * n_size); + sigB.extend_u0(4 * m_size); + + // reg + mode = "00000" + mode + "0000"; + if (st.level == 1) { + if (st.ffA1 && st.ffB1 && !(st.dinput && !st.ffD)) { + mode[1] = mode[2] = '1'; + pm.autoremove(st.ffA1); + pm.autoremove(st.ffB1); + if (st.dinput) + pm.autoremove(st.ffD); + } + if (st.ffM1 && st.ffD && st.postAdd1) { + mode[11] = '1'; + pm.autoremove(st.ffM1); + pm.autoremove(st.ffD); + } + if (st.ffY1 || (!st.postAdd1 && st.ffM1)) { + mode[3] = mode[4] = '1'; + pm.autoremove(st.ffY1); + pm.autoremove(st.ffM1); + } + } + if (st.level == 2) { + if (st.ffA1 && st.ffB1 && st.ffA2 && st.ffB2 && st.dinput && + st.ffD) // in reg + { + mode[1] = mode[2] = '1'; + pm.autoremove(st.ffA1); + pm.autoremove(st.ffB1); + pm.autoremove(st.ffA2); + pm.autoremove(st.ffB2); + pm.autoremove(st.ffD); + } + if (st.ffA1 && st.ffB1 && st.ffA4 && st.ffB4 && !st.dinput) // in reg + { + mode[1] = mode[2] = '1'; + pm.autoremove(st.ffA1); + pm.autoremove(st.ffB1); + pm.autoremove(st.ffA4); + pm.autoremove(st.ffB4); + } + if (st.ffM1 && ((st.dinput && st.ffM2) || st.ffD)) // mul in reg + { + mode[11] = '1'; + pm.autoremove(st.ffM1); + pm.autoremove(st.ffD); + if (st.dinput) + pm.autoremove(st.ffM2); + } + if (st.ffY1 && st.ffY2 && st.dinput) // mul out reg + { + mode[12] = '1'; + pm.autoremove(st.ffY1); + pm.autoremove(st.ffY2); + } else if (st.ffY1 || st.ffY2) // out reg + { + mode[3] = mode[4] = '1'; + pm.autoremove(st.ffY1); + pm.autoremove(st.ffY2); + } + } + if (st.ffA1 && st.ffB1 && st.ffA2 && st.ffB2 && st.dinput && + st.ffD) // in reg low + { + mode[2] = '1'; + pm.autoremove(st.ffA1); + pm.autoremove(st.ffB1); + pm.autoremove(st.ffA2); + pm.autoremove(st.ffB2); + pm.autoremove(st.ffD); + } + if (st.ffA1 && st.ffB1 && st.ffA4 && st.ffB4 && !st.dinput) // in reg low + { + mode[2] = '1'; + pm.autoremove(st.ffA1); + pm.autoremove(st.ffB1); + pm.autoremove(st.ffA4); + pm.autoremove(st.ffB4); + } + if (st.level == 3) { + if (st.ffA3 && st.ffB3 && st.dinput) // in reg high + { + mode[1] = '1'; + pm.autoremove(st.ffA3); + pm.autoremove(st.ffB3); + } + if (st.ffA2 && st.ffB2 && !st.dinput) // in reg high + { + mode[1] = '1'; + pm.autoremove(st.ffA2); + pm.autoremove(st.ffB2); + } + if (st.ffM1 && st.ffM2 && ((st.dinput && st.ffM3) || st.ffD)) // mul in reg + { + mode[11] = '1'; + pm.autoremove(st.ffM1); + pm.autoremove(st.ffD); + pm.autoremove(st.ffM2); + if (st.dinput) + pm.autoremove(st.ffM3); + } + if (st.ffY1 && st.ffY2 && !(st.dinput && !st.ffY3)) // mul out reg + { + mode[12] = '1'; + pm.autoremove(st.ffY1); + pm.autoremove(st.ffY2); + if (st.dinput) + pm.autoremove(st.ffY3); + } else if (st.ffY2 || st.ffY3) // out reg + { + mode[3] = mode[4] = '1'; + pm.autoremove(st.ffY2); + pm.autoremove(st.ffY3); + } + } + if (st.level == 4) { + if (st.ffA3 && st.ffB3 && st.ffA4 && st.ffB4 && st.dinput) // in reg high + { + mode[1] = '1'; + pm.autoremove(st.ffA3); + pm.autoremove(st.ffB3); + pm.autoremove(st.ffA4); + pm.autoremove(st.ffB4); + } + if (st.ffA2 && st.ffB2 && st.ffA3 && st.ffB3 && !st.dinput) // in reg high + { + mode[1] = '1'; + pm.autoremove(st.ffA2); + pm.autoremove(st.ffB2); + pm.autoremove(st.ffA3); + pm.autoremove(st.ffB3); + } + if (st.ffM1 && st.ffM2 && st.ffM3 && + ((st.dinput && st.ffM4) || st.ffD)) // mul in reg + { + mode[11] = '1'; + pm.autoremove(st.ffM1); + pm.autoremove(st.ffD); + pm.autoremove(st.ffM2); + pm.autoremove(st.ffM3); + if (st.dinput) + pm.autoremove(st.ffM4); + } + if (st.ffY1 && st.ffY2 && st.ffY3 && + !(st.dinput && !st.ffY4)) // mul out reg + { + mode[12] = '1'; + pm.autoremove(st.ffY1); + pm.autoremove(st.ffY2); + pm.autoremove(st.ffY3); + if (st.dinput) + pm.autoremove(st.ffY4); + } else if (st.ffY3 || st.ffY4) // out reg high + { + if (st.multiout2 || st.multiout3) + mode[3] = '1'; + else + mode[3] = mode[4] = '1'; + pm.autoremove(st.ffY3); + pm.autoremove(st.ffY4); + } + if (st.ffMY) // out reg low + { + mode[4] = '1'; + pm.autoremove(st.ffMY); + } + } + + cell->setPort(RTLIL::escape_id("a_i"), sigA); + cell->setPort(RTLIL::escape_id("b_i"), sigB); + cell->setPort(RTLIL::escape_id("d_i"), sigD); + cell->setPort(RTLIL::escape_id("out_o"), sigY); + cell->setPort(RTLIL::escape_id("mode_i"), Const::from_string(mode)); + if (st.clock != SigBit()) + cell->setPort(RTLIL::escape_id("clk_i"), st.clock); + cell->setPort(RTLIL::escape_id("rst_acc"), RTLIL::SigSpec(0, 1)); + cell->setPort(RTLIL::escape_id("accsel"), RTLIL::SigSpec(0, 1)); + cell->setPort(RTLIL::escape_id("cas_g"), RTLIL::SigSpec(0, 1)); + + pm.autoremove(st.mul1); + pm.autoremove(st.mul2); + pm.autoremove(st.mul3); + pm.autoremove(st.mul4); + pm.autoremove(st.postAdd1); + pm.autoremove(st.postAdd2); + pm.autoremove(st.postAdd3); + pm.autoremove(st.postAdd4); +} + +struct RfNewDSP : public Pass { + bool show_help; + + RfNewDSP() + : Pass("rf_new_dsp", + "Extract multiply-add operators and map to new_dsps") {} + + void help() override { + log("\n"); + log(" rf_new_dsp [options] [selection]\n"); + log("\n"); + log(" Extract multiply-add operators and map to new_dsps\n"); + log("\n"); + log(" -help: show help desk\n"); + log("\n"); + log(" -n_size: specify input n size\n"); + log("\n"); + log(" -m_size: specify input m size\n"); + log("\n"); + } + + void clear_flags() override { show_help = false; } + + void execute(std::vector a_Args, + RTLIL::Design *a_Design) override { + log_header(a_Design, "Executing RF_NEW_DSP pass.\n"); + size_t argidx; + for (argidx = 1; argidx < a_Args.size(); argidx++) { + if (a_Args[argidx] == "-help") { + show_help = true; + continue; + } + break; + } + extra_args(a_Args, argidx, a_Design); + if (show_help) { + help(); + return; + } + + for (auto module : a_Design->selected_modules()) { + rf_new_dsp_pm pm(module, module->selected_cells()); + pm.run_rf_new_dsp(rf_new_dsp); + } + } +} RfNewDsp; + +PRIVATE_NAMESPACE_END diff --git a/techlibs/rapidflex/src/rf_new_dsp.pmg b/techlibs/rapidflex/src/rf_new_dsp.pmg new file mode 100644 index 000000000..af97ce1ce --- /dev/null +++ b/techlibs/rapidflex/src/rf_new_dsp.pmg @@ -0,0 +1,478 @@ +pattern rf_new_dsp + +state clock +state sigA1 sigA2 sigA3 sigA4 +state sigB1 sigB2 sigB3 sigB4 +state sigD sigY sigY2 sigM + +state ffA1 ffA2 ffA3 ffA4 +state ffB1 ffB2 ffB3 ffB4 +state ffD +state ffM1 ffM2 ffM3 ffM4 +state ffY1 ffY2 ffY3 ffY4 ffMY +state postAdd1 postAdd2 postAdd3 postAdd4 +state mul2 mul3 mul4 + +state multiout2 multiout3 dinput y_signed y2_signed +state level + +// Variables used for subpatterns +state argQ argD argA argM +udata dffD dffQ addD addY +udata dffclock +udata dff postadder multiplier +state postAddAB + +// (1) match multiplier 1 +match mul1 + select mul1->type.in($mul) +endmatch + +code sigA1 sigB1 sigY multiout2 multiout3 dinput y_signed y2_signed level + sigA1 = port(mul1, \A); + sigB1 = port(mul1, \B); + sigY = port(mul1, \Y); + multiout2 = false; + multiout3 = false; + y_signed = param(mul1, \A_SIGNED).as_bool(); + dinput = false; + level = 1; +endcode + +// (2) Match A input register 1 +code argQ ffA1 sigA1 clock + argQ = sigA1; + subpattern(in_dffe); + if (dff) { + ffA1 = dff; + clock = dffclock; + sigA1 = dffD; + } +endcode + +// (3) Match B input register 1 +code argQ ffB1 sigB1 clock + argQ = sigB1; + subpattern(in_dffe); + if (dff) { + ffB1 = dff; + clock = dffclock; + sigB1 = dffD; + } +endcode + +// (4) Match mul output register 1 +code argD ffM1 sigY clock + argD = sigY; + subpattern(out_dffe); + if (dff) { + ffM1 = dff; + clock = dffclock; + sigY = dffQ; + } +endcode + +// (5) Match post adder 1 +code argA postAdd1 sigY sigD y_signed + argA = sigY; + subpattern(post_add); + if(postadder) { + postAdd1 = postadder; + sigY = addY; + sigD = addD; + y_signed = param(postAdd1, \A_SIGNED).as_bool(); + } +endcode + +// (6) Match D input register +code argQ ffD sigD clock + argQ = sigD; + subpattern(in_dffe); + if (dff) { + ffD = dff; + clock = dffclock; + sigD = dffD; + } +endcode + +// (6-1) Match multiplier 4 +code argM mul4 sigA4 sigB4 sigD level dinput + argM = sigD; + subpattern(more_mult); + if (multiplier) { + mul4 = multiplier; + sigA4 = port(mul4, \A); + sigB4 = port(mul4, \B); + level += 1; + } + else if(postAdd1) + { + dinput = true; + } +endcode + +// (7) Match mac output register 1 +code argD ffY1 sigY clock + argD = sigY; + subpattern(out_dffe); + if (dff) { + ffY1 = dff; + clock = dffclock; + sigY = dffQ; + } +endcode + +// (8) Match post adder 2 +code argA postAdd2 sigM sigY sigD y_signed + argA = sigY; + subpattern(post_add); + if(postadder) { + postAdd2 = postadder; + sigY = addY; + sigM = addD; + y_signed = param(postAdd2, \A_SIGNED).as_bool(); + } +endcode + +// (9) Match mul output register 2 +code argQ ffM2 sigM clock + argQ = sigM; + subpattern(in_dffe); + if (dff) { + ffM2 = dff; + clock = dffclock; + sigM = dffD; + } +endcode + +// (10) match multiplier 2 +code argM mul2 sigA2 sigB2 sigM level + argM = sigM; + subpattern(more_mult); + if (multiplier) { + mul2 = multiplier; + sigA2 = port(mul2, \A); + sigB2 = port(mul2, \B); + level += 1; + } + sigM.remove(0, GetSize(sigM)); +endcode + +// (11) Match A input register 2 +code argQ ffA2 sigA2 clock + argQ = sigA2; + subpattern(in_dffe); + if (dff) { + ffA2 = dff; + clock = dffclock; + sigA2 = dffD; + } +endcode + +// (12) Match B input register 2 +code argQ ffB2 sigB2 clock + argQ = sigB2; + subpattern(in_dffe); + if (dff) { + ffB2 = dff; + clock = dffclock; + sigB2 = dffD; + } +endcode + +// (13) Match mac output register 2 +code argD ffY2 sigY clock + argD = sigY; + subpattern(out_dffe); + if (dff) { + ffY2 = dff; + clock = dffclock; + sigY = dffQ; + } +endcode + +// (14) Match post adder 3 +code argA postAdd3 sigM sigY sigD y_signed + argA = sigY; + subpattern(post_add); + if(postadder) { + postAdd3 = postadder; + sigY = addY; + sigM = addD; + y_signed = param(postAdd3, \A_SIGNED).as_bool(); + } +endcode + +// (15) Match mul output register 3 +code argQ ffM3 sigM clock + argQ = sigM; + subpattern(in_dffe); + if (dff) { + ffM3 = dff; + clock = dffclock; + sigM = dffD; + } +endcode + +// (16) match multiplier 3 +code argM mul3 sigA3 sigB3 level sigM + argM = sigM; + subpattern(more_mult); + if (multiplier) { + mul3 = multiplier; + sigA3 = port(mul3, \A); + sigB3 = port(mul3, \B); + level += 1; + } + sigM.remove(0, GetSize(sigM)); +endcode + +// (17) Match A input register 3 +code argQ ffA3 sigA3 clock + argQ = sigA3; + subpattern(in_dffe); + if (dff) { + ffA3 = dff; + clock = dffclock; + sigA3 = dffD; + } +endcode + +// (18) Match B input register 3 +code argQ ffB3 sigB3 clock + argQ = sigB3; + subpattern(in_dffe); + if (dff) { + ffB3 = dff; + clock = dffclock; + sigB3 = dffD; + } +endcode + +// (19) Match mac output register 3 +code argD ffY3 sigY clock + argD = sigY; + subpattern(out_dffe); + if (dff) { + ffY3 = dff; + clock = dffclock; + sigY = dffQ; + } +endcode + +// (20) Match post adder 4 +code argA postAdd4 sigM sigY sigD y_signed + argA = sigY; + subpattern(post_add); + if(postadder) { + postAdd4 = postadder; + sigY = addY; + sigM = addD; + y_signed = param(postAdd4, \A_SIGNED).as_bool(); + } +endcode + +// (21) Match mul output register 4 +code argQ ffM4 sigM clock + argQ = sigM; + subpattern(in_dffe); + if (dff) { + ffM4 = dff; + clock = dffclock; + sigM = dffD; + } +endcode + +// (22) match multiplier 4 +code argM mul4 sigA4 sigB4 level sigM + argM = sigM; + if(!mul4) + { + subpattern(more_mult); + if (multiplier) { + mul4 = multiplier; + sigA4 = port(mul4, \A); + sigB4 = port(mul4, \B); + level += 1; + } + } + sigM.remove(0, GetSize(sigM)); +endcode + +// (23) Match A input register 4 +code argQ ffA4 sigA4 clock + argQ = sigA4; + subpattern(in_dffe); + if (dff) { + ffA4 = dff; + clock = dffclock; + sigA4 = dffD; + } +endcode + +// (24) Match B input register 4 +code argQ ffB4 sigB4 clock + argQ = sigB4; + subpattern(in_dffe); + if (dff) { + ffB4 = dff; + clock = dffclock; + sigB4 = dffD; + } +endcode + +// (25) Match mac output register 4 +code argD ffY4 sigY clock + argD = sigY; + subpattern(out_dffe); + if (dff) { + ffY4 = dff; + clock = dffclock; + sigY = dffQ; + } +endcode + +code + accept; +endcode + +// ####################### + +// Subpattern for matching against input registers + +subpattern in_dffe +arg argQ clock + +code + dff = nullptr; + if (argQ.empty()) + reject; + for (const auto &c : argQ.chunks()) + { + // Abandon matches when 'Q' is a constant + if (!c.wire) + reject; + // Abandon matches when 'Q' has the keep attribute set + if (c.wire->get_bool_attribute(\keep)) + reject; + } +endcode + +match ff + select ff->type.in($dff, $dffe, $sdff, $sdffe, $adff, $adffe) + + slice offset GetSize(port(ff, \D)) + index port(ff, \Q)[offset] === argQ[0] + + // Check that the rest of argQ is present + filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ) + filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ + + filter clock == SigBit() || port(ff, \CLK)[0] == clock +endmatch + +code argQ + SigSpec Q = port(ff, \Q); + dff = ff; + dffclock = port(ff, \CLK); + dffD = argQ; + SigSpec D = port(ff, \D); + argQ = Q; + dffD.replace(argQ, D); //to.replace(pattern, with). 'to' become 'with' according 'pattern' +endcode + +// ####################### + +// Subpattern for matching output registers + +subpattern out_dffe +arg argD argQ clock + +code + dff = nullptr; + for (auto c : argD.chunks()) + // Abandon matches when 'D' has the keep attribute set + if (c.wire->get_bool_attribute(\keep)) + reject; +endcode + +match ff + select ff->type.in($dff, $dffe, $sdff, $sdffe) + + slice offset GetSize(port(ff, \D)) + index port(ff, \D)[offset] === argD[0] + + // Check that the rest of argD is present + filter GetSize(port(ff, \D)) >= offset + GetSize(argD) + filter port(ff, \D).extract(offset, GetSize(argD)) == argD + + filter clock == SigBit() || port(ff, \CLK)[0] == clock +endmatch + +code argQ + SigSpec D = port(ff, \D); + SigSpec Q = port(ff, \Q); + argQ = argD; + argQ.replace(D, Q); + + dff = ff; + dffQ = argQ; + dffclock = port(ff, \CLK); +endcode + +// ####################### + +// Subpattern for matching post adder + +subpattern post_add +arg argA + +code + postadder = nullptr; +endcode + +match adder + select adder->type.in($add) + choice AB {\A, \B} + select nusers(port(adder, AB)) == 2 + + index port(adder, AB)[0] === argA[0] + filter GetSize(port(adder, AB)) >= GetSize(argA) + filter port(adder, AB).extract(0, GetSize(argA)) == argA + + set postAddAB AB +endmatch + +code argA + SigSpec A = port(adder, postAddAB); + SigSpec D = port(adder, postAddAB == \A ? \B : \A); + SigSpec Y = port(adder, \Y); + + postadder = adder; + + addY = argA; + addY.replace(A, Y); + + addD = D; +endcode + +// ####################### + +// Subpattern for matching multiplier + +subpattern more_mult +arg argM + +code + multiplier = nullptr; +endcode + +match mult + select mult->type.in($mul) + filter GetSize(port(mult, \Y)) <= GetSize(argM) + filter port(mult, \Y) == argM.extract(0, GetSize(port(mult, \Y))) +endmatch + +code + multiplier = mult; +endcode \ No newline at end of file diff --git a/techlibs/rapidflex/src/synth_rapidflex.cc b/techlibs/rapidflex/src/synth_rapidflex.cc new file mode 100644 index 000000000..4d38cfa89 --- /dev/null +++ b/techlibs/rapidflex/src/synth_rapidflex.cc @@ -0,0 +1,530 @@ +/* + * Copyright 2020-2024 RapidFlex + */ +#include "kernel/celltypes.h" +#include "kernel/log.h" +#include "kernel/register.h" +#include "kernel/rtlil.h" +using namespace std; +/* Constants for device name */ +constexpr const char *ALKDC_DNAME = "alkaidC"; +constexpr const char *ALKDL_DNAME = "alkaidL"; +constexpr const char *ALKDT_DNAME = "alkaidT"; + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +#define XSTR(val) #val +#define STR(val) XSTR(val) + +#ifndef PASS_NAME +#define PASS_NAME synth_rf_alkaid +#endif + +struct SynthRapidFlexPass : public ScriptPass { + + SynthRapidFlexPass() + : ScriptPass(STR(PASS_NAME), "Synthesis for RapidFlex Alkaid FPGAs") {} + + void help() override { + log("\n"); + log(" %s [options]\n", STR(PASS_NAME)); + log("This command runs synthesis for RapidFlex Alkaid FPGAs\n"); + log("\n"); + log(" -top \n"); + log(" use the specified module as top module\n"); + log("\n"); + log(" -family \n"); + log(" run synthesis for the specified RapidFlex architecture\n"); + log(" generate the synthesis netlist for the specified family.\n"); + log(" supported values:\n"); + log(" - alkaidL\n"); + log(" - alkaidT\n"); + log(" - alkaidC\n"); + log("\n"); + log(" -edif \n"); + log(" write the design to the specified edif file. Writing of an " + "output file\n"); + log(" is omitted if this parameter is not specified.\n"); + log("\n"); + log(" -blif \n"); + log(" write the design to the specified BLIF file. Writing of an " + "output file\n"); + log(" is omitted if this parameter is not specified.\n"); + log("\n"); + log(" -verilog \n"); + log(" write the design to the specified verilog file. Writing of an " + "output\n"); + log(" file is omitted if this parameter is not specified.\n"); + log("\n"); + log(" -no_dsp\n"); + log(" By default use DSP blocks in output netlist.\n"); + log(" do not use DSP blocks to implement multipliers and associated " + "logic\n"); + log("\n"); + log(" -no_adder\n"); + log(" By default use adder cells in output netlist.\n"); + log(" Specifying this switch turns it off.\n"); + log("\n"); + log(" -no_bram\n"); + log(" By default use Block RAM in output netlist.\n"); + log(" Specifying this switch turns it off.\n"); + log("\n"); + log(" -insert_clock_buffer\n"); + log(" By default no insertion of clock buffer for output " + "netlist.\n"); + log(" -cell_map_file \n"); + log(" write the ckbuf into to the specified XML file. Writing of an " + "output file\n"); + log(" Specifying this switch turns it on.\n"); + log(" -K \n"); + log(" Specify the input size of LUT when running optimization. If " + "not specified, a default value will be applied. Please do not modify " + "this parameter except architecture exploration\n"); + log(" -parse_only\n"); + log(" Only apply verilog parsing. This is for rewriting purpose.\n"); + log(" Specifying this switch turns it on.\n"); + log("\n"); + log("The following commands are executed by this synthesis command:\n"); + log("\n"); + log(" -save_block_diagram \n"); + log(" generate a block diagram and save to the specified file\n"); + log(" supported formats: dot, png, svg, eps, pdf\n"); + log("\n"); + help_script(); + } + + std::string top_opt, edif_file, blif_file, family, currmodule, verilog_file, + cell_map_file, lib_path, block_diagram_file; + bool nodsp; + bool no_opt; + bool abc9; + bool inferAdder; + bool inferBram; + bool show_help; + bool insert_clock_buffer; + size_t DEFAULT_K = 5; + size_t MIN_K = 4; + size_t MAX_K = 6; + size_t max_lut_size = DEFAULT_K; + bool parse_only = false; + + void clear_flags() override { + top_opt = "-auto-top"; + edif_file = ""; + blif_file = ""; + cell_map_file = ""; + verilog_file = ""; + currmodule = ""; + family = ALKDL_DNAME; + inferAdder = true; + inferBram = true; + nodsp = false; + no_opt = false; + abc9 = false; + lib_path = "+/rapidflex/"; + show_help = false; + insert_clock_buffer = false; + max_lut_size = DEFAULT_K; + parse_only = false; + block_diagram_file = ""; + } + + void execute(std::vector args, RTLIL::Design *design) override { + string run_from, run_to; + clear_flags(); + lib_path = design->scratchpad_get_string("rf.lib_path", lib_path); + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-run" && argidx + 1 < args.size()) { + size_t pos = args[argidx + 1].find(':'); + if (pos == std::string::npos) { + run_from = args[++argidx]; + run_to = args[argidx]; + } else { + run_from = args[++argidx].substr(0, pos); + run_to = args[argidx].substr(pos + 1); + } + continue; + } + if (args[argidx] == "-top" && argidx + 1 < args.size()) { + top_opt = "-top " + args[++argidx]; + continue; + } + if (args[argidx] == "-edif" && argidx + 1 < args.size()) { + edif_file = args[++argidx]; + continue; + } + + if (args[argidx] == "-family" && argidx + 1 < args.size()) { + family = args[++argidx]; + continue; + } + if (args[argidx] == "-blif" && argidx + 1 < args.size()) { + blif_file = args[++argidx]; + continue; + } + if (args[argidx] == "-cell_map_file" && argidx + 1 < args.size()) { + cell_map_file = args[++argidx]; + continue; + } + if (args[argidx] == "-verilog" && argidx + 1 < args.size()) { + verilog_file = args[++argidx]; + continue; + } + if (args[argidx] == "-K" && argidx + 1 < args.size()) { + max_lut_size = std::stoi(args[++argidx]); + continue; + } + if (args[argidx] == "-no_dsp") { + nodsp = true; + continue; + } + if (args[argidx] == "-no_adder") { + inferAdder = false; + continue; + } + if (args[argidx] == "-no_bram") { + inferBram = false; + continue; + } + if (args[argidx] == "-no_opt") { + no_opt = false; + continue; + } + if (args[argidx] == "-parse_only") { + parse_only = true; + continue; + } + if (args[argidx] == "-help") { + show_help = true; + continue; + } + if (args[argidx] == "-insert_clock_buffer") { + insert_clock_buffer = true; + continue; + } + if (args[argidx] == "-save_block_diagram") { + if (argidx + 1 < args.size() && args[argidx + 1][0] != '-') { + block_diagram_file = args[++argidx]; + } else { + block_diagram_file = ""; + } + continue; + } + break; + } + extra_args(args, argidx, design); + + if (show_help) { + help(); + return; + } + + if (!design->full_selection()) { + log_cmd_error("This command only operates on fully selected designs!\n"); + } + /* Pre-check on family name and confirm on selection*/ + if (family != ALKDL_DNAME && family != ALKDT_DNAME && + family != ALKDC_DNAME) { + log_cmd_error("Invalid family specified: '%s'\n", family.c_str()); + } + log("Selected device family: %s\n", family.c_str()); + /* Force to enable/disable options upon device limits */ + if (family == ALKDL_DNAME || family == ALKDC_DNAME) { + if (!nodsp) { + log_warning("Disabling DSP inference as the selected device " + "does not contain dedicated resources\n"); + nodsp = true; + } + if (inferBram) { + log_warning("Disabling RAM inference as the selected device " + "does not contain dedicated resources\n"); + inferBram = false; + } + } + /* By default, no opt should be enabled. Throw a warning if not */ + if (no_opt) { + log_warning("Disabling optimization, which may have a " + "negative impact on QoR\n"); + } + if (abc9 && design->scratchpad_get_int("abc9.D", 0) == 0) { + log_warning("Delay target has not been set via SDC or scratchpad; " + "Assuming 1GHz clock.\n"); + design->scratchpad_set_int("abc9.W", + 1000); // set interconnet delay as 1ns + } + /* Sanity checks on max lut size */ + if (max_lut_size < MIN_K || max_lut_size > MAX_K) { + log_cmd_error( + "The provided K=%ld is out of the acceptable range [%ld, %ld]!\n", + max_lut_size, MIN_K, MAX_K); + return; + } + + log_header(design, "Executing SYNTH_RAPIDFLEX pass.\n"); + log_push(); + + run_script(design, run_from, run_to); + log_pop(); + } + + void script() override { + if (help_mode) { + family = ""; + } + + std::string noDFFArgs; + if (check_label("begin")) { + std::string family_path = " " + lib_path + family; + std::string read_vlog_args; + + // Read simulation library + read_vlog_args = family_path + "/cell_sim.v"; + + // Use -nomem2reg here to prevent Yosys from complaining about + // some block ram cell models. After all the only part of the cells + // library required here is cell port definitions plus specify blocks. + if (parse_only) { + run("read_verilog " + lib_path + "common/cells_sim.v" + read_vlog_args); + } else { + run("read_verilog -lib -specify -nomem2reg " + lib_path + + "common/cells_sim.v" + read_vlog_args); + } + run("logger -werror \"multiple conflicting drivers\""); + run("check"); + run(stringf("hierarchy -check %s", + help_mode ? "-top " : top_opt.c_str())); + run("stat"); + } + + if (check_label("prepare")) { + run("proc"); + run("flatten"); + if (parse_only) { + log("Running parse-only flow. Exit after flattening the design\n"); + return; + } + if (help_mode) { + run("tribuf -logic"); + } + if (!no_opt) { + run("opt_expr"); + run("opt_clean"); + } + run("deminout"); + if (!no_opt) { + run("opt -nodffe"); + } + + run("check"); + if (!no_opt) { + run("opt -nodffe"); + run("fsm"); + run("opt -nodffe"); + run("wreduce -keepdc"); + run("peepopt"); + run("pmuxtree"); + run("opt_clean"); + run("share"); + } + } + + if (check_label("map_dsp"), "(skip if -no_dsp)") { + struct DspParams { + size_t a_maxwidth; + size_t b_maxwidth; + size_t a_minwidth; + size_t b_minwidth; + std::string type; + }; + + const std::vector dsp_rules = { + {24, 20, 13, 11, "mult_24x20_map"}, + {12, 10, 2, 2, "mult_12x10_map"}, + }; + + if (help_mode || family == ALKDT_DNAME) { + if (help_mode || !nodsp) { + run("memory_dff", " (for alkaidT)"); + if (!no_opt) { + run("wreduce t:$mul", " (for alkaidT)"); + } + run("rf_new_dsp"); + for (const auto &rule : dsp_rules) { + run(stringf("techmap -map +/mul2dsp.v " + "-map %s/dsp_map.v " + "-D DSP_A_MAXWIDTH=%zu -D DSP_B_MAXWIDTH=%zu " + "-D DSP_A_MINWIDTH=%zu -D DSP_B_MINWIDTH=%zu " + "-D DSP_NAME=%s", + std::string(lib_path + family).c_str(), rule.a_maxwidth, + rule.b_maxwidth, rule.a_minwidth, rule.b_minwidth, + rule.type.c_str())); + /* Without the following command, some multiplier may be skipped */ + run("chtype -set $mul t:$__soft_mul", " (for alkaidT)"); + } + run("select a:mul2dsp", " (for alkaidT)"); + run("setattr -unset mul2dsp", " (for alkaidT)"); + if (!no_opt) { + run("opt_expr -fine", " (for alkaidT)"); + run("wreduce", " (for alkaidT)"); + } + run("select -clear", " (for alkaidT)"); + // Comment out for further development + // run("rf_dsp", " (for + // alkaidT)"); + run("chtype -set $mul t:$__soft_mul", " (for alkaidT)"); + } + } + } + + if (check_label("coarse")) { + run("techmap -map +/cmp2lut.v -D LUT_WIDTH=5"); + if (!no_opt) { + run("opt_expr"); + run("opt_clean"); + } + run("alumacc"); + run("pmuxtree"); + if (!no_opt) { + run("opt -nodffe"); + } + run("memory -nomap"); + if (!no_opt) { + run("opt_clean"); + } + } + + if (check_label("map_bram", "(skip if -no_bram)") && + ((help_mode || family == ALKDT_DNAME) && inferBram)) { + if (help_mode || family == ALKDT_DNAME) { + run("memory_bram -rules " + lib_path + family + "/bram.txt"); + } + /* TODO: Add bram initilization support */ + run("techmap -map " + lib_path + family + "/bram_map.v"); + } + if (check_label("map_ffram")) { + if (!no_opt) { + run("opt -fast -mux_undef -undriven -fine -nodffe"); + } + run("memory_map"); + if (!no_opt) { + run("opt -undriven -fine -nodffe"); + } + } + + if (check_label("map_gates")) { + if (help_mode || + (inferAdder && (family == ALKDL_DNAME || family == ALKDT_DNAME || + family == ALKDC_DNAME))) { + run("techmap -map +/techmap.v -map " + lib_path + family + + "/arith_map.v", + "(unless -no_adder)"); + } else { + run("techmap"); + } + if (!no_opt) { + run("opt -fast -nodffe"); + run("opt_expr"); + run("opt_merge"); + run("opt_clean"); + run("opt -nodffe"); + } + } + + if (check_label("map_ffs")) { + run("memory"); + /* TODO: Support shift-register mapping */ + /* Run 2 times dff mapping incase anything missing */ + run("dfflegalize -cell $_DFF_?_ 01 -cell $_DFF_???_ 01 -cell $_SDFF_???_ " + "01"); + run("techmap -map " + lib_path + family + "/dff_map.v"); + run("dfflegalize -cell $_DFF_?_ 01 -cell $_DFF_???_ 01 -cell $_SDFF_???_ " + "01"); + run("techmap -map " + lib_path + family + "/dff_map.v"); + run("opt_expr -mux_undef"); + run("simplemap"); + run("opt_expr"); + if (!no_opt) { + run("opt_merge"); + run("opt_dff -nodffe"); + run("opt_clean"); + run("opt -nodffe"); + } + } + + if (check_label("map_luts")) { + run("abc -lut " + std::to_string(max_lut_size)); + /* Map dff and adder again since ABC may generate new gates */ + run("techmap -map " + lib_path + family + "/dff_map.v"); + run("techmap -map " + lib_path + family + "/arith_map.v"); + } + + if (check_label("check")) { + run("autoname"); + run("hierarchy -check"); + run("stat"); + run("check -noinit"); + } + + if (check_label("finalize")) { + if (!no_opt) { + run("opt_clean -purge"); + } + run("check"); + } + + if (check_label("insert_clock_buffer", "(if -insert_clock_buffer)")) { + if (insert_clock_buffer) { + if (top_opt == "-auto-top") + run(stringf("rapidflex_insert_clock_buffer -cell_map_file %s", + cell_map_file.c_str())); + else + run(stringf("rapidflex_insert_clock_buffer %s -cell_map_file %s", + top_opt.c_str(), cell_map_file.c_str())); + } + } + + if (check_label("blif", "(if -blif)")) { + if (help_mode || !blif_file.empty()) { + run(stringf("write_blif -param %s ", + help_mode ? "" : blif_file.c_str())); + } + } + + if (check_label("verilog", "(if -verilog)")) { + if (help_mode || !verilog_file.empty()) { + run("write_verilog -noattr -nohex " + + (help_mode ? "" : verilog_file)); + } + } + + if (check_label("save_block_diagram", "(if -save_block_diagram)")) { + if (!block_diagram_file.empty()) { + size_t dot_pos = block_diagram_file.find_last_of('.'); + std::string ext, prefix; + if (dot_pos != std::string::npos && + dot_pos + 1 < block_diagram_file.length()) { + ext = block_diagram_file.substr(dot_pos + 1); + prefix = block_diagram_file.substr(0, dot_pos); + } else { + ext = "dot"; + prefix = block_diagram_file; + } + if (ext != "dot" && ext != "png" && ext != "svg" && ext != "eps" && + ext != "pdf") { + log_cmd_error("Unsupported block diagram file format: %s. Supported " + "formats: dot, png, svg, eps, pdf\n", + ext.c_str()); + return; + } + run(stringf("show -format %s -prefix %s", ext.c_str(), prefix.c_str())); + } else { + run("show -format dot"); + } + } + } + +} SynthRapidFlexPass; + +PRIVATE_NAMESPACE_END diff --git a/techlibs/rapidflex/util/pcnt_cell_sim_gen.py b/techlibs/rapidflex/util/pcnt_cell_sim_gen.py new file mode 100644 index 000000000..2670d2d83 --- /dev/null +++ b/techlibs/rapidflex/util/pcnt_cell_sim_gen.py @@ -0,0 +1,142 @@ +##################################################################### +# A script to generate cell sim for pcounters +##################################################################### +import os +from os.path import dirname, abspath +import argparse +import logging +import csv +import pcounter_ip_template_generator + +##################################################################### +# Error codes +##################################################################### +error_codes = {"SUCCESS": 0, "ERROR": 1, "FILE_ERROR": 3} + +##################################################################### +# Initialize logger +##################################################################### +logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.ERROR) + + +def generate_file_header(f0): + f0.write("//-------------------------------------------------\n") + f0.write("// IMPORTANT: This file is auto generated!!! DO NOT MODIFY BY HAND!!!\n") + f0.write("//-------------------------------------------------\n") + f0.write("// Pcounter Primitives\n") + f0.write("// Naming convention:\n") + f0.write( + "// pcounter_clk_rst_\n" + ) + f0.write( + "// size: [N | ] ranges from 0 to 31, representing the number of bits. N is a parameterized design, which is supposed not be exposed to users\n" + ) + f0.write("// trigger_type: [p|n] denotes [rising edge (posedge) | falling edge (negedge) ]\n") + f0.write("// reset_type: [a|s] denotes [ asynchronous | synchronous ]\n") + f0.write("// reset_polarity: [p|n] denotes [ active-high | active-low ]\n") + f0.write( + "// event_function : [ load | add | sub | sr | sl ] denotes [ load | add | substract | shift right | shift left ] on the data_i values\n" + ) + + +def generate_pcounter_ips(fpath): + pcnt_ip_tmpl_gen = pcounter_ip_template_generator.PcounterIpTemplateGenerator() + cnt = 0 + with open(fpath, "w") as cell_sim_fh: + generate_file_header(cell_sim_fh) + for clk_type in pcnt_ip_tmpl_gen.clock_types(): + for rst_type in pcnt_ip_tmpl_gen.reset_types(): + for rst_polar in pcnt_ip_tmpl_gen.reset_polarities(): + for event_type in pcnt_ip_tmpl_gen.event_types(): + logging.info( + f"Generating IPs: {pcnt_ip_tmpl_gen.ip_template_name(clk_type, rst_type, rst_polar, event_type)} ..." + ) + logging.debug( + f"Generating IP template: {pcnt_ip_tmpl_gen.ip_template_name(clk_type, rst_type, rst_polar, event_type)} ..." + ) + pcnt_ip_tmpl_gen.write_ip_template( + cell_sim_fh, clk_type, rst_type, rst_polar, event_type + ) + logging.debug(f"Done") + cnt += 1 + # Generate full-size (32-bit) version + d_size = pcnt_ip_tmpl_gen.max_data_size() + logging.debug( + f"Generating full-sized counter IP: {pcnt_ip_tmpl_gen.ip_name(clk_type, rst_type, rst_polar, event_type, d_size)} ..." + ) + pcnt_ip_tmpl_gen.write_ip( + cell_sim_fh, clk_type, rst_type, rst_polar, event_type, d_size + ) + logging.debug(f"Done") + cnt += 1 + # Generate half-size (16-bit) version + d_size = int(pcnt_ip_tmpl_gen.max_data_size() / 2) + logging.debug( + f"Generating half-sized counter IP: {pcnt_ip_tmpl_gen.ip_name(clk_type, rst_type, rst_polar, event_type, d_size)} ..." + ) + pcnt_ip_tmpl_gen.write_ip( + cell_sim_fh, clk_type, rst_type, rst_polar, event_type, d_size + ) + logging.debug(f"Done") + cnt += 1 + # Generate CCB + logging.info( + f"Generating ccb-related IPs: {pcnt_ip_tmpl_gen.ccb_ip_template_name(clk_type, rst_type, rst_polar, event_type)} ..." + ) + logging.debug( + f"Generating ccb-related IP template: {pcnt_ip_tmpl_gen.ccb_ip_template_name(clk_type, rst_type, rst_polar, event_type)} ..." + ) + pcnt_ip_tmpl_gen.write_ccb_ip_template( + cell_sim_fh, clk_type, rst_type, rst_polar, event_type + ) + logging.debug(f"Done") + cnt += 1 + d_size = pcnt_ip_tmpl_gen.max_data_size() + logging.debug( + f"Generating ccb-related full-sized IP: {pcnt_ip_tmpl_gen.ccb_ip_name(clk_type, rst_type, rst_polar, event_type, d_size)} ..." + ) + pcnt_ip_tmpl_gen.write_ccb_ip( + cell_sim_fh, clk_type, rst_type, rst_polar, event_type, d_size + ) + logging.debug(f"Done") + cnt += 1 + # Generate half-size (16-bit) version + d_size = int(pcnt_ip_tmpl_gen.max_data_size() / 2) + logging.debug( + f"Generating ccb-related half-sized IP: {pcnt_ip_tmpl_gen.ccb_ip_name(clk_type, rst_type, rst_polar, event_type, d_size)} ..." + ) + pcnt_ip_tmpl_gen.write_ccb_ip( + cell_sim_fh, clk_type, rst_type, rst_polar, event_type, d_size + ) + logging.debug(f"Done") + cnt += 1 + logging.info(f"Done") + + logging.info(f"Generated {cnt} IPs") + + return 0 + + +##################################################################### +# Main function +##################################################################### +if __name__ == "__main__": + # Execute when the module is not initialized from an import statement + + # Parse the options and apply sanity checks + parser = argparse.ArgumentParser(description="Generate cell sim Verilog for Pcounter") + parser.add_argument( + "--file", + required=True, + default="cell_sim_pcnt.v", + help="Path to the .v file that contains pcounter IPs", + ) + args = parser.parse_args() + + num_error = generate_pcounter_ips(args.file) + if num_error == 0: + logging.info("Generation succeed") + exit(error_codes["SUCCESS"]) + else: + logging.error("Generation failed in " + str(num_error) + " errors!") + exit(error_codes["ERROR"]) diff --git a/techlibs/rapidflex/util/pcounter_ip_template_generator.py b/techlibs/rapidflex/util/pcounter_ip_template_generator.py new file mode 100644 index 000000000..d2fe8009f --- /dev/null +++ b/techlibs/rapidflex/util/pcounter_ip_template_generator.py @@ -0,0 +1,482 @@ +import time +import logging +import yaml +import os +import re +import subprocess +from datetime import timedelta +from datetime import datetime +import threading +import csv + +# Constants +CLK_TYPE_RISE = "RISING" +CLK_TYPE_FALL = "FALLING" +RST_TYPE_SYNC = "SYNCHRONOUS" +RST_TYPE_ASYNC = "ASYNCHRONOUS" +RST_POLARITY_L = "ACTIVE-LOW" +RST_POLARITY_H = "ACTIVE-HIGH" +EVENT_TYPE_LOAD = "LOAD" +EVENT_TYPE_ADD = "ADD" +EVENT_TYPE_SUB = "SUBTRACT" +EVENT_TYPE_SR = "SHIFT-RIGHT" +EVENT_TYPE_SL = "SHIFT-LEFT" + + +# Class of an mini pin table +class PcounterIpTemplateGenerator: + def __init__(self): + # Constants + self.__CLK_TYPES_ = [CLK_TYPE_RISE, CLK_TYPE_FALL] + self.__RST_TYPES_ = [RST_TYPE_SYNC, RST_TYPE_ASYNC] + self.__RST_POLARITIES_ = [RST_POLARITY_L, RST_POLARITY_H] + self.__EVENT_TYPES_ = [ + EVENT_TYPE_LOAD, + EVENT_TYPE_ADD, + EVENT_TYPE_SUB, + EVENT_TYPE_SR, + EVENT_TYPE_SL, + ] + self.__MAX_DATA_SIZE_ = 32 + self.__CCB_POSTFIX_ = "_ccb" + # Internal switches + self.__include_q_ = False + + # Create a new row + def clock_types(self): + return self.__CLK_TYPES_ + + def reset_types(self): + return self.__RST_TYPES_ + + def reset_polarities(self): + return self.__RST_POLARITIES_ + + def event_types(self): + return self.__EVENT_TYPES_ + + def data_sizes(self): + return range(1, self.__MAX_DATA_SIZE_ + 1) + + def max_data_size(self): + return self.__MAX_DATA_SIZE_ + + def __verilog_clock_edge(self, edge_type): + if edge_type == CLK_TYPE_RISE: + return "posedge" + elif edge_type == CLK_TYPE_FALL: + return "negedge" + else: + raise Exception(f"Invalid clock type '{edge_type}'. Expect {self.__CLK_TYPES_}\n") + + def __verilog_reset_edge(self, edge_type): + if edge_type == RST_POLARITY_H: + return "posedge" + elif edge_type == RST_POLARITY_L: + return "negedge" + else: + raise Exception(f"Invalid reset type '{edge_type}'. Expect {self.__RST_POLARITIES_}\n") + + def __verilog_reset_polarity(self, polarity_type): + if polarity_type == RST_POLARITY_L: + return "~" + elif polarity_type == RST_POLARITY_H: + return "" + else: + raise Exception( + f"Invalid reset type '{polarity_type}'. Expect {self.__RST_POLARITIES}\n" + ) + + def __verilog_event_op_ccb(self, event_type): + if event_type == EVENT_TYPE_LOAD: + return "ccb_load_val_i" + elif event_type == EVENT_TYPE_ADD: + return "q_o + ccb_load_val_i" + elif event_type == EVENT_TYPE_SUB: + return "q_o - ccb_load_val_i" + elif event_type == EVENT_TYPE_SL: + return "q_o << ccb_load_val_i" + elif event_type == EVENT_TYPE_SR: + return "q_o >> ccb_load_val_i" + else: + raise Exception(f"Invalid event type '{event_type}'. Expect {self.__EVENT_TYPES_}\n") + + def __verilog_event_op(self, event_type): + if event_type == EVENT_TYPE_LOAD: + return "LOAD_VAL" + elif event_type == EVENT_TYPE_ADD: + return "q_o + LOAD_VAL" + elif event_type == EVENT_TYPE_SUB: + return "q_o - LOAD_VAL" + elif event_type == EVENT_TYPE_SL: + return "q_o << LOAD_VAL" + elif event_type == EVENT_TYPE_SR: + return "q_o >> LOAD_VAL" + else: + raise Exception(f"Invalid event type '{event_type}'. Expect {self.__EVENT_TYPES_}\n") + + def __clock_type_str(self, edge_type): + if edge_type == CLK_TYPE_RISE: + return "clkp" + elif edge_type == CLK_TYPE_FALL: + return "clkn" + else: + raise Exception(f"Invalid clock type '{edge_type}'. Expect {self.__CLK_TYPES_}\n") + + def __reset_type_str(self, edge_type, polarity_type): + ret = "" + if edge_type == RST_TYPE_ASYNC: + ret = "arst" + elif edge_type == RST_TYPE_SYNC: + ret = "srst" + else: + raise Exception(f"Invalid reset type '{edge_type}'. Expect {self.__RST_TYPES_}\n") + + if polarity_type == RST_POLARITY_L: + ret += "n" + elif polarity_type == RST_POLARITY_H: + ret += "p" + else: + raise Exception( + f"Invalid reset type '{polarity_type}'. Expect {self.__RST_POLARITIES}\n" + ) + + return ret + + def __event_type_str(self, event_type): + if event_type == EVENT_TYPE_LOAD: + return "load" + elif event_type == EVENT_TYPE_ADD: + return "add" + elif event_type == EVENT_TYPE_SUB: + return "sub" + elif event_type == EVENT_TYPE_SL: + return "sl" + elif event_type == EVENT_TYPE_SR: + return "sr" + else: + raise Exception(f"Invalid event type '{event_type}'. Expect {self.__EVENT_TYPES_}\n") + + def ccb_ip_template_name(self, c_type, r_type, r_polar, e_type): + return ( + "pcounterN_" + + self.__clock_type_str(c_type) + + "_" + + self.__reset_type_str(r_type, r_polar) + + "_" + + self.__event_type_str(e_type) + + self.__CCB_POSTFIX_ + ) + + def ip_template_name(self, c_type, r_type, r_polar, e_type): + return ( + "pcounterN_" + + self.__clock_type_str(c_type) + + "_" + + self.__reset_type_str(r_type, r_polar) + + "_" + + self.__event_type_str(e_type) + ) + + def ip_name(self, c_type, r_type, r_polar, e_type, d_size): + return ( + "pcounter" + + str(d_size) + + "_" + + self.__clock_type_str(c_type) + + "_" + + self.__reset_type_str(r_type, r_polar) + + "_" + + self.__event_type_str(e_type) + ) + + def ccb_ip_name(self, c_type, r_type, r_polar, e_type, d_size): + return ( + "pcounter" + + str(d_size) + + "_" + + self.__clock_type_str(c_type) + + "_" + + self.__reset_type_str(r_type, r_polar) + + "_" + + self.__event_type_str(e_type) + + self.__CCB_POSTFIX_ + ) + + def write_ccb_ip_template(self, f0, c_type, r_type, r_polar, e_type): + f0.write("//-------------------------------------------------\n\n") + f0.write( + "// Template of Programmable counter to be counting up or down as well as paused\n\n" + ) + f0.write(f"// triggered by {c_type.lower()} edge clock\n\n") + f0.write(f"// with {r_type.lower()} {r_polar.lower()} reset\n\n") + f0.write(f"// Capable of {e_type.lower()} values from inputs\n\n") + f0.write("`default_nettype none\n\n") + f0.write(f"module {self.ccb_ip_template_name(c_type, r_type, r_polar, e_type)} #(\n") + f0.write(f" parameter integer DATA_WIDTH = {self.__MAX_DATA_SIZE_}\n") + f0.write(")(\n") + f0.write(" input clk_i,\n") + f0.write(" input rst_i,\n") + f0.write(" input up_down_i,\n") + f0.write(" input event_i,\n") + f0.write(" input enable_i,\n") + f0.write(" input [0 : DATA_WIDTH - 1] ccb_match0_ref_i,\n") + f0.write(" input [0 : DATA_WIDTH - 1] ccb_match1_ref_i,\n") + f0.write(" input [0 : DATA_WIDTH - 1] ccb_load_val_i,\n") + f0.write(" output match0_o,\n") + f0.write(" output match1_o,\n") + f0.write(" output zero_o,\n") + f0.write(" output [0 : DATA_WIDTH - 1] q_o\n") + f0.write(");\n") + f0.write(" reg [0 : DATA_WIDTH - 1] q_o;\n") + + if r_type == RST_TYPE_ASYNC: + f0.write( + f" always@({self.__verilog_clock_edge(c_type)} clk_i or {self.__verilog_reset_edge(r_polar)} rst_i) \n" + ) + elif r_type == RST_TYPE_SYNC: + f0.write(f" always@({self.__verilog_clock_edge(c_type)} clk_i) \n") + else: + raise Exception(f"Invalid reset type '{edge_type}'. Expect {self.__RST_TYPES_}\n") + + f0.write(" begin\n") + f0.write( + f" if ({self.__verilog_reset_polarity(r_polar)}rst_i) //Set Counter to Zero\n" + ) + f0.write(" q_o <= 0;\n") + f0.write(" else if(event_i)\n") + f0.write(f" q_o <= {self.__verilog_event_op_ccb(e_type)};\n") + f0.write(" else if (~enable_i)\n") + f0.write(" q_o <= q_o; // pause\n") + f0.write(" else if(up_down_i) //count down\n") + f0.write(" q_o <= q_o - 1;\n") + f0.write(" else //count up\n") + f0.write(" q_o <= q_o + 1;\n") + f0.write(" end\n") + f0.write(" assign zero_o = (q_o == 0) ? 1 : 0;\n") + f0.write(" assign match0_o = (q_o == ccb_match0_ref_i) ? 1 : 0;\n") + f0.write(" assign match1_o = (q_o == ccb_match1_ref_i) ? 1 : 0;\n") + f0.write("endmodule\n") + f0.write("`default_nettype wire\n") + + def write_ip_template(self, f0, c_type, r_type, r_polar, e_type): + f0.write("//-------------------------------------------------\n\n") + f0.write( + "// Template of Programmable counter to be counting up or down as well as paused\n\n" + ) + f0.write(f"// triggered by {c_type.lower()} edge clock\n\n") + f0.write(f"// with {r_type.lower()} {r_polar.lower()} reset\n\n") + f0.write(f"// Capable of {e_type.lower()} values from inputs\n\n") + f0.write("`default_nettype none\n\n") + f0.write(f"module {self.ip_template_name(c_type, r_type, r_polar, e_type)} #(\n") + f0.write(f" parameter integer DATA_WIDTH = {self.__MAX_DATA_SIZE_},\n") + f0.write(" parameter [0 : DATA_WIDTH - 1] LOAD_VAL = {DATA_WIDTH{1'b0}},\n") + f0.write(" parameter [0 : DATA_WIDTH - 1] MATCH0_REF = {DATA_WIDTH{1'b0}},\n") + f0.write(" parameter [0 : DATA_WIDTH - 1] MATCH1_REF = {DATA_WIDTH{1'b0}}\n") + f0.write(")(\n") + f0.write(" input clk_i,\n") + f0.write(" input rst_i,\n") + f0.write(" input up_down_i,\n") + f0.write(" input event_i,\n") + f0.write(" input enable_i,\n") + f0.write(" output match0_o,\n") + f0.write(" output match1_o,\n") + f0.write(" output zero_o") + if self.__include_q_: + f0.write(",\n") + f0.write(" output [0 : DATA_WIDTH - 1] q_o\n") + f0.write(");\n") + f0.write(" reg [0 : DATA_WIDTH - 1] q_o;\n") + + if r_type == RST_TYPE_ASYNC: + f0.write( + f" always@({self.__verilog_clock_edge(c_type)} clk_i or {self.__verilog_reset_edge(r_polar)} rst_i) \n" + ) + elif r_type == RST_TYPE_SYNC: + f0.write(f" always@({self.__verilog_clock_edge(c_type)} clk_i) \n") + else: + raise Exception(f"Invalid reset type '{edge_type}'. Expect {self.__RST_TYPES_}\n") + + f0.write(" begin\n") + f0.write( + f" if ({self.__verilog_reset_polarity(r_polar)}rst_i) //Set Counter to Zero\n" + ) + f0.write(" q_o <= 0;\n") + f0.write(" else if(event_i)\n") + f0.write(f" q_o <= {self.__verilog_event_op(e_type)};\n") + f0.write(" else if (~enable_i)\n") + f0.write(" q_o <= q_o; // pause\n") + f0.write(" else if(up_down_i) //count down\n") + f0.write(" q_o <= q_o - 1;\n") + f0.write(" else //count up\n") + f0.write(" q_o <= q_o + 1;\n") + f0.write(" end\n") + f0.write(" assign zero_o = (q_o == 0) ? 1 : 0;\n") + f0.write(" assign match0_o = (q_o == MATCH0_REF) ? 1 : 0;\n") + f0.write(" assign match1_o = (q_o == MATCH1_REF) ? 1 : 0;\n") + f0.write("endmodule\n") + f0.write("`default_nettype wire\n") + + def write_ccb_ip(self, f0, c_type, r_type, r_polar, e_type, d_size, require_padding=False): + d_msb = d_size - 1 + f0.write("`default_nettype none\n\n") + f0.write(f"module {self.ccb_ip_name(c_type, r_type, r_polar, e_type, d_size)} # (\n") + f0.write(" // Location constraints\n") + f0.write(" parameter FPGA_LOC_X = 0,\n") + f0.write(" parameter FPGA_LOC_Y = 0,\n") + f0.write(" parameter FPGA_LOC_Z = 0)(\n") + f0.write(" input clk_i,\n") + f0.write(" input rst_i,\n") + f0.write(" input up_down_i,\n") + f0.write(" input event_i,\n") + f0.write(" input enable_i,\n") + f0.write(f" input [0 : {d_msb}] ccb_match0_ref_i,\n") + f0.write(f" input [0 : {d_msb}] ccb_match1_ref_i,\n") + f0.write(f" input [0 : {d_msb}] ccb_load_val_i,\n") + f0.write(" output match0_o,\n") + f0.write(" output match1_o,\n") + f0.write(" output zero_o,\n") + f0.write(f" output [0 : {d_msb}] q_o\n") + f0.write(");\n") + + # Local wire + padding_size = self.__MAX_DATA_SIZE_ - d_size + # Padding + # q_padding_str = "q_o" + # if require_padding: + # if padding_size: + # q_padding_str = "{ q_o, {" + str(padding_size) + "{1'b0}} }" + # f0.write(f"wire [0: {self.__MAX_DATA_SIZE_ - 1}] q_wire;\n") + # f0.write(f"assign q_wire = {q_padding_str};\n") + # else: + # f0.write(f"wire [0: {d_size - 1}] q_wire;\n") + # f0.write(f"assign q_wire = q_o;\n") + + f0.write(f" {self.ccb_ip_template_name(c_type, r_type, r_polar, e_type)} #(\n") + if require_padding: + f0.write(f" .DATA_WIDTH({self.__MAX_DATA_SIZE_})\n") + else: + f0.write(f" .DATA_WIDTH({d_size})\n") + + load_val_padding_str = "ccb_load_val_i" + if require_padding: + if padding_size: + load_val_padding_str = "{ ccb_load_val_i, {" + str(padding_size) + "{1'b0}} }" + + match0_padding_str = "ccb_match0_ref_i" + if require_padding: + if padding_size: + match0_padding_str = "{ ccb_match0_ref_i, {" + str(padding_size) + "{1'b0}} }" + + match1_padding_str = "ccb_match1_ref_i" + if require_padding: + if padding_size: + match1_padding_str = "{ ccb_match1_ref_i, {" + str(padding_size) + "{1'b0}} }" + + f0.write(" ) core (\n") + f0.write(" .clk_i(clk_i),\n") + f0.write(" .rst_i(rst_i),\n") + f0.write(" .up_down_i(up_down_i),\n") + f0.write(" .event_i(event_i),\n") + f0.write(" .enable_i(enable_i),\n") + f0.write(f" .ccb_load_val_i({load_val_padding_str}),\n") + f0.write(f" .ccb_match0_ref_i({match0_padding_str}),\n") + f0.write(f" .ccb_match1_ref_i({match1_padding_str}),\n") + f0.write(" .match0_o(match0_o),\n") + f0.write(" .match1_o(match1_o),\n") + f0.write(" .zero_o(zero_o),\n") + f0.write(f" .q_o(q_o)\n") + + f0.write(" );\n") + f0.write("endmodule\n") + f0.write("`default_nettype wire\n") + + def write_ip(self, f0, c_type, r_type, r_polar, e_type, d_size, require_padding=False): + d_msb = d_size - 1 + f0.write("`default_nettype none\n\n") + f0.write(f"module {self.ip_name(c_type, r_type, r_polar, e_type, d_size)} #(\n") + f0.write(" // Location constraints\n") + f0.write(" parameter FPGA_LOC_X = 0,\n") + f0.write(" parameter FPGA_LOC_Y = 0,\n") + f0.write(" parameter FPGA_LOC_Z = 0,\n") + f0.write(" parameter [0 : " + str(d_msb) + "] LOAD_VAL = {" + str(d_size) + "{1'b0}},\n") + f0.write(" parameter [0 : " + str(d_msb) + "] MATCH0_REF = {" + str(d_size) + "{1'b0}},\n") + f0.write(" parameter [0 : " + str(d_msb) + "] MATCH1_REF = {" + str(d_size) + "{1'b0}}\n") + f0.write(")(\n") + f0.write(" input clk_i,\n") + f0.write(" input rst_i,\n") + f0.write(" input up_down_i,\n") + f0.write(" input event_i,\n") + f0.write(" input enable_i,\n") + f0.write(" output match0_o,\n") + f0.write(" output match1_o,\n") + f0.write(" output zero_o") + + if self.__include_q_: + f0.write(",\n") + f0.write(f" output [0 : {d_msb}] q_o\n") + else: + f0.write("\n") + + f0.write(");\n") + + # Local wire + padding_size = self.__MAX_DATA_SIZE_ - d_size + # if self.__include_q_: + # q_padding_str = "q_o" + # if require_padding: + # if padding_size: + # q_padding_str = "{ q_o, {" + str(padding_size) + "{1'b0}} }" + # f0.write(f"wire [0: {self.__MAX_DATA_SIZE_ - 1}] q_wire;\n") + # f0.write(f"assign q_wire = {q_padding_str};\n") + # else: + # f0.write(f"wire [0: {d_size - 1}] q_wire;\n") + # f0.write(f"assign q_wire = {q_padding_str};\n") + + f0.write(f" {self.ip_template_name(c_type, r_type, r_polar, e_type)} #(\n") + if require_padding: + f0.write(f" .DATA_WIDTH({self.__MAX_DATA_SIZE_})\n") + else: + f0.write(f" .DATA_WIDTH({d_size}),\n") + + load_val_padding_str = "LOAD_VAL" + if require_padding: + if padding_size: + load_val_padding_str = "{ LOAD_VAL, {" + str(padding_size) + "{1'b0}} }" + f0.write(f" .LOAD_VAL({load_val_padding_str}),\n") + + match0_padding_str = "MATCH0_REF" + if require_padding: + if padding_size: + match0_padding_str = "{ MATCH0_REF, {" + str(padding_size) + "{1'b0}} }" + f0.write(f" .MATCH0_REF({match0_padding_str}),\n") + + match1_padding_str = "MATCH1_REF" + if require_padding: + if padding_size: + match1_padding_str = "{ MATCH1_REF, {" + str(padding_size) + "{1'b0}} }" + f0.write(f" .MATCH1_REF({match1_padding_str})\n") + + f0.write(" ) core (\n") + f0.write(" .clk_i(clk_i),\n") + f0.write(" .rst_i(rst_i),\n") + f0.write(" .up_down_i(up_down_i),\n") + f0.write(" .event_i(event_i),\n") + f0.write(" .enable_i(enable_i),\n") + f0.write(" .match0_o(match0_o),\n") + f0.write(" .match1_o(match1_o),\n") + f0.write(" .zero_o(zero_o)") + + if self.__include_q_: + f0.write(",\n") + f0.write(f" .q_o(q_o)\n") + else: + f0.write("\n") + + f0.write(" );\n") + f0.write("endmodule\n") + f0.write("`default_nettype wire\n") + + # Clear all the data + def clear(self): + self.__init__()