From d6106f141cc66c116c79cdc85bdf16327c43bed8 Mon Sep 17 00:00:00 2001 From: nella Date: Wed, 27 May 2026 13:19:51 +0200 Subject: [PATCH] Add matching for fused mac operations for Nexus (fix #5906). --- techlibs/lattice/Makefile.inc | 8 ++ techlibs/lattice/dsp_map_nexus.v | 89 +++++++++++++ techlibs/lattice/lattice_dsp_nexus.cc | 36 ++++++ techlibs/lattice/lattice_dsp_nexus.pmg | 165 +++++++++++++++++++++++++ techlibs/lattice/synth_lattice.cc | 3 + tests/arch/nexus/fuse_mac.sv | 76 ++++++++++++ tests/arch/nexus/fuse_mac.ys | 35 ++++++ 7 files changed, 412 insertions(+) create mode 100644 techlibs/lattice/lattice_dsp_nexus.cc create mode 100644 techlibs/lattice/lattice_dsp_nexus.pmg create mode 100644 tests/arch/nexus/fuse_mac.sv create mode 100644 tests/arch/nexus/fuse_mac.ys diff --git a/techlibs/lattice/Makefile.inc b/techlibs/lattice/Makefile.inc index 9084472cf..1fb150e6c 100644 --- a/techlibs/lattice/Makefile.inc +++ b/techlibs/lattice/Makefile.inc @@ -1,6 +1,7 @@ OBJS += techlibs/lattice/synth_lattice.o OBJS += techlibs/lattice/lattice_gsr.o +OBJS += techlibs/lattice/lattice_dsp_nexus.o $(eval $(call add_share_file,share/lattice,techlibs/lattice/cells_ff.vh)) $(eval $(call add_share_file,share/lattice,techlibs/lattice/cells_io.vh)) @@ -50,3 +51,10 @@ $(eval $(call add_share_file_and_rename,share/ecp5,techlibs/lattice/cells_bb_ecp $(eval $(call add_share_file,share/nexus,techlibs/lattice/parse_init.vh)) $(eval $(call add_share_file_and_rename,share/nexus,techlibs/lattice/cells_sim_nexus.v,cells_sim.v)) $(eval $(call add_share_file_and_rename,share/nexus,techlibs/lattice/cells_bb_nexus.v,cells_xtra.v)) + +techlibs/lattice/%_pm.h: passes/pmgen/pmgen.py techlibs/lattice/%.pmg + $(P) mkdir -p $(dir $@) && $(PYTHON_EXECUTABLE) $< -o $@ -p $(notdir $*) $(filter-out $<,$^) + +GENFILES += techlibs/lattice/lattice_dsp_nexus_pm.h +techlibs/lattice/lattice_dsp_nexus.o: techlibs/lattice/lattice_dsp_nexus_pm.h +$(eval $(call add_extra_objs,techlibs/lattice/lattice_dsp_nexus_pm.h)) diff --git a/techlibs/lattice/dsp_map_nexus.v b/techlibs/lattice/dsp_map_nexus.v index b12528309..35caacd10 100644 --- a/techlibs/lattice/dsp_map_nexus.v +++ b/techlibs/lattice/dsp_map_nexus.v @@ -77,3 +77,92 @@ module \$__NX_MUL9X9 (input [8:0] A, input [8:0] B, output [17:0] Y); .Z(Y) ); endmodule + +module \$__NX_MAC18X18 (A, B, C, Y); + + parameter A_WIDTH = 18; + parameter B_WIDTH = 18; + parameter C_WIDTH = 48; + parameter Y_WIDTH = 48; + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter SUBTRACT = 0; + input [17:0] A; + input [17:0] B; + input [47:0] C; + output [47:0] Y; + wire [53:0] Z_out; + assign Y = Z_out[47:0]; + + MULTADDSUB18X18 #( + .REGINPUTA("BYPASS"), + .REGINPUTB("BYPASS"), + .REGINPUTC("BYPASS"), + .REGOUTPUT("BYPASS") + ) _TECHMAP_REPLACE_ ( + .A(A), + .B(B), + .C({6'b0, C}), + .SIGNED(A_SIGNED ? 1'b1 : 1'b0), + .ADDSUB(SUBTRACT ? 1'b1 : 1'b0), + .Z(Z_out) + ); +endmodule + +module \$__NX_PREADD18X18 (A, B, C, Y, CLK); + + parameter PIPELINED = 0; + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter C_SIGNED = 0; + input [17:0] A; + input [17:0] B; + input [17:0] C; + input CLK; + output [47:0] Y; + wire [35:0] Z_out; + assign Y = A_SIGNED ? {{12{Z_out[35]}}, Z_out} : {12'b0, Z_out}; + + MULTPREADD18X18 #( + .REGINPUTA("BYPASS"), + .REGINPUTB("BYPASS"), + .REGINPUTC("BYPASS"), + .REGOUTPUT(PIPELINED ? "REGISTER" : "BYPASS") + ) _TECHMAP_REPLACE_ ( + .A(A), + .B(B), + .C(C), + .CLK(CLK), + .SIGNEDA(A_SIGNED ? 1'b1 : 1'b0), + .SIGNEDB(B_SIGNED ? 1'b1 : 1'b0), + .SIGNEDC(C_SIGNED ? 1'b1 : 1'b0), + .Z(Z_out) + ); +endmodule + +module \$__NX_MAC9X9WIDE_4LANE (A0, B0, A1, B1, A2, B2, A3, B3, Y); + + parameter SIGNED = 0; + input [8:0] A0, B0, A1, B1, A2, B2, A3, B3; + output [47:0] Y; + wire [53:0] Z_out; + assign Y = Z_out[47:0]; + + MULTADDSUB9X9WIDE #( + .REGINPUTAB0("BYPASS"), + .REGINPUTAB1("BYPASS"), + .REGINPUTAB2("BYPASS"), + .REGINPUTAB3("BYPASS"), + .REGINPUTC("BYPASS"), + .REGOUTPUT("BYPASS") + ) _TECHMAP_REPLACE_ ( + .A0(A0), .B0(B0), + .A1(A1), .B1(B1), + .A2(A2), .B2(B2), + .A3(A3), .B3(B3), + .C(54'b0), + .SIGNED(SIGNED ? 1'b1 : 1'b0), + .ADDSUB(4'b0000), + .Z(Z_out) + ); +endmodule diff --git a/techlibs/lattice/lattice_dsp_nexus.cc b/techlibs/lattice/lattice_dsp_nexus.cc new file mode 100644 index 000000000..d072c552d --- /dev/null +++ b/techlibs/lattice/lattice_dsp_nexus.cc @@ -0,0 +1,36 @@ +#include "kernel/yosys.h" +#include "kernel/sigtools.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +#include "techlibs/lattice/lattice_dsp_nexus_pm.h" + +struct LatticeDspNexusPass : public Pass { + LatticeDspNexusPass() : Pass("lattice_dsp_nexus", "Lattice Nexus DSP inference") { } + void help() override + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" lattice_dsp_nexus [options] [selection]\n"); + log("\n"); + log("Infer Lattice Nexus sysDSP macrocells (MULTADDSUB18X18, MULTPREADD18X18,\n"); + log("MULTADDSUB9X9WIDE) from MAC and dot-product patterns.\n"); + log("\n"); + } + void execute(std::vector args, RTLIL::Design *design) override + { + log_header(design, "Executing LATTICE_DSP_NEXUS pass.\n"); + extra_args(args, 1, design); + + for (auto module : design->selected_modules()) { + lattice_dsp_nexus_pm pm(module, module->cells()); + + pm.run_nexus_mac9_4lane(); + pm.run_nexus_mac18(); + pm.run_nexus_preadd18(); + } + } +} LatticeDspNexusPass; + +PRIVATE_NAMESPACE_END \ No newline at end of file diff --git a/techlibs/lattice/lattice_dsp_nexus.pmg b/techlibs/lattice/lattice_dsp_nexus.pmg new file mode 100644 index 000000000..73587b91c --- /dev/null +++ b/techlibs/lattice/lattice_dsp_nexus.pmg @@ -0,0 +1,165 @@ +pattern nexus_mac18 + +match mul + select mul->type.in($mul) + select GetSize(port(mul, \A)) <= 18 + select GetSize(port(mul, \B)) <= 18 + select GetSize(port(mul, \Y)) <= 48 +endmatch + +match add + select add->type.in($add, $sub) + select GetSize(port(add, \Y)) <= 48 + choice AB {\A, \B} + index port(add, AB)[0] === port(mul, \Y)[0] +endmatch + +code + SigSpec mul_out = port(mul, \Y); + IdString add_AB; + Cell *mac = module->addCell(NEW_ID, "$__NX_MAC18X18"); + IdString add_C = (add_AB == \A) ? \B : \A; + + mac->setPort(\A, port(mul, \A)); + mac->setPort(\B, port(mul, \B)); + mac->setPort(\C, port(add, add_C)); + mac->setPort(\Y, port(add, \Y)); + mac->setParam(\A_SIGNED, mul->getParam(\A_SIGNED)); + mac->setParam(\B_SIGNED, mul->getParam(\B_SIGNED)); + mac->setParam(\SUBTRACT, add->type == $sub ? State::S1 : State::S0); + + autoremove(mul); + autoremove(add); + + accept; +endcode + +pattern nexus_preadd18 + +match preadd + select preadd->type.in($add, $sub) + select GetSize(port(preadd, \Y)) <= 19 +endmatch + +match mul + select mul->type.in($mul) + select GetSize(port(mul, \Y)) <= 48 + choice mul_AB {\A, \B} + index port(mul, mul_AB)[0] === port(preadd, \Y)[0] +endmatch + +match pipe_ff + select pipe_ff->type.in($dff, $dffe, $sdff, $sdffe) + index port(pipe_ff, \D)[0] === port(mul, \Y)[0] + optional +endmatch + +code + SigSpec preadd_out = port(preadd, \Y); + IdString actual_mul_AB; + Cell *mac = module->addCell(NEW_ID, "$__NX_PREADD18X18"); + + IdString mul_other = (actual_mul_AB == \A) ? \B : \A; + IdString sgn_AC = (mul_other == \A) ? \B_SIGNED : \A_SIGNED; + IdString sgn_B = (mul_other == \A) ? \A_SIGNED : \B_SIGNED; + + SigSpec sig_A = port(preadd, \A); + SigSpec sig_C = port(preadd, \B); + SigSpec sig_B = port(mul, mul_other); + + sig_A.extend_u0(18, false); + sig_C.extend_u0(18, false); + sig_B.extend_u0(18, false); + + mac->setPort(\A, sig_A.extract(0, 18)); + mac->setPort(\C, sig_C.extract(0, 18)); + mac->setPort(\B, sig_B.extract(0, 18)); + + if (pipe_ff) { + mac->setPort(\Y, port(pipe_ff, \Q)); + mac->setPort(\CLK, port(pipe_ff, \CLK)); + mac->setParam(\PIPELINED, State::S1); + } else { + mac->setPort(\Y, port(mul, \Y)); + mac->setPort(\CLK, State::S0); + mac->setParam(\PIPELINED, State::S0); + } + + mac->setParam(\A_SIGNED, mul->getParam(sgn_AC)); + mac->setParam(\B_SIGNED, mul->getParam(sgn_B)); + mac->setParam(\C_SIGNED, mul->getParam(sgn_AC)); + + if (pipe_ff) autoremove(pipe_ff); + autoremove(mul); + autoremove(preadd); + accept; +endcode + +pattern nexus_mac9_4lane + +match add_top + select add_top->type == $add +endmatch + +match add_mid + select add_mid->type == $add + index port(add_mid, \Y)[0] === port(add_top, \A)[0] +endmatch + +match add_bot + select add_bot->type == $add + index port(add_bot, \Y)[0] === port(add_mid, \A)[0] +endmatch + +match mul3 + select mul3->type == $mul + select GetSize(port(mul3, \A)) <= 9 && GetSize(port(mul3, \B)) <= 9 + index port(mul3, \Y)[0] === port(add_top, \B)[0] +endmatch + +match mul2 + select mul2->type == $mul + select GetSize(port(mul2, \A)) <= 9 && GetSize(port(mul2, \B)) <= 9 + index port(mul2, \Y)[0] === port(add_mid, \B)[0] +endmatch + +match mul1 + select mul1->type == $mul + select GetSize(port(mul1, \A)) <= 9 && GetSize(port(mul1, \B)) <= 9 + index port(mul1, \Y)[0] === port(add_bot, \B)[0] +endmatch + +match mul0 + select mul0->type == $mul + select GetSize(port(mul0, \A)) <= 9 && GetSize(port(mul0, \B)) <= 9 + index port(mul0, \Y)[0] === port(add_bot, \A)[0] +endmatch + +code + Cell *mac = module->addCell(NEW_ID, "$__NX_MAC9X9WIDE_4LANE"); + bool is_signed = mul0->getParam(\A_SIGNED).as_bool(); + auto ext9 = [&](SigSpec s) { + s.extend_u0(9, is_signed); + return s; + }; + + mac->setPort(\A0, ext9(port(mul0, \A))); + mac->setPort(\B0, ext9(port(mul0, \B))); + mac->setPort(\A1, ext9(port(mul1, \A))); + mac->setPort(\B1, ext9(port(mul1, \B))); + mac->setPort(\A2, ext9(port(mul2, \A))); + mac->setPort(\B2, ext9(port(mul2, \B))); + mac->setPort(\A3, ext9(port(mul3, \A))); + mac->setPort(\B3, ext9(port(mul3, \B))); + mac->setPort(\Y, port(add_top, \Y)); + mac->setParam(\SIGNED, mul0->getParam(\A_SIGNED)); + + autoremove(add_top); + autoremove(add_mid); + autoremove(add_bot); + autoremove(mul0); + autoremove(mul1); + autoremove(mul2); + autoremove(mul3); + accept; +endcode diff --git a/techlibs/lattice/synth_lattice.cc b/techlibs/lattice/synth_lattice.cc index 382dae3d8..43fb7b1c2 100644 --- a/techlibs/lattice/synth_lattice.cc +++ b/techlibs/lattice/synth_lattice.cc @@ -425,9 +425,12 @@ struct SynthLatticePass : public ScriptPass run("opt_clean"); if (help_mode) { + run("lattice_dsp_nexus", "(only if -family lifcl/lfd2nx and unless -nodsp)"); run("techmap -map +/mul2dsp.v [...]", "(unless -nodsp)"); run("techmap -map +/lattice/dsp_map" + dsp_map + ".v", "(unless -nodsp)"); } else if (have_dsp && !nodsp) { + if (is_nexus) + run("lattice_dsp_nexus"); for (const auto &rule : dsp_rules) { run(stringf("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=%d -D DSP_B_MAXWIDTH=%d -D DSP_A_MINWIDTH=%d -D DSP_B_MINWIDTH=%d -D DSP_NAME=%s", rule.a_maxwidth, rule.b_maxwidth, rule.a_minwidth, rule.b_minwidth, rule.prim)); diff --git a/tests/arch/nexus/fuse_mac.sv b/tests/arch/nexus/fuse_mac.sv new file mode 100644 index 000000000..cf16bd261 --- /dev/null +++ b/tests/arch/nexus/fuse_mac.sv @@ -0,0 +1,76 @@ +// https://github.com/YosysHQ/yosys/issues/5906 + +module mac ( + input bit clk, rst, + input bit [17:0] a, b, + input bit clear, + output bit [47:0] p +); + bit [17:0] a_r, b_r; bit clear_r; bit [47:0] p_r; + always_ff @(posedge clk) begin + if (rst) begin a_r<=0; b_r<=0; clear_r<=0; p_r<=0; end + else begin + a_r<=a; b_r<=b; clear_r<=clear; + p_r <= clear_r ? 48'(a_r*b_r) : 48'(p_r + 48'(a_r*b_r)); + end + end + assign p = p_r; +endmodule + +module madd_pre ( + input bit clk, rst, + input bit [17:0] a, b, c, d, + output bit [47:0] p +); + bit [17:0] a_r, b_r, c_r, d_r; bit [47:0] m_r, p_r; + always_ff @(posedge clk) begin + if (rst) begin a_r<=0; b_r<=0; c_r<=0; d_r<=0; m_r<=0; p_r<=0; end + else begin + a_r<=a; b_r<=b; c_r<=c; d_r<=d; + m_r <= 48'((a_r - d_r) * b_r); + p_r <= 48'(m_r + 48'(c_r)); + end + end + assign p = p_r; +endmodule + +module dot4 ( + input bit clk, rst, + input bit [8:0] a0, b0, a1, b1, a2, b2, a3, b3, + output bit [19:0] p +); + bit [8:0] a0_r, b0_r, a1_r, b1_r, a2_r, b2_r, a3_r, b3_r; + bit [19:0] p_r; + always_ff @(posedge clk) begin + if (rst) begin + a0_r<=0; b0_r<=0; a1_r<=0; b1_r<=0; + a2_r<=0; b2_r<=0; a3_r<=0; b3_r<=0; + p_r<=0; + end else begin + a0_r<=a0; b0_r<=b0; a1_r<=a1; b1_r<=b1; + a2_r<=a2; b2_r<=b2; a3_r<=a3; b3_r<=b3; + p_r <= 20'(20'(a0_r*b0_r) + 20'(a1_r*b1_r) + 20'(a2_r*b2_r) + 20'(a3_r*b3_r)); + end + end + assign p = p_r; +endmodule + +// Oversized 24x24 MAC +module neg_mac24 (input clk, clear, input [23:0] a, b, output [47:0] p); + reg [23:0] a_r, b_r; reg [47:0] p_r; reg clear_r; + always_ff @(posedge clk) begin + a_r <= a; b_r <= b; clear_r <= clear; + p_r <= clear_r ? 48'(a_r*b_r) : 48'(p_r + 48'(a_r*b_r)); + end + assign p = p_r; +endmodule + +// Dot product with mixed 9x9 and 18x18 lanes +module neg_dot_mixed (input clk, input [8:0] a0,b0,a1,b1, input [17:0] a2, b2, output [35:0] p); + reg [8:0] a0_r,b0_r,a1_r,b1_r; reg [17:0] a2_r, b2_r; reg [35:0] p_r; + always_ff @(posedge clk) begin + a0_r<=a0; b0_r<=b0; a1_r<=a1; b1_r<=b1; a2_r<=a2; b2_r<=b2; + p_r <= 36'(36'(a0_r*b0_r) + 36'(a1_r*b1_r) + 36'(a2_r*b2_r)); + end + assign p = p_r; +endmodule diff --git a/tests/arch/nexus/fuse_mac.ys b/tests/arch/nexus/fuse_mac.ys new file mode 100644 index 000000000..e3e117130 --- /dev/null +++ b/tests/arch/nexus/fuse_mac.ys @@ -0,0 +1,35 @@ +read_verilog -sv fuse_mac.sv + +design -save pristine + +# 18x18 MAC +design -load pristine +hierarchy -top mac; +synth_nexus -family lifcl -top mac +select -assert-count 1 t:MULTADDSUB18X18 +select -assert-count 0 t:CCU2 + +# 18x18 pre-add MAC +design -load pristine +hierarchy -top madd_pre; +synth_nexus -family lifcl -top madd_pre +select -assert-count 1 t:MULTPREADD18X18 + +# 4-lane 9x9 dot product +design -load pristine +hierarchy -top dot4; +synth_nexus -family lifcl -top dot4 +select -assert-count 1 t:MULTADDSUB9X9WIDE + +# 24x24 MAC +design -load pristine +hierarchy -top neg_mac24; +synth_nexus -family lifcl -top neg_mac24 +select -assert-count 0 t:MULTADDSUB18X18 + +# mixed +design -load pristine +hierarchy -top neg_dot_mixed; +synth_nexus -family lifcl -top neg_dot_mixed +select -assert-count 0 t:MULTADDSUB9X9WIDE +select -assert-count 2 t:MULTADDSUB18X18