Add matching for fused mac operations for Nexus (fix #5906).

This commit is contained in:
nella 2026-05-27 13:19:51 +02:00 committed by nella
parent 94ec78b6e8
commit d6106f141c
7 changed files with 412 additions and 0 deletions

View File

@ -1,6 +1,7 @@
OBJS += techlibs/lattice/synth_lattice.o
OBJS += techlibs/lattice/lattice_gsr.o
OBJS += techlibs/lattice/lattice_dsp_nexus.o
$(eval $(call add_share_file,share/lattice,techlibs/lattice/cells_ff.vh))
$(eval $(call add_share_file,share/lattice,techlibs/lattice/cells_io.vh))
@ -50,3 +51,10 @@ $(eval $(call add_share_file_and_rename,share/ecp5,techlibs/lattice/cells_bb_ecp
$(eval $(call add_share_file,share/nexus,techlibs/lattice/parse_init.vh))
$(eval $(call add_share_file_and_rename,share/nexus,techlibs/lattice/cells_sim_nexus.v,cells_sim.v))
$(eval $(call add_share_file_and_rename,share/nexus,techlibs/lattice/cells_bb_nexus.v,cells_xtra.v))
techlibs/lattice/%_pm.h: passes/pmgen/pmgen.py techlibs/lattice/%.pmg
$(P) mkdir -p $(dir $@) && $(PYTHON_EXECUTABLE) $< -o $@ -p $(notdir $*) $(filter-out $<,$^)
GENFILES += techlibs/lattice/lattice_dsp_nexus_pm.h
techlibs/lattice/lattice_dsp_nexus.o: techlibs/lattice/lattice_dsp_nexus_pm.h
$(eval $(call add_extra_objs,techlibs/lattice/lattice_dsp_nexus_pm.h))

View File

@ -77,3 +77,92 @@ module \$__NX_MUL9X9 (input [8:0] A, input [8:0] B, output [17:0] Y);
.Z(Y)
);
endmodule
module \$__NX_MAC18X18 (A, B, C, Y);
parameter A_WIDTH = 18;
parameter B_WIDTH = 18;
parameter C_WIDTH = 48;
parameter Y_WIDTH = 48;
parameter A_SIGNED = 0;
parameter B_SIGNED = 0;
parameter SUBTRACT = 0;
input [17:0] A;
input [17:0] B;
input [47:0] C;
output [47:0] Y;
wire [53:0] Z_out;
assign Y = Z_out[47:0];
MULTADDSUB18X18 #(
.REGINPUTA("BYPASS"),
.REGINPUTB("BYPASS"),
.REGINPUTC("BYPASS"),
.REGOUTPUT("BYPASS")
) _TECHMAP_REPLACE_ (
.A(A),
.B(B),
.C({6'b0, C}),
.SIGNED(A_SIGNED ? 1'b1 : 1'b0),
.ADDSUB(SUBTRACT ? 1'b1 : 1'b0),
.Z(Z_out)
);
endmodule
module \$__NX_PREADD18X18 (A, B, C, Y, CLK);
parameter PIPELINED = 0;
parameter A_SIGNED = 0;
parameter B_SIGNED = 0;
parameter C_SIGNED = 0;
input [17:0] A;
input [17:0] B;
input [17:0] C;
input CLK;
output [47:0] Y;
wire [35:0] Z_out;
assign Y = A_SIGNED ? {{12{Z_out[35]}}, Z_out} : {12'b0, Z_out};
MULTPREADD18X18 #(
.REGINPUTA("BYPASS"),
.REGINPUTB("BYPASS"),
.REGINPUTC("BYPASS"),
.REGOUTPUT(PIPELINED ? "REGISTER" : "BYPASS")
) _TECHMAP_REPLACE_ (
.A(A),
.B(B),
.C(C),
.CLK(CLK),
.SIGNEDA(A_SIGNED ? 1'b1 : 1'b0),
.SIGNEDB(B_SIGNED ? 1'b1 : 1'b0),
.SIGNEDC(C_SIGNED ? 1'b1 : 1'b0),
.Z(Z_out)
);
endmodule
module \$__NX_MAC9X9WIDE_4LANE (A0, B0, A1, B1, A2, B2, A3, B3, Y);
parameter SIGNED = 0;
input [8:0] A0, B0, A1, B1, A2, B2, A3, B3;
output [47:0] Y;
wire [53:0] Z_out;
assign Y = Z_out[47:0];
MULTADDSUB9X9WIDE #(
.REGINPUTAB0("BYPASS"),
.REGINPUTAB1("BYPASS"),
.REGINPUTAB2("BYPASS"),
.REGINPUTAB3("BYPASS"),
.REGINPUTC("BYPASS"),
.REGOUTPUT("BYPASS")
) _TECHMAP_REPLACE_ (
.A0(A0), .B0(B0),
.A1(A1), .B1(B1),
.A2(A2), .B2(B2),
.A3(A3), .B3(B3),
.C(54'b0),
.SIGNED(SIGNED ? 1'b1 : 1'b0),
.ADDSUB(4'b0000),
.Z(Z_out)
);
endmodule

View File

@ -0,0 +1,36 @@
#include "kernel/yosys.h"
#include "kernel/sigtools.h"
USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN
#include "techlibs/lattice/lattice_dsp_nexus_pm.h"
struct LatticeDspNexusPass : public Pass {
LatticeDspNexusPass() : Pass("lattice_dsp_nexus", "Lattice Nexus DSP inference") { }
void help() override
{
// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
log("\n");
log(" lattice_dsp_nexus [options] [selection]\n");
log("\n");
log("Infer Lattice Nexus sysDSP macrocells (MULTADDSUB18X18, MULTPREADD18X18,\n");
log("MULTADDSUB9X9WIDE) from MAC and dot-product patterns.\n");
log("\n");
}
void execute(std::vector<std::string> args, RTLIL::Design *design) override
{
log_header(design, "Executing LATTICE_DSP_NEXUS pass.\n");
extra_args(args, 1, design);
for (auto module : design->selected_modules()) {
lattice_dsp_nexus_pm pm(module, module->cells());
pm.run_nexus_mac9_4lane();
pm.run_nexus_mac18();
pm.run_nexus_preadd18();
}
}
} LatticeDspNexusPass;
PRIVATE_NAMESPACE_END

View File

@ -0,0 +1,165 @@
pattern nexus_mac18
match mul
select mul->type.in($mul)
select GetSize(port(mul, \A)) <= 18
select GetSize(port(mul, \B)) <= 18
select GetSize(port(mul, \Y)) <= 48
endmatch
match add
select add->type.in($add, $sub)
select GetSize(port(add, \Y)) <= 48
choice <IdString> AB {\A, \B}
index <SigBit> port(add, AB)[0] === port(mul, \Y)[0]
endmatch
code
SigSpec mul_out = port(mul, \Y);
IdString add_AB;
Cell *mac = module->addCell(NEW_ID, "$__NX_MAC18X18");
IdString add_C = (add_AB == \A) ? \B : \A;
mac->setPort(\A, port(mul, \A));
mac->setPort(\B, port(mul, \B));
mac->setPort(\C, port(add, add_C));
mac->setPort(\Y, port(add, \Y));
mac->setParam(\A_SIGNED, mul->getParam(\A_SIGNED));
mac->setParam(\B_SIGNED, mul->getParam(\B_SIGNED));
mac->setParam(\SUBTRACT, add->type == $sub ? State::S1 : State::S0);
autoremove(mul);
autoremove(add);
accept;
endcode
pattern nexus_preadd18
match preadd
select preadd->type.in($add, $sub)
select GetSize(port(preadd, \Y)) <= 19
endmatch
match mul
select mul->type.in($mul)
select GetSize(port(mul, \Y)) <= 48
choice <IdString> mul_AB {\A, \B}
index <SigBit> port(mul, mul_AB)[0] === port(preadd, \Y)[0]
endmatch
match pipe_ff
select pipe_ff->type.in($dff, $dffe, $sdff, $sdffe)
index <SigBit> port(pipe_ff, \D)[0] === port(mul, \Y)[0]
optional
endmatch
code
SigSpec preadd_out = port(preadd, \Y);
IdString actual_mul_AB;
Cell *mac = module->addCell(NEW_ID, "$__NX_PREADD18X18");
IdString mul_other = (actual_mul_AB == \A) ? \B : \A;
IdString sgn_AC = (mul_other == \A) ? \B_SIGNED : \A_SIGNED;
IdString sgn_B = (mul_other == \A) ? \A_SIGNED : \B_SIGNED;
SigSpec sig_A = port(preadd, \A);
SigSpec sig_C = port(preadd, \B);
SigSpec sig_B = port(mul, mul_other);
sig_A.extend_u0(18, false);
sig_C.extend_u0(18, false);
sig_B.extend_u0(18, false);
mac->setPort(\A, sig_A.extract(0, 18));
mac->setPort(\C, sig_C.extract(0, 18));
mac->setPort(\B, sig_B.extract(0, 18));
if (pipe_ff) {
mac->setPort(\Y, port(pipe_ff, \Q));
mac->setPort(\CLK, port(pipe_ff, \CLK));
mac->setParam(\PIPELINED, State::S1);
} else {
mac->setPort(\Y, port(mul, \Y));
mac->setPort(\CLK, State::S0);
mac->setParam(\PIPELINED, State::S0);
}
mac->setParam(\A_SIGNED, mul->getParam(sgn_AC));
mac->setParam(\B_SIGNED, mul->getParam(sgn_B));
mac->setParam(\C_SIGNED, mul->getParam(sgn_AC));
if (pipe_ff) autoremove(pipe_ff);
autoremove(mul);
autoremove(preadd);
accept;
endcode
pattern nexus_mac9_4lane
match add_top
select add_top->type == $add
endmatch
match add_mid
select add_mid->type == $add
index <SigBit> port(add_mid, \Y)[0] === port(add_top, \A)[0]
endmatch
match add_bot
select add_bot->type == $add
index <SigBit> port(add_bot, \Y)[0] === port(add_mid, \A)[0]
endmatch
match mul3
select mul3->type == $mul
select GetSize(port(mul3, \A)) <= 9 && GetSize(port(mul3, \B)) <= 9
index <SigBit> port(mul3, \Y)[0] === port(add_top, \B)[0]
endmatch
match mul2
select mul2->type == $mul
select GetSize(port(mul2, \A)) <= 9 && GetSize(port(mul2, \B)) <= 9
index <SigBit> port(mul2, \Y)[0] === port(add_mid, \B)[0]
endmatch
match mul1
select mul1->type == $mul
select GetSize(port(mul1, \A)) <= 9 && GetSize(port(mul1, \B)) <= 9
index <SigBit> port(mul1, \Y)[0] === port(add_bot, \B)[0]
endmatch
match mul0
select mul0->type == $mul
select GetSize(port(mul0, \A)) <= 9 && GetSize(port(mul0, \B)) <= 9
index <SigBit> port(mul0, \Y)[0] === port(add_bot, \A)[0]
endmatch
code
Cell *mac = module->addCell(NEW_ID, "$__NX_MAC9X9WIDE_4LANE");
bool is_signed = mul0->getParam(\A_SIGNED).as_bool();
auto ext9 = [&](SigSpec s) {
s.extend_u0(9, is_signed);
return s;
};
mac->setPort(\A0, ext9(port(mul0, \A)));
mac->setPort(\B0, ext9(port(mul0, \B)));
mac->setPort(\A1, ext9(port(mul1, \A)));
mac->setPort(\B1, ext9(port(mul1, \B)));
mac->setPort(\A2, ext9(port(mul2, \A)));
mac->setPort(\B2, ext9(port(mul2, \B)));
mac->setPort(\A3, ext9(port(mul3, \A)));
mac->setPort(\B3, ext9(port(mul3, \B)));
mac->setPort(\Y, port(add_top, \Y));
mac->setParam(\SIGNED, mul0->getParam(\A_SIGNED));
autoremove(add_top);
autoremove(add_mid);
autoremove(add_bot);
autoremove(mul0);
autoremove(mul1);
autoremove(mul2);
autoremove(mul3);
accept;
endcode

View File

@ -425,9 +425,12 @@ struct SynthLatticePass : public ScriptPass
run("opt_clean");
if (help_mode) {
run("lattice_dsp_nexus", "(only if -family lifcl/lfd2nx and unless -nodsp)");
run("techmap -map +/mul2dsp.v [...]", "(unless -nodsp)");
run("techmap -map +/lattice/dsp_map" + dsp_map + ".v", "(unless -nodsp)");
} else if (have_dsp && !nodsp) {
if (is_nexus)
run("lattice_dsp_nexus");
for (const auto &rule : dsp_rules) {
run(stringf("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=%d -D DSP_B_MAXWIDTH=%d -D DSP_A_MINWIDTH=%d -D DSP_B_MINWIDTH=%d -D DSP_NAME=%s",
rule.a_maxwidth, rule.b_maxwidth, rule.a_minwidth, rule.b_minwidth, rule.prim));

View File

@ -0,0 +1,76 @@
// https://github.com/YosysHQ/yosys/issues/5906
module mac (
input bit clk, rst,
input bit [17:0] a, b,
input bit clear,
output bit [47:0] p
);
bit [17:0] a_r, b_r; bit clear_r; bit [47:0] p_r;
always_ff @(posedge clk) begin
if (rst) begin a_r<=0; b_r<=0; clear_r<=0; p_r<=0; end
else begin
a_r<=a; b_r<=b; clear_r<=clear;
p_r <= clear_r ? 48'(a_r*b_r) : 48'(p_r + 48'(a_r*b_r));
end
end
assign p = p_r;
endmodule
module madd_pre (
input bit clk, rst,
input bit [17:0] a, b, c, d,
output bit [47:0] p
);
bit [17:0] a_r, b_r, c_r, d_r; bit [47:0] m_r, p_r;
always_ff @(posedge clk) begin
if (rst) begin a_r<=0; b_r<=0; c_r<=0; d_r<=0; m_r<=0; p_r<=0; end
else begin
a_r<=a; b_r<=b; c_r<=c; d_r<=d;
m_r <= 48'((a_r - d_r) * b_r);
p_r <= 48'(m_r + 48'(c_r));
end
end
assign p = p_r;
endmodule
module dot4 (
input bit clk, rst,
input bit [8:0] a0, b0, a1, b1, a2, b2, a3, b3,
output bit [19:0] p
);
bit [8:0] a0_r, b0_r, a1_r, b1_r, a2_r, b2_r, a3_r, b3_r;
bit [19:0] p_r;
always_ff @(posedge clk) begin
if (rst) begin
a0_r<=0; b0_r<=0; a1_r<=0; b1_r<=0;
a2_r<=0; b2_r<=0; a3_r<=0; b3_r<=0;
p_r<=0;
end else begin
a0_r<=a0; b0_r<=b0; a1_r<=a1; b1_r<=b1;
a2_r<=a2; b2_r<=b2; a3_r<=a3; b3_r<=b3;
p_r <= 20'(20'(a0_r*b0_r) + 20'(a1_r*b1_r) + 20'(a2_r*b2_r) + 20'(a3_r*b3_r));
end
end
assign p = p_r;
endmodule
// Oversized 24x24 MAC
module neg_mac24 (input clk, clear, input [23:0] a, b, output [47:0] p);
reg [23:0] a_r, b_r; reg [47:0] p_r; reg clear_r;
always_ff @(posedge clk) begin
a_r <= a; b_r <= b; clear_r <= clear;
p_r <= clear_r ? 48'(a_r*b_r) : 48'(p_r + 48'(a_r*b_r));
end
assign p = p_r;
endmodule
// Dot product with mixed 9x9 and 18x18 lanes
module neg_dot_mixed (input clk, input [8:0] a0,b0,a1,b1, input [17:0] a2, b2, output [35:0] p);
reg [8:0] a0_r,b0_r,a1_r,b1_r; reg [17:0] a2_r, b2_r; reg [35:0] p_r;
always_ff @(posedge clk) begin
a0_r<=a0; b0_r<=b0; a1_r<=a1; b1_r<=b1; a2_r<=a2; b2_r<=b2;
p_r <= 36'(36'(a0_r*b0_r) + 36'(a1_r*b1_r) + 36'(a2_r*b2_r));
end
assign p = p_r;
endmodule

View File

@ -0,0 +1,35 @@
read_verilog -sv fuse_mac.sv
design -save pristine
# 18x18 MAC
design -load pristine
hierarchy -top mac;
synth_nexus -family lifcl -top mac
select -assert-count 1 t:MULTADDSUB18X18
select -assert-count 0 t:CCU2
# 18x18 pre-add MAC
design -load pristine
hierarchy -top madd_pre;
synth_nexus -family lifcl -top madd_pre
select -assert-count 1 t:MULTPREADD18X18
# 4-lane 9x9 dot product
design -load pristine
hierarchy -top dot4;
synth_nexus -family lifcl -top dot4
select -assert-count 1 t:MULTADDSUB9X9WIDE
# 24x24 MAC
design -load pristine
hierarchy -top neg_mac24;
synth_nexus -family lifcl -top neg_mac24
select -assert-count 0 t:MULTADDSUB18X18
# mixed
design -load pristine
hierarchy -top neg_dot_mixed;
synth_nexus -family lifcl -top neg_dot_mixed
select -assert-count 0 t:MULTADDSUB9X9WIDE
select -assert-count 2 t:MULTADDSUB18X18