mirror of https://github.com/YosysHQ/yosys.git
Add matching for fused mac operations for Nexus (fix #5906).
This commit is contained in:
parent
94ec78b6e8
commit
d6106f141c
|
|
@ -1,6 +1,7 @@
|
|||
|
||||
OBJS += techlibs/lattice/synth_lattice.o
|
||||
OBJS += techlibs/lattice/lattice_gsr.o
|
||||
OBJS += techlibs/lattice/lattice_dsp_nexus.o
|
||||
|
||||
$(eval $(call add_share_file,share/lattice,techlibs/lattice/cells_ff.vh))
|
||||
$(eval $(call add_share_file,share/lattice,techlibs/lattice/cells_io.vh))
|
||||
|
|
@ -50,3 +51,10 @@ $(eval $(call add_share_file_and_rename,share/ecp5,techlibs/lattice/cells_bb_ecp
|
|||
$(eval $(call add_share_file,share/nexus,techlibs/lattice/parse_init.vh))
|
||||
$(eval $(call add_share_file_and_rename,share/nexus,techlibs/lattice/cells_sim_nexus.v,cells_sim.v))
|
||||
$(eval $(call add_share_file_and_rename,share/nexus,techlibs/lattice/cells_bb_nexus.v,cells_xtra.v))
|
||||
|
||||
techlibs/lattice/%_pm.h: passes/pmgen/pmgen.py techlibs/lattice/%.pmg
|
||||
$(P) mkdir -p $(dir $@) && $(PYTHON_EXECUTABLE) $< -o $@ -p $(notdir $*) $(filter-out $<,$^)
|
||||
|
||||
GENFILES += techlibs/lattice/lattice_dsp_nexus_pm.h
|
||||
techlibs/lattice/lattice_dsp_nexus.o: techlibs/lattice/lattice_dsp_nexus_pm.h
|
||||
$(eval $(call add_extra_objs,techlibs/lattice/lattice_dsp_nexus_pm.h))
|
||||
|
|
|
|||
|
|
@ -77,3 +77,92 @@ module \$__NX_MUL9X9 (input [8:0] A, input [8:0] B, output [17:0] Y);
|
|||
.Z(Y)
|
||||
);
|
||||
endmodule
|
||||
|
||||
module \$__NX_MAC18X18 (A, B, C, Y);
|
||||
|
||||
parameter A_WIDTH = 18;
|
||||
parameter B_WIDTH = 18;
|
||||
parameter C_WIDTH = 48;
|
||||
parameter Y_WIDTH = 48;
|
||||
parameter A_SIGNED = 0;
|
||||
parameter B_SIGNED = 0;
|
||||
parameter SUBTRACT = 0;
|
||||
input [17:0] A;
|
||||
input [17:0] B;
|
||||
input [47:0] C;
|
||||
output [47:0] Y;
|
||||
wire [53:0] Z_out;
|
||||
assign Y = Z_out[47:0];
|
||||
|
||||
MULTADDSUB18X18 #(
|
||||
.REGINPUTA("BYPASS"),
|
||||
.REGINPUTB("BYPASS"),
|
||||
.REGINPUTC("BYPASS"),
|
||||
.REGOUTPUT("BYPASS")
|
||||
) _TECHMAP_REPLACE_ (
|
||||
.A(A),
|
||||
.B(B),
|
||||
.C({6'b0, C}),
|
||||
.SIGNED(A_SIGNED ? 1'b1 : 1'b0),
|
||||
.ADDSUB(SUBTRACT ? 1'b1 : 1'b0),
|
||||
.Z(Z_out)
|
||||
);
|
||||
endmodule
|
||||
|
||||
module \$__NX_PREADD18X18 (A, B, C, Y, CLK);
|
||||
|
||||
parameter PIPELINED = 0;
|
||||
parameter A_SIGNED = 0;
|
||||
parameter B_SIGNED = 0;
|
||||
parameter C_SIGNED = 0;
|
||||
input [17:0] A;
|
||||
input [17:0] B;
|
||||
input [17:0] C;
|
||||
input CLK;
|
||||
output [47:0] Y;
|
||||
wire [35:0] Z_out;
|
||||
assign Y = A_SIGNED ? {{12{Z_out[35]}}, Z_out} : {12'b0, Z_out};
|
||||
|
||||
MULTPREADD18X18 #(
|
||||
.REGINPUTA("BYPASS"),
|
||||
.REGINPUTB("BYPASS"),
|
||||
.REGINPUTC("BYPASS"),
|
||||
.REGOUTPUT(PIPELINED ? "REGISTER" : "BYPASS")
|
||||
) _TECHMAP_REPLACE_ (
|
||||
.A(A),
|
||||
.B(B),
|
||||
.C(C),
|
||||
.CLK(CLK),
|
||||
.SIGNEDA(A_SIGNED ? 1'b1 : 1'b0),
|
||||
.SIGNEDB(B_SIGNED ? 1'b1 : 1'b0),
|
||||
.SIGNEDC(C_SIGNED ? 1'b1 : 1'b0),
|
||||
.Z(Z_out)
|
||||
);
|
||||
endmodule
|
||||
|
||||
module \$__NX_MAC9X9WIDE_4LANE (A0, B0, A1, B1, A2, B2, A3, B3, Y);
|
||||
|
||||
parameter SIGNED = 0;
|
||||
input [8:0] A0, B0, A1, B1, A2, B2, A3, B3;
|
||||
output [47:0] Y;
|
||||
wire [53:0] Z_out;
|
||||
assign Y = Z_out[47:0];
|
||||
|
||||
MULTADDSUB9X9WIDE #(
|
||||
.REGINPUTAB0("BYPASS"),
|
||||
.REGINPUTAB1("BYPASS"),
|
||||
.REGINPUTAB2("BYPASS"),
|
||||
.REGINPUTAB3("BYPASS"),
|
||||
.REGINPUTC("BYPASS"),
|
||||
.REGOUTPUT("BYPASS")
|
||||
) _TECHMAP_REPLACE_ (
|
||||
.A0(A0), .B0(B0),
|
||||
.A1(A1), .B1(B1),
|
||||
.A2(A2), .B2(B2),
|
||||
.A3(A3), .B3(B3),
|
||||
.C(54'b0),
|
||||
.SIGNED(SIGNED ? 1'b1 : 1'b0),
|
||||
.ADDSUB(4'b0000),
|
||||
.Z(Z_out)
|
||||
);
|
||||
endmodule
|
||||
|
|
|
|||
|
|
@ -0,0 +1,36 @@
|
|||
#include "kernel/yosys.h"
|
||||
#include "kernel/sigtools.h"
|
||||
|
||||
USING_YOSYS_NAMESPACE
|
||||
PRIVATE_NAMESPACE_BEGIN
|
||||
|
||||
#include "techlibs/lattice/lattice_dsp_nexus_pm.h"
|
||||
|
||||
struct LatticeDspNexusPass : public Pass {
|
||||
LatticeDspNexusPass() : Pass("lattice_dsp_nexus", "Lattice Nexus DSP inference") { }
|
||||
void help() override
|
||||
{
|
||||
// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
|
||||
log("\n");
|
||||
log(" lattice_dsp_nexus [options] [selection]\n");
|
||||
log("\n");
|
||||
log("Infer Lattice Nexus sysDSP macrocells (MULTADDSUB18X18, MULTPREADD18X18,\n");
|
||||
log("MULTADDSUB9X9WIDE) from MAC and dot-product patterns.\n");
|
||||
log("\n");
|
||||
}
|
||||
void execute(std::vector<std::string> args, RTLIL::Design *design) override
|
||||
{
|
||||
log_header(design, "Executing LATTICE_DSP_NEXUS pass.\n");
|
||||
extra_args(args, 1, design);
|
||||
|
||||
for (auto module : design->selected_modules()) {
|
||||
lattice_dsp_nexus_pm pm(module, module->cells());
|
||||
|
||||
pm.run_nexus_mac9_4lane();
|
||||
pm.run_nexus_mac18();
|
||||
pm.run_nexus_preadd18();
|
||||
}
|
||||
}
|
||||
} LatticeDspNexusPass;
|
||||
|
||||
PRIVATE_NAMESPACE_END
|
||||
|
|
@ -0,0 +1,165 @@
|
|||
pattern nexus_mac18
|
||||
|
||||
match mul
|
||||
select mul->type.in($mul)
|
||||
select GetSize(port(mul, \A)) <= 18
|
||||
select GetSize(port(mul, \B)) <= 18
|
||||
select GetSize(port(mul, \Y)) <= 48
|
||||
endmatch
|
||||
|
||||
match add
|
||||
select add->type.in($add, $sub)
|
||||
select GetSize(port(add, \Y)) <= 48
|
||||
choice <IdString> AB {\A, \B}
|
||||
index <SigBit> port(add, AB)[0] === port(mul, \Y)[0]
|
||||
endmatch
|
||||
|
||||
code
|
||||
SigSpec mul_out = port(mul, \Y);
|
||||
IdString add_AB;
|
||||
Cell *mac = module->addCell(NEW_ID, "$__NX_MAC18X18");
|
||||
IdString add_C = (add_AB == \A) ? \B : \A;
|
||||
|
||||
mac->setPort(\A, port(mul, \A));
|
||||
mac->setPort(\B, port(mul, \B));
|
||||
mac->setPort(\C, port(add, add_C));
|
||||
mac->setPort(\Y, port(add, \Y));
|
||||
mac->setParam(\A_SIGNED, mul->getParam(\A_SIGNED));
|
||||
mac->setParam(\B_SIGNED, mul->getParam(\B_SIGNED));
|
||||
mac->setParam(\SUBTRACT, add->type == $sub ? State::S1 : State::S0);
|
||||
|
||||
autoremove(mul);
|
||||
autoremove(add);
|
||||
|
||||
accept;
|
||||
endcode
|
||||
|
||||
pattern nexus_preadd18
|
||||
|
||||
match preadd
|
||||
select preadd->type.in($add, $sub)
|
||||
select GetSize(port(preadd, \Y)) <= 19
|
||||
endmatch
|
||||
|
||||
match mul
|
||||
select mul->type.in($mul)
|
||||
select GetSize(port(mul, \Y)) <= 48
|
||||
choice <IdString> mul_AB {\A, \B}
|
||||
index <SigBit> port(mul, mul_AB)[0] === port(preadd, \Y)[0]
|
||||
endmatch
|
||||
|
||||
match pipe_ff
|
||||
select pipe_ff->type.in($dff, $dffe, $sdff, $sdffe)
|
||||
index <SigBit> port(pipe_ff, \D)[0] === port(mul, \Y)[0]
|
||||
optional
|
||||
endmatch
|
||||
|
||||
code
|
||||
SigSpec preadd_out = port(preadd, \Y);
|
||||
IdString actual_mul_AB;
|
||||
Cell *mac = module->addCell(NEW_ID, "$__NX_PREADD18X18");
|
||||
|
||||
IdString mul_other = (actual_mul_AB == \A) ? \B : \A;
|
||||
IdString sgn_AC = (mul_other == \A) ? \B_SIGNED : \A_SIGNED;
|
||||
IdString sgn_B = (mul_other == \A) ? \A_SIGNED : \B_SIGNED;
|
||||
|
||||
SigSpec sig_A = port(preadd, \A);
|
||||
SigSpec sig_C = port(preadd, \B);
|
||||
SigSpec sig_B = port(mul, mul_other);
|
||||
|
||||
sig_A.extend_u0(18, false);
|
||||
sig_C.extend_u0(18, false);
|
||||
sig_B.extend_u0(18, false);
|
||||
|
||||
mac->setPort(\A, sig_A.extract(0, 18));
|
||||
mac->setPort(\C, sig_C.extract(0, 18));
|
||||
mac->setPort(\B, sig_B.extract(0, 18));
|
||||
|
||||
if (pipe_ff) {
|
||||
mac->setPort(\Y, port(pipe_ff, \Q));
|
||||
mac->setPort(\CLK, port(pipe_ff, \CLK));
|
||||
mac->setParam(\PIPELINED, State::S1);
|
||||
} else {
|
||||
mac->setPort(\Y, port(mul, \Y));
|
||||
mac->setPort(\CLK, State::S0);
|
||||
mac->setParam(\PIPELINED, State::S0);
|
||||
}
|
||||
|
||||
mac->setParam(\A_SIGNED, mul->getParam(sgn_AC));
|
||||
mac->setParam(\B_SIGNED, mul->getParam(sgn_B));
|
||||
mac->setParam(\C_SIGNED, mul->getParam(sgn_AC));
|
||||
|
||||
if (pipe_ff) autoremove(pipe_ff);
|
||||
autoremove(mul);
|
||||
autoremove(preadd);
|
||||
accept;
|
||||
endcode
|
||||
|
||||
pattern nexus_mac9_4lane
|
||||
|
||||
match add_top
|
||||
select add_top->type == $add
|
||||
endmatch
|
||||
|
||||
match add_mid
|
||||
select add_mid->type == $add
|
||||
index <SigBit> port(add_mid, \Y)[0] === port(add_top, \A)[0]
|
||||
endmatch
|
||||
|
||||
match add_bot
|
||||
select add_bot->type == $add
|
||||
index <SigBit> port(add_bot, \Y)[0] === port(add_mid, \A)[0]
|
||||
endmatch
|
||||
|
||||
match mul3
|
||||
select mul3->type == $mul
|
||||
select GetSize(port(mul3, \A)) <= 9 && GetSize(port(mul3, \B)) <= 9
|
||||
index <SigBit> port(mul3, \Y)[0] === port(add_top, \B)[0]
|
||||
endmatch
|
||||
|
||||
match mul2
|
||||
select mul2->type == $mul
|
||||
select GetSize(port(mul2, \A)) <= 9 && GetSize(port(mul2, \B)) <= 9
|
||||
index <SigBit> port(mul2, \Y)[0] === port(add_mid, \B)[0]
|
||||
endmatch
|
||||
|
||||
match mul1
|
||||
select mul1->type == $mul
|
||||
select GetSize(port(mul1, \A)) <= 9 && GetSize(port(mul1, \B)) <= 9
|
||||
index <SigBit> port(mul1, \Y)[0] === port(add_bot, \B)[0]
|
||||
endmatch
|
||||
|
||||
match mul0
|
||||
select mul0->type == $mul
|
||||
select GetSize(port(mul0, \A)) <= 9 && GetSize(port(mul0, \B)) <= 9
|
||||
index <SigBit> port(mul0, \Y)[0] === port(add_bot, \A)[0]
|
||||
endmatch
|
||||
|
||||
code
|
||||
Cell *mac = module->addCell(NEW_ID, "$__NX_MAC9X9WIDE_4LANE");
|
||||
bool is_signed = mul0->getParam(\A_SIGNED).as_bool();
|
||||
auto ext9 = [&](SigSpec s) {
|
||||
s.extend_u0(9, is_signed);
|
||||
return s;
|
||||
};
|
||||
|
||||
mac->setPort(\A0, ext9(port(mul0, \A)));
|
||||
mac->setPort(\B0, ext9(port(mul0, \B)));
|
||||
mac->setPort(\A1, ext9(port(mul1, \A)));
|
||||
mac->setPort(\B1, ext9(port(mul1, \B)));
|
||||
mac->setPort(\A2, ext9(port(mul2, \A)));
|
||||
mac->setPort(\B2, ext9(port(mul2, \B)));
|
||||
mac->setPort(\A3, ext9(port(mul3, \A)));
|
||||
mac->setPort(\B3, ext9(port(mul3, \B)));
|
||||
mac->setPort(\Y, port(add_top, \Y));
|
||||
mac->setParam(\SIGNED, mul0->getParam(\A_SIGNED));
|
||||
|
||||
autoremove(add_top);
|
||||
autoremove(add_mid);
|
||||
autoremove(add_bot);
|
||||
autoremove(mul0);
|
||||
autoremove(mul1);
|
||||
autoremove(mul2);
|
||||
autoremove(mul3);
|
||||
accept;
|
||||
endcode
|
||||
|
|
@ -425,9 +425,12 @@ struct SynthLatticePass : public ScriptPass
|
|||
run("opt_clean");
|
||||
|
||||
if (help_mode) {
|
||||
run("lattice_dsp_nexus", "(only if -family lifcl/lfd2nx and unless -nodsp)");
|
||||
run("techmap -map +/mul2dsp.v [...]", "(unless -nodsp)");
|
||||
run("techmap -map +/lattice/dsp_map" + dsp_map + ".v", "(unless -nodsp)");
|
||||
} else if (have_dsp && !nodsp) {
|
||||
if (is_nexus)
|
||||
run("lattice_dsp_nexus");
|
||||
for (const auto &rule : dsp_rules) {
|
||||
run(stringf("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=%d -D DSP_B_MAXWIDTH=%d -D DSP_A_MINWIDTH=%d -D DSP_B_MINWIDTH=%d -D DSP_NAME=%s",
|
||||
rule.a_maxwidth, rule.b_maxwidth, rule.a_minwidth, rule.b_minwidth, rule.prim));
|
||||
|
|
|
|||
|
|
@ -0,0 +1,76 @@
|
|||
// https://github.com/YosysHQ/yosys/issues/5906
|
||||
|
||||
module mac (
|
||||
input bit clk, rst,
|
||||
input bit [17:0] a, b,
|
||||
input bit clear,
|
||||
output bit [47:0] p
|
||||
);
|
||||
bit [17:0] a_r, b_r; bit clear_r; bit [47:0] p_r;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin a_r<=0; b_r<=0; clear_r<=0; p_r<=0; end
|
||||
else begin
|
||||
a_r<=a; b_r<=b; clear_r<=clear;
|
||||
p_r <= clear_r ? 48'(a_r*b_r) : 48'(p_r + 48'(a_r*b_r));
|
||||
end
|
||||
end
|
||||
assign p = p_r;
|
||||
endmodule
|
||||
|
||||
module madd_pre (
|
||||
input bit clk, rst,
|
||||
input bit [17:0] a, b, c, d,
|
||||
output bit [47:0] p
|
||||
);
|
||||
bit [17:0] a_r, b_r, c_r, d_r; bit [47:0] m_r, p_r;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin a_r<=0; b_r<=0; c_r<=0; d_r<=0; m_r<=0; p_r<=0; end
|
||||
else begin
|
||||
a_r<=a; b_r<=b; c_r<=c; d_r<=d;
|
||||
m_r <= 48'((a_r - d_r) * b_r);
|
||||
p_r <= 48'(m_r + 48'(c_r));
|
||||
end
|
||||
end
|
||||
assign p = p_r;
|
||||
endmodule
|
||||
|
||||
module dot4 (
|
||||
input bit clk, rst,
|
||||
input bit [8:0] a0, b0, a1, b1, a2, b2, a3, b3,
|
||||
output bit [19:0] p
|
||||
);
|
||||
bit [8:0] a0_r, b0_r, a1_r, b1_r, a2_r, b2_r, a3_r, b3_r;
|
||||
bit [19:0] p_r;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
a0_r<=0; b0_r<=0; a1_r<=0; b1_r<=0;
|
||||
a2_r<=0; b2_r<=0; a3_r<=0; b3_r<=0;
|
||||
p_r<=0;
|
||||
end else begin
|
||||
a0_r<=a0; b0_r<=b0; a1_r<=a1; b1_r<=b1;
|
||||
a2_r<=a2; b2_r<=b2; a3_r<=a3; b3_r<=b3;
|
||||
p_r <= 20'(20'(a0_r*b0_r) + 20'(a1_r*b1_r) + 20'(a2_r*b2_r) + 20'(a3_r*b3_r));
|
||||
end
|
||||
end
|
||||
assign p = p_r;
|
||||
endmodule
|
||||
|
||||
// Oversized 24x24 MAC
|
||||
module neg_mac24 (input clk, clear, input [23:0] a, b, output [47:0] p);
|
||||
reg [23:0] a_r, b_r; reg [47:0] p_r; reg clear_r;
|
||||
always_ff @(posedge clk) begin
|
||||
a_r <= a; b_r <= b; clear_r <= clear;
|
||||
p_r <= clear_r ? 48'(a_r*b_r) : 48'(p_r + 48'(a_r*b_r));
|
||||
end
|
||||
assign p = p_r;
|
||||
endmodule
|
||||
|
||||
// Dot product with mixed 9x9 and 18x18 lanes
|
||||
module neg_dot_mixed (input clk, input [8:0] a0,b0,a1,b1, input [17:0] a2, b2, output [35:0] p);
|
||||
reg [8:0] a0_r,b0_r,a1_r,b1_r; reg [17:0] a2_r, b2_r; reg [35:0] p_r;
|
||||
always_ff @(posedge clk) begin
|
||||
a0_r<=a0; b0_r<=b0; a1_r<=a1; b1_r<=b1; a2_r<=a2; b2_r<=b2;
|
||||
p_r <= 36'(36'(a0_r*b0_r) + 36'(a1_r*b1_r) + 36'(a2_r*b2_r));
|
||||
end
|
||||
assign p = p_r;
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
read_verilog -sv fuse_mac.sv
|
||||
|
||||
design -save pristine
|
||||
|
||||
# 18x18 MAC
|
||||
design -load pristine
|
||||
hierarchy -top mac;
|
||||
synth_nexus -family lifcl -top mac
|
||||
select -assert-count 1 t:MULTADDSUB18X18
|
||||
select -assert-count 0 t:CCU2
|
||||
|
||||
# 18x18 pre-add MAC
|
||||
design -load pristine
|
||||
hierarchy -top madd_pre;
|
||||
synth_nexus -family lifcl -top madd_pre
|
||||
select -assert-count 1 t:MULTPREADD18X18
|
||||
|
||||
# 4-lane 9x9 dot product
|
||||
design -load pristine
|
||||
hierarchy -top dot4;
|
||||
synth_nexus -family lifcl -top dot4
|
||||
select -assert-count 1 t:MULTADDSUB9X9WIDE
|
||||
|
||||
# 24x24 MAC
|
||||
design -load pristine
|
||||
hierarchy -top neg_mac24;
|
||||
synth_nexus -family lifcl -top neg_mac24
|
||||
select -assert-count 0 t:MULTADDSUB18X18
|
||||
|
||||
# mixed
|
||||
design -load pristine
|
||||
hierarchy -top neg_dot_mixed;
|
||||
synth_nexus -family lifcl -top neg_dot_mixed
|
||||
select -assert-count 0 t:MULTADDSUB9X9WIDE
|
||||
select -assert-count 2 t:MULTADDSUB18X18
|
||||
Loading…
Reference in New Issue