396 lines
11 KiB
Verilog
396 lines
11 KiB
Verilog
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Filename: cpuops.v
|
|
// {{{
|
|
// Project: 10Gb Ethernet switch
|
|
//
|
|
// Purpose: This is the ZipCPU ALU function. It handles all of the
|
|
// instruction opcodes 0-13. (14-15 are divide opcodes).
|
|
//
|
|
//
|
|
// Creator: Dan Gisselquist, Ph.D.
|
|
// Gisselquist Technology, LLC
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// }}}
|
|
// Copyright (C) 2023, Gisselquist Technology, LLC
|
|
// {{{
|
|
// This file is part of the ETH10G project.
|
|
//
|
|
// The ETH10G project contains free software and gateware, licensed under the
|
|
// Apache License, Version 2.0 (the "License"). You may not use this project,
|
|
// or this file, except in compliance with the License. You may obtain a copy
|
|
// of the License at
|
|
// }}}
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// {{{
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
// License for the specific language governing permissions and limitations
|
|
// under the License.
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
`default_nettype none
|
|
// }}}
|
|
module cpuops #(
|
|
// {{{
|
|
parameter OPT_MPY = 3, // == 0 (no mpy),1-4,36
|
|
parameter [0:0] OPT_SHIFTS = 1'b1,
|
|
parameter [0:0] OPT_LOWPOWER = 1'b1
|
|
// }}}
|
|
) (
|
|
// {{{
|
|
input wire i_clk, i_reset, i_stb,
|
|
input wire [3:0] i_op,
|
|
input wire [31:0] i_a, i_b,
|
|
output reg [31:0] o_c,
|
|
output wire [3:0] o_f,
|
|
output reg o_valid,
|
|
`ifdef VMPY_TB
|
|
// {{{
|
|
// Define some wires used to peek at internal values during
|
|
// simulation. These are *ONLY* used by the ZipCPU mpy_tb
|
|
// simulation testbench. They are *NOT* used during synthesis,
|
|
// and not intended to be used outside of the ZipCPU setup.
|
|
//
|
|
output wire [5:0] OPT_MULTIPLY,
|
|
output wire [31:0] mpy_a_input, mpy_b_input,
|
|
output wire [63:0] mpy_output,
|
|
output wire [2:0] mpy_pipe,
|
|
// }}}
|
|
`endif
|
|
output wire o_busy
|
|
// }}}
|
|
);
|
|
|
|
// Declarations
|
|
// {{{
|
|
wire [31:0] w_brev_result;
|
|
wire z, n, v, vx;
|
|
reg c, pre_sign, set_ovfl, keep_sgn_on_ovfl;
|
|
|
|
wire [32:0] w_lsr_result, w_asr_result, w_lsl_result;
|
|
|
|
wire [63:0] mpy_result; // Where we dump the multiply result
|
|
wire mpyhi; // Return the high half of the multiply
|
|
wire mpybusy; // The multiply is busy if true
|
|
wire mpydone; // True if we'll be valid on the next clock;
|
|
wire this_is_a_multiply_op;
|
|
reg r_busy;
|
|
|
|
genvar k;
|
|
// }}}
|
|
|
|
// Shift register pre-logic
|
|
// {{{
|
|
generate if (OPT_SHIFTS)
|
|
begin : IMPLEMENT_SHIFTS
|
|
wire signed [32:0] w_pre_asr_input, w_pre_asr_shifted;
|
|
assign w_pre_asr_input = { i_a, 1'b0 };
|
|
assign w_pre_asr_shifted = w_pre_asr_input >>> i_b[4:0];
|
|
assign w_asr_result = (|i_b[31:5])? {(33){i_a[31]}}
|
|
: w_pre_asr_shifted;// ASR
|
|
assign w_lsr_result = ((|i_b[31:6])||(i_b[5]&&(i_b[4:0]!=0)))? 33'h00
|
|
:((i_b[5])?{32'h0,i_a[31]}
|
|
|
|
: ( { i_a, 1'b0 } >> (i_b[4:0]) ));// LSR
|
|
assign w_lsl_result = ((|i_b[31:6])||(i_b[5]&&(i_b[4:0]!=0)))? 33'h00
|
|
:((i_b[5])?{i_a[0], 32'h0}
|
|
: ({1'b0, i_a } << i_b[4:0])); // LSL
|
|
end else begin : NO_SHIFTS
|
|
|
|
assign w_asr_result = { i_a[31], i_a[31:0] };
|
|
assign w_lsr_result = { 1'b0, i_a[31:0] };
|
|
assign w_lsl_result = { i_a[31:0], 1'b0 };
|
|
|
|
end endgenerate
|
|
// }}}
|
|
|
|
//
|
|
// Bit reversal pre-logic
|
|
// {{{
|
|
generate
|
|
for(k=0; k<32; k=k+1)
|
|
begin : bit_reversal_cpuop
|
|
assign w_brev_result[k] = i_b[31-k];
|
|
end endgenerate
|
|
// }}}
|
|
|
|
// Prelogic for our flags registers : set_ovfl and keep_sgn_on_ovfl
|
|
// {{{
|
|
always @(posedge i_clk)
|
|
if (i_stb) // 1 LUT
|
|
set_ovfl<=(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP
|
|
||((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD
|
|
||(i_op == 4'h6) // LSL
|
|
||(i_op == 4'h5)); // LSR
|
|
|
|
always @(posedge i_clk)
|
|
if (i_stb) // 1 LUT
|
|
keep_sgn_on_ovfl<=
|
|
(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP
|
|
||((i_op==4'h2)&&(i_a[31] == i_b[31]))); // ADD
|
|
// }}}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Multiply handling
|
|
// {{{
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
//
|
|
|
|
// A 4-way multiplexer can be done in one 6-LUT.
|
|
// A 16-way multiplexer can therefore be done in 4x 6-LUT's with
|
|
// the Xilinx multiplexer fabric that follows.
|
|
// Given that we wish to apply this multiplexer approach to 33-bits,
|
|
// this will cost a minimum of 132 6-LUTs.
|
|
|
|
assign this_is_a_multiply_op = (i_stb)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'hc));
|
|
|
|
`ifdef FORMAL
|
|
`define MPYOP abs_mpy
|
|
`else
|
|
`define MPYOP mpyop
|
|
`endif
|
|
`MPYOP #(
|
|
// {{{
|
|
.OPT_MPY(OPT_MPY),
|
|
.OPT_LOWPOWER(OPT_LOWPOWER)
|
|
// }}}
|
|
) thempy(
|
|
// {{{
|
|
.i_clk(i_clk), .i_reset(i_reset),
|
|
.i_stb(this_is_a_multiply_op), .i_op(i_op[1:0]),
|
|
.i_a(i_a), .i_b(i_b), .o_valid(mpydone),
|
|
.o_busy(mpybusy), .o_result(mpy_result), .o_hi(mpyhi)
|
|
// }}}
|
|
);
|
|
// }}}
|
|
////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The master ALU case statement
|
|
// {{{
|
|
always @(posedge i_clk)
|
|
if (i_stb)
|
|
begin
|
|
pre_sign <= (i_a[31]);
|
|
c <= 1'b0;
|
|
casez(i_op)
|
|
4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB
|
|
4'b0001: o_c <= i_a & i_b; // BTST/And
|
|
4'b0010:{c,o_c } <= i_a + i_b; // Add
|
|
4'b0011: o_c <= i_a | i_b; // Or
|
|
4'b0100: o_c <= i_a ^ i_b; // Xor
|
|
4'b0101:{o_c,c } <= w_lsr_result[32:0]; // LSR
|
|
4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL
|
|
4'b0111:{o_c,c } <= w_asr_result[32:0]; // ASR
|
|
4'b1000: o_c <= w_brev_result; // BREV
|
|
4'b1001: o_c <= { i_a[31:16], i_b[15:0] }; // LODILO
|
|
4'b1010: o_c <= mpy_result[63:32]; // MPYHU
|
|
4'b1011: o_c <= mpy_result[63:32]; // MPYHS
|
|
4'b1100: o_c <= mpy_result[31:0]; // MPY
|
|
default: o_c <= i_b; // MOV, LDI
|
|
endcase
|
|
end else if (!OPT_LOWPOWER || mpydone)
|
|
// set the output based upon the multiply result
|
|
o_c <= (mpyhi)?mpy_result[63:32]:mpy_result[31:0];
|
|
// }}}
|
|
|
|
// o_busy, r_busy
|
|
// {{{
|
|
initial r_busy = 1'b0;
|
|
always @(posedge i_clk)
|
|
if (i_reset)
|
|
r_busy <= 1'b0;
|
|
else if (OPT_MPY > 1)
|
|
r_busy <= ((i_stb)&&(this_is_a_multiply_op))||mpybusy;
|
|
else
|
|
r_busy <= 1'b0;
|
|
|
|
assign o_busy = (r_busy); // ||((OPT_MPY>1)&&(this_is_a_multiply_op));
|
|
// }}}
|
|
|
|
// Flags assignment and determination
|
|
// {{{
|
|
assign z = (o_c == 32'h0000);
|
|
assign n = (o_c[31]);
|
|
assign v = (set_ovfl)&&(pre_sign != o_c[31]);
|
|
assign vx = (keep_sgn_on_ovfl)&&(pre_sign != o_c[31]);
|
|
|
|
assign o_f = { v, n^vx, c, z };
|
|
// }}}
|
|
|
|
// o_valid
|
|
// {{{
|
|
initial o_valid = 1'b0;
|
|
always @(posedge i_clk)
|
|
if (i_reset)
|
|
o_valid <= 1'b0;
|
|
else if (OPT_MPY <= 1)
|
|
o_valid <= (i_stb);
|
|
else
|
|
o_valid <=((i_stb)&&(!this_is_a_multiply_op))||(mpydone);
|
|
// }}}
|
|
|
|
`ifdef VMPY_TB
|
|
// {{{
|
|
assign OPT_MULTIPLY = OPT_MPY;
|
|
generate if (OPT_MPY == 0)
|
|
begin : VGEN0
|
|
assign mpy_a_input = 0;
|
|
assign mpy_b_input = 0;
|
|
assign mpy_pipe = 1'b0;
|
|
end else if (OPT_MPY == 1)
|
|
begin : VGEN1
|
|
assign mpy_a_input = thempy.IMPY.MPY1CK.w_mpy_a_input[31:0];
|
|
assign mpy_b_input = thempy.IMPY.MPY1CK.w_mpy_b_input[31:0];
|
|
assign mpy_pipe = 3'b0;
|
|
end else if (OPT_MPY == 2)
|
|
begin : VGEN2
|
|
assign mpy_a_input = thempy.IMPY.MPN1.MPY2CK.r_mpy_a_input[31:0];
|
|
assign mpy_b_input = thempy.IMPY.MPN1.MPY2CK.r_mpy_b_input[31:0];
|
|
assign mpy_pipe = { 2'b0, thempy.IMPY.MPN1.MPY2CK.mpypipe };
|
|
end else if (OPT_MPY == 3)
|
|
begin : VGEN_NORMAL
|
|
assign mpy_a_input = thempy.IMPY.MPN1.MPN2.MPY3CK.r_mpy_a_input;
|
|
assign mpy_b_input = thempy.IMPY.MPN1.MPN2.MPY3CK.r_mpy_b_input;
|
|
assign mpy_pipe = { 1'b0, thempy.IMPY.MPN1.MPN2.MPY3CK.mpypipe };
|
|
end else if (OPT_MPY == 4)
|
|
begin : VGEN_PARTIAL
|
|
assign mpy_a_input = thempy.IMPY.MPN1.MPN2.MPN3.MPY4CK.r_mpy_a_input;
|
|
assign mpy_b_input = thempy.IMPY.MPN1.MPN2.MPN3.MPY4CK.r_mpy_b_input;
|
|
assign mpy_pipe = thempy.IMPY.MPN1.MPN2.MPN3.MPY4CK.mpypipe;
|
|
end else begin : VGEN_SLOW
|
|
assign mpy_a_input = thempy.IMPY.MPN1.MPN2.MPN3.MPYSLOW.slowmpyi.i_a[31:0];
|
|
assign mpy_b_input = thempy.IMPY.MPN1.MPN2.MPN3.MPYSLOW.slowmpyi.i_b[31:0];
|
|
assign mpy_pipe = {(3){mpybusy}};
|
|
end endgenerate
|
|
|
|
assign mpy_output = mpy_result;
|
|
// }}}
|
|
`endif
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Formal properties
|
|
// {{{
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
`ifdef FORMAL
|
|
// Declarations
|
|
// {{{
|
|
initial assume(i_reset);
|
|
reg f_past_valid;
|
|
|
|
initial f_past_valid = 1'b0;
|
|
always @(posedge i_clk)
|
|
f_past_valid <= 1'b1;
|
|
// }}}
|
|
|
|
`define ASSERT assert
|
|
`ifdef CPUOPS
|
|
`define ASSUME assume
|
|
`else
|
|
`define ASSUME assert
|
|
`endif
|
|
|
|
// No request should be given us if/while we are busy
|
|
// {{{
|
|
always @(posedge i_clk)
|
|
if (o_busy)
|
|
`ASSUME(!i_stb);
|
|
// }}}
|
|
|
|
// Following any request other than a multiply request, we should
|
|
// respond in the next cycle
|
|
// {{{
|
|
always @(posedge i_clk)
|
|
if ((f_past_valid)&&(!$past(o_busy))&&(!$past(this_is_a_multiply_op)))
|
|
`ASSERT(!o_busy);
|
|
// }}}
|
|
|
|
// Valid and busy can never both be asserted
|
|
// {{{
|
|
always @(posedge i_clk)
|
|
`ASSERT((!o_valid)||(!r_busy));
|
|
// }}}
|
|
|
|
// Following any busy, we should always become valid
|
|
// {{{
|
|
always @(posedge i_clk)
|
|
if ((f_past_valid)&&($past(o_busy))&&(!o_busy))
|
|
`ASSERT($past(i_reset) || o_valid);
|
|
// }}}
|
|
|
|
// Check the shift values
|
|
// {{{
|
|
always @(posedge i_clk)
|
|
if ((f_past_valid)&&($past(i_stb)))
|
|
begin
|
|
if (($past(|i_b[31:6]))||($past(i_b[5:0])>6'd32))
|
|
begin
|
|
assert(($past(i_op)!=4'h5)
|
|
||({o_c,c}=={(33){1'b0}}));
|
|
assert(($past(i_op)!=4'h6)
|
|
||({c,o_c}=={(33){1'b0}}));
|
|
assert(($past(i_op)!=4'h7)
|
|
||({o_c,c}=={(33){$past(i_a[31])}}));
|
|
end else if ($past(i_b[5:0]==6'd32))
|
|
begin
|
|
assert(($past(i_op)!=4'h5)
|
|
||(o_c=={(32){1'b0}}));
|
|
assert(($past(i_op)!=4'h6)
|
|
||(o_c=={(32){1'b0}}));
|
|
assert(($past(i_op)!=4'h7)
|
|
||(o_c=={(32){$past(i_a[31])}}));
|
|
end if ($past(i_b)==0)
|
|
begin
|
|
assert(($past(i_op)!=4'h5)
|
|
||({o_c,c}=={$past(i_a), 1'b0}));
|
|
assert(($past(i_op)!=4'h6)
|
|
||({c,o_c}=={1'b0, $past(i_a)}));
|
|
assert(($past(i_op)!=4'h7)
|
|
||({o_c,c}=={$past(i_a), 1'b0}));
|
|
end if ($past(i_b)==1)
|
|
begin
|
|
assert(($past(i_op)!=4'h5)
|
|
||({o_c,c}=={1'b0, $past(i_a)}));
|
|
assert(($past(i_op)!=4'h6)
|
|
||({c,o_c}=={$past(i_a),1'b0}));
|
|
assert(($past(i_op)!=4'h7)
|
|
||({o_c,c}=={$past(i_a[31]),$past(i_a)}));
|
|
end if ($past(i_b)==2)
|
|
begin
|
|
assert(($past(i_op)!=4'h5)
|
|
||({o_c,c}=={2'b0, $past(i_a[31:1])}));
|
|
assert(($past(i_op)!=4'h6)
|
|
||({c,o_c}=={$past(i_a[30:0]),2'b0}));
|
|
assert(($past(i_op)!=4'h7)
|
|
||({o_c,c}=={{(2){$past(i_a[31])}},$past(i_a[31:1])}));
|
|
end if ($past(i_b)==31)
|
|
begin
|
|
assert(($past(i_op)!=4'h5)
|
|
||({o_c,c}=={31'b0, $past(i_a[31:30])}));
|
|
assert(($past(i_op)!=4'h6)
|
|
||({c,o_c}=={$past(i_a[1:0]),31'b0}));
|
|
assert(($past(i_op)!=4'h7)
|
|
||({o_c,c}=={{(31){$past(i_a[31])}},$past(i_a[31:30])}));
|
|
end
|
|
end
|
|
// }}}
|
|
`endif
|
|
// }}}
|
|
endmodule
|
|
//
|
|
// iCE40 NoMPY,w/Shift NoMPY,w/o Shift
|
|
// SB_CARRY 64 64
|
|
// SB_DFFE 3 3
|
|
// SB_DFFESR 1 1
|
|
// SB_DFFSR 33 33
|
|
// SB_LUT4 748 323
|