mirror of https://github.com/zachjs/sv2v.git
Merge 687e921994 into 3dac47da17
This commit is contained in:
commit
9b0133c8dc
|
|
@ -0,0 +1,223 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 16.05.2017
|
||||
// Description: Instruction Tracer Main Class
|
||||
|
||||
`ifndef VERILATOR
|
||||
//pragma translate_off
|
||||
`include "ex_trace_item.svh"
|
||||
`include "instr_trace_item.svh"
|
||||
|
||||
module instr_tracer (
|
||||
instr_tracer_if tracer_if,
|
||||
input logic[riscv::XLEN-1:0] hart_id_i
|
||||
);
|
||||
|
||||
// keep the decoded instructions in a queue
|
||||
logic [31:0] decode_queue [$];
|
||||
// keep the issued instructions in a queue
|
||||
logic [31:0] issue_queue [$];
|
||||
// issue scoreboard entries
|
||||
ariane_pkg::scoreboard_entry_t issue_sbe_queue [$];
|
||||
ariane_pkg::scoreboard_entry_t issue_sbe;
|
||||
// store resolved branches, get (mis-)predictions
|
||||
ariane_pkg::bp_resolve_t bp [$];
|
||||
// shadow copy of the register files
|
||||
logic [63:0] gp_reg_file [32];
|
||||
logic [63:0] fp_reg_file [32];
|
||||
// 64 bit clock tick count
|
||||
longint unsigned clk_ticks;
|
||||
int f, commit_log;
|
||||
// address mapping
|
||||
// contains mappings of the form vaddr <-> paddr
|
||||
logic [63:0] store_mapping[$], load_mapping[$], address_mapping;
|
||||
|
||||
// static uvm_cmdline_processor uvcl = uvm_cmdline_processor::get_inst();
|
||||
|
||||
function void create_file(logic [63:0] hart_id);
|
||||
string fn, fn_commit_log;
|
||||
$sformat(fn, "trace_hart_%0.0f.log", hart_id);
|
||||
$sformat(fn_commit_log, "trace_hart_%0.0f_commit.log", hart_id);
|
||||
$display("[TRACER] Output filename is: %s", fn);
|
||||
|
||||
f = $fopen(fn,"w");
|
||||
if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG) commit_log = $fopen(fn_commit_log, "w");
|
||||
endfunction : create_file
|
||||
|
||||
task trace();
|
||||
automatic logic [31:0] decode_instruction, issue_instruction, issue_commit_instruction;
|
||||
automatic ariane_pkg::scoreboard_entry_t commit_instruction;
|
||||
// initialize register 0
|
||||
gp_reg_file = '{default:0};
|
||||
fp_reg_file = '{default:0};
|
||||
|
||||
forever begin
|
||||
automatic ariane_pkg::bp_resolve_t bp_instruction = '0;
|
||||
// new cycle, we are only interested if reset is de-asserted
|
||||
@(tracer_if.pck) if (tracer_if.pck.rstn !== 1'b1) begin
|
||||
flush();
|
||||
continue;
|
||||
end
|
||||
|
||||
// increment clock tick
|
||||
clk_ticks++;
|
||||
|
||||
// -------------------
|
||||
// Instruction Decode
|
||||
// -------------------
|
||||
// we are decoding an instruction
|
||||
if (tracer_if.pck.fetch_valid && tracer_if.pck.fetch_ack) begin
|
||||
decode_instruction = tracer_if.pck.instruction;
|
||||
decode_queue.push_back(decode_instruction);
|
||||
end
|
||||
// -------------------
|
||||
// Instruction Issue
|
||||
// -------------------
|
||||
// we got a new issue ack, so put the element from the decode queue to
|
||||
// the issue queue
|
||||
if (tracer_if.pck.issue_ack && !tracer_if.pck.flush_unissued) begin
|
||||
issue_instruction = decode_queue.pop_front();
|
||||
issue_queue.push_back(issue_instruction);
|
||||
// also save the scoreboard entry to a separate issue queue
|
||||
issue_sbe_queue.push_back(ariane_pkg::scoreboard_entry_t'(tracer_if.pck.issue_sbe));
|
||||
end
|
||||
|
||||
// --------------------
|
||||
// Address Translation
|
||||
// --------------------
|
||||
if (tracer_if.pck.st_valid) begin
|
||||
store_mapping.push_back(tracer_if.pck.st_paddr);
|
||||
end
|
||||
|
||||
if (tracer_if.pck.ld_valid && !tracer_if.pck.ld_kill) begin
|
||||
load_mapping.push_back(tracer_if.pck.ld_paddr);
|
||||
end
|
||||
// ----------------------
|
||||
// Store predictions
|
||||
// ----------------------
|
||||
if (tracer_if.pck.resolve_branch.valid) begin
|
||||
bp.push_back(tracer_if.pck.resolve_branch);
|
||||
end
|
||||
// --------------
|
||||
// Commit
|
||||
// --------------
|
||||
// we are committing an instruction
|
||||
for (int i = 0; i < 2; i++) begin
|
||||
if (tracer_if.pck.commit_ack[i]) begin
|
||||
commit_instruction = ariane_pkg::scoreboard_entry_t'(tracer_if.pck.commit_instr[i]);
|
||||
issue_commit_instruction = issue_queue.pop_front();
|
||||
issue_sbe = issue_sbe_queue.pop_front();
|
||||
// check if the instruction retiring is a load or store, get the physical address accordingly
|
||||
if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::LOAD)
|
||||
address_mapping = load_mapping.pop_front();
|
||||
else if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::STORE)
|
||||
address_mapping = store_mapping.pop_front();
|
||||
|
||||
if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::CTRL_FLOW)
|
||||
bp_instruction = bp.pop_front();
|
||||
// the scoreboards issue entry still contains the immediate value as a result
|
||||
// check if the write back is valid, if not we need to source the result from the register file
|
||||
// as the most recent version of this register will be there.
|
||||
if (tracer_if.pck.we_gpr[i] || tracer_if.pck.we_fpr[i]) begin
|
||||
printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
|
||||
end else if (ariane_pkg::is_rd_fpr(commit_instruction.op)) begin
|
||||
printInstr(issue_sbe, issue_commit_instruction, fp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
|
||||
end else begin
|
||||
printInstr(issue_sbe, issue_commit_instruction, gp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
|
||||
end
|
||||
end
|
||||
end
|
||||
// --------------
|
||||
// Exceptions
|
||||
// --------------
|
||||
if (tracer_if.pck.exception.valid && !(tracer_if.pck.debug_mode && tracer_if.pck.exception.cause == riscv::BREAKPOINT)) begin
|
||||
// print exception
|
||||
printException(tracer_if.pck.commit_instr[0].pc, tracer_if.pck.exception.cause, tracer_if.pck.exception.tval);
|
||||
end
|
||||
// ----------------------
|
||||
// Commit Registers
|
||||
// ----------------------
|
||||
// update shadow reg files here
|
||||
for (int i = 0; i < 2; i++) begin
|
||||
if (tracer_if.pck.we_gpr[i] && tracer_if.pck.waddr[i] != 5'b0) begin
|
||||
gp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i];
|
||||
end else if (tracer_if.pck.we_fpr[i]) begin
|
||||
fp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i];
|
||||
end
|
||||
end
|
||||
// --------------
|
||||
// Flush Signals
|
||||
// --------------
|
||||
// flush un-issued instructions
|
||||
if (tracer_if.pck.flush_unissued) begin
|
||||
flushDecode();
|
||||
end
|
||||
// flush whole pipeline
|
||||
if (tracer_if.pck.flush) begin
|
||||
flush();
|
||||
end
|
||||
end
|
||||
|
||||
endtask
|
||||
|
||||
// flush all decoded instructions
|
||||
function void flushDecode ();
|
||||
decode_queue = {};
|
||||
endfunction
|
||||
|
||||
// flush everything, we took an exception/interrupt
|
||||
function void flush ();
|
||||
flushDecode();
|
||||
// clear all elements in the queue
|
||||
issue_queue = {};
|
||||
issue_sbe_queue = {};
|
||||
// also clear mappings
|
||||
store_mapping = {};
|
||||
load_mapping = {};
|
||||
bp = {};
|
||||
endfunction
|
||||
|
||||
function void printInstr(ariane_pkg::scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [riscv::PLEN-1:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, ariane_pkg::bp_resolve_t bp);
|
||||
automatic instr_trace_item iti = new ($time, clk_ticks, sbe, instr, gp_reg_file, fp_reg_file, result, paddr, priv_lvl, debug_mode, bp);
|
||||
// print instruction to console
|
||||
automatic string print_instr = iti.printInstr();
|
||||
if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin
|
||||
$fwrite(commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result, ariane_pkg::is_rd_fpr(sbe.op)));
|
||||
end
|
||||
$fwrite(f, {print_instr, "\n"});
|
||||
endfunction
|
||||
|
||||
function void printException(logic [riscv::VLEN-1:0] pc, logic [63:0] cause, logic [63:0] tval);
|
||||
automatic ex_trace_item eti = new (pc, cause, tval);
|
||||
automatic string print_ex = eti.printException();
|
||||
$fwrite(f, {print_ex, "\n"});
|
||||
endfunction
|
||||
|
||||
function void close();
|
||||
if (f) $fclose(f);
|
||||
if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG && commit_log) $fclose(commit_log);
|
||||
endfunction
|
||||
|
||||
|
||||
initial begin
|
||||
#15ns;
|
||||
create_file(hart_id_i);
|
||||
trace();
|
||||
end
|
||||
|
||||
final begin
|
||||
close();
|
||||
end
|
||||
|
||||
endmodule : instr_tracer
|
||||
//pragma translate_on
|
||||
`endif
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 16.05.2017
|
||||
// Description: Instruction Tracer Interface
|
||||
|
||||
`ifndef VERILATOR
|
||||
`ifndef INSTR_TRACER_IF_SV
|
||||
`define INSTR_TRACER_IF_SV
|
||||
interface instr_tracer_if (
|
||||
input clk
|
||||
);
|
||||
|
||||
logic rstn;
|
||||
logic flush_unissued;
|
||||
logic flush;
|
||||
// Decode
|
||||
logic [31:0] instruction;
|
||||
logic fetch_valid;
|
||||
logic fetch_ack;
|
||||
// Issue stage
|
||||
logic issue_ack; // issue acknowledged
|
||||
ariane_pkg::scoreboard_entry_t issue_sbe; // issue scoreboard entry
|
||||
// WB stage
|
||||
logic [1:0][4:0] waddr;
|
||||
logic [1:0][63:0] wdata;
|
||||
logic [1:0] we_gpr;
|
||||
logic [1:0] we_fpr;
|
||||
// commit stage
|
||||
ariane_pkg::scoreboard_entry_t [1:0] commit_instr; // commit instruction
|
||||
logic [1:0] commit_ack;
|
||||
// address translation
|
||||
// stores
|
||||
logic st_valid;
|
||||
logic [riscv::PLEN-1:0] st_paddr;
|
||||
// loads
|
||||
logic ld_valid;
|
||||
logic ld_kill;
|
||||
logic [riscv::PLEN-1:0] ld_paddr;
|
||||
// misprediction
|
||||
ariane_pkg::bp_resolve_t resolve_branch;
|
||||
// exceptions
|
||||
ariane_pkg::exception_t exception;
|
||||
// current privilege level
|
||||
riscv::priv_lvl_t priv_lvl;
|
||||
logic debug_mode;
|
||||
// the tracer just has a passive interface we do not drive anything with it
|
||||
|
||||
//pragma translate_off
|
||||
clocking pck @(posedge clk);
|
||||
input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr,
|
||||
st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, resolve_branch,
|
||||
wdata, we_gpr, we_fpr, commit_instr, commit_ack, exception, priv_lvl, debug_mode;
|
||||
endclocking
|
||||
//pragma translate_on
|
||||
|
||||
endinterface
|
||||
`endif
|
||||
`endif
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 15.08.2018
|
||||
// Description: SRAM wrapper for FPGA (requires the fpga-support submodule)
|
||||
//
|
||||
// Note: the wrapped module contains two different implementations for
|
||||
// ALTERA and XILINX tools, since these follow different coding styles for
|
||||
// inferrable RAMS with byte enable. define `FPGA_TARGET_XILINX or
|
||||
// `FPGA_TARGET_ALTERA in your build environment (default is ALTERA)
|
||||
|
||||
module sram #(
|
||||
parameter DATA_WIDTH = 64,
|
||||
parameter USER_WIDTH = 1,
|
||||
parameter USER_EN = 0,
|
||||
parameter NUM_WORDS = 1024,
|
||||
parameter SIM_INIT = "none",
|
||||
parameter OUT_REGS = 0 // enables output registers in FPGA macro (read lat = 2)
|
||||
)(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic req_i,
|
||||
input logic we_i,
|
||||
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
|
||||
input logic [USER_WIDTH-1:0] wuser_i,
|
||||
input logic [DATA_WIDTH-1:0] wdata_i,
|
||||
input logic [(DATA_WIDTH+7)/8-1:0] be_i,
|
||||
output logic [USER_WIDTH-1:0] ruser_o,
|
||||
output logic [DATA_WIDTH-1:0] rdata_o
|
||||
);
|
||||
|
||||
localparam DATA_WIDTH_ALIGNED = ((DATA_WIDTH+63)/64)*64;
|
||||
localparam USER_WIDTH_ALIGNED = DATA_WIDTH_ALIGNED; // To be fine tuned to reduce memory size
|
||||
localparam BE_WIDTH_ALIGNED = (((DATA_WIDTH+7)/8+7)/8)*8;
|
||||
|
||||
logic [DATA_WIDTH_ALIGNED-1:0] wdata_aligned;
|
||||
logic [USER_WIDTH_ALIGNED-1:0] wuser_aligned;
|
||||
logic [BE_WIDTH_ALIGNED-1:0] be_aligned;
|
||||
logic [DATA_WIDTH_ALIGNED-1:0] rdata_aligned;
|
||||
logic [USER_WIDTH_ALIGNED-1:0] ruser_aligned;
|
||||
|
||||
// align to 64 bits for inferrable macro below
|
||||
always_comb begin : p_align
|
||||
wdata_aligned ='0;
|
||||
wuser_aligned ='0;
|
||||
be_aligned ='0;
|
||||
wdata_aligned[DATA_WIDTH-1:0] = wdata_i;
|
||||
wuser_aligned[USER_WIDTH-1:0] = wuser_i;
|
||||
be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i;
|
||||
|
||||
rdata_o = rdata_aligned[DATA_WIDTH-1:0];
|
||||
ruser_o = ruser_aligned[USER_WIDTH-1:0];
|
||||
end
|
||||
|
||||
for (genvar k = 0; k<(DATA_WIDTH+63)/64; k++) begin : gen_cut
|
||||
// unused byte-enable segments (8bits) are culled by the tool
|
||||
tc_sram_wrapper #(
|
||||
.NumWords(NUM_WORDS), // Number of Words in data array
|
||||
.DataWidth(64), // Data signal width
|
||||
.ByteWidth(32'd8), // Width of a data byte
|
||||
.NumPorts(32'd1), // Number of read and write ports
|
||||
.Latency(32'd1), // Latency when the read data is available
|
||||
.SimInit(SIM_INIT), // Simulation initialization
|
||||
.PrintSimCfg(1'b0) // Print configuration
|
||||
) i_tc_sram_wrapper (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.req_i ( req_i ),
|
||||
.we_i ( we_i ),
|
||||
.be_i ( be_aligned[k*8 +: 8] ),
|
||||
.wdata_i ( wdata_aligned[k*64 +: 64] ),
|
||||
.addr_i ( addr_i ),
|
||||
.rdata_o ( rdata_aligned[k*64 +: 64] )
|
||||
);
|
||||
if (USER_EN > 0) begin : gen_mem_user
|
||||
tc_sram_wrapper #(
|
||||
.NumWords(NUM_WORDS), // Number of Words in data array
|
||||
.DataWidth(64), // Data signal width
|
||||
.ByteWidth(32'd8), // Width of a data byte
|
||||
.NumPorts(32'd1), // Number of read and write ports
|
||||
.Latency(32'd1), // Latency when the read data is available
|
||||
.SimInit(SIM_INIT), // Simulation initialization
|
||||
.PrintSimCfg(1'b0) // Print configuration
|
||||
) i_tc_sram_wrapper_user (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.req_i ( req_i ),
|
||||
.we_i ( we_i ),
|
||||
.be_i ( be_aligned[k*8 +: 8] ),
|
||||
.wdata_i ( wuser_aligned[k*64 +: 64] ),
|
||||
.addr_i ( addr_i ),
|
||||
.rdata_o ( ruser_aligned[k*64 +: 64] )
|
||||
);
|
||||
end else begin
|
||||
assign ruser_aligned[k*64 +: 64] = '0;
|
||||
end
|
||||
end
|
||||
endmodule : sram
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
// Copyright 2022 Thales DIS design services SAS
|
||||
//
|
||||
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Original Author: Jean-Roch COULON - Thales
|
||||
|
||||
module tc_sram_wrapper #(
|
||||
parameter int unsigned NumWords = 32'd1024, // Number of Words in data array
|
||||
parameter int unsigned DataWidth = 32'd128, // Data signal width
|
||||
parameter int unsigned ByteWidth = 32'd8, // Width of a data byte
|
||||
parameter int unsigned NumPorts = 32'd2, // Number of read and write ports
|
||||
parameter int unsigned Latency = 32'd1, // Latency when the read data is available
|
||||
parameter SimInit = "none", // Simulation initialization
|
||||
parameter bit PrintSimCfg = 1'b0, // Print configuration
|
||||
// DEPENDENT PARAMETERS, DO NOT OVERWRITE!
|
||||
parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1,
|
||||
parameter int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div
|
||||
parameter type addr_t = logic [AddrWidth-1:0],
|
||||
parameter type data_t = logic [DataWidth-1:0],
|
||||
parameter type be_t = logic [BeWidth-1:0]
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// input ports
|
||||
input logic [NumPorts-1:0] req_i, // request
|
||||
input logic [NumPorts-1:0] we_i, // write enable
|
||||
input addr_t [NumPorts-1:0] addr_i, // request address
|
||||
input data_t [NumPorts-1:0] wdata_i, // write data
|
||||
input be_t [NumPorts-1:0] be_i, // write byte enable
|
||||
// output ports
|
||||
output data_t [NumPorts-1:0] rdata_o // read data
|
||||
);
|
||||
|
||||
// synthesis translate_off
|
||||
|
||||
tc_sram #(
|
||||
.NumWords(NumWords),
|
||||
.DataWidth(DataWidth),
|
||||
.ByteWidth(ByteWidth),
|
||||
.NumPorts(NumPorts),
|
||||
.Latency(Latency),
|
||||
.SimInit(SimInit),
|
||||
.PrintSimCfg(PrintSimCfg)
|
||||
) i_tc_sram (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.req_i ( req_i ),
|
||||
.we_i ( we_i ),
|
||||
.be_i ( be_i ),
|
||||
.wdata_i ( wdata_i ),
|
||||
.addr_i ( addr_i ),
|
||||
.rdata_o ( rdata_o )
|
||||
);
|
||||
|
||||
// synthesis translate_on
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,423 @@
|
|||
// Copyright 2020 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Authors: Matheus Cavalcante, ETH Zurich
|
||||
// Nils Wistoff, ETH Zurich
|
||||
// Date: 20.11.2020
|
||||
// Description: Functional unit that dispatches CVA6 instructions to accelerators.
|
||||
|
||||
module acc_dispatcher
|
||||
import ariane_pkg::*;
|
||||
import riscv::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter type acc_req_t = acc_pkg::accelerator_req_t,
|
||||
parameter type acc_resp_t = acc_pkg::accelerator_resp_t,
|
||||
parameter type acc_cfg_t = logic,
|
||||
parameter acc_cfg_t AccCfg = '0
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Interface with the CSR regfile
|
||||
input logic acc_cons_en_i, // Accelerator memory consistent mode
|
||||
output logic acc_fflags_valid_o,
|
||||
output logic [4:0] acc_fflags_o,
|
||||
// Interface with the CSRs
|
||||
input priv_lvl_t ld_st_priv_lvl_i,
|
||||
input logic sum_i,
|
||||
input pmpcfg_t [15:0] pmpcfg_i,
|
||||
input logic [15:0][PLEN-3:0] pmpaddr_i,
|
||||
input logic [2:0] fcsr_frm_i,
|
||||
output logic dirty_v_state_o,
|
||||
// Interface with the issue stage
|
||||
input scoreboard_entry_t issue_instr_i,
|
||||
input logic issue_instr_hs_i,
|
||||
output logic issue_stall_o,
|
||||
input fu_data_t fu_data_i,
|
||||
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
|
||||
output logic [TRANS_ID_BITS-1:0] acc_trans_id_o,
|
||||
output xlen_t acc_result_o,
|
||||
output logic acc_valid_o,
|
||||
output exception_t acc_exception_o,
|
||||
// Interface with the execute stage
|
||||
output logic acc_valid_ex_o, // FU executed
|
||||
// Interface with the commit stage
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
|
||||
input logic commit_st_barrier_i, // A store barrier was commited
|
||||
// Interface with the load/store unit
|
||||
output logic acc_stall_st_pending_o,
|
||||
input logic acc_no_st_pending_i,
|
||||
input dcache_req_i_t [2:0] dcache_req_ports_i,
|
||||
// Interface with the controller
|
||||
output logic ctrl_halt_o,
|
||||
input logic flush_unissued_instr_i,
|
||||
input logic flush_ex_i,
|
||||
output logic flush_pipeline_o,
|
||||
// Interface with cache subsystem
|
||||
output dcache_req_i_t [1:0] acc_dcache_req_ports_o,
|
||||
input dcache_req_o_t [1:0] acc_dcache_req_ports_i,
|
||||
input logic inval_ready_i,
|
||||
output logic inval_valid_o,
|
||||
output logic [63:0] inval_addr_o,
|
||||
// Accelerator interface
|
||||
output acc_req_t acc_req_o,
|
||||
input acc_resp_t acc_resp_i
|
||||
);
|
||||
|
||||
`include "common_cells/registers.svh"
|
||||
|
||||
import cf_math_pkg::idx_width;
|
||||
|
||||
/***********************
|
||||
* Common signals *
|
||||
***********************/
|
||||
|
||||
logic acc_ready;
|
||||
logic acc_valid_d, acc_valid_q;
|
||||
|
||||
/**************************
|
||||
* Accelerator issue *
|
||||
**************************/
|
||||
|
||||
// Issue accelerator instructions
|
||||
`FF(acc_valid_q, acc_valid_d, '0)
|
||||
|
||||
assign acc_valid_ex_o = acc_valid_q;
|
||||
assign acc_valid_d = ~issue_instr_i.ex.valid &
|
||||
issue_instr_hs_i &
|
||||
(issue_instr_i.fu == ACCEL) &
|
||||
~flush_unissued_instr_i;
|
||||
|
||||
// Accelerator load/store pending signals
|
||||
logic acc_no_ld_pending;
|
||||
logic acc_no_st_pending;
|
||||
|
||||
// Stall issue stage in three cases:
|
||||
always_comb begin : stall_issue
|
||||
unique case (issue_instr_i.fu)
|
||||
ACCEL:
|
||||
// 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet
|
||||
issue_stall_o = ~acc_ready;
|
||||
LOAD:
|
||||
// 2. We're issuing a scalar load but there is an inflight accelerator store.
|
||||
issue_stall_o = acc_cons_en_i & ~acc_no_st_pending;
|
||||
STORE:
|
||||
// 3. We're issuing a scalar store but there is an inflight accelerator load or store.
|
||||
issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending);
|
||||
default: issue_stall_o = 1'b0;
|
||||
endcase
|
||||
end
|
||||
|
||||
/***********************
|
||||
* Instruction queue *
|
||||
***********************/
|
||||
|
||||
localparam InstructionQueueDepth = 3;
|
||||
|
||||
fu_data_t acc_data;
|
||||
fu_data_t acc_insn_queue_o;
|
||||
logic acc_insn_queue_pop;
|
||||
logic acc_insn_queue_empty;
|
||||
logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage;
|
||||
logic acc_commit;
|
||||
logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id;
|
||||
|
||||
assign acc_data = acc_valid_ex_o ? fu_data_i : '0;
|
||||
|
||||
fifo_v3 #(
|
||||
.DEPTH (InstructionQueueDepth),
|
||||
.FALL_THROUGH(1'b1),
|
||||
.dtype (fu_data_t)
|
||||
) i_acc_insn_queue (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (flush_ex_i),
|
||||
.testmode_i(1'b0),
|
||||
.data_i (fu_data_i),
|
||||
.push_i (acc_valid_q),
|
||||
.full_o ( /* Unused */),
|
||||
.data_o (acc_insn_queue_o),
|
||||
.pop_i (acc_insn_queue_pop),
|
||||
.empty_o (acc_insn_queue_empty),
|
||||
.usage_o (acc_insn_queue_usage)
|
||||
);
|
||||
|
||||
// We are ready if the instruction queue is able to accept at least one more entry.
|
||||
assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1);
|
||||
|
||||
/**********************************
|
||||
* Non-speculative instructions *
|
||||
**********************************/
|
||||
|
||||
// Keep track of the instructions that were received by the dispatcher.
|
||||
logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q;
|
||||
`FF(insn_pending_q, insn_pending_d, '0)
|
||||
|
||||
// Only non-speculative instructions can be issued to the accelerators.
|
||||
// The following block keeps track of which transaction IDs reached the
|
||||
// top of the scoreboard, and are therefore no longer speculative.
|
||||
logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q;
|
||||
`FF(insn_ready_q, insn_ready_d, '0)
|
||||
|
||||
always_comb begin : p_non_speculative_ff
|
||||
// Maintain state
|
||||
insn_pending_d = insn_pending_q;
|
||||
insn_ready_d = insn_ready_q;
|
||||
|
||||
// We received a new instruction
|
||||
if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1;
|
||||
// Flush all received instructions
|
||||
if (flush_ex_i) insn_pending_d = '0;
|
||||
|
||||
// An accelerator instruction is no longer speculative.
|
||||
if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin
|
||||
insn_ready_d[acc_commit_trans_id] = 1'b1;
|
||||
insn_pending_d[acc_commit_trans_id] = 1'b0;
|
||||
end
|
||||
|
||||
// An accelerator instruction was issued.
|
||||
if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0;
|
||||
end : p_non_speculative_ff
|
||||
|
||||
/*************************
|
||||
* Accelerator request *
|
||||
*************************/
|
||||
|
||||
acc_pkg::accelerator_req_t acc_req;
|
||||
logic acc_req_valid;
|
||||
logic acc_req_ready;
|
||||
|
||||
acc_pkg::accelerator_req_t acc_req_int;
|
||||
fall_through_register #(
|
||||
.T(acc_pkg::accelerator_req_t)
|
||||
) i_accelerator_req_register (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clr_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.data_i (acc_req),
|
||||
.valid_i (acc_req_valid),
|
||||
.ready_o (acc_req_ready),
|
||||
.data_o (acc_req_int),
|
||||
.valid_o (acc_req_o.req_valid),
|
||||
.ready_i (acc_resp_i.req_ready)
|
||||
);
|
||||
|
||||
assign acc_req_o.insn = acc_req_int.insn;
|
||||
assign acc_req_o.rs1 = acc_req_int.rs1;
|
||||
assign acc_req_o.rs2 = acc_req_int.rs2;
|
||||
assign acc_req_o.frm = acc_req_int.frm;
|
||||
assign acc_req_o.trans_id = acc_req_int.trans_id;
|
||||
assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i;
|
||||
assign acc_req_o.acc_cons_en = acc_cons_en_i;
|
||||
assign acc_req_o.inval_ready = inval_ready_i;
|
||||
|
||||
always_comb begin : accelerator_req_dispatcher
|
||||
// Do not fetch from the instruction queue
|
||||
acc_insn_queue_pop = 1'b0;
|
||||
|
||||
// Default values
|
||||
acc_req = '0;
|
||||
acc_req_valid = 1'b0;
|
||||
|
||||
// Unpack fu_data_t into accelerator_req_t
|
||||
if (!acc_insn_queue_empty) begin
|
||||
acc_req = '{
|
||||
// Instruction is forwarded from the decoder as an immediate
|
||||
// -
|
||||
// frm rounding information is up to date during a valid request to the accelerator
|
||||
// The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes
|
||||
// do not take place until the accelerator answers (Ariane commits in-order)
|
||||
insn :
|
||||
acc_insn_queue_o.imm[
|
||||
31
|
||||
:
|
||||
0
|
||||
],
|
||||
rs1 : acc_insn_queue_o.operand_a,
|
||||
rs2 : acc_insn_queue_o.operand_b,
|
||||
frm : fpnew_pkg::roundmode_e'(fcsr_frm_i),
|
||||
trans_id: acc_insn_queue_o.trans_id,
|
||||
default: '0
|
||||
};
|
||||
// Wait until the instruction is no longer speculative.
|
||||
acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] ||
|
||||
(acc_commit && insn_pending_q[acc_commit_trans_id]);
|
||||
acc_insn_queue_pop = acc_req_valid && acc_req_ready;
|
||||
end
|
||||
end
|
||||
|
||||
/**************************
|
||||
* Accelerator response *
|
||||
**************************/
|
||||
|
||||
logic acc_ld_disp;
|
||||
logic acc_st_disp;
|
||||
|
||||
// Unpack the accelerator response
|
||||
assign acc_trans_id_o = acc_resp_i.trans_id;
|
||||
assign acc_result_o = acc_resp_i.result;
|
||||
assign acc_valid_o = acc_resp_i.resp_valid;
|
||||
assign acc_exception_o = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error};
|
||||
assign acc_fflags_valid_o = acc_resp_i.fflags_valid;
|
||||
assign acc_fflags_o = acc_resp_i.fflags;
|
||||
// Always ready to receive responses
|
||||
assign acc_req_o.resp_ready = 1'b1;
|
||||
|
||||
// Signal dispatched load/store to issue stage
|
||||
assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
|
||||
assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);
|
||||
|
||||
// Cache invalidation
|
||||
assign inval_valid_o = acc_resp_i.inval_valid;
|
||||
assign inval_addr_o = acc_resp_i.inval_addr;
|
||||
|
||||
/**************************
|
||||
* Accelerator commit *
|
||||
**************************/
|
||||
|
||||
// Instruction can be issued to the (in-order) back-end if
|
||||
// it reached the top of the scoreboard and it hasn't been
|
||||
// issued yet
|
||||
always_comb begin : accelerator_commit
|
||||
acc_commit = 1'b0;
|
||||
if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1;
|
||||
if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL)
|
||||
acc_commit = 1'b1;
|
||||
end
|
||||
|
||||
// Dirty the V state if we are committing anything related to the vector accelerator
|
||||
always_comb begin : dirty_v_state
|
||||
dirty_v_state_o = 1'b0;
|
||||
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
|
||||
dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL);
|
||||
end
|
||||
end
|
||||
|
||||
assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id
|
||||
: commit_instr_i[1].trans_id;
|
||||
|
||||
/**************************
|
||||
* Accelerator barriers *
|
||||
**************************/
|
||||
|
||||
// On a store barrier (i.e. any barrier that requires preceeding stores to complete
|
||||
// before continuing execution), halt execution while there are pending stores in
|
||||
// the accelerator pipeline.
|
||||
logic wait_acc_store_d, wait_acc_store_q;
|
||||
`FF(wait_acc_store_q, wait_acc_store_d, '0)
|
||||
|
||||
// Set on store barrier. Clear when no store is pending.
|
||||
assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending;
|
||||
assign ctrl_halt_o = wait_acc_store_q;
|
||||
|
||||
/**************************
|
||||
* Load/Store tracking *
|
||||
**************************/
|
||||
|
||||
// Loads
|
||||
logic acc_spec_loads_overflow;
|
||||
logic [2:0] acc_spec_loads_pending;
|
||||
logic acc_disp_loads_overflow;
|
||||
logic [2:0] acc_disp_loads_pending;
|
||||
|
||||
assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0);
|
||||
|
||||
// Count speculative loads. These can still be flushed.
|
||||
counter #(
|
||||
.WIDTH (3),
|
||||
.STICKY_OVERFLOW(0)
|
||||
) i_acc_spec_loads (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clear_i (flush_ex_i),
|
||||
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp),
|
||||
.load_i (1'b0),
|
||||
.down_i (acc_ld_disp),
|
||||
.d_i ('0),
|
||||
.q_o (acc_spec_loads_pending),
|
||||
.overflow_o(acc_spec_loads_overflow)
|
||||
);
|
||||
|
||||
// Count dispatched loads. These cannot be flushed anymore.
|
||||
counter #(
|
||||
.WIDTH (3),
|
||||
.STICKY_OVERFLOW(0)
|
||||
) i_acc_disp_loads (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clear_i (1'b0),
|
||||
.en_i (acc_ld_disp ^ acc_resp_i.load_complete),
|
||||
.load_i (1'b0),
|
||||
.down_i (acc_resp_i.load_complete),
|
||||
.d_i ('0),
|
||||
.q_o (acc_disp_loads_pending),
|
||||
.overflow_o(acc_disp_loads_overflow)
|
||||
);
|
||||
|
||||
acc_dispatcher_no_load_overflow :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) )
|
||||
else $error("[acc_dispatcher] Too many pending loads.");
|
||||
|
||||
// Stores
|
||||
logic acc_spec_stores_overflow;
|
||||
logic [2:0] acc_spec_stores_pending;
|
||||
logic acc_disp_stores_overflow;
|
||||
logic [2:0] acc_disp_stores_pending;
|
||||
|
||||
assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0);
|
||||
|
||||
// Count speculative stores. These can still be flushed.
|
||||
counter #(
|
||||
.WIDTH (3),
|
||||
.STICKY_OVERFLOW(0)
|
||||
) i_acc_spec_stores (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clear_i (flush_ex_i),
|
||||
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp),
|
||||
.load_i (1'b0),
|
||||
.down_i (acc_st_disp),
|
||||
.d_i ('0),
|
||||
.q_o (acc_spec_stores_pending),
|
||||
.overflow_o(acc_spec_stores_overflow)
|
||||
);
|
||||
|
||||
// Count dispatched stores. These cannot be flushed anymore.
|
||||
counter #(
|
||||
.WIDTH (3),
|
||||
.STICKY_OVERFLOW(0)
|
||||
) i_acc_disp_stores (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clear_i (1'b0),
|
||||
.en_i (acc_st_disp ^ acc_resp_i.store_complete),
|
||||
.load_i (1'b0),
|
||||
.down_i (acc_resp_i.store_complete),
|
||||
.d_i ('0),
|
||||
.q_o (acc_disp_stores_pending),
|
||||
.overflow_o(acc_disp_stores_overflow)
|
||||
);
|
||||
|
||||
acc_dispatcher_no_store_overflow :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) )
|
||||
else $error("[acc_dispatcher] Too many pending stores.");
|
||||
|
||||
/**************************
|
||||
* Tie Off Unused Signals *
|
||||
**************************/
|
||||
|
||||
assign acc_stall_st_pending_o = 1'b0;
|
||||
assign flush_pipeline_o = 1'b0;
|
||||
assign acc_dcache_req_ports_o = '0;
|
||||
|
||||
endmodule : acc_dispatcher
|
||||
|
|
@ -0,0 +1,423 @@
|
|||
// Copyright 2020 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Authors: Matheus Cavalcante, ETH Zurich
|
||||
// Nils Wistoff, ETH Zurich
|
||||
// Date: 20.11.2020
|
||||
// Description: Functional unit that dispatches CVA6 instructions to accelerators.
|
||||
|
||||
module acc_dispatcher
|
||||
import ariane_pkg::*;
|
||||
import riscv::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter type acc_req_t = acc_pkg::accelerator_req_t,
|
||||
parameter type acc_resp_t = acc_pkg::accelerator_resp_t,
|
||||
parameter type acc_cfg_t = logic,
|
||||
parameter acc_cfg_t AccCfg = '0
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Interface with the CSR regfile
|
||||
input logic acc_cons_en_i, // Accelerator memory consistent mode
|
||||
output logic acc_fflags_valid_o,
|
||||
output logic [4:0] acc_fflags_o,
|
||||
// Interface with the CSRs
|
||||
input priv_lvl_t ld_st_priv_lvl_i,
|
||||
input logic sum_i,
|
||||
input pmpcfg_t [15:0] pmpcfg_i,
|
||||
input logic [15:0][PLEN-3:0] pmpaddr_i,
|
||||
input logic [2:0] fcsr_frm_i,
|
||||
output logic dirty_v_state_o,
|
||||
// Interface with the issue stage
|
||||
input scoreboard_entry_t issue_instr_i,
|
||||
input logic issue_instr_hs_i,
|
||||
output logic issue_stall_o,
|
||||
input fu_data_t fu_data_i,
|
||||
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
|
||||
output logic [TRANS_ID_BITS-1:0] acc_trans_id_o,
|
||||
output xlen_t acc_result_o,
|
||||
output logic acc_valid_o,
|
||||
output exception_t acc_exception_o,
|
||||
// Interface with the execute stage
|
||||
output logic acc_valid_ex_o, // FU executed
|
||||
// Interface with the commit stage
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
|
||||
input logic commit_st_barrier_i, // A store barrier was commited
|
||||
// Interface with the load/store unit
|
||||
output logic acc_stall_st_pending_o,
|
||||
input logic acc_no_st_pending_i,
|
||||
input dcache_req_i_t [2:0] dcache_req_ports_i,
|
||||
// Interface with the controller
|
||||
output logic ctrl_halt_o,
|
||||
input logic flush_unissued_instr_i,
|
||||
input logic flush_ex_i,
|
||||
output logic flush_pipeline_o,
|
||||
// Interface with cache subsystem
|
||||
output dcache_req_i_t [1:0] acc_dcache_req_ports_o,
|
||||
input dcache_req_o_t [1:0] acc_dcache_req_ports_i,
|
||||
input logic inval_ready_i,
|
||||
output logic inval_valid_o,
|
||||
output logic [63:0] inval_addr_o,
|
||||
// Accelerator interface
|
||||
output acc_pkg::accelerator_req_t acc_req_o,
|
||||
input acc_pkg::accelerator_resp_t acc_resp_i
|
||||
);
|
||||
|
||||
`include "common_cells/registers.svh"
|
||||
|
||||
import cf_math_pkg::idx_width;
|
||||
|
||||
/***********************
|
||||
* Common signals *
|
||||
***********************/
|
||||
|
||||
logic acc_ready;
|
||||
logic acc_valid_d, acc_valid_q;
|
||||
|
||||
/**************************
|
||||
* Accelerator issue *
|
||||
**************************/
|
||||
|
||||
// Issue accelerator instructions
|
||||
`FF(acc_valid_q, acc_valid_d, '0)
|
||||
|
||||
assign acc_valid_ex_o = acc_valid_q;
|
||||
assign acc_valid_d = ~issue_instr_i.ex.valid &
|
||||
issue_instr_hs_i &
|
||||
(issue_instr_i.fu == ACCEL) &
|
||||
~flush_unissued_instr_i;
|
||||
|
||||
// Accelerator load/store pending signals
|
||||
logic acc_no_ld_pending;
|
||||
logic acc_no_st_pending;
|
||||
|
||||
// Stall issue stage in three cases:
|
||||
always_comb begin : stall_issue
|
||||
unique case (issue_instr_i.fu)
|
||||
ACCEL:
|
||||
// 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet
|
||||
issue_stall_o = ~acc_ready;
|
||||
LOAD:
|
||||
// 2. We're issuing a scalar load but there is an inflight accelerator store.
|
||||
issue_stall_o = acc_cons_en_i & ~acc_no_st_pending;
|
||||
STORE:
|
||||
// 3. We're issuing a scalar store but there is an inflight accelerator load or store.
|
||||
issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending);
|
||||
default: issue_stall_o = 1'b0;
|
||||
endcase
|
||||
end
|
||||
|
||||
/***********************
|
||||
* Instruction queue *
|
||||
***********************/
|
||||
|
||||
localparam InstructionQueueDepth = 3;
|
||||
|
||||
fu_data_t acc_data;
|
||||
fu_data_t acc_insn_queue_o;
|
||||
logic acc_insn_queue_pop;
|
||||
logic acc_insn_queue_empty;
|
||||
logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage;
|
||||
logic acc_commit;
|
||||
logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id;
|
||||
|
||||
assign acc_data = acc_valid_ex_o ? fu_data_i : '0;
|
||||
|
||||
fifo_v3 #(
|
||||
.DEPTH (InstructionQueueDepth),
|
||||
.FALL_THROUGH(1'b1),
|
||||
.dtype (fu_data_t)
|
||||
) i_acc_insn_queue (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (flush_ex_i),
|
||||
.testmode_i(1'b0),
|
||||
.data_i (fu_data_i),
|
||||
.push_i (acc_valid_q),
|
||||
.full_o ( /* Unused */),
|
||||
.data_o (acc_insn_queue_o),
|
||||
.pop_i (acc_insn_queue_pop),
|
||||
.empty_o (acc_insn_queue_empty),
|
||||
.usage_o (acc_insn_queue_usage)
|
||||
);
|
||||
|
||||
// We are ready if the instruction queue is able to accept at least one more entry.
|
||||
assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1);
|
||||
|
||||
/**********************************
|
||||
* Non-speculative instructions *
|
||||
**********************************/
|
||||
|
||||
// Keep track of the instructions that were received by the dispatcher.
|
||||
logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q;
|
||||
`FF(insn_pending_q, insn_pending_d, '0)
|
||||
|
||||
// Only non-speculative instructions can be issued to the accelerators.
|
||||
// The following block keeps track of which transaction IDs reached the
|
||||
// top of the scoreboard, and are therefore no longer speculative.
|
||||
logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q;
|
||||
`FF(insn_ready_q, insn_ready_d, '0)
|
||||
|
||||
always_comb begin : p_non_speculative_ff
|
||||
// Maintain state
|
||||
insn_pending_d = insn_pending_q;
|
||||
insn_ready_d = insn_ready_q;
|
||||
|
||||
// We received a new instruction
|
||||
if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1;
|
||||
// Flush all received instructions
|
||||
if (flush_ex_i) insn_pending_d = '0;
|
||||
|
||||
// An accelerator instruction is no longer speculative.
|
||||
if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin
|
||||
insn_ready_d[acc_commit_trans_id] = 1'b1;
|
||||
insn_pending_d[acc_commit_trans_id] = 1'b0;
|
||||
end
|
||||
|
||||
// An accelerator instruction was issued.
|
||||
if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0;
|
||||
end : p_non_speculative_ff
|
||||
|
||||
/*************************
|
||||
* Accelerator request *
|
||||
*************************/
|
||||
|
||||
acc_pkg::accelerator_req_t acc_req;
|
||||
logic acc_req_valid;
|
||||
logic acc_req_ready;
|
||||
|
||||
acc_pkg::accelerator_req_t acc_req_int;
|
||||
fall_through_register #(
|
||||
.T(acc_pkg::accelerator_req_t)
|
||||
) i_accelerator_req_register (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clr_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.data_i (acc_req),
|
||||
.valid_i (acc_req_valid),
|
||||
.ready_o (acc_req_ready),
|
||||
.data_o (acc_req_int),
|
||||
.valid_o (acc_req_o.req_valid),
|
||||
.ready_i (acc_resp_i.req_ready)
|
||||
);
|
||||
|
||||
assign acc_req_o.insn = acc_req_int.insn;
|
||||
assign acc_req_o.rs1 = acc_req_int.rs1;
|
||||
assign acc_req_o.rs2 = acc_req_int.rs2;
|
||||
assign acc_req_o.frm = acc_req_int.frm;
|
||||
assign acc_req_o.trans_id = acc_req_int.trans_id;
|
||||
assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i;
|
||||
assign acc_req_o.acc_cons_en = acc_cons_en_i;
|
||||
assign acc_req_o.inval_ready = inval_ready_i;
|
||||
|
||||
always_comb begin : accelerator_req_dispatcher
|
||||
// Do not fetch from the instruction queue
|
||||
acc_insn_queue_pop = 1'b0;
|
||||
|
||||
// Default values
|
||||
acc_req = '0;
|
||||
acc_req_valid = 1'b0;
|
||||
|
||||
// Unpack fu_data_t into accelerator_req_t
|
||||
if (!acc_insn_queue_empty) begin
|
||||
acc_req = '{
|
||||
// Instruction is forwarded from the decoder as an immediate
|
||||
// -
|
||||
// frm rounding information is up to date during a valid request to the accelerator
|
||||
// The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes
|
||||
// do not take place until the accelerator answers (Ariane commits in-order)
|
||||
insn :
|
||||
acc_insn_queue_o.imm[
|
||||
31
|
||||
:
|
||||
0
|
||||
],
|
||||
rs1 : acc_insn_queue_o.operand_a,
|
||||
rs2 : acc_insn_queue_o.operand_b,
|
||||
frm : fpnew_pkg::roundmode_e'(fcsr_frm_i),
|
||||
trans_id: acc_insn_queue_o.trans_id,
|
||||
default: '0
|
||||
};
|
||||
// Wait until the instruction is no longer speculative.
|
||||
acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] ||
|
||||
(acc_commit && insn_pending_q[acc_commit_trans_id]);
|
||||
acc_insn_queue_pop = acc_req_valid && acc_req_ready;
|
||||
end
|
||||
end
|
||||
|
||||
/**************************
|
||||
* Accelerator response *
|
||||
**************************/
|
||||
|
||||
logic acc_ld_disp;
|
||||
logic acc_st_disp;
|
||||
|
||||
// Unpack the accelerator response
|
||||
assign acc_trans_id_o = acc_resp_i.trans_id;
|
||||
assign acc_result_o = acc_resp_i.result;
|
||||
assign acc_valid_o = acc_resp_i.resp_valid;
|
||||
assign acc_exception_o = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error};
|
||||
assign acc_fflags_valid_o = acc_resp_i.fflags_valid;
|
||||
assign acc_fflags_o = acc_resp_i.fflags;
|
||||
// Always ready to receive responses
|
||||
assign acc_req_o.resp_ready = 1'b1;
|
||||
|
||||
// Signal dispatched load/store to issue stage
|
||||
assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
|
||||
assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);
|
||||
|
||||
// Cache invalidation
|
||||
assign inval_valid_o = acc_resp_i.inval_valid;
|
||||
assign inval_addr_o = acc_resp_i.inval_addr;
|
||||
|
||||
/**************************
|
||||
* Accelerator commit *
|
||||
**************************/
|
||||
|
||||
// Instruction can be issued to the (in-order) back-end if
|
||||
// it reached the top of the scoreboard and it hasn't been
|
||||
// issued yet
|
||||
always_comb begin : accelerator_commit
|
||||
acc_commit = 1'b0;
|
||||
if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1;
|
||||
if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL)
|
||||
acc_commit = 1'b1;
|
||||
end
|
||||
|
||||
// Dirty the V state if we are committing anything related to the vector accelerator
|
||||
always_comb begin : dirty_v_state
|
||||
dirty_v_state_o = 1'b0;
|
||||
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
|
||||
dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL);
|
||||
end
|
||||
end
|
||||
|
||||
assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id
|
||||
: commit_instr_i[1].trans_id;
|
||||
|
||||
/**************************
|
||||
* Accelerator barriers *
|
||||
**************************/
|
||||
|
||||
// On a store barrier (i.e. any barrier that requires preceeding stores to complete
|
||||
// before continuing execution), halt execution while there are pending stores in
|
||||
// the accelerator pipeline.
|
||||
logic wait_acc_store_d, wait_acc_store_q;
|
||||
`FF(wait_acc_store_q, wait_acc_store_d, '0)
|
||||
|
||||
// Set on store barrier. Clear when no store is pending.
|
||||
assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending;
|
||||
assign ctrl_halt_o = wait_acc_store_q;
|
||||
|
||||
/**************************
|
||||
* Load/Store tracking *
|
||||
**************************/
|
||||
|
||||
// Loads
|
||||
logic acc_spec_loads_overflow;
|
||||
logic [2:0] acc_spec_loads_pending;
|
||||
logic acc_disp_loads_overflow;
|
||||
logic [2:0] acc_disp_loads_pending;
|
||||
|
||||
assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0);
|
||||
|
||||
// Count speculative loads. These can still be flushed.
|
||||
counter #(
|
||||
.WIDTH (3),
|
||||
.STICKY_OVERFLOW(0)
|
||||
) i_acc_spec_loads (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clear_i (flush_ex_i),
|
||||
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp),
|
||||
.load_i (1'b0),
|
||||
.down_i (acc_ld_disp),
|
||||
.d_i ('0),
|
||||
.q_o (acc_spec_loads_pending),
|
||||
.overflow_o(acc_spec_loads_overflow)
|
||||
);
|
||||
|
||||
// Count dispatched loads. These cannot be flushed anymore.
|
||||
counter #(
|
||||
.WIDTH (3),
|
||||
.STICKY_OVERFLOW(0)
|
||||
) i_acc_disp_loads (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clear_i (1'b0),
|
||||
.en_i (acc_ld_disp ^ acc_resp_i.load_complete),
|
||||
.load_i (1'b0),
|
||||
.down_i (acc_resp_i.load_complete),
|
||||
.d_i ('0),
|
||||
.q_o (acc_disp_loads_pending),
|
||||
.overflow_o(acc_disp_loads_overflow)
|
||||
);
|
||||
|
||||
acc_dispatcher_no_load_overflow :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) )
|
||||
else $error("[acc_dispatcher] Too many pending loads.");
|
||||
|
||||
// Stores
|
||||
logic acc_spec_stores_overflow;
|
||||
logic [2:0] acc_spec_stores_pending;
|
||||
logic acc_disp_stores_overflow;
|
||||
logic [2:0] acc_disp_stores_pending;
|
||||
|
||||
assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0);
|
||||
|
||||
// Count speculative stores. These can still be flushed.
|
||||
counter #(
|
||||
.WIDTH (3),
|
||||
.STICKY_OVERFLOW(0)
|
||||
) i_acc_spec_stores (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clear_i (flush_ex_i),
|
||||
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp),
|
||||
.load_i (1'b0),
|
||||
.down_i (acc_st_disp),
|
||||
.d_i ('0),
|
||||
.q_o (acc_spec_stores_pending),
|
||||
.overflow_o(acc_spec_stores_overflow)
|
||||
);
|
||||
|
||||
// Count dispatched stores. These cannot be flushed anymore.
|
||||
counter #(
|
||||
.WIDTH (3),
|
||||
.STICKY_OVERFLOW(0)
|
||||
) i_acc_disp_stores (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clear_i (1'b0),
|
||||
.en_i (acc_st_disp ^ acc_resp_i.store_complete),
|
||||
.load_i (1'b0),
|
||||
.down_i (acc_resp_i.store_complete),
|
||||
.d_i ('0),
|
||||
.q_o (acc_disp_stores_pending),
|
||||
.overflow_o(acc_disp_stores_overflow)
|
||||
);
|
||||
|
||||
acc_dispatcher_no_store_overflow :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) )
|
||||
else $error("[acc_dispatcher] Too many pending stores.");
|
||||
|
||||
/**************************
|
||||
* Tie Off Unused Signals *
|
||||
**************************/
|
||||
|
||||
assign acc_stall_st_pending_o = 1'b0;
|
||||
assign flush_pipeline_o = 1'b0;
|
||||
assign acc_dcache_req_ports_o = '0;
|
||||
|
||||
endmodule : acc_dispatcher
|
||||
|
|
@ -0,0 +1,359 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Matthias Baer <baermatt@student.ethz.ch>
|
||||
// Author: Igor Loi <igor.loi@unibo.it>
|
||||
// Author: Andreas Traber <atraber@student.ethz.ch>
|
||||
// Author: Lukas Mueller <lukasmue@student.ethz.ch>
|
||||
// Author: Florian Zaruba <zaruabf@iis.ee.ethz.ch>
|
||||
//
|
||||
// Date: 19.03.2017
|
||||
// Description: Ariane ALU based on RI5CY's ALU
|
||||
|
||||
|
||||
module alu
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input fu_data_t fu_data_i,
|
||||
output riscv::xlen_t result_o,
|
||||
output logic alu_branch_res_o
|
||||
);
|
||||
|
||||
riscv::xlen_t operand_a_rev;
|
||||
logic [ 31:0] operand_a_rev32;
|
||||
logic [ riscv::XLEN:0] operand_b_neg;
|
||||
logic [riscv::XLEN+1:0] adder_result_ext_o;
|
||||
logic less; // handles both signed and unsigned forms
|
||||
logic [ 31:0] rolw; // Rotate Left Word
|
||||
logic [ 31:0] rorw; // Rotate Right Word
|
||||
logic [31:0] orcbw, rev8w;
|
||||
logic [ $clog2(riscv::XLEN) : 0] cpop; // Count Population
|
||||
logic [$clog2(riscv::XLEN)-1 : 0] lz_tz_count; // Count Leading Zeros
|
||||
logic [ 4:0] lz_tz_wcount; // Count Leading Zeros Word
|
||||
logic lz_tz_empty, lz_tz_wempty;
|
||||
riscv::xlen_t orcbw_result, rev8w_result;
|
||||
|
||||
// bit reverse operand_a for left shifts and bit counting
|
||||
generate
|
||||
genvar k;
|
||||
for (k = 0; k < riscv::XLEN; k++)
|
||||
assign operand_a_rev[k] = fu_data_i.operand_a[riscv::XLEN-1-k];
|
||||
|
||||
for (k = 0; k < 32; k++) assign operand_a_rev32[k] = fu_data_i.operand_a[31-k];
|
||||
endgenerate
|
||||
|
||||
// ------
|
||||
// Adder
|
||||
// ------
|
||||
logic adder_op_b_negate;
|
||||
logic adder_z_flag;
|
||||
logic [riscv::XLEN:0] adder_in_a, adder_in_b;
|
||||
riscv::xlen_t adder_result;
|
||||
logic [riscv::XLEN-1:0] operand_a_bitmanip, bit_indx;
|
||||
|
||||
always_comb begin
|
||||
adder_op_b_negate = 1'b0;
|
||||
|
||||
unique case (fu_data_i.operation)
|
||||
// ADDER OPS
|
||||
EQ, NE, SUB, SUBW, ANDN, ORN, XNOR: adder_op_b_negate = 1'b1;
|
||||
default: ;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
operand_a_bitmanip = fu_data_i.operand_a;
|
||||
|
||||
if (CVA6Cfg.RVB) begin
|
||||
if (riscv::IS_XLEN64) begin
|
||||
unique case (fu_data_i.operation)
|
||||
SH1ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1;
|
||||
SH2ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2;
|
||||
SH3ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 3;
|
||||
CTZW: operand_a_bitmanip = operand_a_rev32;
|
||||
ADDUW, CPOPW, CLZW: operand_a_bitmanip = fu_data_i.operand_a[31:0];
|
||||
default: ;
|
||||
endcase
|
||||
end
|
||||
unique case (fu_data_i.operation)
|
||||
SH1ADD: operand_a_bitmanip = fu_data_i.operand_a << 1;
|
||||
SH2ADD: operand_a_bitmanip = fu_data_i.operand_a << 2;
|
||||
SH3ADD: operand_a_bitmanip = fu_data_i.operand_a << 3;
|
||||
CTZ: operand_a_bitmanip = operand_a_rev;
|
||||
default: ;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// prepare operand a
|
||||
assign adder_in_a = {operand_a_bitmanip, 1'b1};
|
||||
|
||||
// prepare operand b
|
||||
assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {riscv::XLEN + 1{adder_op_b_negate}};
|
||||
assign adder_in_b = operand_b_neg;
|
||||
|
||||
// actual adder
|
||||
assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
|
||||
assign adder_result = adder_result_ext_o[riscv::XLEN:1];
|
||||
assign adder_z_flag = ~|adder_result;
|
||||
|
||||
// get the right branch comparison result
|
||||
always_comb begin : branch_resolve
|
||||
// set comparison by default
|
||||
alu_branch_res_o = 1'b1;
|
||||
case (fu_data_i.operation)
|
||||
EQ: alu_branch_res_o = adder_z_flag;
|
||||
NE: alu_branch_res_o = ~adder_z_flag;
|
||||
LTS, LTU: alu_branch_res_o = less;
|
||||
GES, GEU: alu_branch_res_o = ~less;
|
||||
default: alu_branch_res_o = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
||||
// ---------
|
||||
// Shifts
|
||||
// ---------
|
||||
|
||||
// TODO: this can probably optimized significantly
|
||||
logic shift_left; // should we shift left
|
||||
logic shift_arithmetic;
|
||||
|
||||
riscv::xlen_t shift_amt; // amount of shift, to the right
|
||||
riscv::xlen_t shift_op_a; // input of the shifter
|
||||
logic [ 31:0] shift_op_a32; // input to the 32 bit shift operation
|
||||
|
||||
riscv::xlen_t shift_result;
|
||||
logic [ 31:0] shift_result32;
|
||||
|
||||
logic [riscv::XLEN:0] shift_right_result;
|
||||
logic [ 32:0] shift_right_result32;
|
||||
|
||||
riscv::xlen_t shift_left_result;
|
||||
logic [ 31:0] shift_left_result32;
|
||||
|
||||
assign shift_amt = fu_data_i.operand_b;
|
||||
|
||||
assign shift_left = (fu_data_i.operation == SLL) | (fu_data_i.operation == SLLW);
|
||||
|
||||
assign shift_arithmetic = (fu_data_i.operation == SRA) | (fu_data_i.operation == SRAW);
|
||||
|
||||
// right shifts, we let the synthesizer optimize this
|
||||
logic [riscv::XLEN:0] shift_op_a_64;
|
||||
logic [32:0] shift_op_a_32;
|
||||
|
||||
// choose the bit reversed or the normal input for shift operand a
|
||||
assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a;
|
||||
assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0];
|
||||
|
||||
assign shift_op_a_64 = {shift_arithmetic & shift_op_a[riscv::XLEN-1], shift_op_a};
|
||||
assign shift_op_a_32 = {shift_arithmetic & shift_op_a[31], shift_op_a32};
|
||||
|
||||
assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]);
|
||||
|
||||
assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]);
|
||||
// bit reverse the shift_right_result for left shifts
|
||||
genvar j;
|
||||
generate
|
||||
for (j = 0; j < riscv::XLEN; j++)
|
||||
assign shift_left_result[j] = shift_right_result[riscv::XLEN-1-j];
|
||||
|
||||
for (j = 0; j < 32; j++) assign shift_left_result32[j] = shift_right_result32[31-j];
|
||||
|
||||
endgenerate
|
||||
|
||||
assign shift_result = shift_left ? shift_left_result : shift_right_result[riscv::XLEN-1:0];
|
||||
assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0];
|
||||
|
||||
// ------------
|
||||
// Comparisons
|
||||
// ------------
|
||||
|
||||
always_comb begin
|
||||
logic sgn;
|
||||
sgn = 1'b0;
|
||||
|
||||
if ((fu_data_i.operation == SLTS) ||
|
||||
(fu_data_i.operation == LTS) ||
|
||||
(fu_data_i.operation == GES) ||
|
||||
(fu_data_i.operation == MAX) ||
|
||||
(fu_data_i.operation == MIN))
|
||||
sgn = 1'b1;
|
||||
|
||||
less = ($signed({sgn & fu_data_i.operand_a[riscv::XLEN-1], fu_data_i.operand_a}) <
|
||||
$signed({sgn & fu_data_i.operand_b[riscv::XLEN-1], fu_data_i.operand_b}));
|
||||
end
|
||||
|
||||
if (CVA6Cfg.RVB) begin : gen_bitmanip
|
||||
// Count Population + Count population Word
|
||||
|
||||
popcount #(
|
||||
.INPUT_WIDTH(riscv::XLEN)
|
||||
) i_cpop_count (
|
||||
.data_i (operand_a_bitmanip),
|
||||
.popcount_o(cpop)
|
||||
);
|
||||
|
||||
// Count Leading/Trailing Zeros
|
||||
// 64b
|
||||
lzc #(
|
||||
.WIDTH(riscv::XLEN),
|
||||
.MODE (1)
|
||||
) i_clz_64b (
|
||||
.in_i(operand_a_bitmanip),
|
||||
.cnt_o(lz_tz_count),
|
||||
.empty_o(lz_tz_empty)
|
||||
);
|
||||
if (riscv::IS_XLEN64) begin
|
||||
//32b
|
||||
lzc #(
|
||||
.WIDTH(32),
|
||||
.MODE (1)
|
||||
) i_clz_32b (
|
||||
.in_i(operand_a_bitmanip[31:0]),
|
||||
.cnt_o(lz_tz_wcount),
|
||||
.empty_o(lz_tz_wempty)
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
if (CVA6Cfg.RVB) begin : gen_orcbw_rev8w_results
|
||||
assign orcbw = {
|
||||
{8{|fu_data_i.operand_a[31:24]}},
|
||||
{8{|fu_data_i.operand_a[23:16]}},
|
||||
{8{|fu_data_i.operand_a[15:8]}},
|
||||
{8{|fu_data_i.operand_a[7:0]}}
|
||||
};
|
||||
assign rev8w = {
|
||||
{fu_data_i.operand_a[7:0]},
|
||||
{fu_data_i.operand_a[15:8]},
|
||||
{fu_data_i.operand_a[23:16]},
|
||||
{fu_data_i.operand_a[31:24]}
|
||||
};
|
||||
if (riscv::IS_XLEN64) begin : gen_64b
|
||||
assign orcbw_result = {
|
||||
{8{|fu_data_i.operand_a[63:56]}},
|
||||
{8{|fu_data_i.operand_a[55:48]}},
|
||||
{8{|fu_data_i.operand_a[47:40]}},
|
||||
{8{|fu_data_i.operand_a[39:32]}},
|
||||
orcbw
|
||||
};
|
||||
assign rev8w_result = {
|
||||
rev8w,
|
||||
{fu_data_i.operand_a[39:32]},
|
||||
{fu_data_i.operand_a[47:40]},
|
||||
{fu_data_i.operand_a[55:48]},
|
||||
{fu_data_i.operand_a[63:56]}
|
||||
};
|
||||
end else begin : gen_32b
|
||||
assign orcbw_result = orcbw;
|
||||
assign rev8w_result = rev8w;
|
||||
end
|
||||
end
|
||||
|
||||
// -----------
|
||||
// Result MUX
|
||||
// -----------
|
||||
always_comb begin
|
||||
result_o = '0;
|
||||
if (riscv::IS_XLEN64) begin
|
||||
unique case (fu_data_i.operation)
|
||||
// Add word: Ignore the upper bits and sign extend to 64 bit
|
||||
ADDW, SUBW: result_o = {{riscv::XLEN - 32{adder_result[31]}}, adder_result[31:0]};
|
||||
SH1ADDUW, SH2ADDUW, SH3ADDUW: result_o = adder_result;
|
||||
// Shifts 32 bit
|
||||
SLLW, SRLW, SRAW: result_o = {{riscv::XLEN - 32{shift_result32[31]}}, shift_result32[31:0]};
|
||||
default: ;
|
||||
endcase
|
||||
end
|
||||
unique case (fu_data_i.operation)
|
||||
// Standard Operations
|
||||
ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[riscv::XLEN:1];
|
||||
ORL, ORN: result_o = fu_data_i.operand_a | operand_b_neg[riscv::XLEN:1];
|
||||
XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[riscv::XLEN:1];
|
||||
// Adder Operations
|
||||
ADD, SUB, ADDUW, SH1ADD, SH2ADD, SH3ADD: result_o = adder_result;
|
||||
// Shift Operations
|
||||
SLL, SRL, SRA: result_o = (riscv::IS_XLEN64) ? shift_result : shift_result32;
|
||||
// Comparison Operations
|
||||
SLTS, SLTU: result_o = {{riscv::XLEN - 1{1'b0}}, less};
|
||||
default: ; // default case to suppress unique warning
|
||||
endcase
|
||||
|
||||
if (CVA6Cfg.RVB) begin
|
||||
// Index for Bitwise Rotation
|
||||
bit_indx = 1 << (fu_data_i.operand_b & (riscv::XLEN - 1));
|
||||
// rolw, roriw, rorw
|
||||
rolw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
|
||||
rorw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
|
||||
if (riscv::IS_XLEN64) begin
|
||||
unique case (fu_data_i.operation)
|
||||
CLZW, CTZW:
|
||||
result_o = (lz_tz_wempty) ? 32 : {{riscv::XLEN - 5{1'b0}}, lz_tz_wcount}; // change
|
||||
ROLW: result_o = {{riscv::XLEN - 32{rolw[31]}}, rolw};
|
||||
RORW, RORIW: result_o = {{riscv::XLEN - 32{rorw[31]}}, rorw};
|
||||
default: ;
|
||||
endcase
|
||||
end
|
||||
unique case (fu_data_i.operation)
|
||||
// Integer minimum/maximum
|
||||
MAX: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
|
||||
MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
|
||||
MIN: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
|
||||
MINU: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
|
||||
|
||||
// Single bit instructions operations
|
||||
BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx;
|
||||
BEXT, BEXTI: result_o = {{riscv::XLEN - 1{1'b0}}, |(fu_data_i.operand_a & bit_indx)};
|
||||
BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx;
|
||||
BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx;
|
||||
|
||||
// Count Leading/Trailing Zeros
|
||||
CLZ, CTZ:
|
||||
result_o = (lz_tz_empty) ? ({{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count} + 1) :
|
||||
{{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count};
|
||||
|
||||
// Count population
|
||||
CPOP, CPOPW: result_o = {{(riscv::XLEN - ($clog2(riscv::XLEN) + 1)) {1'b0}}, cpop};
|
||||
|
||||
// Sign and Zero Extend
|
||||
SEXTB: result_o = {{riscv::XLEN - 8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]};
|
||||
SEXTH: result_o = {{riscv::XLEN - 16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]};
|
||||
ZEXTH: result_o = {{riscv::XLEN - 16{1'b0}}, fu_data_i.operand_a[15:0]};
|
||||
|
||||
// Bitwise Rotation
|
||||
ROL:
|
||||
result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[4:0])));
|
||||
|
||||
ROR, RORI:
|
||||
result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[4:0])));
|
||||
|
||||
ORCB: result_o = orcbw_result;
|
||||
REV8: result_o = rev8w_result;
|
||||
|
||||
default:
|
||||
if (fu_data_i.operation == SLLIUW && riscv::IS_XLEN64)
|
||||
result_o = {{riscv::XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0]; // Left Shift 32 bit unsigned
|
||||
endcase
|
||||
end
|
||||
if (CVA6Cfg.ZiCondExtEn) begin
|
||||
unique case (fu_data_i.operation)
|
||||
CZERO_EQZ:
|
||||
result_o = (|fu_data_i.operand_b) ? fu_data_i.operand_a : '0; // move zero to rd if rs2 is equal to zero else rs1
|
||||
CZERO_NEZ:
|
||||
result_o = (|fu_data_i.operand_b) ? '0 : fu_data_i.operand_a; // move zero to rd if rs2 is nonzero else rs1
|
||||
default: ; // default case to suppress unique warning
|
||||
endcase
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 20.09.2018
|
||||
// Description: Buffers AMO requests
|
||||
// This unit buffers an atomic memory operations for the cache subsyste.
|
||||
// Furthermore it handles interfacing with the commit stage
|
||||
|
||||
module amo_buffer #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i, // pipeline flush
|
||||
|
||||
input logic valid_i, // AMO is valid
|
||||
output logic ready_o, // AMO unit is ready
|
||||
input ariane_pkg::amo_t amo_op_i, // AMO Operation
|
||||
input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue
|
||||
input riscv::xlen_t data_i, // data which is placed in the queue
|
||||
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
|
||||
// D$
|
||||
output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem
|
||||
input ariane_pkg::amo_resp_t amo_resp_i, // response from cache subsystem
|
||||
// Auxiliary signals
|
||||
input logic amo_valid_commit_i, // We have a vaild AMO in the commit stage
|
||||
input logic no_st_pending_i // there is currently no store pending anymore
|
||||
);
|
||||
logic flush_amo_buffer;
|
||||
logic amo_valid;
|
||||
|
||||
typedef struct packed {
|
||||
ariane_pkg::amo_t op;
|
||||
logic [riscv::PLEN-1:0] paddr;
|
||||
riscv::xlen_t data;
|
||||
logic [1:0] size;
|
||||
} amo_op_t;
|
||||
|
||||
amo_op_t amo_data_in, amo_data_out;
|
||||
|
||||
// validate this request as soon as all stores have drained and the AMO is in the commit stage
|
||||
assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid;
|
||||
assign amo_req_o.amo_op = amo_data_out.op;
|
||||
assign amo_req_o.size = amo_data_out.size;
|
||||
assign amo_req_o.operand_a = {{64 - riscv::PLEN{1'b0}}, amo_data_out.paddr};
|
||||
assign amo_req_o.operand_b = {{64 - riscv::XLEN{1'b0}}, amo_data_out.data};
|
||||
|
||||
assign amo_data_in.op = amo_op_i;
|
||||
assign amo_data_in.data = data_i;
|
||||
assign amo_data_in.paddr = paddr_i;
|
||||
assign amo_data_in.size = data_size_i;
|
||||
|
||||
// only flush if we are currently not committing the AMO
|
||||
// e.g.: it is not speculative anymore
|
||||
assign flush_amo_buffer = flush_i & !amo_valid_commit_i;
|
||||
|
||||
fifo_v3 #(
|
||||
.DEPTH(1),
|
||||
.dtype(amo_op_t)
|
||||
) i_amo_fifo (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (flush_amo_buffer),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (amo_valid),
|
||||
.empty_o (ready_o),
|
||||
.usage_o (), // left open
|
||||
.data_i (amo_data_in),
|
||||
.push_i (valid_i),
|
||||
.data_o (amo_data_out),
|
||||
.pop_i (amo_resp_i.ack)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Engineer: Francesco Conti - f.conti@unibo.it
|
||||
//
|
||||
// Additional contributions by:
|
||||
// Markus Wegmann - markus.wegmann@technokrat.ch
|
||||
//
|
||||
// Design Name: RISC-V register file
|
||||
// Project Name: zero-riscy
|
||||
// Language: SystemVerilog
|
||||
//
|
||||
// Description: Register file with 31 or 15x 32 bit wide registers.
|
||||
// Register 0 is fixed to 0. This register file is based on
|
||||
// flip flops.
|
||||
//
|
||||
|
||||
module ariane_regfile #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned DATA_WIDTH = 32,
|
||||
parameter int unsigned NR_READ_PORTS = 2,
|
||||
parameter bit ZERO_REG_ZERO = 0
|
||||
) (
|
||||
// clock and reset
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// disable clock gates for testing
|
||||
input logic test_en_i,
|
||||
// read port
|
||||
input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i,
|
||||
output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
|
||||
// write port
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
|
||||
);
|
||||
|
||||
localparam ADDR_WIDTH = 5;
|
||||
localparam NUM_WORDS = 2 ** ADDR_WIDTH;
|
||||
|
||||
logic [ NUM_WORDS-1:0][DATA_WIDTH-1:0] mem;
|
||||
logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec;
|
||||
|
||||
|
||||
always_comb begin : we_decoder
|
||||
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
|
||||
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
|
||||
if (waddr_i[j] == i) we_dec[j][i] = we_i[j];
|
||||
else we_dec[j][i] = 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// loop from 1 to NUM_WORDS-1 as R0 is nil
|
||||
always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral
|
||||
if (~rst_ni) begin
|
||||
mem <= '{default: '0};
|
||||
end else begin
|
||||
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
|
||||
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
|
||||
if (we_dec[j][i]) begin
|
||||
mem[i] <= wdata_i[j];
|
||||
end
|
||||
end
|
||||
if (ZERO_REG_ZERO) begin
|
||||
mem[0] <= '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NR_READ_PORTS; i++) begin
|
||||
assign rdata_o[i] = mem[raddr_i[i]];
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Engineer: Francesco Conti - f.conti@unibo.it
|
||||
//
|
||||
// Additional contributions by:
|
||||
// Markus Wegmann - markus.wegmann@technokrat.ch
|
||||
// Noam Gallmann - gnoam@live.com
|
||||
// Felipe Lisboa Malaquias
|
||||
// Henry Suzukawa
|
||||
//
|
||||
//
|
||||
// Description: This register file is optimized for implementation on
|
||||
// FPGAs. The register file features one distributed RAM block per implemented
|
||||
// sync-write port, each with a parametrized number of async-read ports.
|
||||
// Read-accesses are multiplexed from the relevant block depending on which block
|
||||
// was last written to. For that purpose an additional array of registers is
|
||||
// maintained keeping track of write acesses.
|
||||
//
|
||||
|
||||
module ariane_regfile_fpga #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned DATA_WIDTH = 32,
|
||||
parameter int unsigned NR_READ_PORTS = 2,
|
||||
parameter bit ZERO_REG_ZERO = 0
|
||||
) (
|
||||
// clock and reset
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// disable clock gates for testing
|
||||
input logic test_en_i,
|
||||
// read port
|
||||
input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i,
|
||||
output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
|
||||
// write port
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
|
||||
);
|
||||
|
||||
localparam ADDR_WIDTH = 5;
|
||||
localparam NUM_WORDS = 2 ** ADDR_WIDTH;
|
||||
localparam LOG_NR_WRITE_PORTS = CVA6Cfg.NrCommitPorts == 1 ? 1 : $clog2(CVA6Cfg.NrCommitPorts);
|
||||
|
||||
// Distributed RAM usually supports one write port per block - duplicate for each write port.
|
||||
logic [ NUM_WORDS-1:0][ DATA_WIDTH-1:0] mem [CVA6Cfg.NrCommitPorts];
|
||||
|
||||
logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec;
|
||||
logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel;
|
||||
logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q;
|
||||
|
||||
// write adress decoder (for block selector)
|
||||
always_comb begin
|
||||
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
|
||||
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
|
||||
if (waddr_i[j] == i) begin
|
||||
we_dec[j][i] = we_i[j];
|
||||
end else begin
|
||||
we_dec[j][i] = 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// update block selector:
|
||||
// signal mem_block_sel records where the current valid value is stored.
|
||||
// if multiple ports try to write to the same address simultaneously, the port with the highest
|
||||
// index has priority.
|
||||
always_comb begin
|
||||
mem_block_sel = mem_block_sel_q;
|
||||
for (int i = 0; i < NUM_WORDS; i++) begin
|
||||
for (int j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
|
||||
if (we_dec[j][i] == 1'b1) begin
|
||||
mem_block_sel[i] = LOG_NR_WRITE_PORTS'(j);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// block selector flops
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
mem_block_sel_q <= '0;
|
||||
end else begin
|
||||
mem_block_sel_q <= mem_block_sel;
|
||||
end
|
||||
end
|
||||
|
||||
// distributed RAM blocks
|
||||
logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts];
|
||||
for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block
|
||||
always_ff @(posedge clk_i) begin
|
||||
if (we_i[j] && ~waddr_i[j] != 0) begin
|
||||
mem[j][waddr_i[j]] <= wdata_i[j];
|
||||
end
|
||||
end
|
||||
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read
|
||||
assign mem_read[j][k] = mem[j][raddr_i[k]];
|
||||
end
|
||||
end
|
||||
|
||||
// output MUX
|
||||
logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr;
|
||||
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port
|
||||
assign block_addr[k] = mem_block_sel_q[raddr_i[k]];
|
||||
assign rdata_o[k] = (ZERO_REG_ZERO && raddr_i[k] == '0) ? '0 : mem_read[block_addr[k]][k];
|
||||
end
|
||||
|
||||
// random initialization of the memory to suppress assert warnings on Questa.
|
||||
initial begin
|
||||
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
|
||||
for (int j = 0; j < NUM_WORDS; j++) begin
|
||||
mem[i][j] = $random();
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,310 @@
|
|||
/* Copyright 2018 ETH Zurich and University of Bologna.
|
||||
* Copyright and related rights are licensed under the Solderpad Hardware
|
||||
* License, Version 0.51 (the “License”); you may not use this file except in
|
||||
* compliance with the License. You may obtain a copy of the License at
|
||||
* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
* or agreed to in writing, software, hardware and materials distributed under
|
||||
* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
|
||||
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations under the License.
|
||||
*
|
||||
* File: axi_shim.sv
|
||||
* Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>
|
||||
* Florian Zaruba <zarubaf@iis.ee.ethz.ch>
|
||||
* Date: 1.8.2018
|
||||
*
|
||||
* Description: Manages communication with the AXI Bus. Note that this unit does not
|
||||
* buffer requests and register the signals.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
module axi_shim #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned AxiNumWords = 4, // data width in dwords, this is also the maximum burst length, must be >=2
|
||||
parameter type axi_req_t = logic,
|
||||
parameter type axi_rsp_t = logic
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// read channel
|
||||
// request
|
||||
input logic rd_req_i,
|
||||
output logic rd_gnt_o,
|
||||
input logic [CVA6Cfg.AxiAddrWidth-1:0] rd_addr_i,
|
||||
input logic [$clog2(AxiNumWords)-1:0] rd_blen_i, // axi convention: LEN-1
|
||||
input logic [2:0] rd_size_i,
|
||||
input logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_i, // use same ID for reads, or make sure you only have one outstanding read tx
|
||||
input logic rd_lock_i,
|
||||
// read response (we have to unconditionally sink the response)
|
||||
input logic rd_rdy_i,
|
||||
output logic rd_last_o,
|
||||
output logic rd_valid_o,
|
||||
output logic [CVA6Cfg.AxiDataWidth-1:0] rd_data_o,
|
||||
output logic [CVA6Cfg.AxiUserWidth-1:0] rd_user_o,
|
||||
output logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_o,
|
||||
output logic rd_exokay_o, // indicates whether exclusive tx succeeded
|
||||
// write channel
|
||||
input logic wr_req_i,
|
||||
output logic wr_gnt_o,
|
||||
input logic [CVA6Cfg.AxiAddrWidth-1:0] wr_addr_i,
|
||||
input logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] wr_data_i,
|
||||
input logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] wr_user_i,
|
||||
input logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] wr_be_i,
|
||||
input logic [$clog2(AxiNumWords)-1:0] wr_blen_i, // axi convention: LEN-1
|
||||
input logic [2:0] wr_size_i,
|
||||
input logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_i,
|
||||
input logic wr_lock_i,
|
||||
input logic [5:0] wr_atop_i,
|
||||
// write response
|
||||
input logic wr_rdy_i,
|
||||
output logic wr_valid_o,
|
||||
output logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_o,
|
||||
output logic wr_exokay_o, // indicates whether exclusive tx succeeded
|
||||
// AXI port
|
||||
output axi_req_t axi_req_o,
|
||||
input axi_rsp_t axi_resp_i
|
||||
);
|
||||
localparam AddrIndex = ($clog2(AxiNumWords) > 0) ? $clog2(AxiNumWords) : 1;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// write channel
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
enum logic [3:0] {
|
||||
IDLE,
|
||||
WAIT_AW_READY,
|
||||
WAIT_LAST_W_READY,
|
||||
WAIT_LAST_W_READY_AW_READY,
|
||||
WAIT_AW_READY_BURST
|
||||
}
|
||||
wr_state_q, wr_state_d;
|
||||
|
||||
// AXI tx counter
|
||||
logic [AddrIndex-1:0] wr_cnt_d, wr_cnt_q;
|
||||
logic wr_single_req, wr_cnt_done, wr_cnt_clr, wr_cnt_en;
|
||||
|
||||
assign wr_single_req = (wr_blen_i == 0);
|
||||
|
||||
// address
|
||||
assign axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
|
||||
assign axi_req_o.aw.addr = wr_addr_i[CVA6Cfg.AxiAddrWidth-1:0];
|
||||
assign axi_req_o.aw.size = wr_size_i;
|
||||
assign axi_req_o.aw.len = wr_blen_i;
|
||||
assign axi_req_o.aw.id = wr_id_i;
|
||||
assign axi_req_o.aw.prot = 3'b0;
|
||||
assign axi_req_o.aw.region = 4'b0;
|
||||
assign axi_req_o.aw.lock = wr_lock_i;
|
||||
assign axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE;
|
||||
assign axi_req_o.aw.qos = 4'b0;
|
||||
assign axi_req_o.aw.atop = wr_atop_i;
|
||||
assign axi_req_o.aw.user = '0;
|
||||
|
||||
// data
|
||||
assign axi_req_o.w.data = wr_data_i[wr_cnt_q];
|
||||
assign axi_req_o.w.user = wr_user_i[wr_cnt_q];
|
||||
assign axi_req_o.w.strb = wr_be_i[wr_cnt_q];
|
||||
assign axi_req_o.w.last = wr_cnt_done;
|
||||
|
||||
// write response
|
||||
assign wr_exokay_o = (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY);
|
||||
assign axi_req_o.b_ready = wr_rdy_i;
|
||||
assign wr_valid_o = axi_resp_i.b_valid;
|
||||
assign wr_id_o = axi_resp_i.b.id;
|
||||
|
||||
// tx counter
|
||||
assign wr_cnt_done = (wr_cnt_q == wr_blen_i);
|
||||
assign wr_cnt_d = (wr_cnt_clr) ? '0 : (wr_cnt_en && CVA6Cfg.AxiBurstWriteEn) ? wr_cnt_q + 1 : wr_cnt_q;
|
||||
|
||||
always_comb begin : p_axi_write_fsm
|
||||
// default
|
||||
wr_state_d = wr_state_q;
|
||||
|
||||
axi_req_o.aw_valid = 1'b0;
|
||||
axi_req_o.w_valid = 1'b0;
|
||||
wr_gnt_o = 1'b0;
|
||||
|
||||
wr_cnt_en = 1'b0;
|
||||
wr_cnt_clr = 1'b0;
|
||||
|
||||
case (wr_state_q)
|
||||
///////////////////////////////////
|
||||
IDLE: begin
|
||||
// we have an incoming request
|
||||
if (wr_req_i) begin
|
||||
// is this a read or write?
|
||||
axi_req_o.aw_valid = 1'b1;
|
||||
axi_req_o.w_valid = 1'b1;
|
||||
|
||||
if (CVA6Cfg.AxiBurstWriteEn && !wr_single_req) begin
|
||||
wr_cnt_en = axi_resp_i.w_ready;
|
||||
|
||||
case ({
|
||||
axi_resp_i.aw_ready, axi_resp_i.w_ready
|
||||
})
|
||||
2'b11: wr_state_d = WAIT_LAST_W_READY;
|
||||
2'b01: wr_state_d = WAIT_LAST_W_READY_AW_READY;
|
||||
2'b10: wr_state_d = WAIT_LAST_W_READY;
|
||||
default: ;
|
||||
endcase
|
||||
end else if (wr_single_req) begin // its a single write
|
||||
wr_cnt_clr = 1'b1;
|
||||
// single req can be granted here
|
||||
wr_gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
|
||||
case ({
|
||||
axi_resp_i.aw_ready, axi_resp_i.w_ready
|
||||
})
|
||||
2'b01: wr_state_d = WAIT_AW_READY;
|
||||
2'b10: wr_state_d = WAIT_LAST_W_READY;
|
||||
default: wr_state_d = IDLE;
|
||||
endcase
|
||||
// its a request for the whole cache line
|
||||
end
|
||||
end
|
||||
end
|
||||
///////////////////////////////////
|
||||
// ~> from single write
|
||||
WAIT_AW_READY: begin
|
||||
axi_req_o.aw_valid = 1'b1;
|
||||
|
||||
if (axi_resp_i.aw_ready) begin
|
||||
wr_state_d = IDLE;
|
||||
wr_gnt_o = 1'b1;
|
||||
end
|
||||
end
|
||||
///////////////////////////////////
|
||||
// ~> from write, there is an outstanding write
|
||||
WAIT_LAST_W_READY: begin
|
||||
axi_req_o.w_valid = 1'b1;
|
||||
|
||||
if (CVA6Cfg.AxiBurstWriteEn && axi_resp_i.w_ready && !wr_cnt_done) begin
|
||||
wr_cnt_en = 1'b1;
|
||||
end else if (wr_cnt_done) begin // this is the last write
|
||||
if (axi_resp_i.w_ready) begin
|
||||
wr_state_d = IDLE;
|
||||
wr_cnt_clr = 1'b1;
|
||||
wr_gnt_o = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
///////////////////////////////////
|
||||
default: begin
|
||||
///////////////////////////////////
|
||||
// ~> we need to wait for an aw_ready and there is at least one outstanding write
|
||||
if (CVA6Cfg.AxiBurstWriteEn) begin
|
||||
if (wr_state_q == WAIT_LAST_W_READY_AW_READY) begin
|
||||
axi_req_o.w_valid = 1'b1;
|
||||
axi_req_o.aw_valid = 1'b1;
|
||||
// we got an aw_ready
|
||||
case ({
|
||||
axi_resp_i.aw_ready, axi_resp_i.w_ready
|
||||
})
|
||||
// we got an aw ready
|
||||
2'b01: begin
|
||||
// are there any outstanding transactions?
|
||||
if (wr_cnt_done) begin
|
||||
wr_state_d = WAIT_AW_READY_BURST;
|
||||
wr_cnt_clr = 1'b1;
|
||||
end else begin
|
||||
// yes, so reduce the count and stay here
|
||||
wr_cnt_en = 1'b1;
|
||||
end
|
||||
end
|
||||
2'b10: wr_state_d = WAIT_LAST_W_READY;
|
||||
2'b11: begin
|
||||
// we are finished
|
||||
if (wr_cnt_done) begin
|
||||
wr_state_d = IDLE;
|
||||
wr_gnt_o = 1'b1;
|
||||
wr_cnt_clr = 1'b1;
|
||||
// there are outstanding transactions
|
||||
end else begin
|
||||
wr_state_d = WAIT_LAST_W_READY;
|
||||
wr_cnt_en = 1'b1;
|
||||
end
|
||||
end
|
||||
default: ;
|
||||
endcase
|
||||
end ///////////////////////////////////
|
||||
// ~> all data has already been sent, we are only waiting for the aw_ready
|
||||
else if (wr_state_q == WAIT_AW_READY_BURST) begin
|
||||
axi_req_o.aw_valid = 1'b1;
|
||||
|
||||
if (axi_resp_i.aw_ready) begin
|
||||
wr_state_d = IDLE;
|
||||
wr_gnt_o = 1'b1;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
wr_state_d = IDLE;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// read channel
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// address
|
||||
// in case of a wrapping transfer we can simply begin at the address, if we want to request a cache-line
|
||||
// with an incremental transfer we need to output the corresponding base address of the cache line
|
||||
assign axi_req_o.ar.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
|
||||
assign axi_req_o.ar.addr = rd_addr_i[CVA6Cfg.AxiAddrWidth-1:0];
|
||||
assign axi_req_o.ar.size = rd_size_i;
|
||||
assign axi_req_o.ar.len = rd_blen_i;
|
||||
assign axi_req_o.ar.id = rd_id_i;
|
||||
assign axi_req_o.ar.prot = 3'b0;
|
||||
assign axi_req_o.ar.region = 4'b0;
|
||||
assign axi_req_o.ar.lock = rd_lock_i;
|
||||
assign axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE;
|
||||
assign axi_req_o.ar.qos = 4'b0;
|
||||
assign axi_req_o.ar.user = '0;
|
||||
|
||||
// make the read request
|
||||
assign axi_req_o.ar_valid = rd_req_i;
|
||||
assign rd_gnt_o = rd_req_i & axi_resp_i.ar_ready;
|
||||
|
||||
// return path
|
||||
assign axi_req_o.r_ready = rd_rdy_i;
|
||||
assign rd_data_o = axi_resp_i.r.data;
|
||||
if (ariane_pkg::AXI_USER_EN) begin
|
||||
assign rd_user_o = axi_resp_i.r.user;
|
||||
end else begin
|
||||
assign rd_user_o = '0;
|
||||
end
|
||||
assign rd_last_o = axi_resp_i.r.last;
|
||||
assign rd_valid_o = axi_resp_i.r_valid;
|
||||
assign rd_id_o = axi_resp_i.r.id;
|
||||
assign rd_exokay_o = (axi_resp_i.r.resp == axi_pkg::RESP_EXOKAY);
|
||||
|
||||
|
||||
// ----------------
|
||||
// Registers
|
||||
// ----------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
// start in flushing state and initialize the memory
|
||||
wr_state_q <= IDLE;
|
||||
wr_cnt_q <= '0;
|
||||
end else begin
|
||||
wr_state_q <= wr_state_d;
|
||||
wr_cnt_q <= wr_cnt_d;
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------
|
||||
// Assertions
|
||||
// ----------------
|
||||
|
||||
//pragma translate_off
|
||||
initial begin
|
||||
assert (AxiNumWords >= 1)
|
||||
else $fatal(1, "[axi adapter] AxiNumWords must be >= 1");
|
||||
assert (CVA6Cfg.AxiIdWidth >= 2)
|
||||
else $fatal(1, "[axi adapter] AXI id width must be at least 2 bit wide");
|
||||
end
|
||||
//pragma translate_on
|
||||
|
||||
endmodule // axi_adapter2
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 09.05.2017
|
||||
// Description: Branch target calculation and comparison
|
||||
|
||||
module branch_unit #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic debug_mode_i,
|
||||
input ariane_pkg::fu_data_t fu_data_i,
|
||||
input logic [riscv::VLEN-1:0] pc_i, // PC of instruction
|
||||
input logic is_compressed_instr_i,
|
||||
input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict
|
||||
input logic branch_valid_i,
|
||||
input logic branch_comp_res_i, // branch comparison result from ALU
|
||||
output logic [riscv::VLEN-1:0] branch_result_o,
|
||||
|
||||
input ariane_pkg::branchpredict_sbe_t branch_predict_i, // this is the address we predicted
|
||||
output ariane_pkg::bp_resolve_t resolved_branch_o, // this is the actual address we are targeting
|
||||
output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can
|
||||
// accept new entries to the scoreboard
|
||||
output ariane_pkg::exception_t branch_exception_o // branch exception out
|
||||
);
|
||||
logic [riscv::VLEN-1:0] target_address;
|
||||
logic [riscv::VLEN-1:0] next_pc;
|
||||
|
||||
// here we handle the various possibilities of mis-predicts
|
||||
always_comb begin : mispredict_handler
|
||||
// set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC
|
||||
automatic logic [riscv::VLEN-1:0] jump_base;
|
||||
// TODO(zarubaf): The ALU can be used to calculate the branch target
|
||||
jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[riscv::VLEN-1:0] : pc_i;
|
||||
|
||||
target_address = {riscv::VLEN{1'b0}};
|
||||
resolve_branch_o = 1'b0;
|
||||
resolved_branch_o.target_address = {riscv::VLEN{1'b0}};
|
||||
resolved_branch_o.is_taken = 1'b0;
|
||||
resolved_branch_o.valid = branch_valid_i;
|
||||
resolved_branch_o.is_mispredict = 1'b0;
|
||||
resolved_branch_o.cf_type = branch_predict_i.cf;
|
||||
// calculate next PC, depending on whether the instruction is compressed or not this may be different
|
||||
// TODO(zarubaf): We already calculate this a couple of times, maybe re-use?
|
||||
next_pc = pc_i + ((is_compressed_instr_i) ? {{riscv::VLEN-2{1'b0}}, 2'h2} : {{riscv::VLEN-3{1'b0}}, 3'h4});
|
||||
// calculate target address simple 64 bit addition
|
||||
target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[riscv::VLEN-1:0]));
|
||||
// on a JALR we are supposed to reset the LSB to 0 (according to the specification)
|
||||
if (fu_data_i.operation == ariane_pkg::JALR) target_address[0] = 1'b0;
|
||||
// we need to put the branch target address into rd, this is the result of this unit
|
||||
branch_result_o = next_pc;
|
||||
resolved_branch_o.pc = pc_i;
|
||||
// There are only two sources of mispredicts:
|
||||
// 1. Branches
|
||||
// 2. Jumps to register addresses
|
||||
if (branch_valid_i) begin
|
||||
// write target address which goes to PC Gen
|
||||
resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
|
||||
resolved_branch_o.is_taken = branch_comp_res_i;
|
||||
// check the outcome of the branch speculation
|
||||
if (ariane_pkg::op_is_branch(fu_data_i.operation)) begin
|
||||
// Set the `cf_type` of the output as `branch`, this will update the BHT.
|
||||
resolved_branch_o.cf_type = ariane_pkg::Branch;
|
||||
// If the ALU comparison does not agree with the BHT prediction set the resolution as mispredicted.
|
||||
resolved_branch_o.is_mispredict = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch);
|
||||
end
|
||||
if (fu_data_i.operation == ariane_pkg::JALR
|
||||
// check if the address of the jump register is correct and that we actually predicted
|
||||
&& (branch_predict_i.cf == ariane_pkg::NoCF || target_address != branch_predict_i.predict_address)) begin
|
||||
resolved_branch_o.is_mispredict = 1'b1;
|
||||
// update BTB only if this wasn't a return
|
||||
if (branch_predict_i.cf != ariane_pkg::Return)
|
||||
resolved_branch_o.cf_type = ariane_pkg::JumpR;
|
||||
end
|
||||
// to resolve the branch in ID
|
||||
resolve_branch_o = 1'b1;
|
||||
end
|
||||
end
|
||||
// use ALU exception signal for storing instruction fetch exceptions if
|
||||
// the target address is not aligned to a 2 byte boundary
|
||||
//
|
||||
logic jump_taken;
|
||||
always_comb begin : exception_handling
|
||||
|
||||
// Do a jump if it is either unconditional jump (JAL | JALR) or `taken` conditional jump
|
||||
jump_taken = !(ariane_pkg::op_is_branch(fu_data_i.operation)) ||
|
||||
((ariane_pkg::op_is_branch(fu_data_i.operation)) && branch_comp_res_i);
|
||||
branch_exception_o.cause = riscv::INSTR_ADDR_MISALIGNED;
|
||||
branch_exception_o.valid = 1'b0;
|
||||
branch_exception_o.tval = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
|
||||
// Only throw instruction address misaligned exception if this is indeed a `taken` conditional branch or
|
||||
// an unconditional jump
|
||||
if (branch_valid_i && (target_address[0] || (!CVA6Cfg.RVC && target_address[1])) && jump_taken) begin
|
||||
branch_exception_o.valid = 1'b1;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,520 @@
|
|||
/* Copyright 2018 ETH Zurich and University of Bologna.
|
||||
* Copyright and related rights are licensed under the Solderpad Hardware
|
||||
* License, Version 0.51 (the “License”); you may not use this file except in
|
||||
* compliance with the License. You may obtain a copy of the License at
|
||||
* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
* or agreed to in writing, software, hardware and materials distributed under
|
||||
* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
|
||||
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations under the License.
|
||||
*
|
||||
* File: axi_adapter.sv
|
||||
* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
|
||||
* Date: 1.8.2018
|
||||
*
|
||||
* Description: Manages communication with the AXI Bus
|
||||
*/
|
||||
//import std_cache_pkg::*;
|
||||
|
||||
module axi_adapter #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned DATA_WIDTH = 256,
|
||||
parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature
|
||||
parameter int unsigned CACHELINE_BYTE_OFFSET = 8,
|
||||
parameter type axi_req_t = logic,
|
||||
parameter type axi_rsp_t = logic
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
|
||||
input logic req_i,
|
||||
input ariane_pkg::ad_req_t type_i,
|
||||
input ariane_pkg::amo_t amo_i,
|
||||
output logic gnt_o,
|
||||
input logic [riscv::XLEN-1:0] addr_i,
|
||||
input logic we_i,
|
||||
input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] wdata_i,
|
||||
input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] be_i,
|
||||
input logic [1:0] size_i,
|
||||
input logic [CVA6Cfg.AxiIdWidth-1:0] id_i,
|
||||
// read port
|
||||
output logic valid_o,
|
||||
output logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] rdata_o,
|
||||
output logic [CVA6Cfg.AxiIdWidth-1:0] id_o,
|
||||
// critical word - read port
|
||||
output logic [CVA6Cfg.AxiDataWidth-1:0] critical_word_o,
|
||||
output logic critical_word_valid_o,
|
||||
// AXI port
|
||||
output axi_req_t axi_req_o,
|
||||
input axi_rsp_t axi_resp_i
|
||||
);
|
||||
localparam BURST_SIZE = (DATA_WIDTH / CVA6Cfg.AxiDataWidth) - 1;
|
||||
localparam ADDR_INDEX = ($clog2(
|
||||
DATA_WIDTH / CVA6Cfg.AxiDataWidth
|
||||
) > 0) ? $clog2(
|
||||
DATA_WIDTH / CVA6Cfg.AxiDataWidth
|
||||
) : 1;
|
||||
localparam MAX_OUTSTANDING_AW = CVA6Cfg.MaxOutstandingStores;
|
||||
localparam MAX_OUTSTANDING_AW_CNT_WIDTH = $clog2(
|
||||
MAX_OUTSTANDING_AW + 1
|
||||
) > 0 ? $clog2(
|
||||
MAX_OUTSTANDING_AW + 1
|
||||
) : 1;
|
||||
|
||||
typedef logic [MAX_OUTSTANDING_AW_CNT_WIDTH-1:0] outstanding_aw_cnt_t;
|
||||
|
||||
enum logic [3:0] {
|
||||
IDLE,
|
||||
WAIT_B_VALID,
|
||||
WAIT_AW_READY,
|
||||
WAIT_LAST_W_READY,
|
||||
WAIT_LAST_W_READY_AW_READY,
|
||||
WAIT_AW_READY_BURST,
|
||||
WAIT_R_VALID,
|
||||
WAIT_R_VALID_MULTIPLE,
|
||||
COMPLETE_READ,
|
||||
WAIT_AMO_R_VALID
|
||||
}
|
||||
state_q, state_d;
|
||||
|
||||
// counter for AXI transfers
|
||||
logic [ADDR_INDEX-1:0] cnt_d, cnt_q;
|
||||
logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0]
|
||||
cache_line_d, cache_line_q;
|
||||
// save the address for a read, as we allow for non-cacheline aligned accesses
|
||||
logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0] addr_offset_d, addr_offset_q;
|
||||
logic [CVA6Cfg.AxiIdWidth-1:0] id_d, id_q;
|
||||
logic [ADDR_INDEX-1:0] index;
|
||||
// save the atomic operation and size
|
||||
ariane_pkg::amo_t amo_d, amo_q;
|
||||
logic [1:0] size_d, size_q;
|
||||
// outstanding write transactions counter
|
||||
outstanding_aw_cnt_t outstanding_aw_cnt_q, outstanding_aw_cnt_d;
|
||||
logic any_outstanding_aw;
|
||||
|
||||
assign any_outstanding_aw = outstanding_aw_cnt_q != '0;
|
||||
|
||||
always_comb begin : axi_fsm
|
||||
// Default assignments
|
||||
axi_req_o.aw_valid = 1'b0;
|
||||
// Cast to AXI address width
|
||||
axi_req_o.aw.addr = addr_i;
|
||||
axi_req_o.aw.prot = 3'b0;
|
||||
axi_req_o.aw.region = 4'b0;
|
||||
axi_req_o.aw.len = 8'b0;
|
||||
axi_req_o.aw.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes
|
||||
axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
|
||||
axi_req_o.aw.lock = 1'b0;
|
||||
axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE;
|
||||
axi_req_o.aw.qos = 4'b0;
|
||||
axi_req_o.aw.id = id_i;
|
||||
axi_req_o.aw.atop = atop_from_amo(amo_i);
|
||||
axi_req_o.aw.user = '0;
|
||||
|
||||
axi_req_o.ar_valid = 1'b0;
|
||||
// Cast to AXI address width
|
||||
axi_req_o.ar.addr = addr_i;
|
||||
// in case of a single request or wrapping transfer we can simply begin at the address, if we want to request a cache-line
|
||||
// with an incremental transfer we need to output the corresponding base address of the cache line
|
||||
if (!CRITICAL_WORD_FIRST && type_i != ariane_pkg::SINGLE_REQ) begin
|
||||
axi_req_o.ar.addr[CACHELINE_BYTE_OFFSET-1:0] = '0;
|
||||
end
|
||||
axi_req_o.ar.prot = 3'b0;
|
||||
axi_req_o.ar.region = 4'b0;
|
||||
axi_req_o.ar.len = 8'b0;
|
||||
axi_req_o.ar.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes
|
||||
axi_req_o.ar.burst = (CRITICAL_WORD_FIRST ? axi_pkg::BURST_WRAP : axi_pkg::BURST_INCR); // wrapping transfer in case of a critical word first strategy
|
||||
axi_req_o.ar.lock = 1'b0;
|
||||
axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE;
|
||||
axi_req_o.ar.qos = 4'b0;
|
||||
axi_req_o.ar.id = id_i;
|
||||
axi_req_o.ar.user = '0;
|
||||
|
||||
axi_req_o.w_valid = 1'b0;
|
||||
axi_req_o.w.data = wdata_i[0];
|
||||
axi_req_o.w.strb = be_i[0];
|
||||
axi_req_o.w.last = 1'b0;
|
||||
axi_req_o.w.user = '0;
|
||||
|
||||
axi_req_o.b_ready = 1'b0;
|
||||
axi_req_o.r_ready = 1'b0;
|
||||
|
||||
gnt_o = 1'b0;
|
||||
valid_o = 1'b0;
|
||||
id_o = axi_resp_i.r.id;
|
||||
|
||||
critical_word_o = axi_resp_i.r.data;
|
||||
critical_word_valid_o = 1'b0;
|
||||
rdata_o = cache_line_q;
|
||||
|
||||
state_d = state_q;
|
||||
cnt_d = cnt_q;
|
||||
cache_line_d = cache_line_q;
|
||||
addr_offset_d = addr_offset_q;
|
||||
id_d = id_q;
|
||||
amo_d = amo_q;
|
||||
size_d = size_q;
|
||||
index = '0;
|
||||
|
||||
outstanding_aw_cnt_d = outstanding_aw_cnt_q;
|
||||
|
||||
case (state_q)
|
||||
|
||||
IDLE: begin
|
||||
cnt_d = '0;
|
||||
// we have an incoming request
|
||||
if (req_i) begin
|
||||
// is this a read or write?
|
||||
// write
|
||||
if (we_i) begin
|
||||
// multiple outstanding write transactions are only
|
||||
// allowed if they are guaranteed not to be reordered
|
||||
// i.e. same ID
|
||||
if (!any_outstanding_aw || ((id_i == id_q) && (amo_i == ariane_pkg::AMO_NONE))) begin
|
||||
// the data is valid
|
||||
axi_req_o.aw_valid = 1'b1;
|
||||
axi_req_o.w_valid = 1'b1;
|
||||
// store-conditional requires exclusive access
|
||||
axi_req_o.aw.lock = amo_i == ariane_pkg::AMO_SC;
|
||||
// its a single write
|
||||
if (type_i == ariane_pkg::SINGLE_REQ) begin
|
||||
// only a single write so the data is already the last one
|
||||
axi_req_o.w.last = 1'b1;
|
||||
// single req can be granted here
|
||||
gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
|
||||
case ({
|
||||
axi_resp_i.aw_ready, axi_resp_i.w_ready
|
||||
})
|
||||
2'b11: state_d = WAIT_B_VALID;
|
||||
2'b01: state_d = WAIT_AW_READY;
|
||||
2'b10: state_d = WAIT_LAST_W_READY;
|
||||
default: state_d = IDLE;
|
||||
endcase
|
||||
|
||||
if (axi_resp_i.aw_ready) begin
|
||||
id_d = id_i;
|
||||
amo_d = amo_i;
|
||||
size_d = size_i;
|
||||
end
|
||||
|
||||
// its a request for the whole cache line
|
||||
end else begin
|
||||
// bursts of AMOs unsupported
|
||||
assert (amo_i == ariane_pkg::AMO_NONE)
|
||||
else $fatal("Bursts of atomic operations are not supported");
|
||||
|
||||
axi_req_o.aw.len = BURST_SIZE[7:0]; // number of bursts to do
|
||||
axi_req_o.w.data = wdata_i[0];
|
||||
axi_req_o.w.strb = be_i[0];
|
||||
|
||||
if (axi_resp_i.w_ready) cnt_d = BURST_SIZE[ADDR_INDEX-1:0] - 1;
|
||||
else cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
|
||||
|
||||
case ({
|
||||
axi_resp_i.aw_ready, axi_resp_i.w_ready
|
||||
})
|
||||
2'b11: state_d = WAIT_LAST_W_READY;
|
||||
2'b01: state_d = WAIT_LAST_W_READY_AW_READY;
|
||||
2'b10: state_d = WAIT_LAST_W_READY;
|
||||
default: ;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
// read
|
||||
end else begin
|
||||
// only multiple outstanding write transactions are allowed
|
||||
if (!any_outstanding_aw) begin
|
||||
|
||||
axi_req_o.ar_valid = 1'b1;
|
||||
// load-reserved requires exclusive access
|
||||
axi_req_o.ar.lock = amo_i == ariane_pkg::AMO_LR;
|
||||
|
||||
gnt_o = axi_resp_i.ar_ready;
|
||||
if (type_i != ariane_pkg::SINGLE_REQ) begin
|
||||
assert (amo_i == ariane_pkg::AMO_NONE)
|
||||
else $fatal("Bursts of atomic operations are not supported");
|
||||
|
||||
axi_req_o.ar.len = BURST_SIZE[7:0];
|
||||
cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
|
||||
end
|
||||
|
||||
if (axi_resp_i.ar_ready) begin
|
||||
state_d = (type_i == ariane_pkg::SINGLE_REQ) ? WAIT_R_VALID : WAIT_R_VALID_MULTIPLE;
|
||||
addr_offset_d = addr_i[ADDR_INDEX-1+3:3];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ~> from single write
|
||||
WAIT_AW_READY: begin
|
||||
axi_req_o.aw_valid = 1'b1;
|
||||
|
||||
if (axi_resp_i.aw_ready) begin
|
||||
gnt_o = 1'b1;
|
||||
state_d = WAIT_B_VALID;
|
||||
id_d = id_i;
|
||||
amo_d = amo_i;
|
||||
size_d = size_i;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> we need to wait for an aw_ready and there is at least one outstanding write
|
||||
WAIT_LAST_W_READY_AW_READY: begin
|
||||
axi_req_o.w_valid = 1'b1;
|
||||
axi_req_o.w.last = (cnt_q == '0);
|
||||
if (type_i == ariane_pkg::SINGLE_REQ) begin
|
||||
axi_req_o.w.data = wdata_i[0];
|
||||
axi_req_o.w.strb = be_i[0];
|
||||
end else begin
|
||||
axi_req_o.w.data = wdata_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
|
||||
axi_req_o.w.strb = be_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
|
||||
end
|
||||
axi_req_o.aw_valid = 1'b1;
|
||||
// we are here because we want to write a cache line
|
||||
axi_req_o.aw.len = BURST_SIZE[7:0];
|
||||
// we got an aw_ready
|
||||
case ({
|
||||
axi_resp_i.aw_ready, axi_resp_i.w_ready
|
||||
})
|
||||
// we got an aw ready
|
||||
2'b01: begin
|
||||
// are there any outstanding transactions?
|
||||
if (cnt_q == 0) state_d = WAIT_AW_READY_BURST;
|
||||
else // yes, so reduce the count and stay here
|
||||
cnt_d = cnt_q - 1;
|
||||
end
|
||||
2'b10: state_d = WAIT_LAST_W_READY;
|
||||
2'b11: begin
|
||||
// we are finished
|
||||
if (cnt_q == 0) begin
|
||||
state_d = WAIT_B_VALID;
|
||||
gnt_o = 1'b1;
|
||||
// there are outstanding transactions
|
||||
end else begin
|
||||
state_d = WAIT_LAST_W_READY;
|
||||
cnt_d = cnt_q - 1;
|
||||
end
|
||||
end
|
||||
default: ;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// ~> all data has already been sent, we are only waiting for the aw_ready
|
||||
WAIT_AW_READY_BURST: begin
|
||||
axi_req_o.aw_valid = 1'b1;
|
||||
axi_req_o.aw.len = BURST_SIZE[7:0];
|
||||
|
||||
if (axi_resp_i.aw_ready) begin
|
||||
state_d = WAIT_B_VALID;
|
||||
gnt_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> from write, there is an outstanding write
|
||||
WAIT_LAST_W_READY: begin
|
||||
axi_req_o.w_valid = 1'b1;
|
||||
|
||||
if (type_i != ariane_pkg::SINGLE_REQ) begin
|
||||
axi_req_o.w.data = wdata_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
|
||||
axi_req_o.w.strb = be_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
|
||||
end
|
||||
|
||||
// this is the last write
|
||||
if (cnt_q == '0) begin
|
||||
axi_req_o.w.last = 1'b1;
|
||||
if (axi_resp_i.w_ready) begin
|
||||
state_d = WAIT_B_VALID;
|
||||
gnt_o = 1'b1;
|
||||
end
|
||||
end else if (axi_resp_i.w_ready) begin
|
||||
cnt_d = cnt_q - 1;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> finish write transaction
|
||||
WAIT_B_VALID: begin
|
||||
id_o = axi_resp_i.b.id;
|
||||
|
||||
// Write is valid
|
||||
if (axi_resp_i.b_valid && !any_outstanding_aw) begin
|
||||
axi_req_o.b_ready = 1'b1;
|
||||
|
||||
// some atomics must wait for read data
|
||||
// we only accept it after accepting bvalid
|
||||
if (amo_returns_data(amo_q)) begin
|
||||
if (axi_resp_i.r_valid) begin
|
||||
// return read data if valid
|
||||
valid_o = 1'b1;
|
||||
axi_req_o.r_ready = 1'b1;
|
||||
state_d = IDLE;
|
||||
rdata_o = axi_resp_i.r.data;
|
||||
end else begin
|
||||
// wait otherwise
|
||||
state_d = WAIT_AMO_R_VALID;
|
||||
end
|
||||
end else begin
|
||||
valid_o = 1'b1;
|
||||
state_d = IDLE;
|
||||
|
||||
// store-conditional response
|
||||
if (amo_q == ariane_pkg::AMO_SC) begin
|
||||
if (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY) begin
|
||||
// success -> return 0
|
||||
rdata_o = 'b0;
|
||||
end else begin
|
||||
// failure -> when request is 64-bit, return 1;
|
||||
// when request is 32-bit place a 1 in both upper
|
||||
// and lower half words. The right word will be
|
||||
// realigned/masked externally
|
||||
rdata_o = size_q == 2'b10 ? (1'b1 << 32) | 64'b1 : 64'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
// if the request was not an atomic we can possibly issue
|
||||
// other requests while waiting for the response
|
||||
end else begin
|
||||
if ((amo_q == ariane_pkg::AMO_NONE) && (outstanding_aw_cnt_q != MAX_OUTSTANDING_AW)) begin
|
||||
state_d = IDLE;
|
||||
outstanding_aw_cnt_d = outstanding_aw_cnt_q + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ~> some atomics wait for read data
|
||||
WAIT_AMO_R_VALID: begin
|
||||
// acknowledge data and terminate atomic
|
||||
if (axi_resp_i.r_valid) begin
|
||||
axi_req_o.r_ready = 1'b1;
|
||||
state_d = IDLE;
|
||||
valid_o = 1'b1;
|
||||
rdata_o = axi_resp_i.r.data;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> cacheline read, single read
|
||||
WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin
|
||||
if (CRITICAL_WORD_FIRST) index = addr_offset_q + (BURST_SIZE[ADDR_INDEX-1:0] - cnt_q);
|
||||
else index = BURST_SIZE[ADDR_INDEX-1:0] - cnt_q;
|
||||
|
||||
// reads are always wrapping here
|
||||
axi_req_o.r_ready = 1'b1;
|
||||
// this is the first read a.k.a the critical word
|
||||
if (axi_resp_i.r_valid) begin
|
||||
if (CRITICAL_WORD_FIRST) begin
|
||||
// this is the first word of a cacheline read, e.g.: the word which was causing the miss
|
||||
if (state_q == WAIT_R_VALID_MULTIPLE && cnt_q == BURST_SIZE) begin
|
||||
critical_word_valid_o = 1'b1;
|
||||
critical_word_o = axi_resp_i.r.data;
|
||||
end
|
||||
end else begin
|
||||
// check if the address offset matches - then we are getting the critical word
|
||||
if (index == addr_offset_q) begin
|
||||
critical_word_valid_o = 1'b1;
|
||||
critical_word_o = axi_resp_i.r.data;
|
||||
end
|
||||
end
|
||||
|
||||
// this is the last read
|
||||
if (axi_resp_i.r.last) begin
|
||||
id_d = axi_resp_i.r.id;
|
||||
state_d = COMPLETE_READ;
|
||||
end
|
||||
|
||||
// save the word
|
||||
if (state_q == WAIT_R_VALID_MULTIPLE) begin
|
||||
cache_line_d[index] = axi_resp_i.r.data;
|
||||
|
||||
end else cache_line_d[0] = axi_resp_i.r.data;
|
||||
|
||||
// Decrease the counter
|
||||
cnt_d = cnt_q - 1;
|
||||
end
|
||||
end
|
||||
// ~> read is complete
|
||||
COMPLETE_READ: begin
|
||||
valid_o = 1'b1;
|
||||
state_d = IDLE;
|
||||
id_o = id_q;
|
||||
end
|
||||
|
||||
default: state_d = IDLE;
|
||||
endcase
|
||||
|
||||
// This process handles B responses when accepting
|
||||
// multiple outstanding write transactions
|
||||
if (any_outstanding_aw && axi_resp_i.b_valid) begin
|
||||
axi_req_o.b_ready = 1'b1;
|
||||
valid_o = 1'b1;
|
||||
// Right hand side contains non-registered signal as we want
|
||||
// to preserve a possible increment from the WAIT_B_VALID state
|
||||
outstanding_aw_cnt_d = outstanding_aw_cnt_d - 1;
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------
|
||||
// Registers
|
||||
// ----------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
// start in flushing state and initialize the memory
|
||||
state_q <= IDLE;
|
||||
cnt_q <= '0;
|
||||
cache_line_q <= '0;
|
||||
addr_offset_q <= '0;
|
||||
id_q <= '0;
|
||||
amo_q <= ariane_pkg::AMO_NONE;
|
||||
size_q <= '0;
|
||||
outstanding_aw_cnt_q <= '0;
|
||||
end else begin
|
||||
state_q <= state_d;
|
||||
cnt_q <= cnt_d;
|
||||
cache_line_q <= cache_line_d;
|
||||
addr_offset_q <= addr_offset_d;
|
||||
id_q <= id_d;
|
||||
amo_q <= amo_d;
|
||||
size_q <= size_d;
|
||||
outstanding_aw_cnt_q <= outstanding_aw_cnt_d;
|
||||
end
|
||||
end
|
||||
|
||||
function automatic axi_pkg::atop_t atop_from_amo(ariane_pkg::amo_t amo);
|
||||
axi_pkg::atop_t result = 6'b000000;
|
||||
|
||||
unique case (amo)
|
||||
ariane_pkg::AMO_NONE: result = {axi_pkg::ATOP_NONE, 4'b0000};
|
||||
ariane_pkg::AMO_SWAP: result = {axi_pkg::ATOP_ATOMICSWAP};
|
||||
ariane_pkg::AMO_ADD:
|
||||
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
|
||||
ariane_pkg::AMO_AND:
|
||||
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR};
|
||||
ariane_pkg::AMO_OR:
|
||||
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
|
||||
ariane_pkg::AMO_XOR:
|
||||
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
|
||||
ariane_pkg::AMO_MAX:
|
||||
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX};
|
||||
ariane_pkg::AMO_MAXU:
|
||||
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX};
|
||||
ariane_pkg::AMO_MIN:
|
||||
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN};
|
||||
ariane_pkg::AMO_MINU:
|
||||
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN};
|
||||
ariane_pkg::AMO_CAS1: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported
|
||||
ariane_pkg::AMO_CAS2: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported
|
||||
default: result = 6'b000000;
|
||||
endcase
|
||||
|
||||
return result;
|
||||
endfunction
|
||||
|
||||
function automatic logic amo_returns_data(ariane_pkg::amo_t amo);
|
||||
axi_pkg::atop_t atop = atop_from_amo(amo);
|
||||
logic is_load = atop[5:4] == axi_pkg::ATOP_ATOMICLOAD;
|
||||
logic is_swap_or_cmp = atop[5:4] == axi_pkg::ATOP_ATOMICSWAP[5:4];
|
||||
return is_load || is_swap_or_cmp;
|
||||
endfunction
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,475 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// File: cache_ctrl.svh
|
||||
// Author: Florian Zaruba <zarubaf@ethz.ch>
|
||||
// Date: 14.10.2017
|
||||
//
|
||||
// Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
// All rights reserved.
|
||||
//
|
||||
// Description: Cache controller
|
||||
|
||||
|
||||
module cache_ctrl
|
||||
import ariane_pkg::*;
|
||||
import std_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i,
|
||||
input logic bypass_i, // enable cache
|
||||
output logic busy_o,
|
||||
// Core request ports
|
||||
input dcache_req_i_t req_port_i,
|
||||
output dcache_req_o_t req_port_o,
|
||||
// SRAM interface
|
||||
output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid
|
||||
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
|
||||
input logic gnt_i,
|
||||
output cache_line_t data_o,
|
||||
output cl_be_t be_o,
|
||||
output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later
|
||||
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
|
||||
output logic we_o,
|
||||
input logic [DCACHE_SET_ASSOC-1:0] hit_way_i,
|
||||
// Miss handling
|
||||
output miss_req_t miss_req_o,
|
||||
// return
|
||||
input logic miss_gnt_i,
|
||||
input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss
|
||||
input logic [63:0] critical_word_i,
|
||||
input logic critical_word_valid_i,
|
||||
// bypass ports
|
||||
input logic bypass_gnt_i,
|
||||
input logic bypass_valid_i,
|
||||
input logic [63:0] bypass_data_i,
|
||||
// check MSHR for aliasing
|
||||
output logic [55:0] mshr_addr_o,
|
||||
input logic mshr_addr_matches_i,
|
||||
input logic mshr_index_matches_i
|
||||
);
|
||||
|
||||
enum logic [3:0] {
|
||||
IDLE, // 0
|
||||
WAIT_TAG, // 1
|
||||
WAIT_TAG_BYPASSED, // 2
|
||||
WAIT_GNT, // 3
|
||||
WAIT_GNT_SAVED, // 4
|
||||
STORE_REQ, // 5
|
||||
WAIT_REFILL_VALID, // 6
|
||||
WAIT_REFILL_GNT, // 7
|
||||
WAIT_TAG_SAVED, // 8
|
||||
WAIT_MSHR, // 9
|
||||
WAIT_CRITICAL_WORD // 10
|
||||
}
|
||||
state_d, state_q;
|
||||
|
||||
typedef struct packed {
|
||||
logic [DCACHE_INDEX_WIDTH-1:0] index;
|
||||
logic [DCACHE_TAG_WIDTH-1:0] tag;
|
||||
logic [DCACHE_TID_WIDTH-1:0] id;
|
||||
logic [7:0] be;
|
||||
logic [1:0] size;
|
||||
logic we;
|
||||
logic [63:0] wdata;
|
||||
logic bypass;
|
||||
logic killed;
|
||||
} mem_req_t;
|
||||
|
||||
logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q;
|
||||
|
||||
mem_req_t mem_req_d, mem_req_q;
|
||||
|
||||
assign busy_o = (state_q != IDLE);
|
||||
assign tag_o = mem_req_d.tag;
|
||||
|
||||
logic [DCACHE_LINE_WIDTH-1:0] cl_i;
|
||||
|
||||
always_comb begin : way_select
|
||||
cl_i = '0;
|
||||
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) if (hit_way_i[i]) cl_i = data_i[i].data;
|
||||
|
||||
// cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
|
||||
end
|
||||
|
||||
// --------------
|
||||
// Cache FSM
|
||||
// --------------
|
||||
always_comb begin : cache_ctrl_fsm
|
||||
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
|
||||
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
|
||||
// cache-line offset -> multiple of 64
|
||||
cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
|
||||
// default assignments
|
||||
state_d = state_q;
|
||||
mem_req_d = mem_req_q;
|
||||
hit_way_d = hit_way_q;
|
||||
// output assignments
|
||||
req_port_o.data_gnt = 1'b0;
|
||||
req_port_o.data_rvalid = 1'b0;
|
||||
req_port_o.data_rdata = '0;
|
||||
req_port_o.data_rid = mem_req_q.id;
|
||||
miss_req_o = '0;
|
||||
mshr_addr_o = '0;
|
||||
// Memory array communication
|
||||
req_o = '0;
|
||||
addr_o = req_port_i.address_index;
|
||||
data_o = '0;
|
||||
be_o = '0;
|
||||
we_o = '0;
|
||||
|
||||
mem_req_d.killed |= req_port_i.kill_req;
|
||||
|
||||
case (state_q)
|
||||
|
||||
IDLE: begin
|
||||
// a new request arrived
|
||||
if (req_port_i.data_req && !flush_i) begin
|
||||
// request the cache line - we can do this speculatively
|
||||
req_o = '1;
|
||||
|
||||
// save index, be and we
|
||||
mem_req_d.index = req_port_i.address_index;
|
||||
mem_req_d.id = req_port_i.data_id;
|
||||
mem_req_d.be = req_port_i.data_be;
|
||||
mem_req_d.size = req_port_i.data_size;
|
||||
mem_req_d.we = req_port_i.data_we;
|
||||
mem_req_d.wdata = req_port_i.data_wdata;
|
||||
mem_req_d.killed = req_port_i.kill_req;
|
||||
|
||||
// Bypass mode, check for uncacheable address here as well
|
||||
if (bypass_i) begin
|
||||
state_d = WAIT_TAG_BYPASSED;
|
||||
// grant this access only if it was a load
|
||||
req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1;
|
||||
mem_req_d.bypass = 1'b1;
|
||||
// ------------------
|
||||
// Cache is enabled
|
||||
// ------------------
|
||||
end else begin
|
||||
// Wait that we have access on the memory array
|
||||
if (gnt_i) begin
|
||||
state_d = WAIT_TAG;
|
||||
mem_req_d.bypass = 1'b0;
|
||||
// only for a read
|
||||
if (!req_port_i.data_we) req_port_o.data_gnt = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// cache enabled and waiting for tag
|
||||
WAIT_TAG, WAIT_TAG_SAVED: begin
|
||||
// check that the client really wants to do the request and that we have a valid tag
|
||||
if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin
|
||||
// save tag if we didn't already save it
|
||||
if (state_q != WAIT_TAG_SAVED) begin
|
||||
mem_req_d.tag = req_port_i.address_tag;
|
||||
end
|
||||
// we speculatively request another transfer
|
||||
if (req_port_i.data_req && !flush_i) begin
|
||||
req_o = '1;
|
||||
end
|
||||
// ------------
|
||||
// HIT CASE
|
||||
// ------------
|
||||
if (|hit_way_i) begin
|
||||
// we can request another cache-line if this was a load
|
||||
if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin
|
||||
state_d = WAIT_TAG; // switch back to WAIT_TAG
|
||||
mem_req_d.index = req_port_i.address_index;
|
||||
mem_req_d.id = req_port_i.data_id;
|
||||
mem_req_d.be = req_port_i.data_be;
|
||||
mem_req_d.size = req_port_i.data_size;
|
||||
mem_req_d.we = req_port_i.data_we;
|
||||
mem_req_d.wdata = req_port_i.data_wdata;
|
||||
mem_req_d.killed = req_port_i.kill_req;
|
||||
mem_req_d.bypass = 1'b0;
|
||||
|
||||
req_port_o.data_gnt = gnt_i;
|
||||
|
||||
if (!gnt_i) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end else begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
|
||||
// this is timing critical
|
||||
req_port_o.data_rdata = cl_i[cl_offset+:64];
|
||||
|
||||
// report data for a read
|
||||
if (!mem_req_q.we) begin
|
||||
req_port_o.data_rvalid = ~mem_req_q.killed;
|
||||
// else this was a store so we need an extra step to handle it
|
||||
end else begin
|
||||
state_d = STORE_REQ;
|
||||
hit_way_d = hit_way_i;
|
||||
end
|
||||
// ------------
|
||||
// MISS CASE
|
||||
// ------------
|
||||
end else begin
|
||||
// make a miss request
|
||||
state_d = WAIT_REFILL_GNT;
|
||||
end
|
||||
// ----------------------------------------------
|
||||
// Check MSHR - Miss Status Handling Register
|
||||
// ----------------------------------------------
|
||||
mshr_addr_o = {tag_o, mem_req_q.index};
|
||||
// 1. We've got a match on MSHR and while are going down the
|
||||
// store path. This means that the miss controller is
|
||||
// currently evicting our cache-line. As the store is
|
||||
// non-atomic we need to constantly check whether we are
|
||||
// matching the address the miss handler is serving.
|
||||
// Furthermore we need to check for the whole index
|
||||
// because a completely different memory line could alias
|
||||
// with the cache-line we are evicting.
|
||||
// 2. The second case is where we are currently loading and
|
||||
// the address matches the exact CL the miss controller
|
||||
// is currently serving. That means we need to wait for
|
||||
// the miss controller to finish its request before we
|
||||
// can continue to serve this CL. Otherwise we will fetch
|
||||
// the cache-line again and potentially loosing any
|
||||
// content we've written so far. This as a consequence
|
||||
// means we can't have hit on the CL which mean the
|
||||
// req_port_o.data_rvalid will be de-asserted.
|
||||
if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin
|
||||
state_d = WAIT_MSHR;
|
||||
end
|
||||
|
||||
// -------------------------
|
||||
// Check for cache-ability
|
||||
// -------------------------
|
||||
if (!config_pkg::is_inside_cacheable_regions(
|
||||
CVA6Cfg, {{{64 - riscv::PLEN} {1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}}
|
||||
)) begin
|
||||
mem_req_d.bypass = 1'b1;
|
||||
state_d = WAIT_REFILL_GNT;
|
||||
end
|
||||
|
||||
// we are still waiting for a valid tag
|
||||
end else begin
|
||||
// request cache line for saved index
|
||||
addr_o = mem_req_q.index;
|
||||
req_o = '1;
|
||||
|
||||
// check that we still have a memory grant
|
||||
if (!gnt_i) begin
|
||||
state_d = WAIT_GNT;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ~> we already granted the request but lost the memory grant while waiting for the tag
|
||||
WAIT_GNT, WAIT_GNT_SAVED: begin
|
||||
// request cache line for saved index
|
||||
addr_o = mem_req_q.index;
|
||||
req_o = '1;
|
||||
|
||||
// if we get a valid tag while waiting for the memory grant, save it
|
||||
if (req_port_i.tag_valid) begin
|
||||
mem_req_d.tag = req_port_i.address_tag;
|
||||
state_d = WAIT_GNT_SAVED;
|
||||
end
|
||||
|
||||
// we have a memory grant again ~> go back to WAIT_TAG
|
||||
if (gnt_i) begin
|
||||
state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> we are here as we need a second round of memory access for a store
|
||||
STORE_REQ: begin
|
||||
// check if the MSHR still doesn't match
|
||||
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
|
||||
|
||||
// We need to re-check for MSHR aliasing here as the store requires at least
|
||||
// two memory look-ups on a single-ported SRAM and therefore is non-atomic
|
||||
if (!mshr_index_matches_i) begin
|
||||
// store data, write dirty bit
|
||||
req_o = hit_way_q;
|
||||
addr_o = mem_req_q.index;
|
||||
we_o = 1'b1;
|
||||
|
||||
be_o.vldrty = hit_way_q;
|
||||
|
||||
// set the correct byte enable
|
||||
be_o.data[cl_offset>>3+:8] = mem_req_q.be;
|
||||
data_o.data[cl_offset+:64] = mem_req_q.wdata;
|
||||
// ~> change the state
|
||||
data_o.dirty = 1'b1;
|
||||
data_o.valid = 1'b1;
|
||||
|
||||
// got a grant ~> this is finished now
|
||||
if (gnt_i) begin
|
||||
req_port_o.data_gnt = 1'b1;
|
||||
state_d = IDLE;
|
||||
end
|
||||
end else begin
|
||||
state_d = WAIT_MSHR;
|
||||
end
|
||||
end // case: STORE_REQ
|
||||
|
||||
// we've got a match on MSHR ~> miss unit is currently serving a request
|
||||
WAIT_MSHR: begin
|
||||
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
|
||||
// we can start a new request
|
||||
if (!mshr_index_matches_i) begin
|
||||
req_o = '1;
|
||||
|
||||
addr_o = mem_req_q.index;
|
||||
|
||||
if (gnt_i) state_d = WAIT_TAG_SAVED;
|
||||
end
|
||||
end
|
||||
|
||||
// its for sure a miss
|
||||
WAIT_TAG_BYPASSED: begin
|
||||
// check that the client really wants to do the request and that we have a valid tag
|
||||
if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin
|
||||
// save tag
|
||||
mem_req_d.tag = req_port_i.address_tag;
|
||||
state_d = WAIT_REFILL_GNT;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> wait for grant from miss unit
|
||||
WAIT_REFILL_GNT: begin
|
||||
|
||||
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
|
||||
|
||||
miss_req_o.valid = 1'b1;
|
||||
miss_req_o.bypass = mem_req_q.bypass;
|
||||
miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
|
||||
miss_req_o.be = mem_req_q.be;
|
||||
miss_req_o.size = mem_req_q.size;
|
||||
miss_req_o.we = mem_req_q.we;
|
||||
miss_req_o.wdata = mem_req_q.wdata;
|
||||
|
||||
// got a grant so go to valid
|
||||
if (bypass_gnt_i) begin
|
||||
state_d = WAIT_REFILL_VALID;
|
||||
// if this was a write we still need to give a grant to the store unit.
|
||||
// We can also avoid waiting for the response valid, this signal is
|
||||
// currently not used by the store unit
|
||||
if (mem_req_q.we) begin
|
||||
req_port_o.data_gnt = 1'b1;
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
if (miss_gnt_i && !mem_req_q.we) state_d = WAIT_CRITICAL_WORD;
|
||||
else if (miss_gnt_i) begin
|
||||
state_d = IDLE;
|
||||
req_port_o.data_gnt = 1'b1;
|
||||
end
|
||||
|
||||
// it can be the case that the miss unit is currently serving a
|
||||
// request which matches ours
|
||||
// so we need to check the MSHR for matching continuously
|
||||
// if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i
|
||||
if (mshr_addr_matches_i && !active_serving_i) begin
|
||||
state_d = WAIT_MSHR;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> wait for critical word to arrive
|
||||
WAIT_CRITICAL_WORD: begin
|
||||
// speculatively request another word
|
||||
if (req_port_i.data_req) begin
|
||||
// request the cache line
|
||||
req_o = '1;
|
||||
end
|
||||
|
||||
if (critical_word_valid_i) begin
|
||||
req_port_o.data_rvalid = ~mem_req_q.killed;
|
||||
req_port_o.data_rdata = critical_word_i;
|
||||
// we can make another request
|
||||
if (req_port_i.data_req && !flush_i) begin
|
||||
// save index, be and we
|
||||
mem_req_d.index = req_port_i.address_index;
|
||||
mem_req_d.id = req_port_i.data_id;
|
||||
mem_req_d.be = req_port_i.data_be;
|
||||
mem_req_d.size = req_port_i.data_size;
|
||||
mem_req_d.we = req_port_i.data_we;
|
||||
mem_req_d.wdata = req_port_i.data_wdata;
|
||||
mem_req_d.killed = req_port_i.kill_req;
|
||||
|
||||
state_d = IDLE;
|
||||
|
||||
// Wait until we have access on the memory array
|
||||
if (gnt_i) begin
|
||||
state_d = WAIT_TAG;
|
||||
mem_req_d.bypass = 1'b0;
|
||||
req_port_o.data_gnt = 1'b1;
|
||||
end
|
||||
end else begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
// ~> wait until the bypass request is valid
|
||||
WAIT_REFILL_VALID: begin
|
||||
// got a valid answer
|
||||
if (bypass_valid_i) begin
|
||||
req_port_o.data_rdata = bypass_data_i;
|
||||
req_port_o.data_rvalid = ~mem_req_q.killed;
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
|
||||
if (req_port_i.kill_req) begin
|
||||
req_port_o.data_rvalid = 1'b1;
|
||||
if (!(state_q inside {WAIT_REFILL_GNT, WAIT_CRITICAL_WORD})) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// --------------
|
||||
// Registers
|
||||
// --------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
state_q <= IDLE;
|
||||
mem_req_q <= '0;
|
||||
hit_way_q <= '0;
|
||||
end else begin
|
||||
state_q <= state_d;
|
||||
mem_req_q <= mem_req_d;
|
||||
hit_way_q <= hit_way_d;
|
||||
end
|
||||
end
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
initial begin
|
||||
assert (DCACHE_LINE_WIDTH == 128)
|
||||
else
|
||||
$error(
|
||||
"Cacheline width has to be 128 for the moment. But only small changes required in data select logic"
|
||||
);
|
||||
end
|
||||
// if the full MSHR address matches so should also match the partial one
|
||||
partial_full_mshr_match :
|
||||
assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i)
|
||||
else $fatal(1, "partial mshr index doesn't match");
|
||||
// there should never be a valid answer when the MSHR matches and we are not being served
|
||||
no_valid_on_mshr_match :
|
||||
assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req)
|
||||
else $fatal(1, "rvalid_o should not be set on MSHR match");
|
||||
`endif
|
||||
//pragma translate_on
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,200 @@
|
|||
// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies
|
||||
// Alternatives (CEA)
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”);
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Authors: Cesar Fuguet
|
||||
// Date: February, 2023
|
||||
// Description: Interface adapter for the CVA6 core
|
||||
module cva6_hpdcache_if_adapter
|
||||
import hpdcache_pkg::*;
|
||||
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter bit is_load_port = 1'b1
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
// Clock and active-low reset pins
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Port ID
|
||||
input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i,
|
||||
|
||||
// Request/response ports from/to the CVA6 core
|
||||
input ariane_pkg::dcache_req_i_t cva6_req_i,
|
||||
output ariane_pkg::dcache_req_o_t cva6_req_o,
|
||||
input ariane_pkg::amo_req_t cva6_amo_req_i,
|
||||
output ariane_pkg::amo_resp_t cva6_amo_resp_o,
|
||||
|
||||
// Request port to the L1 Dcache
|
||||
output logic hpdcache_req_valid_o,
|
||||
input logic hpdcache_req_ready_i,
|
||||
output hpdcache_pkg::hpdcache_req_t hpdcache_req_o,
|
||||
output logic hpdcache_req_abort_o,
|
||||
output hpdcache_pkg::hpdcache_tag_t hpdcache_req_tag_o,
|
||||
output hpdcache_pkg::hpdcache_pma_t hpdcache_req_pma_o,
|
||||
|
||||
// Response port from the L1 Dcache
|
||||
input logic hpdcache_rsp_valid_i,
|
||||
input hpdcache_pkg::hpdcache_rsp_t hpdcache_rsp_i
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Internal nets and registers
|
||||
// {{{
|
||||
logic forward_store, forward_amo;
|
||||
logic hpdcache_req_is_uncacheable;
|
||||
// }}}
|
||||
|
||||
// Request forwarding
|
||||
// {{{
|
||||
generate
|
||||
// LOAD request
|
||||
// {{{
|
||||
if (is_load_port == 1'b1) begin : load_port_gen
|
||||
assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions(
|
||||
CVA6Cfg,
|
||||
{
|
||||
{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
|
||||
, cva6_req_i.address_tag
|
||||
, {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}
|
||||
}
|
||||
);
|
||||
|
||||
// Request forwarding
|
||||
assign hpdcache_req_valid_o = cva6_req_i.data_req,
|
||||
hpdcache_req_o.addr_offset = cva6_req_i.address_index,
|
||||
hpdcache_req_o.wdata = '0,
|
||||
hpdcache_req_o.op = hpdcache_pkg::HPDCACHE_REQ_LOAD,
|
||||
hpdcache_req_o.be = cva6_req_i.data_be,
|
||||
hpdcache_req_o.size = cva6_req_i.data_size,
|
||||
hpdcache_req_o.sid = hpdcache_req_sid_i,
|
||||
hpdcache_req_o.tid = cva6_req_i.data_id,
|
||||
hpdcache_req_o.need_rsp = 1'b1,
|
||||
hpdcache_req_o.phys_indexed = 1'b0,
|
||||
hpdcache_req_o.addr_tag = '0, // unused on virtually indexed request
|
||||
hpdcache_req_o.pma = '0; // unused on virtually indexed request
|
||||
|
||||
assign hpdcache_req_abort_o = cva6_req_i.kill_req,
|
||||
hpdcache_req_tag_o = cva6_req_i.address_tag,
|
||||
hpdcache_req_pma_o.uncacheable = hpdcache_req_is_uncacheable,
|
||||
hpdcache_req_pma_o.io = 1'b0;
|
||||
|
||||
// Response forwarding
|
||||
assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i,
|
||||
cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
|
||||
cva6_req_o.data_rid = hpdcache_rsp_i.tid,
|
||||
cva6_req_o.data_gnt = hpdcache_req_ready_i;
|
||||
end // }}}
|
||||
|
||||
// {{{
|
||||
else begin : store_amo_gen
|
||||
// STORE/AMO request
|
||||
hpdcache_req_addr_t amo_addr;
|
||||
hpdcache_req_offset_t amo_addr_offset;
|
||||
hpdcache_tag_t amo_tag;
|
||||
logic amo_is_word, amo_is_word_hi;
|
||||
hpdcache_req_data_t amo_data;
|
||||
hpdcache_req_be_t amo_data_be;
|
||||
hpdcache_req_op_t amo_op;
|
||||
logic [31:0] amo_resp_word;
|
||||
|
||||
// AMO logic
|
||||
// {{{
|
||||
always_comb begin : amo_op_comb
|
||||
amo_addr = cva6_amo_req_i.operand_a;
|
||||
amo_addr_offset = amo_addr[0+:HPDCACHE_REQ_OFFSET_WIDTH];
|
||||
amo_tag = amo_addr[HPDCACHE_REQ_OFFSET_WIDTH+:HPDCACHE_TAG_WIDTH];
|
||||
amo_is_word = (cva6_amo_req_i.size == 2'b10);
|
||||
amo_is_word_hi = cva6_amo_req_i.operand_a[2];
|
||||
|
||||
amo_data = amo_is_word ? {2{cva6_amo_req_i.operand_b[0+:32]}} : cva6_amo_req_i.operand_b;
|
||||
|
||||
amo_data_be = amo_is_word_hi ? 8'hf0 : amo_is_word ? 8'h0f : 8'hff;
|
||||
|
||||
unique case (cva6_amo_req_i.amo_op)
|
||||
ariane_pkg::AMO_LR: amo_op = HPDCACHE_REQ_AMO_LR;
|
||||
ariane_pkg::AMO_SC: amo_op = HPDCACHE_REQ_AMO_SC;
|
||||
ariane_pkg::AMO_SWAP: amo_op = HPDCACHE_REQ_AMO_SWAP;
|
||||
ariane_pkg::AMO_ADD: amo_op = HPDCACHE_REQ_AMO_ADD;
|
||||
ariane_pkg::AMO_AND: amo_op = HPDCACHE_REQ_AMO_AND;
|
||||
ariane_pkg::AMO_OR: amo_op = HPDCACHE_REQ_AMO_OR;
|
||||
ariane_pkg::AMO_XOR: amo_op = HPDCACHE_REQ_AMO_XOR;
|
||||
ariane_pkg::AMO_MAX: amo_op = HPDCACHE_REQ_AMO_MAX;
|
||||
ariane_pkg::AMO_MAXU: amo_op = HPDCACHE_REQ_AMO_MAXU;
|
||||
ariane_pkg::AMO_MIN: amo_op = HPDCACHE_REQ_AMO_MIN;
|
||||
ariane_pkg::AMO_MINU: amo_op = HPDCACHE_REQ_AMO_MINU;
|
||||
default: amo_op = HPDCACHE_REQ_LOAD;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign amo_resp_word = amo_is_word_hi ? hpdcache_rsp_i.rdata[0][32 +: 32]
|
||||
: hpdcache_rsp_i.rdata[0][0 +: 32];
|
||||
// }}}
|
||||
|
||||
// Request forwarding
|
||||
// {{{
|
||||
assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions(
|
||||
CVA6Cfg,
|
||||
{
|
||||
{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
|
||||
, hpdcache_req_o.addr_tag,
|
||||
{ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}
|
||||
}
|
||||
);
|
||||
|
||||
assign forward_store = cva6_req_i.data_req, forward_amo = cva6_amo_req_i.req;
|
||||
|
||||
assign hpdcache_req_valid_o = forward_store | forward_amo,
|
||||
hpdcache_req_o.addr_offset = forward_amo ? amo_addr_offset : cva6_req_i.address_index,
|
||||
hpdcache_req_o.wdata = forward_amo ? amo_data : cva6_req_i.data_wdata,
|
||||
hpdcache_req_o.op = forward_amo ? amo_op : hpdcache_pkg::HPDCACHE_REQ_STORE,
|
||||
hpdcache_req_o.be = forward_amo ? amo_data_be : cva6_req_i.data_be,
|
||||
hpdcache_req_o.size = forward_amo ? cva6_amo_req_i.size : cva6_req_i.data_size,
|
||||
hpdcache_req_o.sid = hpdcache_req_sid_i,
|
||||
hpdcache_req_o.tid = forward_amo ? '1 : '0,
|
||||
hpdcache_req_o.need_rsp = forward_amo,
|
||||
hpdcache_req_o.phys_indexed = 1'b1,
|
||||
hpdcache_req_o.addr_tag = forward_amo ? amo_tag : cva6_req_i.address_tag,
|
||||
hpdcache_req_o.pma.uncacheable = hpdcache_req_is_uncacheable,
|
||||
hpdcache_req_o.pma.io = 1'b0,
|
||||
hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests
|
||||
hpdcache_req_tag_o = '0, // unused on physically indexed requests
|
||||
hpdcache_req_pma_o = '0; // unused on physically indexed requests
|
||||
// }}}
|
||||
|
||||
// Response forwarding
|
||||
// {{{
|
||||
assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid != '1),
|
||||
cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
|
||||
cva6_req_o.data_rid = hpdcache_rsp_i.tid,
|
||||
cva6_req_o.data_gnt = hpdcache_req_ready_i;
|
||||
|
||||
assign cva6_amo_resp_o.ack = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid == '1),
|
||||
cva6_amo_resp_o.result = amo_is_word ? {{32{amo_resp_word[31]}}, amo_resp_word}
|
||||
: hpdcache_rsp_i.rdata[0][63:0];
|
||||
// }}}
|
||||
end
|
||||
// }}}
|
||||
endgenerate
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
forward_one_request_assert :
|
||||
assert property (@(posedge clk_i) ($onehot0({forward_store, forward_amo})))
|
||||
else $error("Only one request shall be forwarded");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,609 @@
|
|||
// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies
|
||||
// Alternatives (CEA)
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”);
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Authors: Cesar Fuguet
|
||||
// Date: February, 2023
|
||||
// Description: CVA6 cache subsystem integrating standard CVA6's
|
||||
// instruction cache and the Core-V High-Performance L1
|
||||
// data cache (CV-HPDcache).
|
||||
|
||||
module cva6_hpdcache_subsystem
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int NumPorts = 4,
|
||||
parameter int NrHwPrefetchers = 4,
|
||||
parameter type noc_req_t = logic,
|
||||
parameter type noc_resp_t = logic,
|
||||
parameter type cmo_req_t = logic,
|
||||
parameter type cmo_rsp_t = logic
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// I$
|
||||
// {{{
|
||||
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
|
||||
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
|
||||
output logic icache_miss_o, // to performance counter
|
||||
// address translation requests
|
||||
input ariane_pkg::icache_areq_t icache_areq_i, // to/from frontend
|
||||
output ariane_pkg::icache_arsp_t icache_areq_o,
|
||||
// data requests
|
||||
input ariane_pkg::icache_dreq_t icache_dreq_i, // to/from frontend
|
||||
output ariane_pkg::icache_drsp_t icache_dreq_o,
|
||||
// }}}
|
||||
|
||||
// D$
|
||||
// {{{
|
||||
// Cache management
|
||||
input logic dcache_enable_i, // from CSR
|
||||
input logic dcache_flush_i, // high until acknowledged
|
||||
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
|
||||
output logic dcache_miss_o, // we missed on a ld/st
|
||||
|
||||
// AMO interface
|
||||
input ariane_pkg::amo_req_t dcache_amo_req_i, // from LSU
|
||||
output ariane_pkg::amo_resp_t dcache_amo_resp_o, // to LSU
|
||||
// CMO interface
|
||||
input cmo_req_t dcache_cmo_req_i, // from CMO FU
|
||||
output cmo_rsp_t dcache_cmo_resp_o, // to CMO FU
|
||||
// Request ports
|
||||
input ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU
|
||||
output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU
|
||||
// Write Buffer status
|
||||
output logic wbuffer_empty_o,
|
||||
output logic wbuffer_not_ni_o,
|
||||
|
||||
// Hardware memory prefetcher configuration
|
||||
input logic [NrHwPrefetchers-1:0] hwpf_base_set_i,
|
||||
input logic [NrHwPrefetchers-1:0][63:0] hwpf_base_i,
|
||||
output logic [NrHwPrefetchers-1:0][63:0] hwpf_base_o,
|
||||
input logic [NrHwPrefetchers-1:0] hwpf_param_set_i,
|
||||
input logic [NrHwPrefetchers-1:0][63:0] hwpf_param_i,
|
||||
output logic [NrHwPrefetchers-1:0][63:0] hwpf_param_o,
|
||||
input logic [NrHwPrefetchers-1:0] hwpf_throttle_set_i,
|
||||
input logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_i,
|
||||
output logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_o,
|
||||
output logic [ 63:0] hwpf_status_o,
|
||||
// }}}
|
||||
|
||||
// AXI port to upstream memory/peripherals
|
||||
// {{{
|
||||
output noc_req_t noc_req_o,
|
||||
input noc_resp_t noc_resp_i
|
||||
// }}}
|
||||
);
|
||||
// }}}
|
||||
|
||||
`include "axi/typedef.svh"
|
||||
|
||||
// I$ instantiation
|
||||
// {{{
|
||||
logic icache_miss_valid, icache_miss_ready;
|
||||
wt_cache_pkg::icache_req_t icache_miss;
|
||||
|
||||
logic icache_miss_resp_valid;
|
||||
wt_cache_pkg::icache_rtrn_t icache_miss_resp;
|
||||
|
||||
localparam int ICACHE_RDTXID = 1 << (ariane_pkg::MEM_TID_WIDTH - 1);
|
||||
|
||||
cva6_icache #(
|
||||
.CVA6Cfg(CVA6Cfg),
|
||||
.RdTxId (ICACHE_RDTXID)
|
||||
) i_cva6_icache (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (icache_flush_i),
|
||||
.en_i (icache_en_i),
|
||||
.miss_o (icache_miss_o),
|
||||
.areq_i (icache_areq_i),
|
||||
.areq_o (icache_areq_o),
|
||||
.dreq_i (icache_dreq_i),
|
||||
.dreq_o (icache_dreq_o),
|
||||
.mem_rtrn_vld_i(icache_miss_resp_valid),
|
||||
.mem_rtrn_i (icache_miss_resp),
|
||||
.mem_data_req_o(icache_miss_valid),
|
||||
.mem_data_ack_i(icache_miss_ready),
|
||||
.mem_data_o (icache_miss)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// D$ instantiation
|
||||
// {{{
|
||||
`include "hpdcache_typedef.svh"
|
||||
|
||||
// 0: Page-Table Walk (PTW)
|
||||
// 1: Load unit
|
||||
// 2: Accelerator load
|
||||
// 3: Store/AMO
|
||||
// .
|
||||
// .
|
||||
// .
|
||||
// NumPorts: CMO
|
||||
// NumPorts + 1: Hardware Memory Prefetcher (hwpf)
|
||||
localparam int HPDCACHE_NREQUESTERS = NumPorts + 2;
|
||||
|
||||
typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t;
|
||||
typedef logic [ariane_pkg::MEM_TID_WIDTH-1:0] hpdcache_mem_id_t;
|
||||
typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t;
|
||||
typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t;
|
||||
`HPDCACHE_TYPEDEF_MEM_REQ_T(hpdcache_mem_req_t, hpdcache_mem_addr_t, hpdcache_mem_id_t);
|
||||
`HPDCACHE_TYPEDEF_MEM_RESP_R_T(hpdcache_mem_resp_r_t, hpdcache_mem_id_t, hpdcache_mem_data_t);
|
||||
`HPDCACHE_TYPEDEF_MEM_REQ_W_T(hpdcache_mem_req_w_t, hpdcache_mem_data_t, hpdcache_mem_be_t);
|
||||
`HPDCACHE_TYPEDEF_MEM_RESP_W_T(hpdcache_mem_resp_w_t, hpdcache_mem_id_t);
|
||||
|
||||
typedef logic [63:0] hwpf_stride_param_t;
|
||||
|
||||
logic dcache_req_valid[HPDCACHE_NREQUESTERS-1:0];
|
||||
logic dcache_req_ready[HPDCACHE_NREQUESTERS-1:0];
|
||||
hpdcache_pkg::hpdcache_req_t dcache_req [HPDCACHE_NREQUESTERS-1:0];
|
||||
logic dcache_req_abort[HPDCACHE_NREQUESTERS-1:0];
|
||||
hpdcache_pkg::hpdcache_tag_t dcache_req_tag [HPDCACHE_NREQUESTERS-1:0];
|
||||
hpdcache_pkg::hpdcache_pma_t dcache_req_pma [HPDCACHE_NREQUESTERS-1:0];
|
||||
logic dcache_rsp_valid[HPDCACHE_NREQUESTERS-1:0];
|
||||
hpdcache_pkg::hpdcache_rsp_t dcache_rsp [HPDCACHE_NREQUESTERS-1:0];
|
||||
logic dcache_read_miss, dcache_write_miss;
|
||||
|
||||
logic [ 2:0] snoop_valid;
|
||||
logic [ 2:0] snoop_abort;
|
||||
hpdcache_pkg::hpdcache_req_offset_t [ 2:0] snoop_addr_offset;
|
||||
hpdcache_pkg::hpdcache_tag_t [ 2:0] snoop_addr_tag;
|
||||
logic [ 2:0] snoop_phys_indexed;
|
||||
|
||||
logic dcache_cmo_req_is_prefetch;
|
||||
|
||||
logic dcache_miss_ready;
|
||||
logic dcache_miss_valid;
|
||||
hpdcache_mem_req_t dcache_miss;
|
||||
|
||||
logic dcache_miss_resp_ready;
|
||||
logic dcache_miss_resp_valid;
|
||||
hpdcache_mem_resp_r_t dcache_miss_resp;
|
||||
|
||||
logic dcache_wbuf_ready;
|
||||
logic dcache_wbuf_valid;
|
||||
hpdcache_mem_req_t dcache_wbuf;
|
||||
|
||||
logic dcache_wbuf_data_ready;
|
||||
logic dcache_wbuf_data_valid;
|
||||
hpdcache_mem_req_w_t dcache_wbuf_data;
|
||||
|
||||
logic dcache_wbuf_resp_ready;
|
||||
logic dcache_wbuf_resp_valid;
|
||||
hpdcache_mem_resp_w_t dcache_wbuf_resp;
|
||||
|
||||
logic dcache_uc_read_ready;
|
||||
logic dcache_uc_read_valid;
|
||||
hpdcache_mem_req_t dcache_uc_read;
|
||||
|
||||
logic dcache_uc_read_resp_ready;
|
||||
logic dcache_uc_read_resp_valid;
|
||||
hpdcache_mem_resp_r_t dcache_uc_read_resp;
|
||||
|
||||
logic dcache_uc_write_ready;
|
||||
logic dcache_uc_write_valid;
|
||||
hpdcache_mem_req_t dcache_uc_write;
|
||||
|
||||
logic dcache_uc_write_data_ready;
|
||||
logic dcache_uc_write_data_valid;
|
||||
hpdcache_mem_req_w_t dcache_uc_write_data;
|
||||
|
||||
logic dcache_uc_write_resp_ready;
|
||||
logic dcache_uc_write_resp_valid;
|
||||
hpdcache_mem_resp_w_t dcache_uc_write_resp;
|
||||
|
||||
hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_in;
|
||||
hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_out;
|
||||
|
||||
generate
|
||||
ariane_pkg::dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0];
|
||||
|
||||
for (genvar r = 0; r < (NumPorts - 1); r++) begin : cva6_hpdcache_load_if_adapter_gen
|
||||
assign dcache_req_ports[r] = dcache_req_ports_i[r];
|
||||
|
||||
cva6_hpdcache_if_adapter #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.is_load_port(1'b1)
|
||||
) i_cva6_hpdcache_load_if_adapter (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(r)),
|
||||
|
||||
.cva6_req_i (dcache_req_ports[r]),
|
||||
.cva6_req_o (dcache_req_ports_o[r]),
|
||||
.cva6_amo_req_i ('0),
|
||||
.cva6_amo_resp_o( /* unused */),
|
||||
|
||||
.hpdcache_req_valid_o(dcache_req_valid[r]),
|
||||
.hpdcache_req_ready_i(dcache_req_ready[r]),
|
||||
.hpdcache_req_o (dcache_req[r]),
|
||||
.hpdcache_req_abort_o(dcache_req_abort[r]),
|
||||
.hpdcache_req_tag_o (dcache_req_tag[r]),
|
||||
.hpdcache_req_pma_o (dcache_req_pma[r]),
|
||||
|
||||
.hpdcache_rsp_valid_i(dcache_rsp_valid[r]),
|
||||
.hpdcache_rsp_i (dcache_rsp[r])
|
||||
);
|
||||
end
|
||||
|
||||
cva6_hpdcache_if_adapter #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.is_load_port(1'b0)
|
||||
) i_cva6_hpdcache_store_if_adapter (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts - 1)),
|
||||
|
||||
.cva6_req_i (dcache_req_ports_i[NumPorts-1]),
|
||||
.cva6_req_o (dcache_req_ports_o[NumPorts-1]),
|
||||
.cva6_amo_req_i (dcache_amo_req_i),
|
||||
.cva6_amo_resp_o(dcache_amo_resp_o),
|
||||
|
||||
.hpdcache_req_valid_o(dcache_req_valid[NumPorts-1]),
|
||||
.hpdcache_req_ready_i(dcache_req_ready[NumPorts-1]),
|
||||
.hpdcache_req_o (dcache_req[NumPorts-1]),
|
||||
.hpdcache_req_abort_o(dcache_req_abort[NumPorts-1]),
|
||||
.hpdcache_req_tag_o (dcache_req_tag[NumPorts-1]),
|
||||
.hpdcache_req_pma_o (dcache_req_pma[NumPorts-1]),
|
||||
|
||||
.hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts-1]),
|
||||
.hpdcache_rsp_i (dcache_rsp[NumPorts-1])
|
||||
);
|
||||
|
||||
`ifdef HPDCACHE_ENABLE_CMO
|
||||
cva6_hpdcache_cmo_if_adapter #(
|
||||
.cmo_req_t(cmo_req_t),
|
||||
.cmo_rsp_t(cmo_rsp_t)
|
||||
) i_cva6_hpdcache_cmo_if_adapter (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.dcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts)),
|
||||
|
||||
.cva6_cmo_req_i (dcache_cmo_req_i),
|
||||
.cva6_cmo_resp_o(dcache_cmo_resp_o),
|
||||
|
||||
.dcache_req_valid_o(dcache_req_valid[NumPorts]),
|
||||
.dcache_req_ready_i(dcache_req_ready[NumPorts]),
|
||||
.dcache_req_o (dcache_req[NumPorts]),
|
||||
.dcache_req_abort_o(dcache_req_abort[NumPorts]),
|
||||
.dcache_req_tag_o (dcache_req_tag[NumPorts]),
|
||||
.dcache_req_pma_o (dcache_req_pma[NumPorts]),
|
||||
|
||||
.dcache_rsp_valid_i(dcache_rsp_valid[NumPorts]),
|
||||
.dcache_rsp_i (dcache_rsp[NumPorts])
|
||||
);
|
||||
`else
|
||||
assign dcache_req_valid[NumPorts] = 1'b0,
|
||||
dcache_req[NumPorts] = '0,
|
||||
dcache_req_abort[NumPorts] = 1'b0,
|
||||
dcache_req_tag[NumPorts] = '0,
|
||||
dcache_req_pma[NumPorts] = '0;
|
||||
`endif
|
||||
endgenerate
|
||||
|
||||
// Snoop load port
|
||||
assign snoop_valid[0] = dcache_req_valid[1] & dcache_req_ready[1],
|
||||
snoop_abort[0] = dcache_req_abort[1],
|
||||
snoop_addr_offset[0] = dcache_req[1].addr_offset,
|
||||
snoop_addr_tag[0] = dcache_req_tag[1],
|
||||
snoop_phys_indexed[0] = dcache_req[1].phys_indexed;
|
||||
|
||||
// Snoop Store/AMO port
|
||||
assign snoop_valid[1] = dcache_req_valid[NumPorts-1] & dcache_req_ready[NumPorts-1],
|
||||
snoop_abort[1] = dcache_req_abort[NumPorts-1],
|
||||
snoop_addr_offset[1] = dcache_req[NumPorts-1].addr_offset,
|
||||
snoop_addr_tag[1] = dcache_req_tag[NumPorts-1],
|
||||
snoop_phys_indexed[1] = dcache_req[NumPorts-1].phys_indexed;
|
||||
|
||||
`ifdef HPDCACHE_ENABLE_CMO
|
||||
// Snoop CMO port (in case of read prefetch accesses)
|
||||
assign dcache_cmo_req_is_prefetch = hpdcache_pkg::is_cmo_prefetch(
|
||||
dcache_req[NumPorts].op, dcache_req[NumPorts].size
|
||||
);
|
||||
assign snoop_valid[2] = dcache_req_valid[NumPorts]
|
||||
& dcache_req_ready[NumPorts]
|
||||
& dcache_cmo_req_is_prefetch,
|
||||
snoop_abort[2] = dcache_req_abort[NumPorts],
|
||||
snoop_addr_offset[2] = dcache_req[NumPorts].addr_offset,
|
||||
snoop_addr_tag[2] = dcache_req_tag[NumPorts],
|
||||
snoop_phys_indexed[2] = dcache_req[NumPorts].phys_indexed;
|
||||
`else
|
||||
assign snoop_valid[2] = 1'b0,
|
||||
snoop_abort[2] = 1'b0,
|
||||
snoop_addr_offset[2] = '0,
|
||||
snoop_addr_tag[2] = '0,
|
||||
snoop_phys_indexed[2] = 1'b0;
|
||||
`endif
|
||||
|
||||
generate
|
||||
for (genvar h = 0; h < NrHwPrefetchers; h++) begin : hwpf_throttle_gen
|
||||
assign hwpf_throttle_in[h] = hwpf_stride_pkg::hwpf_stride_throttle_t'(hwpf_throttle_i[h]),
|
||||
hwpf_throttle_o[h] = hwpf_stride_pkg::hwpf_stride_param_t'(hwpf_throttle_out[h]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
hwpf_stride_wrapper #(
|
||||
.NUM_HW_PREFETCH(NrHwPrefetchers),
|
||||
.NUM_SNOOP_PORTS(3)
|
||||
) i_hwpf_stride_wrapper (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.hwpf_stride_base_set_i (hwpf_base_set_i),
|
||||
.hwpf_stride_base_i (hwpf_base_i),
|
||||
.hwpf_stride_base_o (hwpf_base_o),
|
||||
.hwpf_stride_param_set_i (hwpf_param_set_i),
|
||||
.hwpf_stride_param_i (hwpf_param_i),
|
||||
.hwpf_stride_param_o (hwpf_param_o),
|
||||
.hwpf_stride_throttle_set_i(hwpf_throttle_set_i),
|
||||
.hwpf_stride_throttle_i (hwpf_throttle_in),
|
||||
.hwpf_stride_throttle_o (hwpf_throttle_out),
|
||||
.hwpf_stride_status_o (hwpf_status_o),
|
||||
|
||||
.snoop_valid_i (snoop_valid),
|
||||
.snoop_abort_i (snoop_abort),
|
||||
.snoop_addr_offset_i (snoop_addr_offset),
|
||||
.snoop_addr_tag_i (snoop_addr_tag),
|
||||
.snoop_phys_indexed_i(snoop_phys_indexed),
|
||||
|
||||
.hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts + 1)),
|
||||
|
||||
.hpdcache_req_valid_o(dcache_req_valid[NumPorts+1]),
|
||||
.hpdcache_req_ready_i(dcache_req_ready[NumPorts+1]),
|
||||
.hpdcache_req_o (dcache_req[NumPorts+1]),
|
||||
.hpdcache_req_abort_o(dcache_req_abort[NumPorts+1]),
|
||||
.hpdcache_req_tag_o (dcache_req_tag[NumPorts+1]),
|
||||
.hpdcache_req_pma_o (dcache_req_pma[NumPorts+1]),
|
||||
.hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts+1]),
|
||||
.hpdcache_rsp_i (dcache_rsp[NumPorts+1])
|
||||
);
|
||||
|
||||
hpdcache #(
|
||||
.NREQUESTERS (HPDCACHE_NREQUESTERS),
|
||||
.HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH),
|
||||
.HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
|
||||
.hpdcache_mem_req_t (hpdcache_mem_req_t),
|
||||
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
|
||||
.hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
|
||||
.hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t)
|
||||
) i_hpdcache (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.wbuf_flush_i(dcache_flush_i),
|
||||
|
||||
.core_req_valid_i(dcache_req_valid),
|
||||
.core_req_ready_o(dcache_req_ready),
|
||||
.core_req_i (dcache_req),
|
||||
.core_req_abort_i(dcache_req_abort),
|
||||
.core_req_tag_i (dcache_req_tag),
|
||||
.core_req_pma_i (dcache_req_pma),
|
||||
|
||||
.core_rsp_valid_o(dcache_rsp_valid),
|
||||
.core_rsp_o (dcache_rsp),
|
||||
|
||||
.mem_req_miss_read_ready_i(dcache_miss_ready),
|
||||
.mem_req_miss_read_valid_o(dcache_miss_valid),
|
||||
.mem_req_miss_read_o (dcache_miss),
|
||||
|
||||
.mem_resp_miss_read_ready_o(dcache_miss_resp_ready),
|
||||
.mem_resp_miss_read_valid_i(dcache_miss_resp_valid),
|
||||
.mem_resp_miss_read_i (dcache_miss_resp),
|
||||
|
||||
.mem_req_wbuf_write_ready_i(dcache_wbuf_ready),
|
||||
.mem_req_wbuf_write_valid_o(dcache_wbuf_valid),
|
||||
.mem_req_wbuf_write_o (dcache_wbuf),
|
||||
|
||||
.mem_req_wbuf_write_data_ready_i(dcache_wbuf_data_ready),
|
||||
.mem_req_wbuf_write_data_valid_o(dcache_wbuf_data_valid),
|
||||
.mem_req_wbuf_write_data_o (dcache_wbuf_data),
|
||||
|
||||
.mem_resp_wbuf_write_ready_o(dcache_wbuf_resp_ready),
|
||||
.mem_resp_wbuf_write_valid_i(dcache_wbuf_resp_valid),
|
||||
.mem_resp_wbuf_write_i (dcache_wbuf_resp),
|
||||
|
||||
.mem_req_uc_read_ready_i(dcache_uc_read_ready),
|
||||
.mem_req_uc_read_valid_o(dcache_uc_read_valid),
|
||||
.mem_req_uc_read_o (dcache_uc_read),
|
||||
|
||||
.mem_resp_uc_read_ready_o(dcache_uc_read_resp_ready),
|
||||
.mem_resp_uc_read_valid_i(dcache_uc_read_resp_valid),
|
||||
.mem_resp_uc_read_i (dcache_uc_read_resp),
|
||||
|
||||
.mem_req_uc_write_ready_i(dcache_uc_write_ready),
|
||||
.mem_req_uc_write_valid_o(dcache_uc_write_valid),
|
||||
.mem_req_uc_write_o (dcache_uc_write),
|
||||
|
||||
.mem_req_uc_write_data_ready_i(dcache_uc_write_data_ready),
|
||||
.mem_req_uc_write_data_valid_o(dcache_uc_write_data_valid),
|
||||
.mem_req_uc_write_data_o (dcache_uc_write_data),
|
||||
|
||||
.mem_resp_uc_write_ready_o(dcache_uc_write_resp_ready),
|
||||
.mem_resp_uc_write_valid_i(dcache_uc_write_resp_valid),
|
||||
.mem_resp_uc_write_i (dcache_uc_write_resp),
|
||||
|
||||
.evt_cache_write_miss_o(dcache_write_miss),
|
||||
.evt_cache_read_miss_o (dcache_read_miss),
|
||||
.evt_uncached_req_o ( /* unused */),
|
||||
.evt_cmo_req_o ( /* unused */),
|
||||
.evt_write_req_o ( /* unused */),
|
||||
.evt_read_req_o ( /* unused */),
|
||||
.evt_prefetch_req_o ( /* unused */),
|
||||
.evt_req_on_hold_o ( /* unused */),
|
||||
.evt_rtab_rollback_o ( /* unused */),
|
||||
.evt_stall_refill_o ( /* unused */),
|
||||
.evt_stall_o ( /* unused */),
|
||||
|
||||
.wbuf_empty_o(wbuffer_empty_o),
|
||||
|
||||
.cfg_enable_i (dcache_enable_i),
|
||||
.cfg_wbuf_threshold_i (4'd2),
|
||||
.cfg_wbuf_reset_timecnt_on_write_i (1'b1),
|
||||
.cfg_wbuf_sequential_waw_i (1'b0),
|
||||
.cfg_wbuf_inhibit_write_coalescing_i(1'b0),
|
||||
.cfg_prefetch_updt_plru_i (1'b1),
|
||||
.cfg_error_on_cacheable_amo_i (1'b0),
|
||||
.cfg_rtab_single_entry_i (1'b0)
|
||||
);
|
||||
|
||||
assign dcache_miss_o = dcache_read_miss, wbuffer_not_ni_o = wbuffer_empty_o;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : dcache_flush_ff
|
||||
if (!rst_ni) dcache_flush_ack_o <= 1'b0;
|
||||
else dcache_flush_ack_o <= ~dcache_flush_ack_o & dcache_flush_i;
|
||||
end
|
||||
|
||||
// }}}
|
||||
|
||||
// AXI arbiter instantiation
|
||||
// {{{
|
||||
typedef logic [CVA6Cfg.AxiAddrWidth-1:0] axi_addr_t;
|
||||
typedef logic [CVA6Cfg.AxiDataWidth-1:0] axi_data_t;
|
||||
typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] axi_strb_t;
|
||||
typedef logic [CVA6Cfg.AxiIdWidth-1:0] axi_id_t;
|
||||
typedef logic [CVA6Cfg.AxiUserWidth-1:0] axi_user_t;
|
||||
`AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, axi_addr_t, axi_id_t, axi_user_t)
|
||||
`AXI_TYPEDEF_W_CHAN_T(axi_w_chan_t, axi_data_t, axi_strb_t, axi_user_t)
|
||||
`AXI_TYPEDEF_B_CHAN_T(axi_b_chan_t, axi_id_t, axi_user_t)
|
||||
`AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, axi_addr_t, axi_id_t, axi_user_t)
|
||||
`AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, axi_data_t, axi_id_t, axi_user_t)
|
||||
|
||||
cva6_hpdcache_subsystem_axi_arbiter #(
|
||||
.HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH),
|
||||
.HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
|
||||
.hpdcache_mem_req_t (hpdcache_mem_req_t),
|
||||
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
|
||||
.hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
|
||||
.hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t),
|
||||
|
||||
.AxiAddrWidth (CVA6Cfg.AxiAddrWidth),
|
||||
.AxiDataWidth (CVA6Cfg.AxiDataWidth),
|
||||
.AxiIdWidth (CVA6Cfg.AxiIdWidth),
|
||||
.AxiUserWidth (CVA6Cfg.AxiUserWidth),
|
||||
.axi_ar_chan_t(axi_ar_chan_t),
|
||||
.axi_aw_chan_t(axi_aw_chan_t),
|
||||
.axi_w_chan_t (axi_w_chan_t),
|
||||
.axi_req_t (noc_req_t),
|
||||
.axi_rsp_t (noc_resp_t)
|
||||
) i_axi_arbiter (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.icache_miss_valid_i(icache_miss_valid),
|
||||
.icache_miss_ready_o(icache_miss_ready),
|
||||
.icache_miss_i (icache_miss),
|
||||
.icache_miss_id_i (hpdcache_mem_id_t'(ICACHE_RDTXID)),
|
||||
|
||||
.icache_miss_resp_valid_o(icache_miss_resp_valid),
|
||||
.icache_miss_resp_o (icache_miss_resp),
|
||||
|
||||
.dcache_miss_ready_o(dcache_miss_ready),
|
||||
.dcache_miss_valid_i(dcache_miss_valid),
|
||||
.dcache_miss_i (dcache_miss),
|
||||
|
||||
.dcache_miss_resp_ready_i(dcache_miss_resp_ready),
|
||||
.dcache_miss_resp_valid_o(dcache_miss_resp_valid),
|
||||
.dcache_miss_resp_o (dcache_miss_resp),
|
||||
|
||||
.dcache_wbuf_ready_o(dcache_wbuf_ready),
|
||||
.dcache_wbuf_valid_i(dcache_wbuf_valid),
|
||||
.dcache_wbuf_i (dcache_wbuf),
|
||||
|
||||
.dcache_wbuf_data_ready_o(dcache_wbuf_data_ready),
|
||||
.dcache_wbuf_data_valid_i(dcache_wbuf_data_valid),
|
||||
.dcache_wbuf_data_i (dcache_wbuf_data),
|
||||
|
||||
.dcache_wbuf_resp_ready_i(dcache_wbuf_resp_ready),
|
||||
.dcache_wbuf_resp_valid_o(dcache_wbuf_resp_valid),
|
||||
.dcache_wbuf_resp_o (dcache_wbuf_resp),
|
||||
|
||||
.dcache_uc_read_ready_o(dcache_uc_read_ready),
|
||||
.dcache_uc_read_valid_i(dcache_uc_read_valid),
|
||||
.dcache_uc_read_i (dcache_uc_read),
|
||||
.dcache_uc_read_id_i ('1),
|
||||
|
||||
.dcache_uc_read_resp_ready_i(dcache_uc_read_resp_ready),
|
||||
.dcache_uc_read_resp_valid_o(dcache_uc_read_resp_valid),
|
||||
.dcache_uc_read_resp_o (dcache_uc_read_resp),
|
||||
|
||||
.dcache_uc_write_ready_o(dcache_uc_write_ready),
|
||||
.dcache_uc_write_valid_i(dcache_uc_write_valid),
|
||||
.dcache_uc_write_i (dcache_uc_write),
|
||||
.dcache_uc_write_id_i ('1),
|
||||
|
||||
.dcache_uc_write_data_ready_o(dcache_uc_write_data_ready),
|
||||
.dcache_uc_write_data_valid_i(dcache_uc_write_data_valid),
|
||||
.dcache_uc_write_data_i (dcache_uc_write_data),
|
||||
|
||||
.dcache_uc_write_resp_ready_i(dcache_uc_write_resp_ready),
|
||||
.dcache_uc_write_resp_valid_o(dcache_uc_write_resp_valid),
|
||||
.dcache_uc_write_resp_o (dcache_uc_write_resp),
|
||||
|
||||
.axi_req_o (noc_req_o),
|
||||
.axi_resp_i(noc_resp_i)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
initial
|
||||
assert (hpdcache_pkg::HPDCACHE_REQ_SRC_ID_WIDTH >= $clog2(HPDCACHE_NREQUESTERS))
|
||||
else $fatal(1, "HPDCACHE_REQ_SRC_ID_WIDTH is not wide enough");
|
||||
|
||||
a_invalid_instruction_fetch :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
|
||||
else
|
||||
$warning(
|
||||
1,
|
||||
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
|
||||
icache_dreq_o.vaddr,
|
||||
icache_dreq_o.data
|
||||
);
|
||||
|
||||
a_invalid_write_data :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX)
|
||||
else
|
||||
$warning(
|
||||
1,
|
||||
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
|
||||
{
|
||||
dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index
|
||||
},
|
||||
dcache_req_ports_i[2].data_be,
|
||||
dcache_req_ports_i[2].data_wdata
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < 2; j++) begin : gen_assertion
|
||||
a_invalid_read_data :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
|
||||
else
|
||||
$warning(
|
||||
1,
|
||||
"[l1 dcache] reading invalid data on port %01d: data=%016X",
|
||||
j,
|
||||
dcache_req_ports_o[j].data_rdata
|
||||
);
|
||||
end
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
endmodule : cva6_hpdcache_subsystem
|
||||
|
|
@ -0,0 +1,586 @@
|
|||
// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies
|
||||
// Alternatives (CEA)
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”);
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Authors: Cesar Fuguet
|
||||
// Date: February, 2023
|
||||
// Description: AXI arbiter for the CVA6 cache subsystem integrating standard
|
||||
// CVA6's instruction cache and the Core-V High-Performance
|
||||
// L1 Dcache (CV-HPDcache).
|
||||
|
||||
module cva6_hpdcache_subsystem_axi_arbiter
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int HPDcacheMemIdWidth = 8,
|
||||
parameter int HPDcacheMemDataWidth = 512,
|
||||
parameter type hpdcache_mem_req_t = logic,
|
||||
parameter type hpdcache_mem_req_w_t = logic,
|
||||
parameter type hpdcache_mem_resp_r_t = logic,
|
||||
parameter type hpdcache_mem_resp_w_t = logic,
|
||||
|
||||
parameter int unsigned AxiAddrWidth = 1,
|
||||
parameter int unsigned AxiDataWidth = 1,
|
||||
parameter int unsigned AxiIdWidth = 1,
|
||||
parameter int unsigned AxiUserWidth = 1,
|
||||
parameter type axi_ar_chan_t = logic,
|
||||
parameter type axi_aw_chan_t = logic,
|
||||
parameter type axi_w_chan_t = logic,
|
||||
parameter type axi_req_t = logic,
|
||||
parameter type axi_rsp_t = logic,
|
||||
|
||||
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Interfaces from/to I$
|
||||
// {{{
|
||||
input logic icache_miss_valid_i,
|
||||
output logic icache_miss_ready_o,
|
||||
input wt_cache_pkg::icache_req_t icache_miss_i,
|
||||
input hpdcache_mem_id_t icache_miss_id_i,
|
||||
|
||||
output logic icache_miss_resp_valid_o,
|
||||
output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o,
|
||||
// }}}
|
||||
|
||||
// Interfaces from/to D$
|
||||
// {{{
|
||||
output logic dcache_miss_ready_o,
|
||||
input logic dcache_miss_valid_i,
|
||||
input hpdcache_mem_req_t dcache_miss_i,
|
||||
|
||||
input logic dcache_miss_resp_ready_i,
|
||||
output logic dcache_miss_resp_valid_o,
|
||||
output hpdcache_mem_resp_r_t dcache_miss_resp_o,
|
||||
|
||||
// Write-buffer write interface
|
||||
output logic dcache_wbuf_ready_o,
|
||||
input logic dcache_wbuf_valid_i,
|
||||
input hpdcache_mem_req_t dcache_wbuf_i,
|
||||
|
||||
output logic dcache_wbuf_data_ready_o,
|
||||
input logic dcache_wbuf_data_valid_i,
|
||||
input hpdcache_mem_req_w_t dcache_wbuf_data_i,
|
||||
|
||||
input logic dcache_wbuf_resp_ready_i,
|
||||
output logic dcache_wbuf_resp_valid_o,
|
||||
output hpdcache_mem_resp_w_t dcache_wbuf_resp_o,
|
||||
|
||||
// Uncached read interface
|
||||
output logic dcache_uc_read_ready_o,
|
||||
input logic dcache_uc_read_valid_i,
|
||||
input hpdcache_mem_req_t dcache_uc_read_i,
|
||||
input hpdcache_mem_id_t dcache_uc_read_id_i,
|
||||
|
||||
input logic dcache_uc_read_resp_ready_i,
|
||||
output logic dcache_uc_read_resp_valid_o,
|
||||
output hpdcache_mem_resp_r_t dcache_uc_read_resp_o,
|
||||
|
||||
// Uncached write interface
|
||||
output logic dcache_uc_write_ready_o,
|
||||
input logic dcache_uc_write_valid_i,
|
||||
input hpdcache_mem_req_t dcache_uc_write_i,
|
||||
input hpdcache_mem_id_t dcache_uc_write_id_i,
|
||||
|
||||
output logic dcache_uc_write_data_ready_o,
|
||||
input logic dcache_uc_write_data_valid_i,
|
||||
input hpdcache_mem_req_w_t dcache_uc_write_data_i,
|
||||
|
||||
input logic dcache_uc_write_resp_ready_i,
|
||||
output logic dcache_uc_write_resp_valid_o,
|
||||
output hpdcache_mem_resp_w_t dcache_uc_write_resp_o,
|
||||
// }}}
|
||||
|
||||
// AXI port to upstream memory/peripherals
|
||||
// {{{
|
||||
output axi_req_t axi_req_o,
|
||||
input axi_rsp_t axi_resp_i
|
||||
// }}}
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Internal type definitions
|
||||
// {{{
|
||||
typedef struct packed {
|
||||
logic [AxiIdWidth-1:0] id;
|
||||
logic [AxiDataWidth-1:0] data;
|
||||
axi_pkg::resp_t resp;
|
||||
logic last;
|
||||
logic [AxiUserWidth-1:0] user;
|
||||
} axi_r_chan_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [AxiIdWidth-1:0] id;
|
||||
axi_pkg::resp_t resp;
|
||||
logic [AxiUserWidth-1:0] user;
|
||||
} axi_b_chan_t;
|
||||
|
||||
localparam int MEM_RESP_RT_DEPTH = (1 << HPDcacheMemIdWidth);
|
||||
typedef hpdcache_mem_id_t [MEM_RESP_RT_DEPTH-1:0] mem_resp_rt_t;
|
||||
typedef logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] icache_resp_data_t;
|
||||
// }}}
|
||||
|
||||
// Adapt the I$ interface to the HPDcache memory interface
|
||||
// {{{
|
||||
localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH / 64;
|
||||
localparam int ICACHE_CL_WORD_INDEX = $clog2(ICACHE_CL_WORDS);
|
||||
localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8);
|
||||
localparam int ICACHE_WORD_SIZE = 3;
|
||||
localparam int ICACHE_MEM_REQ_CL_LEN =
|
||||
(ariane_pkg::ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth;
|
||||
localparam int ICACHE_MEM_REQ_CL_SIZE =
|
||||
(HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) ?
|
||||
$clog2(
|
||||
HPDcacheMemDataWidth / 8
|
||||
) : ICACHE_CL_SIZE;
|
||||
|
||||
// I$ request
|
||||
hpdcache_mem_req_t icache_miss_req_wdata;
|
||||
logic icache_miss_req_w, icache_miss_req_wok;
|
||||
|
||||
hpdcache_mem_req_t icache_miss_req_rdata;
|
||||
logic icache_miss_req_r, icache_miss_req_rok;
|
||||
|
||||
logic icache_miss_pending_q;
|
||||
|
||||
// This FIFO has two functionnalities:
|
||||
// - Stabilize the ready-valid protocol. The ICACHE can abort a valid
|
||||
// transaction without receiving the corresponding ready signal. This
|
||||
// behavior is not supported by AXI.
|
||||
// - Cut a possible long timing path.
|
||||
hpdcache_fifo_reg #(
|
||||
.FIFO_DEPTH (1),
|
||||
.fifo_data_t(hpdcache_mem_req_t)
|
||||
) i_icache_miss_req_fifo (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.w_i (icache_miss_req_w),
|
||||
.wok_o (icache_miss_req_wok),
|
||||
.wdata_i(icache_miss_req_wdata),
|
||||
|
||||
.r_i (icache_miss_req_r),
|
||||
.rok_o (icache_miss_req_rok),
|
||||
.rdata_o(icache_miss_req_rdata)
|
||||
);
|
||||
|
||||
assign icache_miss_req_w = icache_miss_valid_i, icache_miss_ready_o = icache_miss_req_wok;
|
||||
|
||||
assign icache_miss_req_wdata.mem_req_addr = icache_miss_i.paddr,
|
||||
icache_miss_req_wdata.mem_req_len = icache_miss_i.nc ? 0 : ICACHE_MEM_REQ_CL_LEN - 1,
|
||||
icache_miss_req_wdata.mem_req_size = icache_miss_i.nc ? ICACHE_WORD_SIZE : ICACHE_MEM_REQ_CL_SIZE,
|
||||
icache_miss_req_wdata.mem_req_id = icache_miss_i.tid,
|
||||
icache_miss_req_wdata.mem_req_command = hpdcache_pkg::HPDCACHE_MEM_READ,
|
||||
icache_miss_req_wdata.mem_req_atomic = hpdcache_pkg::hpdcache_mem_atomic_e'(0),
|
||||
icache_miss_req_wdata.mem_req_cacheable = ~icache_miss_i.nc;
|
||||
|
||||
|
||||
// I$ response
|
||||
logic icache_miss_resp_w, icache_miss_resp_wok;
|
||||
hpdcache_mem_resp_r_t icache_miss_resp_wdata;
|
||||
|
||||
logic icache_miss_resp_data_w, icache_miss_resp_data_wok;
|
||||
logic icache_miss_resp_data_r, icache_miss_resp_data_rok;
|
||||
icache_resp_data_t icache_miss_resp_data_rdata;
|
||||
|
||||
logic icache_miss_resp_meta_w, icache_miss_resp_meta_wok;
|
||||
logic icache_miss_resp_meta_r, icache_miss_resp_meta_rok;
|
||||
hpdcache_mem_id_t icache_miss_resp_meta_id;
|
||||
|
||||
icache_resp_data_t icache_miss_rdata;
|
||||
|
||||
generate
|
||||
if (HPDcacheMemDataWidth < ariane_pkg::ICACHE_LINE_WIDTH) begin
|
||||
hpdcache_fifo_reg #(
|
||||
.FIFO_DEPTH (1),
|
||||
.fifo_data_t(hpdcache_mem_id_t)
|
||||
) i_icache_refill_meta_fifo (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.w_i (icache_miss_resp_meta_w),
|
||||
.wok_o (icache_miss_resp_meta_wok),
|
||||
.wdata_i(icache_miss_resp_wdata.mem_resp_r_id),
|
||||
|
||||
.r_i (icache_miss_resp_meta_r),
|
||||
.rok_o (icache_miss_resp_meta_rok),
|
||||
.rdata_o(icache_miss_resp_meta_id)
|
||||
);
|
||||
|
||||
hpdcache_data_upsize #(
|
||||
.WR_WIDTH(HPDcacheMemDataWidth),
|
||||
.RD_WIDTH(ariane_pkg::ICACHE_LINE_WIDTH),
|
||||
.DEPTH (1)
|
||||
) i_icache_hpdcache_data_upsize (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.w_i (icache_miss_resp_data_w),
|
||||
.wlast_i(icache_miss_resp_wdata.mem_resp_r_last),
|
||||
.wok_o (icache_miss_resp_data_wok),
|
||||
.wdata_i(icache_miss_resp_wdata.mem_resp_r_data),
|
||||
|
||||
.r_i (icache_miss_resp_data_r),
|
||||
.rok_o (icache_miss_resp_data_rok),
|
||||
.rdata_o(icache_miss_resp_data_rdata)
|
||||
);
|
||||
|
||||
assign icache_miss_resp_meta_r = 1'b1, icache_miss_resp_data_r = 1'b1;
|
||||
|
||||
assign icache_miss_resp_meta_w = icache_miss_resp_w & icache_miss_resp_wdata.mem_resp_r_last;
|
||||
|
||||
assign icache_miss_resp_data_w = icache_miss_resp_w;
|
||||
|
||||
assign icache_miss_resp_wok = icache_miss_resp_data_wok & (
|
||||
icache_miss_resp_meta_wok | ~icache_miss_resp_wdata.mem_resp_r_last);
|
||||
|
||||
assign icache_miss_rdata = icache_miss_resp_data_rdata;
|
||||
|
||||
end else begin
|
||||
assign icache_miss_resp_data_rok = icache_miss_resp_w;
|
||||
assign icache_miss_resp_meta_rok = icache_miss_resp_w;
|
||||
assign icache_miss_resp_wok = 1'b1;
|
||||
assign icache_miss_resp_meta_id = icache_miss_resp_wdata.mem_resp_r_id;
|
||||
assign icache_miss_resp_data_rdata = icache_miss_resp_wdata.mem_resp_r_data;
|
||||
|
||||
// In the case of uncacheable accesses, the Icache expects the data to be right-aligned
|
||||
always_comb begin : icache_miss_resp_data_comb
|
||||
if (!icache_miss_req_rdata.mem_req_cacheable) begin
|
||||
automatic logic [ICACHE_CL_WORD_INDEX - 1:0] icache_miss_word_index;
|
||||
automatic logic [63:0] icache_miss_word;
|
||||
icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3+:ICACHE_CL_WORD_INDEX];
|
||||
icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64+:64];
|
||||
icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH - 64{1'b0}}, icache_miss_word};
|
||||
end else begin
|
||||
icache_miss_rdata = icache_miss_resp_data_rdata;
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign icache_miss_resp_valid_o = icache_miss_resp_meta_rok,
|
||||
icache_miss_resp_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK,
|
||||
icache_miss_resp_o.user = '0,
|
||||
icache_miss_resp_o.inv = '0,
|
||||
icache_miss_resp_o.tid = icache_miss_resp_meta_id,
|
||||
icache_miss_resp_o.data = icache_miss_rdata;
|
||||
|
||||
// consume the Icache miss on the arrival of the response. The request
|
||||
// metadata is decoded to forward the correct word in case of uncacheable
|
||||
// Icache access
|
||||
assign icache_miss_req_r = icache_miss_resp_meta_rok;
|
||||
// }}}
|
||||
|
||||
// Read request arbiter
|
||||
// {{{
|
||||
logic mem_req_read_ready [2:0];
|
||||
logic mem_req_read_valid [2:0];
|
||||
hpdcache_mem_req_t mem_req_read [2:0];
|
||||
|
||||
logic mem_req_read_ready_arb;
|
||||
logic mem_req_read_valid_arb;
|
||||
hpdcache_mem_req_t mem_req_read_arb;
|
||||
|
||||
assign mem_req_read_valid[0] = icache_miss_req_rok & ~icache_miss_pending_q,
|
||||
mem_req_read[0] = icache_miss_req_rdata;
|
||||
|
||||
assign dcache_miss_ready_o = mem_req_read_ready[1],
|
||||
mem_req_read_valid[1] = dcache_miss_valid_i,
|
||||
mem_req_read[1] = dcache_miss_i;
|
||||
|
||||
assign dcache_uc_read_ready_o = mem_req_read_ready[2],
|
||||
mem_req_read_valid[2] = dcache_uc_read_valid_i,
|
||||
mem_req_read[2] = dcache_uc_read_i;
|
||||
|
||||
hpdcache_mem_req_read_arbiter #(
|
||||
.N (3),
|
||||
.hpdcache_mem_req_t(hpdcache_mem_req_t)
|
||||
) i_mem_req_read_arbiter (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.mem_req_read_ready_o(mem_req_read_ready),
|
||||
.mem_req_read_valid_i(mem_req_read_valid),
|
||||
.mem_req_read_i (mem_req_read),
|
||||
|
||||
.mem_req_read_ready_i(mem_req_read_ready_arb),
|
||||
.mem_req_read_valid_o(mem_req_read_valid_arb),
|
||||
.mem_req_read_o (mem_req_read_arb)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Read response demultiplexor
|
||||
// {{{
|
||||
logic mem_resp_read_ready;
|
||||
logic mem_resp_read_valid;
|
||||
hpdcache_mem_resp_r_t mem_resp_read;
|
||||
|
||||
logic mem_resp_read_ready_arb[2:0];
|
||||
logic mem_resp_read_valid_arb[2:0];
|
||||
hpdcache_mem_resp_r_t mem_resp_read_arb [2:0];
|
||||
|
||||
mem_resp_rt_t mem_resp_read_rt;
|
||||
|
||||
always_comb begin
|
||||
for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin
|
||||
mem_resp_read_rt[i] = (i == int'( icache_miss_id_i)) ? 0 :
|
||||
(i == int'(dcache_uc_read_id_i)) ? 2 : 1;
|
||||
end
|
||||
end
|
||||
|
||||
hpdcache_mem_resp_demux #(
|
||||
.N (3),
|
||||
.resp_t (hpdcache_mem_resp_r_t),
|
||||
.resp_id_t(hpdcache_mem_id_t)
|
||||
) i_mem_resp_read_demux (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.mem_resp_ready_o(mem_resp_read_ready),
|
||||
.mem_resp_valid_i(mem_resp_read_valid),
|
||||
.mem_resp_id_i (mem_resp_read.mem_resp_r_id),
|
||||
.mem_resp_i (mem_resp_read),
|
||||
|
||||
.mem_resp_ready_i(mem_resp_read_ready_arb),
|
||||
.mem_resp_valid_o(mem_resp_read_valid_arb),
|
||||
.mem_resp_o (mem_resp_read_arb),
|
||||
|
||||
.mem_resp_rt_i(mem_resp_read_rt)
|
||||
);
|
||||
|
||||
assign icache_miss_resp_w = mem_resp_read_valid_arb[0],
|
||||
icache_miss_resp_wdata = mem_resp_read_arb[0],
|
||||
mem_resp_read_ready_arb[0] = icache_miss_resp_wok;
|
||||
|
||||
assign dcache_miss_resp_valid_o = mem_resp_read_valid_arb[1],
|
||||
dcache_miss_resp_o = mem_resp_read_arb[1],
|
||||
mem_resp_read_ready_arb[1] = dcache_miss_resp_ready_i;
|
||||
|
||||
assign dcache_uc_read_resp_valid_o = mem_resp_read_valid_arb[2],
|
||||
dcache_uc_read_resp_o = mem_resp_read_arb[2],
|
||||
mem_resp_read_ready_arb[2] = dcache_uc_read_resp_ready_i;
|
||||
// }}}
|
||||
|
||||
// Write request arbiter
|
||||
// {{{
|
||||
logic mem_req_write_ready [1:0];
|
||||
logic mem_req_write_valid [1:0];
|
||||
hpdcache_mem_req_t mem_req_write [1:0];
|
||||
|
||||
logic mem_req_write_data_ready [1:0];
|
||||
logic mem_req_write_data_valid [1:0];
|
||||
hpdcache_mem_req_w_t mem_req_write_data [1:0];
|
||||
|
||||
logic mem_req_write_ready_arb;
|
||||
logic mem_req_write_valid_arb;
|
||||
hpdcache_mem_req_t mem_req_write_arb;
|
||||
|
||||
logic mem_req_write_data_ready_arb;
|
||||
logic mem_req_write_data_valid_arb;
|
||||
hpdcache_mem_req_w_t mem_req_write_data_arb;
|
||||
|
||||
assign dcache_wbuf_ready_o = mem_req_write_ready[0],
|
||||
mem_req_write_valid[0] = dcache_wbuf_valid_i,
|
||||
mem_req_write[0] = dcache_wbuf_i;
|
||||
|
||||
assign dcache_wbuf_data_ready_o = mem_req_write_data_ready[0],
|
||||
mem_req_write_data_valid[0] = dcache_wbuf_data_valid_i,
|
||||
mem_req_write_data[0] = dcache_wbuf_data_i;
|
||||
|
||||
assign dcache_uc_write_ready_o = mem_req_write_ready[1],
|
||||
mem_req_write_valid[1] = dcache_uc_write_valid_i,
|
||||
mem_req_write[1] = dcache_uc_write_i;
|
||||
|
||||
assign dcache_uc_write_data_ready_o = mem_req_write_data_ready[1],
|
||||
mem_req_write_data_valid[1] = dcache_uc_write_data_valid_i,
|
||||
mem_req_write_data[1] = dcache_uc_write_data_i;
|
||||
|
||||
hpdcache_mem_req_write_arbiter #(
|
||||
.N (2),
|
||||
.hpdcache_mem_req_t (hpdcache_mem_req_t),
|
||||
.hpdcache_mem_req_w_t(hpdcache_mem_req_w_t)
|
||||
) i_mem_req_write_arbiter (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.mem_req_write_ready_o(mem_req_write_ready),
|
||||
.mem_req_write_valid_i(mem_req_write_valid),
|
||||
.mem_req_write_i (mem_req_write),
|
||||
|
||||
.mem_req_write_data_ready_o(mem_req_write_data_ready),
|
||||
.mem_req_write_data_valid_i(mem_req_write_data_valid),
|
||||
.mem_req_write_data_i (mem_req_write_data),
|
||||
|
||||
.mem_req_write_ready_i(mem_req_write_ready_arb),
|
||||
.mem_req_write_valid_o(mem_req_write_valid_arb),
|
||||
.mem_req_write_o (mem_req_write_arb),
|
||||
|
||||
.mem_req_write_data_ready_i(mem_req_write_data_ready_arb),
|
||||
.mem_req_write_data_valid_o(mem_req_write_data_valid_arb),
|
||||
.mem_req_write_data_o (mem_req_write_data_arb)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Write response demultiplexor
|
||||
// {{{
|
||||
logic mem_resp_write_ready;
|
||||
logic mem_resp_write_valid;
|
||||
hpdcache_mem_resp_w_t mem_resp_write;
|
||||
|
||||
logic mem_resp_write_ready_arb[1:0];
|
||||
logic mem_resp_write_valid_arb[1:0];
|
||||
hpdcache_mem_resp_w_t mem_resp_write_arb [1:0];
|
||||
|
||||
mem_resp_rt_t mem_resp_write_rt;
|
||||
|
||||
always_comb begin
|
||||
for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin
|
||||
mem_resp_write_rt[i] = (i == int'(dcache_uc_write_id_i)) ? 1 : 0;
|
||||
end
|
||||
end
|
||||
|
||||
hpdcache_mem_resp_demux #(
|
||||
.N (2),
|
||||
.resp_t (hpdcache_mem_resp_w_t),
|
||||
.resp_id_t(hpdcache_mem_id_t)
|
||||
) i_hpdcache_mem_resp_write_demux (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.mem_resp_ready_o(mem_resp_write_ready),
|
||||
.mem_resp_valid_i(mem_resp_write_valid),
|
||||
.mem_resp_id_i (mem_resp_write.mem_resp_w_id),
|
||||
.mem_resp_i (mem_resp_write),
|
||||
|
||||
.mem_resp_ready_i(mem_resp_write_ready_arb),
|
||||
.mem_resp_valid_o(mem_resp_write_valid_arb),
|
||||
.mem_resp_o (mem_resp_write_arb),
|
||||
|
||||
.mem_resp_rt_i(mem_resp_write_rt)
|
||||
);
|
||||
|
||||
assign dcache_wbuf_resp_valid_o = mem_resp_write_valid_arb[0],
|
||||
dcache_wbuf_resp_o = mem_resp_write_arb[0],
|
||||
mem_resp_write_ready_arb[0] = dcache_wbuf_resp_ready_i;
|
||||
|
||||
assign dcache_uc_write_resp_valid_o = mem_resp_write_valid_arb[1],
|
||||
dcache_uc_write_resp_o = mem_resp_write_arb[1],
|
||||
mem_resp_write_ready_arb[1] = dcache_uc_write_resp_ready_i;
|
||||
// }}}
|
||||
|
||||
// I$ miss pending
|
||||
// {{{
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : icache_miss_pending_ff
|
||||
if (!rst_ni) begin
|
||||
icache_miss_pending_q <= 1'b0;
|
||||
end else begin
|
||||
icache_miss_pending_q <= ( (icache_miss_req_rok & mem_req_read_ready[0]) & ~icache_miss_pending_q) |
|
||||
(~(icache_miss_req_r & icache_miss_req_rok) & icache_miss_pending_q);
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// AXI adapters
|
||||
// {{{
|
||||
axi_req_t axi_req;
|
||||
axi_rsp_t axi_resp;
|
||||
|
||||
hpdcache_mem_to_axi_write #(
|
||||
.hpdcache_mem_req_t (hpdcache_mem_req_t),
|
||||
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
|
||||
.hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t),
|
||||
.aw_chan_t (axi_aw_chan_t),
|
||||
.w_chan_t (axi_w_chan_t),
|
||||
.b_chan_t (axi_b_chan_t)
|
||||
) i_hpdcache_mem_to_axi_write (
|
||||
.req_ready_o(mem_req_write_ready_arb),
|
||||
.req_valid_i(mem_req_write_valid_arb),
|
||||
.req_i (mem_req_write_arb),
|
||||
|
||||
.req_data_ready_o(mem_req_write_data_ready_arb),
|
||||
.req_data_valid_i(mem_req_write_data_valid_arb),
|
||||
.req_data_i (mem_req_write_data_arb),
|
||||
|
||||
.resp_ready_i(mem_resp_write_ready),
|
||||
.resp_valid_o(mem_resp_write_valid),
|
||||
.resp_o (mem_resp_write),
|
||||
|
||||
.axi_aw_valid_o(axi_req.aw_valid),
|
||||
.axi_aw_o (axi_req.aw),
|
||||
.axi_aw_ready_i(axi_resp.aw_ready),
|
||||
|
||||
.axi_w_valid_o(axi_req.w_valid),
|
||||
.axi_w_o (axi_req.w),
|
||||
.axi_w_ready_i(axi_resp.w_ready),
|
||||
|
||||
.axi_b_valid_i(axi_resp.b_valid),
|
||||
.axi_b_i (axi_resp.b),
|
||||
.axi_b_ready_o(axi_req.b_ready)
|
||||
);
|
||||
|
||||
hpdcache_mem_to_axi_read #(
|
||||
.hpdcache_mem_req_t (hpdcache_mem_req_t),
|
||||
.hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
|
||||
.ar_chan_t (axi_ar_chan_t),
|
||||
.r_chan_t (axi_r_chan_t)
|
||||
) i_hpdcache_mem_to_axi_read (
|
||||
.req_ready_o(mem_req_read_ready_arb),
|
||||
.req_valid_i(mem_req_read_valid_arb),
|
||||
.req_i (mem_req_read_arb),
|
||||
|
||||
.resp_ready_i(mem_resp_read_ready),
|
||||
.resp_valid_o(mem_resp_read_valid),
|
||||
.resp_o (mem_resp_read),
|
||||
|
||||
.axi_ar_valid_o(axi_req.ar_valid),
|
||||
.axi_ar_o (axi_req.ar),
|
||||
.axi_ar_ready_i(axi_resp.ar_ready),
|
||||
|
||||
.axi_r_valid_i(axi_resp.r_valid),
|
||||
.axi_r_i (axi_resp.r),
|
||||
.axi_r_ready_o(axi_req.r_ready)
|
||||
);
|
||||
|
||||
assign axi_req_o = axi_req;
|
||||
assign axi_resp = axi_resp_i;
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
initial
|
||||
assert (HPDcacheMemIdWidth <= AxiIdWidth)
|
||||
else $fatal("HPDcacheMemIdWidth shall be less or equal to AxiIdWidth");
|
||||
initial
|
||||
assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_MSHR_SET_WIDTH + hpdcache_pkg::HPDCACHE_MSHR_WAY_WIDTH + 1))
|
||||
else
|
||||
$fatal(
|
||||
"HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache misses and Icache misses"
|
||||
);
|
||||
initial
|
||||
assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_WBUF_DIR_PTR_WIDTH + 1))
|
||||
else
|
||||
$fatal(
|
||||
"HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes"
|
||||
);
|
||||
initial
|
||||
assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH)
|
||||
else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line");
|
||||
initial
|
||||
assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH)
|
||||
else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
endmodule : cva6_hpdcache_subsystem_axi_arbiter
|
||||
|
|
@ -0,0 +1,584 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 15.08.2018
|
||||
// Description: Instruction cache that is compatible with openpiton.
|
||||
//
|
||||
// Some notes:
|
||||
//
|
||||
// 1) refills always have the size of one cache line, except for accesses to the I/O region, which is mapped
|
||||
// to the top half of the physical address space (bit 39 = 1). the data width of the interface has the width
|
||||
// of one cache line, and hence the ifills can be transferred in a single cycle. note that the ifills must be
|
||||
// consumed unconditionally.
|
||||
//
|
||||
// 2) instruction fetches are always assumed to be aligned to 32bit (lower 2 bits are ignored)
|
||||
//
|
||||
// 3) NC accesses to I/O space are expected to return 32bit from memory.
|
||||
//
|
||||
|
||||
|
||||
module cva6_icache
|
||||
import ariane_pkg::*;
|
||||
import wt_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
/// ID to be used for read transactions
|
||||
parameter logic [MEM_TID_WIDTH-1:0] RdTxId = 0
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
/// flush the icache, flush and kill have to be asserted together
|
||||
input logic flush_i,
|
||||
/// enable icache
|
||||
input logic en_i,
|
||||
/// to performance counter
|
||||
output logic miss_o,
|
||||
// address translation requests
|
||||
input icache_areq_t areq_i,
|
||||
output icache_arsp_t areq_o,
|
||||
// data requests
|
||||
input icache_dreq_t dreq_i,
|
||||
output icache_drsp_t dreq_o,
|
||||
// refill port
|
||||
input logic mem_rtrn_vld_i,
|
||||
input icache_rtrn_t mem_rtrn_i,
|
||||
output logic mem_data_req_o,
|
||||
input logic mem_data_ack_i,
|
||||
output icache_req_t mem_data_o
|
||||
);
|
||||
|
||||
// functions
|
||||
function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh(
|
||||
input logic [L1I_WAY_WIDTH-1:0] in);
|
||||
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out;
|
||||
out = '0;
|
||||
out[in] = 1'b1;
|
||||
return out;
|
||||
endfunction
|
||||
|
||||
// signals
|
||||
logic cache_en_d, cache_en_q; // cache is enabled
|
||||
logic [riscv::VLEN-1:0] vaddr_d, vaddr_q;
|
||||
logic paddr_is_nc; // asserted if physical address is non-cacheable
|
||||
logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare
|
||||
logic cache_rden; // triggers cache lookup
|
||||
logic cache_wren; // triggers write to cacheline
|
||||
logic
|
||||
cmp_en_d,
|
||||
cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal.
|
||||
logic flush_d, flush_q; // used to register and signal pending flushes
|
||||
|
||||
// replacement strategy
|
||||
logic update_lfsr; // shift the LFSR
|
||||
logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered
|
||||
logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement
|
||||
logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace
|
||||
logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot)
|
||||
logic all_ways_valid; // we need to switch repl strategy since all are valid
|
||||
|
||||
// invalidations / flushing
|
||||
logic inv_en; // incoming invalidations
|
||||
logic inv_d, inv_q; // invalidation in progress
|
||||
logic flush_en, flush_done; // used to flush cache entries
|
||||
logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries
|
||||
|
||||
// mem arrays
|
||||
logic cl_we; // write enable to memory array
|
||||
logic [ ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array
|
||||
logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array
|
||||
logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line
|
||||
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag
|
||||
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem
|
||||
logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache
|
||||
logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache
|
||||
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; // selected word from each cacheline
|
||||
logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline
|
||||
logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
|
||||
logic vld_we; // valid bits write enable
|
||||
logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
|
||||
logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs
|
||||
logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
|
||||
|
||||
// cpmtroller FSM
|
||||
typedef enum logic [2:0] {
|
||||
FLUSH,
|
||||
IDLE,
|
||||
READ,
|
||||
MISS,
|
||||
KILL_ATRANS,
|
||||
KILL_MISS
|
||||
} state_e;
|
||||
state_e state_d, state_q;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// address -> cl_index mapping, interface plumbing
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// extract tag from physical address, check if NC
|
||||
assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q;
|
||||
|
||||
// noncacheable if request goes to I/O space, or if cache is disabled
|
||||
assign paddr_is_nc = (~cache_en_q) | (~config_pkg::is_inside_cacheable_regions(
|
||||
CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}}
|
||||
));
|
||||
|
||||
// pass exception through
|
||||
assign dreq_o.ex = areq_i.fetch_exception;
|
||||
|
||||
// latch this in case we have to stall later on
|
||||
// make sure this is 32bit aligned
|
||||
assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q;
|
||||
assign areq_o.fetch_vaddr = {vaddr_q[riscv::VLEN-1:2], 2'b0};
|
||||
|
||||
// split virtual address into index and offset to address cache arrays
|
||||
assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH];
|
||||
|
||||
|
||||
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset
|
||||
// if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory
|
||||
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr[ICACHE_OFFSET_WIDTH-1:2], 2'b0} :
|
||||
( paddr_is_nc & mem_data_req_o ) ? {{ICACHE_OFFSET_WIDTH-1{1'b0}}, cl_offset_q[2]}<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case
|
||||
cl_offset_q;
|
||||
// request word address instead of cl address in case of NC access
|
||||
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit
|
||||
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
|
||||
end else begin : gen_piton_offset
|
||||
// icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not.
|
||||
// since the piton cache system replicates the data, we can always index the full CL
|
||||
assign cl_offset_d = (dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr >> 2, 2'b0} : cl_offset_q;
|
||||
|
||||
// request word address instead of cl address in case of NC access
|
||||
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit
|
||||
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
|
||||
end
|
||||
|
||||
|
||||
assign mem_data_o.tid = RdTxId;
|
||||
|
||||
assign mem_data_o.nc = paddr_is_nc;
|
||||
// way that is being replaced
|
||||
assign mem_data_o.way = repl_way;
|
||||
assign dreq_o.vaddr = vaddr_q;
|
||||
|
||||
// invalidations take two cycles
|
||||
assign inv_d = inv_en;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// main control logic
|
||||
///////////////////////////////////////////////////////
|
||||
logic addr_ni;
|
||||
assign addr_ni = config_pkg::is_inside_nonidempotent_regions(
|
||||
CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, areq_i.fetch_paddr}
|
||||
);
|
||||
always_comb begin : p_fsm
|
||||
// default assignment
|
||||
state_d = state_q;
|
||||
cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush
|
||||
flush_en = 1'b0;
|
||||
cmp_en_d = 1'b0;
|
||||
cache_rden = 1'b0;
|
||||
cache_wren = 1'b0;
|
||||
inv_en = 1'b0;
|
||||
flush_d = flush_q | flush_i; // register incoming flush
|
||||
|
||||
// interfaces
|
||||
dreq_o.ready = 1'b0;
|
||||
areq_o.fetch_req = 1'b0;
|
||||
dreq_o.valid = 1'b0;
|
||||
mem_data_req_o = 1'b0;
|
||||
// performance counter
|
||||
miss_o = 1'b0;
|
||||
|
||||
// handle invalidations unconditionally
|
||||
// note: invald are mutually exclusive with
|
||||
// ifills, since both arrive over the same IF
|
||||
// however, we need to make sure below that we
|
||||
// do not trigger a cache readout at the same time...
|
||||
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_INV_REQ) begin
|
||||
inv_en = 1'b1;
|
||||
end
|
||||
|
||||
unique case (state_q)
|
||||
//////////////////////////////////
|
||||
// this clears all valid bits
|
||||
FLUSH: begin
|
||||
flush_en = 1'b1;
|
||||
if (flush_done) begin
|
||||
state_d = IDLE;
|
||||
flush_d = 1'b0;
|
||||
// if the cache was not enabled set this
|
||||
cache_en_d = en_i;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// wait for an incoming request
|
||||
IDLE: begin
|
||||
// only enable tag comparison if cache is enabled
|
||||
cmp_en_d = cache_en_q;
|
||||
|
||||
// handle pending flushes, or perform cache clear upon enable
|
||||
if (flush_d || (en_i && !cache_en_q)) begin
|
||||
state_d = FLUSH;
|
||||
// wait for incoming requests
|
||||
end else begin
|
||||
// mem requests are for sure invals here
|
||||
if (!mem_rtrn_vld_i) begin
|
||||
dreq_o.ready = 1'b1;
|
||||
// we have a new request
|
||||
if (dreq_i.req) begin
|
||||
cache_rden = 1'b1;
|
||||
state_d = READ;
|
||||
end
|
||||
end
|
||||
if (dreq_i.kill_s1) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// check whether we have a hit
|
||||
// in case the cache is disabled,
|
||||
// or in case the address is NC, we
|
||||
// reuse the miss mechanism to handle
|
||||
// the request
|
||||
READ: begin
|
||||
areq_o.fetch_req = '1;
|
||||
// only enable tag comparison if cache is enabled
|
||||
cmp_en_d = cache_en_q;
|
||||
// readout speculatively
|
||||
cache_rden = cache_en_q;
|
||||
|
||||
if (areq_i.fetch_valid && (!dreq_i.spec || ((CVA6Cfg.NonIdemPotenceEn && !addr_ni) || (!CVA6Cfg.NonIdemPotenceEn)))) begin
|
||||
// check if we have to flush
|
||||
if (flush_d) begin
|
||||
state_d = IDLE;
|
||||
// we have a hit or an exception output valid result
|
||||
end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin
|
||||
dreq_o.valid = ~dreq_i.kill_s2; // just don't output in this case
|
||||
state_d = IDLE;
|
||||
|
||||
// we can accept another request
|
||||
// and stay here, but only if no inval is coming in
|
||||
// note: we are not expecting ifill return packets here...
|
||||
if (!mem_rtrn_vld_i) begin
|
||||
dreq_o.ready = 1'b1;
|
||||
if (dreq_i.req) begin
|
||||
state_d = READ;
|
||||
end
|
||||
end
|
||||
// if a request is being killed at this stage,
|
||||
// we have to bail out and wait for the address translation to complete
|
||||
if (dreq_i.kill_s1) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
// we have a miss / NC transaction
|
||||
end else if (dreq_i.kill_s2) begin
|
||||
state_d = IDLE;
|
||||
end else if (!inv_q) begin
|
||||
cmp_en_d = 1'b0;
|
||||
// only count this as a miss if the cache is enabled, and
|
||||
// the address is cacheable
|
||||
// send out ifill request
|
||||
mem_data_req_o = 1'b1;
|
||||
if (mem_data_ack_i) begin
|
||||
miss_o = ~paddr_is_nc;
|
||||
state_d = MISS;
|
||||
end
|
||||
end
|
||||
// bail out if this request is being killed (and we missed on the TLB)
|
||||
end else if (dreq_i.kill_s2 || flush_d) begin
|
||||
state_d = KILL_ATRANS;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// wait until the memory transaction
|
||||
// returns. do not write to memory
|
||||
// if the nc bit is set.
|
||||
MISS: begin
|
||||
// note: this is mutually exclusive with ICACHE_INV_REQ,
|
||||
// so we do not have to check for invals here
|
||||
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
|
||||
state_d = IDLE;
|
||||
// only return data if request is not being killed
|
||||
if (!(dreq_i.kill_s2 || flush_d)) begin
|
||||
dreq_o.valid = 1'b1;
|
||||
// only write to cache if this address is cacheable
|
||||
cache_wren = ~paddr_is_nc;
|
||||
end
|
||||
// bail out if this request is being killed
|
||||
end else if (dreq_i.kill_s2 || flush_d) begin
|
||||
state_d = KILL_MISS;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// killed address translation,
|
||||
// wait until paddr is valid, and go
|
||||
// back to idle
|
||||
KILL_ATRANS: begin
|
||||
areq_o.fetch_req = '1;
|
||||
if (areq_i.fetch_valid) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// killed miss,
|
||||
// wait until memory responds and
|
||||
// go back to idle
|
||||
KILL_MISS: begin
|
||||
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
default: begin
|
||||
// we should never get here
|
||||
state_d = FLUSH;
|
||||
end
|
||||
endcase // state_q
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// valid bit invalidation and replacement strategy
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// note: it cannot happen that we get an invalidation + a cl replacement
|
||||
// in the same cycle as these requests arrive via the same interface
|
||||
// flushes take precedence over invalidations (it is ok if we ignore
|
||||
// the inval since the cache is cleared anyway)
|
||||
|
||||
assign flush_cnt_d = (flush_done) ? '0 : (flush_en) ? flush_cnt_q + 1 : flush_cnt_q;
|
||||
|
||||
assign flush_done = (flush_cnt_q == (ICACHE_NUM_WORDS - 1));
|
||||
|
||||
// invalidation/clearing address
|
||||
// flushing takes precedence over invals
|
||||
assign vld_addr = (flush_en) ? flush_cnt_q :
|
||||
(inv_en) ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] :
|
||||
cl_index;
|
||||
|
||||
assign vld_req = (flush_en || cache_rden) ? '1 :
|
||||
(mem_rtrn_i.inv.all && inv_en) ? '1 :
|
||||
(mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh(
|
||||
mem_rtrn_i.inv.way
|
||||
) : repl_way_oh_q;
|
||||
|
||||
assign vld_wdata = (cache_wren) ? '1 : '0;
|
||||
|
||||
assign vld_we = (cache_wren | inv_en | flush_en);
|
||||
// assign vld_req = (vld_we | cache_rden);
|
||||
|
||||
|
||||
// chose random replacement if all are valid
|
||||
assign update_lfsr = cache_wren & all_ways_valid;
|
||||
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
|
||||
assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q;
|
||||
|
||||
// enable signals for memory arrays
|
||||
assign cl_req = (cache_rden) ? '1 : (cache_wren) ? repl_way_oh_q : '0;
|
||||
assign cl_we = cache_wren;
|
||||
|
||||
|
||||
// find invalid cache line
|
||||
lzc #(
|
||||
.WIDTH(ICACHE_SET_ASSOC)
|
||||
) i_lzc (
|
||||
.in_i (~vld_rdata),
|
||||
.cnt_o (inv_way),
|
||||
.empty_o(all_ways_valid)
|
||||
);
|
||||
|
||||
// generate random cacheline index
|
||||
lfsr #(
|
||||
.LfsrWidth(8),
|
||||
.OutWidth ($clog2(ariane_pkg::ICACHE_SET_ASSOC))
|
||||
) i_lfsr (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
.en_i (update_lfsr),
|
||||
.out_o (rnd_way)
|
||||
);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// tag comparison, hit generation
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx;
|
||||
|
||||
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
|
||||
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
|
||||
assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:FETCH_WIDTH];
|
||||
assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH];
|
||||
end
|
||||
|
||||
|
||||
lzc #(
|
||||
.WIDTH(ICACHE_SET_ASSOC)
|
||||
) i_lzc_hit (
|
||||
.in_i (cl_hit),
|
||||
.cnt_o (hit_idx),
|
||||
.empty_o()
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
if (cmp_en_q) begin
|
||||
dreq_o.data = cl_sel[hit_idx];
|
||||
dreq_o.user = cl_user[hit_idx];
|
||||
end else begin
|
||||
dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:FETCH_WIDTH];
|
||||
dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH];
|
||||
end
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// memory arrays and regs
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
|
||||
logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata[ICACHE_SET_ASSOC-1:0];
|
||||
|
||||
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram
|
||||
// Tag RAM
|
||||
sram #(
|
||||
// tag + valid bit
|
||||
.DATA_WIDTH(ICACHE_TAG_WIDTH + 1),
|
||||
.NUM_WORDS (ICACHE_NUM_WORDS)
|
||||
) tag_sram (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.req_i (vld_req[i]),
|
||||
.we_i (vld_we),
|
||||
.addr_i (vld_addr),
|
||||
// we can always use the saved tag here since it takes a
|
||||
// couple of cycle until we write to the cache upon a miss
|
||||
.wuser_i('0),
|
||||
.wdata_i({vld_wdata[i], cl_tag_q}),
|
||||
.be_i ('1),
|
||||
.ruser_o(),
|
||||
.rdata_o(cl_tag_valid_rdata[i])
|
||||
);
|
||||
|
||||
assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0];
|
||||
assign vld_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH];
|
||||
|
||||
// Data RAM
|
||||
sram #(
|
||||
.USER_WIDTH(ICACHE_USER_LINE_WIDTH),
|
||||
.DATA_WIDTH(ICACHE_LINE_WIDTH),
|
||||
.USER_EN (ariane_pkg::FETCH_USER_EN),
|
||||
.NUM_WORDS (ICACHE_NUM_WORDS)
|
||||
) data_sram (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.req_i (cl_req[i]),
|
||||
.we_i (cl_we),
|
||||
.addr_i (cl_index),
|
||||
.wuser_i(mem_rtrn_i.user),
|
||||
.wdata_i(mem_rtrn_i.data),
|
||||
.be_i ('1),
|
||||
.ruser_o(cl_ruser[i]),
|
||||
.rdata_o(cl_rdata[i])
|
||||
);
|
||||
end
|
||||
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
|
||||
if (!rst_ni) begin
|
||||
cl_tag_q <= '0;
|
||||
flush_cnt_q <= '0;
|
||||
vaddr_q <= '0;
|
||||
cmp_en_q <= '0;
|
||||
cache_en_q <= '0;
|
||||
flush_q <= '0;
|
||||
state_q <= FLUSH;
|
||||
cl_offset_q <= '0;
|
||||
repl_way_oh_q <= '0;
|
||||
inv_q <= '0;
|
||||
end else begin
|
||||
cl_tag_q <= cl_tag_d;
|
||||
flush_cnt_q <= flush_cnt_d;
|
||||
vaddr_q <= vaddr_d;
|
||||
cmp_en_q <= cmp_en_d;
|
||||
cache_en_q <= cache_en_d;
|
||||
flush_q <= flush_d;
|
||||
state_q <= state_d;
|
||||
cl_offset_q <= cl_offset_d;
|
||||
repl_way_oh_q <= repl_way_oh_d;
|
||||
inv_q <= inv_d;
|
||||
end
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// assertions
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
repl_inval0 :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) cache_wren |-> !(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld))
|
||||
else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
|
||||
|
||||
repl_inval1 :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> !cache_wren)
|
||||
else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
|
||||
|
||||
invalid_state :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS}))
|
||||
else $fatal(1, "[l1 icache] fsm reached an invalid state");
|
||||
|
||||
hot1 :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(
|
||||
cl_hit
|
||||
))
|
||||
else $fatal(1, "[l1 icache] cl_hit signal must be hot1");
|
||||
|
||||
// this is only used for verification!
|
||||
logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
|
||||
logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
|
||||
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
|
||||
if (!rst_ni) begin
|
||||
vld_mirror <= '{default: '0};
|
||||
tag_mirror <= '{default: '0};
|
||||
end else begin
|
||||
for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin
|
||||
if (vld_req[i] & vld_we) begin
|
||||
vld_mirror[vld_addr][i] <= vld_wdata[i];
|
||||
tag_mirror[vld_addr][i] <= cl_tag_q;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_dupl
|
||||
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata);
|
||||
end
|
||||
|
||||
tag_write_duplicate :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
|
||||
else $fatal(1, "[l1 icache] cannot allocate a CL that is already present in the cache");
|
||||
|
||||
|
||||
initial begin
|
||||
// assert wrong parameterizations
|
||||
assert (ICACHE_INDEX_WIDTH <= 12)
|
||||
else $fatal(1, "[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages");
|
||||
end
|
||||
`endif
|
||||
//pragma translate_on
|
||||
|
||||
endmodule // cva6_icache
|
||||
|
|
@ -0,0 +1,202 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Nils Wistoff <nwistoff@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 07.09.2020
|
||||
// Description: wrapper module to connect the L1I$ to a 64bit AXI bus.
|
||||
//
|
||||
|
||||
module cva6_icache_axi_wrapper
|
||||
import ariane_pkg::*;
|
||||
import wt_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter type axi_req_t = logic,
|
||||
parameter type axi_rsp_t = logic
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input riscv::priv_lvl_t priv_lvl_i,
|
||||
|
||||
input logic flush_i, // flush the icache, flush and kill have to be asserted together
|
||||
input logic en_i, // enable icache
|
||||
output logic miss_o, // to performance counter
|
||||
// address translation requests
|
||||
input icache_areq_t areq_i,
|
||||
output icache_arsp_t areq_o,
|
||||
// data requests
|
||||
input icache_dreq_t dreq_i,
|
||||
output icache_drsp_t dreq_o,
|
||||
// AXI refill port
|
||||
output axi_req_t axi_req_o,
|
||||
input axi_rsp_t axi_resp_i
|
||||
);
|
||||
|
||||
localparam AxiNumWords = (ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH > DCACHE_LINE_WIDTH) +
|
||||
(DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ;
|
||||
|
||||
logic icache_mem_rtrn_vld;
|
||||
icache_rtrn_t icache_mem_rtrn;
|
||||
logic icache_mem_data_req;
|
||||
logic icache_mem_data_ack;
|
||||
icache_req_t icache_mem_data;
|
||||
|
||||
logic axi_rd_req;
|
||||
logic axi_rd_gnt;
|
||||
logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr;
|
||||
logic [ $clog2(AxiNumWords)-1:0] axi_rd_blen;
|
||||
logic [ 2:0] axi_rd_size;
|
||||
logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in;
|
||||
logic axi_rd_rdy;
|
||||
logic axi_rd_lock;
|
||||
logic axi_rd_last;
|
||||
logic axi_rd_valid;
|
||||
logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
|
||||
logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_out;
|
||||
logic axi_rd_exokay;
|
||||
|
||||
logic req_valid_d, req_valid_q;
|
||||
icache_req_t req_data_d, req_data_q;
|
||||
logic first_d, first_q;
|
||||
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
|
||||
rd_shift_d, rd_shift_q;
|
||||
|
||||
// Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but
|
||||
// required by AXI).
|
||||
assign req_valid_d = ~axi_rd_gnt & (icache_mem_data_req | req_valid_q);
|
||||
|
||||
// Update read request information on a new request
|
||||
assign req_data_d = (icache_mem_data_req) ? icache_mem_data : req_data_q;
|
||||
|
||||
// We have a new or pending read request
|
||||
assign axi_rd_req = icache_mem_data_req | req_valid_q;
|
||||
assign axi_rd_addr = CVA6Cfg.AxiAddrWidth'(req_data_d.paddr);
|
||||
|
||||
// Fetch a full cache line on a cache miss, or a single word on a bypassed access
|
||||
assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH / 64 - 1;
|
||||
assign axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth / 8); // Maximum
|
||||
assign axi_rd_id_in = req_data_d.tid;
|
||||
assign axi_rd_rdy = 1'b1;
|
||||
assign axi_rd_lock = 1'b0;
|
||||
|
||||
// Immediately acknowledge read request. This is an implicit requirement for the icache.
|
||||
assign icache_mem_data_ack = icache_mem_data_req;
|
||||
|
||||
// Return data as soon as last word arrives
|
||||
assign icache_mem_rtrn_vld = axi_rd_valid & axi_rd_last;
|
||||
assign icache_mem_rtrn.data = rd_shift_d;
|
||||
assign icache_mem_rtrn.tid = req_data_q.tid;
|
||||
assign icache_mem_rtrn.rtype = wt_cache_pkg::ICACHE_IFILL_ACK;
|
||||
assign icache_mem_rtrn.inv = '0;
|
||||
|
||||
// -------
|
||||
// I-Cache
|
||||
// -------
|
||||
cva6_icache #(
|
||||
// use ID 0 for icache reads
|
||||
.CVA6Cfg(CVA6Cfg),
|
||||
.RdTxId (0)
|
||||
) i_cva6_icache (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (flush_i),
|
||||
.en_i (en_i),
|
||||
.miss_o (miss_o),
|
||||
.areq_i (areq_i),
|
||||
.areq_o (areq_o),
|
||||
.dreq_i (dreq_i),
|
||||
.dreq_o (dreq_o),
|
||||
.mem_rtrn_vld_i(icache_mem_rtrn_vld),
|
||||
.mem_rtrn_i (icache_mem_rtrn),
|
||||
.mem_data_req_o(icache_mem_data_req),
|
||||
.mem_data_ack_i(icache_mem_data_ack),
|
||||
.mem_data_o (icache_mem_data)
|
||||
);
|
||||
|
||||
// --------
|
||||
// AXI shim
|
||||
// --------
|
||||
axi_shim #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.AxiNumWords(AxiNumWords),
|
||||
.axi_req_t (axi_req_t),
|
||||
.axi_rsp_t (axi_rsp_t)
|
||||
) i_axi_shim (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.rd_req_i (axi_rd_req),
|
||||
.rd_gnt_o (axi_rd_gnt),
|
||||
.rd_addr_i (axi_rd_addr),
|
||||
.rd_blen_i (axi_rd_blen),
|
||||
.rd_size_i (axi_rd_size),
|
||||
.rd_id_i (axi_rd_id_in),
|
||||
.rd_rdy_i (axi_rd_rdy),
|
||||
.rd_lock_i (axi_rd_lock),
|
||||
.rd_last_o (axi_rd_last),
|
||||
.rd_valid_o (axi_rd_valid),
|
||||
.rd_data_o (axi_rd_data),
|
||||
.rd_user_o (),
|
||||
.rd_id_o (axi_rd_id_out),
|
||||
.rd_exokay_o(axi_rd_exokay),
|
||||
.wr_req_i ('0),
|
||||
.wr_gnt_o (),
|
||||
.wr_addr_i ('0),
|
||||
.wr_data_i ('0),
|
||||
.wr_user_i ('0),
|
||||
.wr_be_i ('0),
|
||||
.wr_blen_i ('0),
|
||||
.wr_size_i ('0),
|
||||
.wr_id_i ('0),
|
||||
.wr_lock_i ('0),
|
||||
.wr_atop_i ('0),
|
||||
.wr_rdy_i ('0),
|
||||
.wr_valid_o (),
|
||||
.wr_id_o (),
|
||||
.wr_exokay_o(),
|
||||
.axi_req_o (axi_req_o),
|
||||
.axi_resp_i (axi_resp_i)
|
||||
);
|
||||
|
||||
// Buffer burst data in shift register
|
||||
always_comb begin : p_axi_rtrn_shift
|
||||
first_d = first_q;
|
||||
rd_shift_d = rd_shift_q;
|
||||
|
||||
if (axi_rd_valid) begin
|
||||
first_d = axi_rd_last;
|
||||
if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
|
||||
rd_shift_d = axi_rd_data;
|
||||
end else begin
|
||||
rd_shift_d = {axi_rd_data, rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]};
|
||||
end
|
||||
|
||||
// If this is a single word transaction, we need to make sure that word is placed at offset 0
|
||||
if (first_q) begin
|
||||
rd_shift_d[0] = axi_rd_data;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Registers
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf
|
||||
if (!rst_ni) begin
|
||||
req_valid_q <= 1'b0;
|
||||
req_data_q <= '0;
|
||||
first_q <= 1'b1;
|
||||
rd_shift_q <= '0;
|
||||
end else begin
|
||||
req_valid_q <= req_valid_d;
|
||||
req_data_q <= req_data_d;
|
||||
first_q <= first_d;
|
||||
rd_shift_q <= rd_shift_d;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule // cva6_icache_axi_wrapper
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : February, 2023
|
||||
* Description : HPDcache Types' Definition
|
||||
* History :
|
||||
*/
|
||||
`ifndef __HPDCACHE_TYPEDEF_SVH__
|
||||
`define __HPDCACHE_TYPEDEF_SVH__
|
||||
|
||||
`define HPDCACHE_TYPEDEF_MEM_REQ_T(__name__, addr_t, id_t) \
|
||||
typedef struct packed { \
|
||||
addr_t mem_req_addr; \
|
||||
hpdcache_pkg::hpdcache_mem_len_t mem_req_len; \
|
||||
hpdcache_pkg::hpdcache_mem_size_t mem_req_size; \
|
||||
id_t mem_req_id; \
|
||||
hpdcache_pkg::hpdcache_mem_command_e mem_req_command; \
|
||||
hpdcache_pkg::hpdcache_mem_atomic_e mem_req_atomic; \
|
||||
logic mem_req_cacheable; \
|
||||
} __name__
|
||||
|
||||
`define HPDCACHE_TYPEDEF_MEM_RESP_R_T(__name__, id_t, data_t) \
|
||||
typedef struct packed { \
|
||||
hpdcache_pkg::hpdcache_mem_error_e mem_resp_r_error; \
|
||||
id_t mem_resp_r_id; \
|
||||
data_t mem_resp_r_data; \
|
||||
logic mem_resp_r_last; \
|
||||
} __name__
|
||||
|
||||
`define HPDCACHE_TYPEDEF_MEM_REQ_W_T(__name__, data_t, be_t) \
|
||||
typedef struct packed { \
|
||||
data_t mem_req_w_data; \
|
||||
be_t mem_req_w_be; \
|
||||
logic mem_req_w_last; \
|
||||
} __name__
|
||||
|
||||
`define HPDCACHE_TYPEDEF_MEM_RESP_W_T(__name__, id_t) \
|
||||
typedef struct packed { \
|
||||
logic mem_resp_w_is_atomic; \
|
||||
hpdcache_pkg::hpdcache_mem_error_e mem_resp_w_error; \
|
||||
id_t mem_resp_w_id; \
|
||||
} __name__
|
||||
|
||||
`endif
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : November 22, 2022
|
||||
* Description : Refill data downsize
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_data_downsize
|
||||
// {{{
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int WR_WIDTH = 0,
|
||||
parameter int RD_WIDTH = 0,
|
||||
parameter int DEPTH = 0,
|
||||
|
||||
localparam type wdata_t = logic [WR_WIDTH-1:0],
|
||||
localparam type rdata_t = logic [RD_WIDTH-1:0]
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
input logic w_i,
|
||||
output logic wok_o,
|
||||
input wdata_t wdata_i,
|
||||
|
||||
input logic r_i,
|
||||
output logic rok_o,
|
||||
output rdata_t rdata_o
|
||||
);
|
||||
// }}}
|
||||
// Architecture
|
||||
// {{{
|
||||
// Local definitions
|
||||
// {{{
|
||||
localparam int RD_WORDS = WR_WIDTH/RD_WIDTH;
|
||||
localparam int PTR_WIDTH = $clog2(DEPTH);
|
||||
localparam int WORDCNT_WIDTH = $clog2(RD_WORDS);
|
||||
typedef logic [PTR_WIDTH-1:0] bufptr_t;
|
||||
typedef logic [WORDCNT_WIDTH-1:0] wordptr_t;
|
||||
typedef logic [PTR_WIDTH:0] occupancy_t;
|
||||
// }}}
|
||||
|
||||
// Internal registers and signals
|
||||
// {{{
|
||||
rdata_t [DEPTH-1:0][RD_WORDS-1:0] buf_q;
|
||||
bufptr_t wrptr_q, wrptr_d;
|
||||
bufptr_t rdptr_q, rdptr_d;
|
||||
occupancy_t used_q, used_d;
|
||||
wordptr_t [DEPTH-1:0] words_q, words_d;
|
||||
logic words_set;
|
||||
logic full, empty;
|
||||
// }}}
|
||||
|
||||
// Control-Path
|
||||
// {{{
|
||||
assign full = (hpdcache_uint'(used_q) == DEPTH),
|
||||
empty = (used_q == 0),
|
||||
wok_o = ~full,
|
||||
rok_o = ~empty;
|
||||
|
||||
always_comb
|
||||
begin : ctrl_comb
|
||||
automatic logic used_inc, used_dec;
|
||||
automatic logic words_dec;
|
||||
|
||||
rdptr_d = rdptr_q;
|
||||
wrptr_d = wrptr_q;
|
||||
used_dec = 1'b0;
|
||||
used_inc = 1'b0;
|
||||
words_dec = 1'b0;
|
||||
words_set = 1'b0;
|
||||
|
||||
if (w_i && wok_o) begin
|
||||
used_inc = 1'b1;
|
||||
words_set = 1'b1;
|
||||
if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin
|
||||
wrptr_d = 0;
|
||||
end else begin
|
||||
wrptr_d = wrptr_q + 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (r_i && rok_o) begin
|
||||
words_dec = (words_q[rdptr_q] > 0);
|
||||
if (words_q[rdptr_q] == 0) begin
|
||||
used_dec = 1'b1;
|
||||
if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin
|
||||
rdptr_d = 0;
|
||||
end else begin
|
||||
rdptr_d = rdptr_q + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
case ({used_inc, used_dec})
|
||||
2'b10 : used_d = used_q + 1;
|
||||
2'b01 : used_d = used_q - 1;
|
||||
default: used_d = used_q;
|
||||
endcase
|
||||
|
||||
words_d = words_q;
|
||||
if (words_set) begin
|
||||
words_d[wrptr_q] = wordptr_t'(RD_WORDS - 1);
|
||||
end
|
||||
if (words_dec) begin
|
||||
words_d[rdptr_q] = words_q[rdptr_q] - 1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : ctrl_ff
|
||||
if (!rst_ni) begin
|
||||
rdptr_q <= 0;
|
||||
wrptr_q <= 0;
|
||||
used_q <= 0;
|
||||
words_q <= 0;
|
||||
end else begin
|
||||
rdptr_q <= rdptr_d;
|
||||
wrptr_q <= wrptr_d;
|
||||
used_q <= used_d;
|
||||
words_q <= words_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Data-Path
|
||||
// {{{
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : buf_ff
|
||||
if (!rst_ni) begin
|
||||
buf_q <= '0;
|
||||
end else begin
|
||||
if (words_set) begin
|
||||
buf_q[wrptr_q] <= wdata_i;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata_o = buf_q[rdptr_q][RD_WORDS - hpdcache_uint'(words_q[rdptr_q]) - 1];
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
initial
|
||||
begin : initial_assertions
|
||||
assert (DEPTH > 0) else $error("DEPTH must be greater than 0");
|
||||
assert (WR_WIDTH > 0) else $error("WR_WIDTH must be greater than 0");
|
||||
assert (RD_WIDTH > 0) else $error("RD_WIDTH must be greater than 0");
|
||||
assert (RD_WIDTH < WR_WIDTH) else $error("RD_WIDTH must be less to WR_WIDTH");
|
||||
assert ((WR_WIDTH % RD_WIDTH) == 0) else $error("WR_WIDTH must be a multiple RD_WIDTH");
|
||||
end
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
// }}}
|
||||
endmodule
|
||||
// }}}
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : November 22, 2022
|
||||
* Description : Refill data upsize
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_data_upsize
|
||||
// {{{
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int WR_WIDTH = 0,
|
||||
parameter int RD_WIDTH = 0,
|
||||
parameter int DEPTH = 0,
|
||||
|
||||
localparam type wdata_t = logic [WR_WIDTH-1:0],
|
||||
localparam type rdata_t = logic [RD_WIDTH-1:0]
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
input logic w_i,
|
||||
input logic wlast_i,
|
||||
output logic wok_o,
|
||||
input wdata_t wdata_i,
|
||||
|
||||
input logic r_i,
|
||||
output logic rok_o,
|
||||
output rdata_t rdata_o
|
||||
);
|
||||
// }}}
|
||||
// Architecture
|
||||
// {{{
|
||||
// Local definitions
|
||||
// {{{
|
||||
localparam int WR_WORDS = RD_WIDTH/WR_WIDTH;
|
||||
localparam int PTR_WIDTH = $clog2(DEPTH);
|
||||
localparam int WORDCNT_WIDTH = $clog2(WR_WORDS);
|
||||
typedef logic [PTR_WIDTH-1:0] bufptr_t;
|
||||
typedef logic [WORDCNT_WIDTH-1:0] wordptr_t;
|
||||
typedef logic [PTR_WIDTH:0] occupancy_t;
|
||||
// }}}
|
||||
|
||||
// Internal registers and signals
|
||||
// {{{
|
||||
wdata_t [DEPTH-1:0][WR_WORDS-1:0] buf_q;
|
||||
bufptr_t wrptr_q, wrptr_d;
|
||||
bufptr_t rdptr_q, rdptr_d;
|
||||
occupancy_t used_q, used_d;
|
||||
wordptr_t [DEPTH-1:0] words_q, words_d;
|
||||
logic full, empty;
|
||||
logic shift;
|
||||
// }}}
|
||||
|
||||
// Control-Path
|
||||
// {{{
|
||||
assign full = (hpdcache_uint'(used_q) == DEPTH),
|
||||
empty = (used_q == 0),
|
||||
wok_o = ~full,
|
||||
rok_o = ~empty;
|
||||
|
||||
always_comb
|
||||
begin : ctrl_comb
|
||||
automatic logic used_inc, used_dec;
|
||||
automatic logic words_inc, words_reset;
|
||||
|
||||
wrptr_d = wrptr_q;
|
||||
rdptr_d = rdptr_q;
|
||||
words_d = words_q;
|
||||
used_dec = 1'b0;
|
||||
used_inc = 1'b0;
|
||||
words_reset = 1'b0;
|
||||
words_inc = 1'b0;
|
||||
shift = 1'b0;
|
||||
|
||||
if (w_i && wok_o) begin
|
||||
shift = 1'b1;
|
||||
words_inc = (hpdcache_uint'(words_q[wrptr_q]) < (WR_WORDS-1));
|
||||
if (hpdcache_uint'(words_q[wrptr_q]) == (WR_WORDS-1) || wlast_i) begin
|
||||
used_inc = 1'b1;
|
||||
if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin
|
||||
wrptr_d = 0;
|
||||
end else begin
|
||||
wrptr_d = wrptr_q + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (r_i && rok_o) begin
|
||||
used_dec = 1'b1;
|
||||
words_reset = 1'b1;
|
||||
if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin
|
||||
rdptr_d = 0;
|
||||
end else begin
|
||||
rdptr_d = rdptr_q + 1;
|
||||
end
|
||||
end
|
||||
|
||||
case ({used_inc, used_dec})
|
||||
2'b10 : used_d = used_q + 1;
|
||||
2'b01 : used_d = used_q - 1;
|
||||
default: used_d = used_q;
|
||||
endcase
|
||||
|
||||
if (words_inc) words_d[wrptr_q] = words_q[wrptr_q] + 1;
|
||||
if (words_reset) words_d[rdptr_q] = 0;
|
||||
end
|
||||
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : ctrl_ff
|
||||
if (!rst_ni) begin
|
||||
rdptr_q <= 0;
|
||||
wrptr_q <= 0;
|
||||
used_q <= 0;
|
||||
words_q <= '0;
|
||||
end else begin
|
||||
rdptr_q <= rdptr_d;
|
||||
wrptr_q <= wrptr_d;
|
||||
used_q <= used_d;
|
||||
words_q <= words_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Data-Path
|
||||
// {{{
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : buf_ff
|
||||
if (!rst_ni) begin
|
||||
buf_q <= '0;
|
||||
end else begin
|
||||
if (shift) begin
|
||||
buf_q[wrptr_q][words_q[wrptr_q]] <= wdata_i;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata_o = buf_q[rdptr_q];
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
initial
|
||||
begin : initial_assertions
|
||||
assert (DEPTH > 0) else $error("DEPTH must be greater than 0");
|
||||
assert (WR_WIDTH > 0) else $error("WR_WIDTH must be greater than 0");
|
||||
assert (RD_WIDTH > 0) else $error("RD_WIDTH must be greater than 0");
|
||||
assert (WR_WIDTH < RD_WIDTH) else $error("WR_WIDTH must be less to RD_WIDTH");
|
||||
assert ((RD_WIDTH % WR_WIDTH) == 0) else $error("RD_WIDTH must be a multiple WR_WIDTH");
|
||||
end
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
// }}}
|
||||
endmodule
|
||||
// }}}
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : Simple multiplexor
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_demux
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
// Number of outputs
|
||||
parameter int unsigned NOUTPUT = 0,
|
||||
|
||||
// Width in bits of each input
|
||||
parameter int unsigned DATA_WIDTH = 0,
|
||||
|
||||
// Selector signal is one-hot encoded
|
||||
parameter bit ONE_HOT_SEL = 0,
|
||||
|
||||
// Compute the width of the selection signal
|
||||
localparam int unsigned NOUTPUT_LOG2 = $clog2(NOUTPUT),
|
||||
localparam int unsigned SEL_WIDTH = ONE_HOT_SEL ? NOUTPUT : NOUTPUT_LOG2,
|
||||
|
||||
localparam type data_t = logic [DATA_WIDTH-1:0],
|
||||
localparam type sel_t = logic [SEL_WIDTH-1:0]
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input data_t data_i,
|
||||
input sel_t sel_i,
|
||||
output data_t [NOUTPUT-1:0] data_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
generate
|
||||
always_comb
|
||||
begin : demux_comb
|
||||
for (int unsigned i = 0; i < NOUTPUT; i++) begin
|
||||
if (!ONE_HOT_SEL) begin
|
||||
data_o[i] = (sel_t'(i) == sel_i) ? data_i : '0;
|
||||
end else begin
|
||||
data_o[i] = sel_i[i] ? data_i : '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : FIFO buffer (using registers)
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_fifo_reg
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int unsigned FIFO_DEPTH = 0,
|
||||
parameter bit FEEDTHROUGH = 1'b0,
|
||||
parameter type fifo_data_t = logic
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic w_i,
|
||||
output logic wok_o,
|
||||
input fifo_data_t wdata_i,
|
||||
input logic r_i,
|
||||
output logic rok_o,
|
||||
output fifo_data_t rdata_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
/*
|
||||
* Single-entry FIFO buffer -> synchronization buffer
|
||||
*/
|
||||
if (FIFO_DEPTH == 1) begin : gen_sync_buffer
|
||||
hpdcache_sync_buffer #(
|
||||
.FEEDTHROUGH (FEEDTHROUGH),
|
||||
.data_t (fifo_data_t)
|
||||
) i_sync_buffer (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.w_i,
|
||||
.wok_o,
|
||||
.wdata_i,
|
||||
.r_i,
|
||||
.rok_o,
|
||||
.rdata_o
|
||||
);
|
||||
|
||||
/*
|
||||
* Multi-entry FIFO buffer
|
||||
*/
|
||||
end else if (FIFO_DEPTH > 0) begin : gen_fifo
|
||||
// Declaration of constants, types and functions
|
||||
// {{{
|
||||
typedef logic unsigned [$clog2(FIFO_DEPTH)-1:0] fifo_addr_t;
|
||||
// }}}
|
||||
|
||||
// Declaration of internal wires and registers
|
||||
// {{{
|
||||
fifo_data_t [FIFO_DEPTH-1:0] fifo_mem_q;
|
||||
fifo_addr_t rptr_q, rptr_d; // read pointer
|
||||
fifo_addr_t wptr_q, wptr_d; // write pointer
|
||||
logic crossover_q, crossover_d; // write pointer has wrap
|
||||
logic rexec, wexec;
|
||||
logic rptr_max, wptr_max;
|
||||
logic match_ptr;
|
||||
logic empty, full;
|
||||
// }}}
|
||||
|
||||
// Global control signals
|
||||
// {{{
|
||||
assign match_ptr = (wptr_q == rptr_q);
|
||||
|
||||
assign empty = match_ptr & ~crossover_q,
|
||||
full = match_ptr & crossover_q;
|
||||
|
||||
assign rok_o = ~empty | (FEEDTHROUGH & w_i),
|
||||
wok_o = ~full | (FEEDTHROUGH & r_i);
|
||||
|
||||
assign rexec = r_i & ~empty,
|
||||
wexec = w_i & (( FEEDTHROUGH & ((empty & ~r_i) | (full & r_i) | (~full & ~empty))) |
|
||||
(~FEEDTHROUGH & ~full));
|
||||
|
||||
// }}}
|
||||
|
||||
// Control of read and write pointers
|
||||
// {{{
|
||||
assign rptr_max = (rptr_q == fifo_addr_t'(FIFO_DEPTH-1));
|
||||
assign wptr_max = (wptr_q == fifo_addr_t'(FIFO_DEPTH-1));
|
||||
|
||||
always_comb
|
||||
begin : fifo_ctrl_comb
|
||||
rptr_d = rptr_q;
|
||||
wptr_d = wptr_q;
|
||||
crossover_d = crossover_q;
|
||||
|
||||
if (rexec) begin
|
||||
rptr_d = rptr_max ? 0 : rptr_q + 1;
|
||||
end
|
||||
|
||||
if (wexec) begin
|
||||
wptr_d = wptr_max ? 0 : wptr_q + 1;
|
||||
end
|
||||
|
||||
if (wexec && wptr_max) begin
|
||||
crossover_d = 1'b1;
|
||||
end else if (rexec && rptr_max) begin
|
||||
crossover_d = 1'b0;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// FIFO buffer memory management
|
||||
// {{{
|
||||
always_ff @(posedge clk_i)
|
||||
begin
|
||||
if (wexec) fifo_mem_q[wptr_q] <= wdata_i;
|
||||
end
|
||||
|
||||
assign rdata_o = FEEDTHROUGH && empty ? wdata_i : fifo_mem_q[rptr_q];
|
||||
// }}}
|
||||
|
||||
// Setting of internal state
|
||||
// {{{
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin
|
||||
if (!rst_ni) begin
|
||||
rptr_q <= 0;
|
||||
wptr_q <= 0;
|
||||
crossover_q <= 1'b0;
|
||||
end else begin
|
||||
rptr_q <= rptr_d;
|
||||
wptr_q <= wptr_d;
|
||||
crossover_q <= crossover_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
rptr_ahead_wptr_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
((rptr_q <= wptr_q) && !crossover_q) ||
|
||||
((rptr_q >= wptr_q) && crossover_q)) else
|
||||
$error("fifo: read pointer is ahead of the write pointer");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : Fixed-Priority Arbiter
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_fxarb
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
// Number of requesters
|
||||
parameter int unsigned N = 0
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic [N-1:0] req_i,
|
||||
output logic [N-1:0] gnt_o,
|
||||
input logic ready_i
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Declaration of internal wires and registers
|
||||
// {{{
|
||||
logic [N-1:0] gnt_q, gnt;
|
||||
logic wait_q;
|
||||
// }}}
|
||||
|
||||
// Compute the grant vector
|
||||
// {{{
|
||||
hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i), .val_o(gnt));
|
||||
// }}}
|
||||
|
||||
// Compute the output grant vector
|
||||
// {{{
|
||||
assign gnt_o = wait_q ? gnt_q : gnt;
|
||||
// }}}
|
||||
|
||||
// Setting of internal state
|
||||
// {{{
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin
|
||||
if (!rst_ni) begin
|
||||
wait_q <= 1'b0;
|
||||
gnt_q <= '0;
|
||||
end else begin
|
||||
wait_q <= ~ready_i & (wait_q | (|req_i));
|
||||
if (!ready_i && !wait_q && (|req_i)) begin
|
||||
gnt_q <= gnt;
|
||||
end
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
gnt_at_most_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
$onehot0(gnt_o)) else $error("arbiter: granting more than one requester");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Author(s) : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : Simple multiplexor
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_mux
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
// Number of inputs
|
||||
parameter int unsigned NINPUT = 0,
|
||||
|
||||
// Width in bits of each input
|
||||
parameter int unsigned DATA_WIDTH = 0,
|
||||
|
||||
// Selector signal is one-hot encoded
|
||||
parameter bit ONE_HOT_SEL = 0,
|
||||
|
||||
// Compute the width of the selection signal
|
||||
localparam int unsigned NINPUT_LOG2 = $clog2(NINPUT),
|
||||
localparam int unsigned SEL_WIDTH = ONE_HOT_SEL ? NINPUT : NINPUT_LOG2,
|
||||
|
||||
localparam type data_t = logic [DATA_WIDTH-1:0],
|
||||
localparam type sel_t = logic [SEL_WIDTH-1:0]
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input data_t [NINPUT-1:0] data_i,
|
||||
input sel_t sel_i,
|
||||
output data_t data_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
generate
|
||||
// Selector is one-hot encoded
|
||||
if (ONE_HOT_SEL == 1) begin
|
||||
always_comb
|
||||
begin : data_out_mux_comb
|
||||
data_o = '0;
|
||||
for (int unsigned i = 0; i < NINPUT; i++) begin
|
||||
data_o |= sel_i[i] ? data_i[i] : '0;
|
||||
end
|
||||
end
|
||||
|
||||
// Selector is binary encoded
|
||||
end else begin
|
||||
always_comb
|
||||
begin : data_out_mux_comb
|
||||
data_o = '0;
|
||||
for (int unsigned i = 0; i < NINPUT; i++) begin
|
||||
data_o |= (i == int'(sel_i)) ? data_i[i] : '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : Priority One-hot Encoder
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_prio_1hot_encoder
|
||||
// Parameters
|
||||
#(
|
||||
parameter int unsigned N = 0
|
||||
)
|
||||
// Ports
|
||||
(
|
||||
input logic [N-1:0] val_i,
|
||||
output logic [N-1:0] val_o
|
||||
);
|
||||
|
||||
generate
|
||||
assign val_o[0] = val_i[0];
|
||||
for (genvar i = 1; i < int'(N); i++) begin : prio_gen
|
||||
assign val_o[i] = val_i[i] & ~(|val_i[i-1:0]);
|
||||
end
|
||||
endgenerate
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : March, 2020
|
||||
* Description : 1RW register bank with write byte enable
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_regbank_wbyteenable_1rw
|
||||
#(
|
||||
parameter int unsigned ADDR_SIZE = 0,
|
||||
parameter int unsigned DATA_SIZE = 0,
|
||||
parameter int unsigned DEPTH = 2**ADDR_SIZE
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst_n,
|
||||
input logic cs,
|
||||
input logic we,
|
||||
input logic [ADDR_SIZE-1:0] addr,
|
||||
input logic [DATA_SIZE-1:0] wdata,
|
||||
input logic [DATA_SIZE/8-1:0] wbyteenable,
|
||||
output logic [DATA_SIZE-1:0] rdata
|
||||
);
|
||||
|
||||
/*
|
||||
* Internal memory array declaration
|
||||
*/
|
||||
typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
|
||||
mem_t mem;
|
||||
|
||||
/*
|
||||
* Process to update or read the memory array
|
||||
*/
|
||||
always_ff @(posedge clk)
|
||||
begin : mem_update_ff
|
||||
if (cs == 1'b1) begin
|
||||
if (we == 1'b1) begin
|
||||
for (int i = 0; i < DATA_SIZE/8; i++) begin
|
||||
if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8];
|
||||
end
|
||||
end
|
||||
rdata <= mem[addr];
|
||||
end
|
||||
end : mem_update_ff
|
||||
endmodule : hpdcache_regbank_wbyteenable_1rw
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : March, 2020
|
||||
* Description : 1RW register bank with write bit mask
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_regbank_wmask_1rw
|
||||
#(
|
||||
parameter int unsigned ADDR_SIZE = 0,
|
||||
parameter int unsigned DATA_SIZE = 0,
|
||||
parameter int unsigned DEPTH = 2**ADDR_SIZE
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst_n,
|
||||
input logic cs,
|
||||
input logic we,
|
||||
input logic [ADDR_SIZE-1:0] addr,
|
||||
input logic [DATA_SIZE-1:0] wdata,
|
||||
input logic [DATA_SIZE-1:0] wmask,
|
||||
output logic [DATA_SIZE-1:0] rdata
|
||||
);
|
||||
|
||||
/*
|
||||
* Internal memory array declaration
|
||||
*/
|
||||
typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
|
||||
mem_t mem;
|
||||
|
||||
/*
|
||||
* Process to update or read the memory array
|
||||
*/
|
||||
always_ff @(posedge clk)
|
||||
begin : mem_update_ff
|
||||
if (cs == 1'b1) begin
|
||||
if (we == 1'b1) begin
|
||||
mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask);
|
||||
end
|
||||
rdata <= mem[addr];
|
||||
end
|
||||
end : mem_update_ff
|
||||
endmodule : hpdcache_regbank_wmask_1rw
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/**
|
||||
* Author(s) : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : Round-Robin Arbiter
|
||||
* Based on design from
|
||||
* http://www.rtlery.com/articles/how-design-round-robin-arbiter
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_rrarb
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
// Number of requesters
|
||||
parameter int unsigned N = 0
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic [N-1:0] req_i,
|
||||
output logic [N-1:0] gnt_o,
|
||||
input logic ready_i
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Declaration of internal wires and registers
|
||||
// {{{
|
||||
logic [N-1:0] gnt_q, gnt;
|
||||
logic [N-1:0] nxt;
|
||||
logic wait_q;
|
||||
logic [N-1:0] mask, gnt_msk, gnt_nomsk;
|
||||
logic pending;
|
||||
genvar gen_i;
|
||||
// }}}
|
||||
|
||||
// Elaboration-time assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
generate
|
||||
if (N == 0) $error("N must be greater than 0");
|
||||
endgenerate
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
// Compute the thermometer mask vector
|
||||
// {{{
|
||||
generate
|
||||
if (N > 1) begin : gen_nxt_gt_1
|
||||
assign nxt = {gnt_q[N-2:0], gnt_q[N-1]};
|
||||
end else begin : gen_nxt_1
|
||||
assign nxt = gnt_q[0];
|
||||
end
|
||||
|
||||
for (gen_i = 0; gen_i < int'(N); gen_i++) begin : gen_mask
|
||||
assign mask[gen_i] = |nxt[gen_i:0];
|
||||
end
|
||||
endgenerate
|
||||
// }}}
|
||||
|
||||
// Compute the grant vector
|
||||
// {{{
|
||||
hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i & mask), .val_o(gnt_msk));
|
||||
hpdcache_prio_1hot_encoder #(.N(N)) prio_nomsk_i (.val_i(req_i) , .val_o(gnt_nomsk));
|
||||
assign gnt = |gnt_msk ? gnt_msk : gnt_nomsk;
|
||||
// }}}
|
||||
|
||||
// Compute the output grant vector
|
||||
// {{{
|
||||
assign gnt_o = wait_q ? gnt_q : gnt;
|
||||
// }}}
|
||||
|
||||
// Setting of internal state
|
||||
// {{{
|
||||
assign pending = |req_i;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin
|
||||
if (!rst_ni) begin
|
||||
wait_q <= 1'b0;
|
||||
gnt_q <= {1'b1, {N-1{1'b0}}};
|
||||
end else begin
|
||||
wait_q <= ~ready_i & (wait_q | pending);
|
||||
if (!wait_q && pending) begin
|
||||
gnt_q <= gnt;
|
||||
end
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
gnt_at_most_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
$onehot0(gnt)) else $error("arbiter: granting more than one requester");
|
||||
gnt_q_exactly_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
$onehot(gnt_q)) else $error("arbiter: grant state is not one-hot");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : March, 2020
|
||||
* Description : Wrapper for Behavioral SRAM macros
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_sram
|
||||
#(
|
||||
parameter int unsigned ADDR_SIZE = 0,
|
||||
parameter int unsigned DATA_SIZE = 0,
|
||||
parameter int unsigned DEPTH = 2**ADDR_SIZE
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst_n,
|
||||
input logic cs,
|
||||
input logic we,
|
||||
input logic [ADDR_SIZE-1:0] addr,
|
||||
input logic [DATA_SIZE-1:0] wdata,
|
||||
output logic [DATA_SIZE-1:0] rdata
|
||||
);
|
||||
|
||||
hpdcache_sram_1rw #(
|
||||
.ADDR_SIZE(ADDR_SIZE),
|
||||
.DATA_SIZE(DATA_SIZE),
|
||||
.DEPTH(DEPTH)
|
||||
) ram_i (
|
||||
.clk,
|
||||
.rst_n,
|
||||
.cs,
|
||||
.we,
|
||||
.addr,
|
||||
.wdata,
|
||||
.rdata
|
||||
);
|
||||
|
||||
endmodule : hpdcache_sram
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : March, 2020
|
||||
* Description : Wrapper for 1RW SRAM macros implementing a write byte enable
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_sram_wbyteenable
|
||||
#(
|
||||
parameter int unsigned ADDR_SIZE = 0,
|
||||
parameter int unsigned DATA_SIZE = 0,
|
||||
parameter int unsigned DEPTH = 2**ADDR_SIZE
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst_n,
|
||||
input logic cs,
|
||||
input logic we,
|
||||
input logic [ADDR_SIZE-1:0] addr,
|
||||
input logic [DATA_SIZE-1:0] wdata,
|
||||
input logic [DATA_SIZE/8-1:0] wbyteenable,
|
||||
output logic [DATA_SIZE-1:0] rdata
|
||||
);
|
||||
|
||||
hpdcache_sram_wbyteenable_1rw #(
|
||||
.ADDR_SIZE(ADDR_SIZE),
|
||||
.DATA_SIZE(DATA_SIZE),
|
||||
.DEPTH(DEPTH)
|
||||
) ram_i (
|
||||
.clk,
|
||||
.rst_n,
|
||||
.cs,
|
||||
.we,
|
||||
.addr,
|
||||
.wdata,
|
||||
.wbyteenable,
|
||||
.rdata
|
||||
);
|
||||
|
||||
endmodule : hpdcache_sram_wbyteenable
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : March, 2020
|
||||
* Description : Wrapper for 1RW SRAM macros implementing write bit mask
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_sram_wmask
|
||||
#(
|
||||
parameter int unsigned ADDR_SIZE = 0,
|
||||
parameter int unsigned DATA_SIZE = 0,
|
||||
parameter int unsigned DEPTH = 2**ADDR_SIZE
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst_n,
|
||||
input logic cs,
|
||||
input logic we,
|
||||
input logic [ADDR_SIZE-1:0] addr,
|
||||
input logic [DATA_SIZE-1:0] wdata,
|
||||
input logic [DATA_SIZE-1:0] wmask,
|
||||
output logic [DATA_SIZE-1:0] rdata
|
||||
);
|
||||
|
||||
hpdcache_sram_wmask_1rw #(
|
||||
.ADDR_SIZE(ADDR_SIZE),
|
||||
.DATA_SIZE(DATA_SIZE),
|
||||
.DEPTH(DEPTH)
|
||||
) ram_i (
|
||||
.clk,
|
||||
.rst_n,
|
||||
.cs,
|
||||
.we,
|
||||
.addr,
|
||||
.wdata,
|
||||
.wmask,
|
||||
.rdata
|
||||
);
|
||||
|
||||
endmodule : hpdcache_sram_wmask
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : October, 2023
|
||||
* Description : Synchronization buffer
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_sync_buffer
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter bit FEEDTHROUGH = 1'b0,
|
||||
parameter type data_t = logic
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic w_i,
|
||||
output logic wok_o,
|
||||
input data_t wdata_i,
|
||||
input logic r_i,
|
||||
output logic rok_o,
|
||||
output data_t rdata_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Declaration of internal wires and registers
|
||||
// {{{
|
||||
data_t buf_q;
|
||||
logic buf_we;
|
||||
logic valid_q, valid_d;
|
||||
// }}}
|
||||
|
||||
// Global control signals
|
||||
// {{{
|
||||
assign rok_o = valid_q | (FEEDTHROUGH & w_i),
|
||||
wok_o = ~valid_q | (FEEDTHROUGH & r_i);
|
||||
|
||||
assign buf_we = w_i & ((FEEDTHROUGH & ~(valid_q ^ r_i)) | (~FEEDTHROUGH & ~valid_q));
|
||||
// }}}
|
||||
|
||||
// Control of buffer
|
||||
// {{{
|
||||
assign valid_d = buf_we | (valid_q & ~r_i);
|
||||
// }}}
|
||||
|
||||
// FIFO buffer memory management
|
||||
// {{{
|
||||
always_ff @(posedge clk_i)
|
||||
begin
|
||||
if (buf_we) buf_q <= wdata_i;
|
||||
end
|
||||
|
||||
assign rdata_o = FEEDTHROUGH && !valid_q ? wdata_i : buf_q;
|
||||
// }}}
|
||||
|
||||
// Setting of internal state
|
||||
// {{{
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin
|
||||
if (!rst_ni) begin
|
||||
valid_q <= 1'b0;
|
||||
end else begin
|
||||
valid_q <= valid_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : March, 2020
|
||||
* Description : SRAM behavioral model
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_sram_1rw
|
||||
#(
|
||||
parameter int unsigned ADDR_SIZE = 0,
|
||||
parameter int unsigned DATA_SIZE = 0,
|
||||
parameter int unsigned DEPTH = 2**ADDR_SIZE
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst_n,
|
||||
input logic cs,
|
||||
input logic we,
|
||||
input logic [ADDR_SIZE-1:0] addr,
|
||||
input logic [DATA_SIZE-1:0] wdata,
|
||||
output logic [DATA_SIZE-1:0] rdata
|
||||
);
|
||||
|
||||
/*
|
||||
* Internal memory array declaration
|
||||
*/
|
||||
typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
|
||||
mem_t mem;
|
||||
|
||||
/*
|
||||
* Process to update or read the memory array
|
||||
*/
|
||||
always_ff @(posedge clk)
|
||||
begin : mem_update_ff
|
||||
if (cs == 1'b1) begin
|
||||
if (we == 1'b1) begin
|
||||
mem[addr] <= wdata;
|
||||
end
|
||||
rdata <= mem[addr];
|
||||
end
|
||||
end : mem_update_ff
|
||||
endmodule : hpdcache_sram_1rw
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : March, 2020
|
||||
* Description : Behavioral model of a 1RW SRAM with write byte enable
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_sram_wbyteenable_1rw
|
||||
#(
|
||||
parameter int unsigned ADDR_SIZE = 0,
|
||||
parameter int unsigned DATA_SIZE = 0,
|
||||
parameter int unsigned DEPTH = 2**ADDR_SIZE
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst_n,
|
||||
input logic cs,
|
||||
input logic we,
|
||||
input logic [ADDR_SIZE-1:0] addr,
|
||||
input logic [DATA_SIZE-1:0] wdata,
|
||||
input logic [DATA_SIZE/8-1:0] wbyteenable,
|
||||
output logic [DATA_SIZE-1:0] rdata
|
||||
);
|
||||
|
||||
/*
|
||||
* Internal memory array declaration
|
||||
*/
|
||||
typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
|
||||
mem_t mem;
|
||||
|
||||
/*
|
||||
* Process to update or read the memory array
|
||||
*/
|
||||
always_ff @(posedge clk)
|
||||
begin : mem_update_ff
|
||||
if (cs == 1'b1) begin
|
||||
if (we == 1'b1) begin
|
||||
for (int i = 0; i < DATA_SIZE/8; i++) begin
|
||||
if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8];
|
||||
end
|
||||
end
|
||||
rdata <= mem[addr];
|
||||
end
|
||||
end : mem_update_ff
|
||||
endmodule : hpdcache_sram_wbyteenable_1rw
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : March, 2020
|
||||
* Description : Behavioral model of a 1RW SRAM with write bit mask
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_sram_wmask_1rw
|
||||
#(
|
||||
parameter int unsigned ADDR_SIZE = 0,
|
||||
parameter int unsigned DATA_SIZE = 0,
|
||||
parameter int unsigned DEPTH = 2**ADDR_SIZE
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst_n,
|
||||
input logic cs,
|
||||
input logic we,
|
||||
input logic [ADDR_SIZE-1:0] addr,
|
||||
input logic [DATA_SIZE-1:0] wdata,
|
||||
input logic [DATA_SIZE-1:0] wmask,
|
||||
output logic [DATA_SIZE-1:0] rdata
|
||||
);
|
||||
|
||||
/*
|
||||
* Internal memory array declaration
|
||||
*/
|
||||
typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
|
||||
mem_t mem;
|
||||
|
||||
/*
|
||||
* Process to update or read the memory array
|
||||
*/
|
||||
always_ff @(posedge clk)
|
||||
begin : mem_update_ff
|
||||
if (cs == 1'b1) begin
|
||||
if (we == 1'b1) begin
|
||||
mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask);
|
||||
end
|
||||
rdata <= mem[addr];
|
||||
end
|
||||
end : mem_update_ff
|
||||
endmodule : hpdcache_sram_wmask_1rw
|
||||
|
|
@ -0,0 +1,658 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : HPDcache top
|
||||
* History :
|
||||
*/
|
||||
module hpdcache
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int NREQUESTERS = 1,
|
||||
parameter int HPDcacheMemIdWidth = 8,
|
||||
parameter int HPDcacheMemDataWidth = 512,
|
||||
parameter type hpdcache_mem_req_t = logic,
|
||||
parameter type hpdcache_mem_req_w_t = logic,
|
||||
parameter type hpdcache_mem_resp_r_t = logic,
|
||||
parameter type hpdcache_mem_resp_w_t = logic
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
// Clock and reset signals
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Force the write buffer to send all pending writes
|
||||
input logic wbuf_flush_i,
|
||||
|
||||
// Core request interface
|
||||
// 1st cycle
|
||||
input logic core_req_valid_i [NREQUESTERS-1:0],
|
||||
output logic core_req_ready_o [NREQUESTERS-1:0],
|
||||
input hpdcache_req_t core_req_i [NREQUESTERS-1:0],
|
||||
// 2nd cycle
|
||||
input logic core_req_abort_i [NREQUESTERS-1:0],
|
||||
input hpdcache_tag_t core_req_tag_i [NREQUESTERS-1:0],
|
||||
input hpdcache_pma_t core_req_pma_i [NREQUESTERS-1:0],
|
||||
|
||||
// Core response interface
|
||||
output logic core_rsp_valid_o [NREQUESTERS-1:0],
|
||||
output hpdcache_rsp_t core_rsp_o [NREQUESTERS-1:0],
|
||||
|
||||
// Miss read interface
|
||||
input logic mem_req_miss_read_ready_i,
|
||||
output logic mem_req_miss_read_valid_o,
|
||||
output hpdcache_mem_req_t mem_req_miss_read_o,
|
||||
|
||||
output logic mem_resp_miss_read_ready_o,
|
||||
input logic mem_resp_miss_read_valid_i,
|
||||
input hpdcache_mem_resp_r_t mem_resp_miss_read_i,
|
||||
|
||||
// Write-buffer write interface
|
||||
input logic mem_req_wbuf_write_ready_i,
|
||||
output logic mem_req_wbuf_write_valid_o,
|
||||
output hpdcache_mem_req_t mem_req_wbuf_write_o,
|
||||
|
||||
input logic mem_req_wbuf_write_data_ready_i,
|
||||
output logic mem_req_wbuf_write_data_valid_o,
|
||||
output hpdcache_mem_req_w_t mem_req_wbuf_write_data_o,
|
||||
|
||||
output logic mem_resp_wbuf_write_ready_o,
|
||||
input logic mem_resp_wbuf_write_valid_i,
|
||||
input hpdcache_mem_resp_w_t mem_resp_wbuf_write_i,
|
||||
|
||||
// Uncached read interface
|
||||
input logic mem_req_uc_read_ready_i,
|
||||
output logic mem_req_uc_read_valid_o,
|
||||
output hpdcache_mem_req_t mem_req_uc_read_o,
|
||||
|
||||
output logic mem_resp_uc_read_ready_o,
|
||||
input logic mem_resp_uc_read_valid_i,
|
||||
input hpdcache_mem_resp_r_t mem_resp_uc_read_i,
|
||||
|
||||
// Uncached write interface
|
||||
input logic mem_req_uc_write_ready_i,
|
||||
output logic mem_req_uc_write_valid_o,
|
||||
output hpdcache_mem_req_t mem_req_uc_write_o,
|
||||
|
||||
input logic mem_req_uc_write_data_ready_i,
|
||||
output logic mem_req_uc_write_data_valid_o,
|
||||
output hpdcache_mem_req_w_t mem_req_uc_write_data_o,
|
||||
|
||||
output logic mem_resp_uc_write_ready_o,
|
||||
input logic mem_resp_uc_write_valid_i,
|
||||
input hpdcache_mem_resp_w_t mem_resp_uc_write_i,
|
||||
|
||||
// Performance events
|
||||
output logic evt_cache_write_miss_o,
|
||||
output logic evt_cache_read_miss_o,
|
||||
output logic evt_uncached_req_o,
|
||||
output logic evt_cmo_req_o,
|
||||
output logic evt_write_req_o,
|
||||
output logic evt_read_req_o,
|
||||
output logic evt_prefetch_req_o,
|
||||
output logic evt_req_on_hold_o,
|
||||
output logic evt_rtab_rollback_o,
|
||||
output logic evt_stall_refill_o,
|
||||
output logic evt_stall_o,
|
||||
|
||||
// Status interface
|
||||
output logic wbuf_empty_o,
|
||||
|
||||
// Configuration interface
|
||||
input logic cfg_enable_i,
|
||||
input wbuf_timecnt_t cfg_wbuf_threshold_i,
|
||||
input logic cfg_wbuf_reset_timecnt_on_write_i,
|
||||
input logic cfg_wbuf_sequential_waw_i,
|
||||
input logic cfg_wbuf_inhibit_write_coalescing_i,
|
||||
input logic cfg_prefetch_updt_plru_i,
|
||||
input logic cfg_error_on_cacheable_amo_i,
|
||||
input logic cfg_rtab_single_entry_i
|
||||
);
|
||||
|
||||
// }}}
|
||||
|
||||
// Declaration of internal signals
|
||||
// {{{
|
||||
logic refill_req_valid;
|
||||
logic refill_req_ready;
|
||||
logic refill_busy;
|
||||
logic refill_updt_plru;
|
||||
hpdcache_set_t refill_set;
|
||||
hpdcache_dir_entry_t refill_dir_entry;
|
||||
hpdcache_way_vector_t refill_read_victim_way;
|
||||
hpdcache_way_vector_t refill_write_victim_way;
|
||||
logic refill_write_dir;
|
||||
logic refill_write_data;
|
||||
hpdcache_word_t refill_word;
|
||||
hpdcache_refill_data_t refill_data;
|
||||
logic refill_core_rsp_valid;
|
||||
hpdcache_rsp_t refill_core_rsp;
|
||||
hpdcache_nline_t refill_nline;
|
||||
logic refill_updt_rtab;
|
||||
|
||||
logic miss_mshr_empty;
|
||||
logic miss_mshr_check;
|
||||
mshr_set_t miss_mshr_check_set;
|
||||
mshr_tag_t miss_mshr_check_tag;
|
||||
logic miss_mshr_hit;
|
||||
logic miss_mshr_alloc_cs;
|
||||
logic miss_mshr_alloc;
|
||||
logic miss_mshr_alloc_ready;
|
||||
logic miss_mshr_alloc_full;
|
||||
hpdcache_nline_t miss_mshr_alloc_nline;
|
||||
hpdcache_req_tid_t miss_mshr_alloc_tid;
|
||||
hpdcache_req_sid_t miss_mshr_alloc_sid;
|
||||
hpdcache_word_t miss_mshr_alloc_word;
|
||||
logic miss_mshr_alloc_need_rsp;
|
||||
logic miss_mshr_alloc_is_prefetch;
|
||||
|
||||
logic wbuf_flush_all;
|
||||
logic wbuf_write;
|
||||
logic wbuf_write_ready;
|
||||
wbuf_addr_t wbuf_write_addr;
|
||||
wbuf_data_t wbuf_write_data;
|
||||
wbuf_be_t wbuf_write_be;
|
||||
logic wbuf_write_uncacheable;
|
||||
logic wbuf_read_hit;
|
||||
logic wbuf_read_flush_hit;
|
||||
hpdcache_req_addr_t wbuf_rtab_addr;
|
||||
logic wbuf_rtab_is_read;
|
||||
logic wbuf_rtab_hit_open;
|
||||
logic wbuf_rtab_hit_pend;
|
||||
logic wbuf_rtab_hit_sent;
|
||||
logic wbuf_rtab_not_ready;
|
||||
|
||||
logic uc_ready;
|
||||
logic uc_req_valid;
|
||||
hpdcache_uc_op_t uc_req_op;
|
||||
hpdcache_req_addr_t uc_req_addr;
|
||||
hpdcache_req_size_t uc_req_size;
|
||||
hpdcache_req_data_t uc_req_data;
|
||||
hpdcache_req_be_t uc_req_be;
|
||||
logic uc_req_uncacheable;
|
||||
hpdcache_req_sid_t uc_req_sid;
|
||||
hpdcache_req_tid_t uc_req_tid;
|
||||
logic uc_req_need_rsp;
|
||||
logic uc_wbuf_flush_all;
|
||||
logic uc_dir_amo_match;
|
||||
hpdcache_set_t uc_dir_amo_match_set;
|
||||
hpdcache_tag_t uc_dir_amo_match_tag;
|
||||
logic uc_dir_amo_update_plru;
|
||||
hpdcache_way_vector_t uc_dir_amo_hit_way;
|
||||
logic uc_data_amo_write;
|
||||
logic uc_data_amo_write_enable;
|
||||
hpdcache_set_t uc_data_amo_write_set;
|
||||
hpdcache_req_size_t uc_data_amo_write_size;
|
||||
hpdcache_word_t uc_data_amo_write_word;
|
||||
logic [63:0] uc_data_amo_write_data;
|
||||
logic [7:0] uc_data_amo_write_be;
|
||||
logic uc_lrsc_snoop;
|
||||
hpdcache_req_addr_t uc_lrsc_snoop_addr;
|
||||
hpdcache_req_size_t uc_lrsc_snoop_size;
|
||||
logic uc_core_rsp_ready;
|
||||
logic uc_core_rsp_valid;
|
||||
hpdcache_rsp_t uc_core_rsp;
|
||||
|
||||
logic cmo_req_valid;
|
||||
logic cmo_ready;
|
||||
hpdcache_cmoh_op_t cmo_req_op;
|
||||
hpdcache_req_addr_t cmo_req_addr;
|
||||
hpdcache_req_data_t cmo_req_wdata;
|
||||
logic cmo_wbuf_flush_all;
|
||||
logic cmo_dir_check;
|
||||
hpdcache_set_t cmo_dir_check_set;
|
||||
hpdcache_tag_t cmo_dir_check_tag;
|
||||
hpdcache_way_vector_t cmo_dir_check_hit_way;
|
||||
logic cmo_dir_inval;
|
||||
hpdcache_set_t cmo_dir_inval_set;
|
||||
hpdcache_way_vector_t cmo_dir_inval_way;
|
||||
|
||||
logic rtab_empty;
|
||||
logic ctrl_empty;
|
||||
|
||||
logic core_rsp_valid;
|
||||
hpdcache_rsp_t core_rsp;
|
||||
|
||||
logic arb_req_valid;
|
||||
logic arb_req_ready;
|
||||
hpdcache_req_t arb_req;
|
||||
logic arb_abort;
|
||||
hpdcache_tag_t arb_tag;
|
||||
hpdcache_pma_t arb_pma;
|
||||
|
||||
localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_READ_ID = {HPDcacheMemIdWidth{1'b1}};
|
||||
localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_WRITE_ID = {HPDcacheMemIdWidth{1'b1}};
|
||||
// }}}
|
||||
|
||||
// Requesters arbiter
|
||||
// {{{
|
||||
hpdcache_core_arbiter #(
|
||||
.NREQUESTERS (NREQUESTERS)
|
||||
) core_req_arbiter_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.core_req_valid_i,
|
||||
.core_req_ready_o,
|
||||
.core_req_i,
|
||||
.core_req_abort_i,
|
||||
.core_req_tag_i,
|
||||
.core_req_pma_i,
|
||||
|
||||
.core_rsp_valid_i (core_rsp_valid),
|
||||
.core_rsp_i (core_rsp),
|
||||
.core_rsp_valid_o,
|
||||
.core_rsp_o,
|
||||
|
||||
.arb_req_valid_o (arb_req_valid),
|
||||
.arb_req_ready_i (arb_req_ready),
|
||||
.arb_req_o (arb_req),
|
||||
.arb_abort_o (arb_abort),
|
||||
.arb_tag_o (arb_tag),
|
||||
.arb_pma_o (arb_pma)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// HPDcache controller
|
||||
// {{{
|
||||
hpdcache_ctrl hpdcache_ctrl_i(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.core_req_valid_i (arb_req_valid),
|
||||
.core_req_ready_o (arb_req_ready),
|
||||
.core_req_i (arb_req),
|
||||
.core_req_abort_i (arb_abort),
|
||||
.core_req_tag_i (arb_tag),
|
||||
.core_req_pma_i (arb_pma),
|
||||
|
||||
.core_rsp_valid_o (core_rsp_valid),
|
||||
.core_rsp_o (core_rsp),
|
||||
|
||||
.wbuf_flush_i,
|
||||
|
||||
.cachedir_hit_o (/* unused */),
|
||||
|
||||
.miss_mshr_check_o (miss_mshr_check),
|
||||
.miss_mshr_check_set_o (miss_mshr_check_set),
|
||||
.miss_mshr_check_tag_o (miss_mshr_check_tag),
|
||||
.miss_mshr_alloc_o (miss_mshr_alloc),
|
||||
.miss_mshr_alloc_cs_o (miss_mshr_alloc_cs),
|
||||
.miss_mshr_alloc_ready_i (miss_mshr_alloc_ready),
|
||||
.miss_mshr_alloc_full_i (miss_mshr_alloc_full),
|
||||
.miss_mshr_alloc_nline_o (miss_mshr_alloc_nline),
|
||||
.miss_mshr_alloc_tid_o (miss_mshr_alloc_tid),
|
||||
.miss_mshr_alloc_sid_o (miss_mshr_alloc_sid),
|
||||
.miss_mshr_alloc_word_o (miss_mshr_alloc_word),
|
||||
.miss_mshr_alloc_need_rsp_o (miss_mshr_alloc_need_rsp),
|
||||
.miss_mshr_alloc_is_prefetch_o (miss_mshr_alloc_is_prefetch),
|
||||
.miss_mshr_hit_i (miss_mshr_hit),
|
||||
|
||||
.refill_req_valid_i (refill_req_valid),
|
||||
.refill_req_ready_o (refill_req_ready),
|
||||
.refill_busy_i (refill_busy),
|
||||
.refill_updt_plru_i (refill_updt_plru),
|
||||
.refill_set_i (refill_set),
|
||||
.refill_dir_entry_i (refill_dir_entry),
|
||||
.refill_victim_way_o (refill_read_victim_way),
|
||||
.refill_victim_way_i (refill_write_victim_way),
|
||||
.refill_write_dir_i (refill_write_dir),
|
||||
.refill_write_data_i (refill_write_data),
|
||||
.refill_word_i (refill_word),
|
||||
.refill_data_i (refill_data),
|
||||
.refill_core_rsp_valid_i (refill_core_rsp_valid),
|
||||
.refill_core_rsp_i (refill_core_rsp),
|
||||
.refill_nline_i (refill_nline),
|
||||
.refill_updt_rtab_i (refill_updt_rtab),
|
||||
|
||||
.wbuf_empty_i (wbuf_empty_o),
|
||||
.wbuf_flush_all_o (wbuf_flush_all),
|
||||
.wbuf_write_o (wbuf_write),
|
||||
.wbuf_write_ready_i (wbuf_write_ready),
|
||||
.wbuf_write_addr_o (wbuf_write_addr),
|
||||
.wbuf_write_data_o (wbuf_write_data),
|
||||
.wbuf_write_be_o (wbuf_write_be),
|
||||
.wbuf_write_uncacheable_o (wbuf_write_uncacheable),
|
||||
.wbuf_read_hit_i (wbuf_read_hit),
|
||||
.wbuf_read_flush_hit_o (wbuf_read_flush_hit),
|
||||
.wbuf_rtab_addr_o (wbuf_rtab_addr),
|
||||
.wbuf_rtab_is_read_o (wbuf_rtab_is_read),
|
||||
.wbuf_rtab_hit_open_i (wbuf_rtab_hit_open),
|
||||
.wbuf_rtab_hit_pend_i (wbuf_rtab_hit_pend),
|
||||
.wbuf_rtab_hit_sent_i (wbuf_rtab_hit_sent),
|
||||
.wbuf_rtab_not_ready_i (wbuf_rtab_not_ready),
|
||||
|
||||
.uc_busy_i (~uc_ready),
|
||||
.uc_lrsc_snoop_o (uc_lrsc_snoop),
|
||||
.uc_lrsc_snoop_addr_o (uc_lrsc_snoop_addr),
|
||||
.uc_lrsc_snoop_size_o (uc_lrsc_snoop_size),
|
||||
.uc_req_valid_o (uc_req_valid),
|
||||
.uc_req_op_o (uc_req_op),
|
||||
.uc_req_addr_o (uc_req_addr),
|
||||
.uc_req_size_o (uc_req_size),
|
||||
.uc_req_data_o (uc_req_data),
|
||||
.uc_req_be_o (uc_req_be),
|
||||
.uc_req_uc_o (uc_req_uncacheable),
|
||||
.uc_req_sid_o (uc_req_sid),
|
||||
.uc_req_tid_o (uc_req_tid),
|
||||
.uc_req_need_rsp_o (uc_req_need_rsp),
|
||||
.uc_wbuf_flush_all_i (uc_wbuf_flush_all),
|
||||
.uc_dir_amo_match_i (uc_dir_amo_match),
|
||||
.uc_dir_amo_match_set_i (uc_dir_amo_match_set),
|
||||
.uc_dir_amo_match_tag_i (uc_dir_amo_match_tag),
|
||||
.uc_dir_amo_update_plru_i (uc_dir_amo_update_plru),
|
||||
.uc_dir_amo_hit_way_o (uc_dir_amo_hit_way),
|
||||
.uc_data_amo_write_i (uc_data_amo_write),
|
||||
.uc_data_amo_write_enable_i (uc_data_amo_write_enable),
|
||||
.uc_data_amo_write_set_i (uc_data_amo_write_set),
|
||||
.uc_data_amo_write_size_i (uc_data_amo_write_size),
|
||||
.uc_data_amo_write_word_i (uc_data_amo_write_word),
|
||||
.uc_data_amo_write_data_i (uc_data_amo_write_data),
|
||||
.uc_data_amo_write_be_i (uc_data_amo_write_be),
|
||||
.uc_core_rsp_ready_o (uc_core_rsp_ready),
|
||||
.uc_core_rsp_valid_i (uc_core_rsp_valid),
|
||||
.uc_core_rsp_i (uc_core_rsp),
|
||||
|
||||
.cmo_busy_i (~cmo_ready),
|
||||
.cmo_req_valid_o (cmo_req_valid),
|
||||
.cmo_req_op_o (cmo_req_op),
|
||||
.cmo_req_addr_o (cmo_req_addr),
|
||||
.cmo_req_wdata_o (cmo_req_wdata),
|
||||
.cmo_wbuf_flush_all_i (cmo_wbuf_flush_all),
|
||||
.cmo_dir_check_i (cmo_dir_check),
|
||||
.cmo_dir_check_set_i (cmo_dir_check_set),
|
||||
.cmo_dir_check_tag_i (cmo_dir_check_tag),
|
||||
.cmo_dir_check_hit_way_o (cmo_dir_check_hit_way),
|
||||
.cmo_dir_inval_i (cmo_dir_inval),
|
||||
.cmo_dir_inval_set_i (cmo_dir_inval_set),
|
||||
.cmo_dir_inval_way_i (cmo_dir_inval_way),
|
||||
|
||||
.rtab_empty_o (rtab_empty),
|
||||
.ctrl_empty_o (ctrl_empty),
|
||||
|
||||
.cfg_enable_i,
|
||||
.cfg_rtab_single_entry_i,
|
||||
|
||||
.evt_cache_write_miss_o,
|
||||
.evt_cache_read_miss_o,
|
||||
.evt_uncached_req_o,
|
||||
.evt_cmo_req_o,
|
||||
.evt_write_req_o,
|
||||
.evt_read_req_o,
|
||||
.evt_prefetch_req_o,
|
||||
.evt_req_on_hold_o,
|
||||
.evt_rtab_rollback_o,
|
||||
.evt_stall_refill_o,
|
||||
.evt_stall_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
// HPDcache write-buffer
|
||||
// {{{
|
||||
hpdcache_wbuf_wrapper #(
|
||||
.HPDcacheMemIdWidth (HPDcacheMemIdWidth),
|
||||
.HPDcacheMemDataWidth (HPDcacheMemDataWidth),
|
||||
.hpdcache_mem_req_t (hpdcache_mem_req_t),
|
||||
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
|
||||
.hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t)
|
||||
) hpdcache_wbuf_i(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.empty_o (wbuf_empty_o),
|
||||
.full_o (/* unused */),
|
||||
.flush_all_i (wbuf_flush_all),
|
||||
|
||||
.cfg_threshold_i (cfg_wbuf_threshold_i),
|
||||
.cfg_reset_timecnt_on_write_i (cfg_wbuf_reset_timecnt_on_write_i),
|
||||
.cfg_sequential_waw_i (cfg_wbuf_sequential_waw_i),
|
||||
.cfg_inhibit_write_coalescing_i (cfg_wbuf_inhibit_write_coalescing_i),
|
||||
|
||||
.write_i (wbuf_write),
|
||||
.write_ready_o (wbuf_write_ready),
|
||||
.write_addr_i (wbuf_write_addr),
|
||||
.write_data_i (wbuf_write_data),
|
||||
.write_be_i (wbuf_write_be),
|
||||
.write_uc_i (wbuf_write_uncacheable),
|
||||
|
||||
.read_addr_i (wbuf_write_addr),
|
||||
.read_hit_o (wbuf_read_hit),
|
||||
.read_flush_hit_i (wbuf_read_flush_hit),
|
||||
|
||||
.replay_addr_i (wbuf_rtab_addr),
|
||||
.replay_is_read_i (wbuf_rtab_is_read),
|
||||
.replay_open_hit_o (wbuf_rtab_hit_open),
|
||||
.replay_pend_hit_o (wbuf_rtab_hit_pend),
|
||||
.replay_sent_hit_o (wbuf_rtab_hit_sent),
|
||||
.replay_not_ready_o (wbuf_rtab_not_ready),
|
||||
|
||||
.mem_req_write_ready_i (mem_req_wbuf_write_ready_i),
|
||||
.mem_req_write_valid_o (mem_req_wbuf_write_valid_o),
|
||||
.mem_req_write_o (mem_req_wbuf_write_o),
|
||||
|
||||
.mem_req_write_data_ready_i (mem_req_wbuf_write_data_ready_i),
|
||||
.mem_req_write_data_valid_o (mem_req_wbuf_write_data_valid_o),
|
||||
.mem_req_write_data_o (mem_req_wbuf_write_data_o),
|
||||
|
||||
.mem_resp_write_ready_o (mem_resp_wbuf_write_ready_o),
|
||||
.mem_resp_write_valid_i (mem_resp_wbuf_write_valid_i),
|
||||
.mem_resp_write_i (mem_resp_wbuf_write_i)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Miss handler
|
||||
// {{{
|
||||
hpdcache_miss_handler #(
|
||||
.HPDcacheMemIdWidth (HPDcacheMemIdWidth),
|
||||
.HPDcacheMemDataWidth (HPDcacheMemDataWidth),
|
||||
.hpdcache_mem_req_t (hpdcache_mem_req_t),
|
||||
.hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t)
|
||||
) hpdcache_miss_handler_i(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.mshr_empty_o (miss_mshr_empty),
|
||||
.mshr_full_o (/* unused */),
|
||||
|
||||
.cfg_prefetch_updt_plru_i,
|
||||
|
||||
.mshr_check_i (miss_mshr_check),
|
||||
.mshr_check_set_i (miss_mshr_check_set),
|
||||
.mshr_check_tag_i (miss_mshr_check_tag),
|
||||
.mshr_check_hit_o (miss_mshr_hit),
|
||||
|
||||
.mshr_alloc_ready_o (miss_mshr_alloc_ready),
|
||||
.mshr_alloc_i (miss_mshr_alloc),
|
||||
.mshr_alloc_cs_i (miss_mshr_alloc_cs),
|
||||
.mshr_alloc_full_o (miss_mshr_alloc_full),
|
||||
.mshr_alloc_nline_i (miss_mshr_alloc_nline),
|
||||
.mshr_alloc_tid_i (miss_mshr_alloc_tid),
|
||||
.mshr_alloc_sid_i (miss_mshr_alloc_sid),
|
||||
.mshr_alloc_word_i (miss_mshr_alloc_word),
|
||||
.mshr_alloc_need_rsp_i (miss_mshr_alloc_need_rsp),
|
||||
.mshr_alloc_is_prefetch_i (miss_mshr_alloc_is_prefetch),
|
||||
|
||||
.refill_req_ready_i (refill_req_ready),
|
||||
.refill_req_valid_o (refill_req_valid),
|
||||
.refill_busy_o (refill_busy),
|
||||
.refill_updt_plru_o (refill_updt_plru),
|
||||
.refill_set_o (refill_set),
|
||||
.refill_dir_entry_o (refill_dir_entry),
|
||||
.refill_victim_way_i (refill_read_victim_way),
|
||||
.refill_write_dir_o (refill_write_dir),
|
||||
.refill_write_data_o (refill_write_data),
|
||||
.refill_victim_way_o (refill_write_victim_way),
|
||||
.refill_data_o (refill_data),
|
||||
.refill_word_o (refill_word),
|
||||
.refill_nline_o (refill_nline),
|
||||
.refill_updt_rtab_o (refill_updt_rtab),
|
||||
|
||||
.refill_core_rsp_valid_o (refill_core_rsp_valid),
|
||||
.refill_core_rsp_o (refill_core_rsp),
|
||||
|
||||
.mem_req_ready_i (mem_req_miss_read_ready_i),
|
||||
.mem_req_valid_o (mem_req_miss_read_valid_o),
|
||||
.mem_req_o (mem_req_miss_read_o),
|
||||
|
||||
.mem_resp_ready_o (mem_resp_miss_read_ready_o),
|
||||
.mem_resp_valid_i (mem_resp_miss_read_valid_i),
|
||||
.mem_resp_i (mem_resp_miss_read_i)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Uncacheable request handler
|
||||
// {{{
|
||||
hpdcache_uncached #(
|
||||
.HPDcacheMemIdWidth (HPDcacheMemIdWidth),
|
||||
.HPDcacheMemDataWidth (HPDcacheMemDataWidth),
|
||||
.hpdcache_mem_req_t (hpdcache_mem_req_t),
|
||||
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
|
||||
.hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t),
|
||||
.hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t)
|
||||
) hpdcache_uc_i(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.wbuf_empty_i (wbuf_empty_o),
|
||||
.mshr_empty_i (miss_mshr_empty),
|
||||
.rtab_empty_i (rtab_empty),
|
||||
.ctrl_empty_i (ctrl_empty),
|
||||
|
||||
.req_valid_i (uc_req_valid),
|
||||
.req_ready_o (uc_ready),
|
||||
.req_op_i (uc_req_op),
|
||||
.req_addr_i (uc_req_addr),
|
||||
.req_size_i (uc_req_size),
|
||||
.req_data_i (uc_req_data),
|
||||
.req_be_i (uc_req_be),
|
||||
.req_uc_i (uc_req_uncacheable),
|
||||
.req_sid_i (uc_req_sid),
|
||||
.req_tid_i (uc_req_tid),
|
||||
.req_need_rsp_i (uc_req_need_rsp),
|
||||
|
||||
.wbuf_flush_all_o (uc_wbuf_flush_all),
|
||||
|
||||
.dir_amo_match_o (uc_dir_amo_match),
|
||||
.dir_amo_match_set_o (uc_dir_amo_match_set),
|
||||
.dir_amo_match_tag_o (uc_dir_amo_match_tag),
|
||||
.dir_amo_update_plru_o (uc_dir_amo_update_plru),
|
||||
.dir_amo_hit_way_i (uc_dir_amo_hit_way),
|
||||
|
||||
.data_amo_write_o (uc_data_amo_write),
|
||||
.data_amo_write_enable_o (uc_data_amo_write_enable),
|
||||
.data_amo_write_set_o (uc_data_amo_write_set),
|
||||
.data_amo_write_size_o (uc_data_amo_write_size),
|
||||
.data_amo_write_word_o (uc_data_amo_write_word),
|
||||
.data_amo_write_data_o (uc_data_amo_write_data),
|
||||
.data_amo_write_be_o (uc_data_amo_write_be),
|
||||
|
||||
.lrsc_snoop_i (uc_lrsc_snoop),
|
||||
.lrsc_snoop_addr_i (uc_lrsc_snoop_addr),
|
||||
.lrsc_snoop_size_i (uc_lrsc_snoop_size),
|
||||
|
||||
.core_rsp_ready_i (uc_core_rsp_ready),
|
||||
.core_rsp_valid_o (uc_core_rsp_valid),
|
||||
.core_rsp_o (uc_core_rsp),
|
||||
|
||||
.mem_read_id_i (HPDCACHE_UC_READ_ID),
|
||||
.mem_write_id_i (HPDCACHE_UC_WRITE_ID),
|
||||
|
||||
.mem_req_read_ready_i (mem_req_uc_read_ready_i),
|
||||
.mem_req_read_valid_o (mem_req_uc_read_valid_o),
|
||||
.mem_req_read_o (mem_req_uc_read_o),
|
||||
|
||||
.mem_resp_read_ready_o (mem_resp_uc_read_ready_o),
|
||||
.mem_resp_read_valid_i (mem_resp_uc_read_valid_i),
|
||||
.mem_resp_read_i (mem_resp_uc_read_i),
|
||||
|
||||
.mem_req_write_ready_i (mem_req_uc_write_ready_i),
|
||||
.mem_req_write_valid_o (mem_req_uc_write_valid_o),
|
||||
.mem_req_write_o (mem_req_uc_write_o),
|
||||
|
||||
.mem_req_write_data_ready_i (mem_req_uc_write_data_ready_i),
|
||||
.mem_req_write_data_valid_o (mem_req_uc_write_data_valid_o),
|
||||
.mem_req_write_data_o (mem_req_uc_write_data_o),
|
||||
|
||||
.mem_resp_write_ready_o (mem_resp_uc_write_ready_o),
|
||||
.mem_resp_write_valid_i (mem_resp_uc_write_valid_i),
|
||||
.mem_resp_write_i (mem_resp_uc_write_i),
|
||||
|
||||
.cfg_error_on_cacheable_amo_i
|
||||
);
|
||||
|
||||
// CMO Request Handler
|
||||
// {{{
|
||||
hpdcache_cmo hpdcache_cmo_i(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.wbuf_empty_i (wbuf_empty_o),
|
||||
.mshr_empty_i (miss_mshr_empty),
|
||||
.rtab_empty_i (rtab_empty),
|
||||
.ctrl_empty_i (ctrl_empty),
|
||||
|
||||
.req_valid_i (cmo_req_valid),
|
||||
.req_ready_o (cmo_ready),
|
||||
.req_op_i (cmo_req_op),
|
||||
.req_addr_i (cmo_req_addr),
|
||||
.req_wdata_i (cmo_req_wdata),
|
||||
|
||||
.wbuf_flush_all_o (cmo_wbuf_flush_all),
|
||||
|
||||
.dir_check_o (cmo_dir_check),
|
||||
.dir_check_set_o (cmo_dir_check_set),
|
||||
.dir_check_tag_o (cmo_dir_check_tag),
|
||||
.dir_check_hit_way_i (cmo_dir_check_hit_way),
|
||||
|
||||
.dir_inval_o (cmo_dir_inval),
|
||||
.dir_inval_set_o (cmo_dir_inval_set),
|
||||
.dir_inval_way_o (cmo_dir_inval_way)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
initial begin
|
||||
req_access_width_assert:
|
||||
assert (HPDCACHE_REQ_WORDS <= HPDCACHE_ACCESS_WORDS) else
|
||||
$error("req data width shall be l.e. to cache access width");
|
||||
refill_access_width_assert:
|
||||
assert (HPDCACHE_CL_WORDS >= HPDCACHE_ACCESS_WORDS) else
|
||||
$error("cache access width shall be l.e. to cache-line width");
|
||||
miss_mem_id_width_assert:
|
||||
assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_WAY_WIDTH + HPDCACHE_MSHR_SET_WIDTH)) else
|
||||
$error("insufficient ID bits on the mem interface to transport misses");
|
||||
wbuf_mem_id_width_assert:
|
||||
assert (HPDcacheMemIdWidth >= HPDCACHE_WBUF_DIR_PTR_WIDTH) else
|
||||
$error("insufficient ID bits on the mem interface to transport writes");
|
||||
|
||||
end
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : May, 2021
|
||||
* Description : HPDcache AMO computing unit
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_amo
|
||||
import hpdcache_pkg::*;
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic [63:0] ld_data_i,
|
||||
input logic [63:0] st_data_i,
|
||||
input hpdcache_uc_op_t op_i,
|
||||
output logic [63:0] result_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
logic signed [63:0] ld_data;
|
||||
logic signed [63:0] st_data;
|
||||
logic signed [63:0] sum;
|
||||
logic ugt, sgt;
|
||||
|
||||
assign ld_data = ld_data_i,
|
||||
st_data = st_data_i;
|
||||
|
||||
assign ugt = (ld_data_i > st_data_i),
|
||||
sgt = (ld_data > st_data),
|
||||
sum = ld_data + st_data;
|
||||
|
||||
always_comb
|
||||
begin : amo_compute_comb
|
||||
unique case (1'b1)
|
||||
op_i.is_amo_lr : result_o = ld_data_i;
|
||||
op_i.is_amo_sc : result_o = st_data_i;
|
||||
op_i.is_amo_swap : result_o = st_data_i;
|
||||
op_i.is_amo_add : result_o = sum;
|
||||
op_i.is_amo_and : result_o = ld_data_i & st_data_i;
|
||||
op_i.is_amo_or : result_o = ld_data_i | st_data_i;
|
||||
op_i.is_amo_xor : result_o = ld_data_i ^ st_data_i;
|
||||
op_i.is_amo_max : result_o = sgt ? ld_data_i : st_data_i;
|
||||
op_i.is_amo_maxu : result_o = ugt ? ld_data_i : st_data_i;
|
||||
op_i.is_amo_min : result_o = sgt ? st_data_i : ld_data_i;
|
||||
op_i.is_amo_minu : result_o = ugt ? st_data_i : ld_data_i;
|
||||
default : result_o = '0;
|
||||
endcase
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,250 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : July, 2021
|
||||
* Description : HPDcache Cache-Management-Operation Handler
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_cmo
|
||||
import hpdcache_pkg::*;
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Global control signals
|
||||
// {{{
|
||||
input logic wbuf_empty_i,
|
||||
input logic mshr_empty_i,
|
||||
input logic rtab_empty_i,
|
||||
input logic ctrl_empty_i,
|
||||
// }}}
|
||||
|
||||
// Request interface
|
||||
// {{{
|
||||
input logic req_valid_i,
|
||||
output logic req_ready_o,
|
||||
input hpdcache_cmoh_op_t req_op_i,
|
||||
input hpdcache_req_addr_t req_addr_i,
|
||||
input hpdcache_req_data_t req_wdata_i,
|
||||
// }}}
|
||||
|
||||
// Write Buffer Interface
|
||||
// {{{
|
||||
output logic wbuf_flush_all_o,
|
||||
// }}}
|
||||
|
||||
// Cache Directory Interface
|
||||
// {{{
|
||||
output logic dir_check_o,
|
||||
output hpdcache_set_t dir_check_set_o,
|
||||
output hpdcache_tag_t dir_check_tag_o,
|
||||
input hpdcache_way_vector_t dir_check_hit_way_i,
|
||||
|
||||
output logic dir_inval_o,
|
||||
output hpdcache_set_t dir_inval_set_o,
|
||||
output hpdcache_way_vector_t dir_inval_way_o
|
||||
// }}}
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Definition of constants and types
|
||||
// {{{
|
||||
typedef enum {
|
||||
CMOH_IDLE,
|
||||
CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY,
|
||||
CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY,
|
||||
CMOH_INVAL_CHECK_NLINE,
|
||||
CMOH_INVAL_SET
|
||||
} hpdcache_cmoh_fsm_t;
|
||||
// }}}
|
||||
|
||||
// Internal signals and registers
|
||||
// {{{
|
||||
hpdcache_cmoh_fsm_t cmoh_fsm_q, cmoh_fsm_d;
|
||||
hpdcache_cmoh_op_t cmoh_op_q, cmoh_op_d;
|
||||
hpdcache_req_addr_t cmoh_addr_q, cmoh_addr_d;
|
||||
hpdcache_way_vector_t cmoh_way_q, cmoh_way_d;
|
||||
hpdcache_set_t cmoh_set_cnt_q, cmoh_set_cnt_d;
|
||||
hpdcache_nline_t cmoh_nline_q;
|
||||
hpdcache_tag_t cmoh_tag_q;
|
||||
hpdcache_set_t cmoh_set_q;
|
||||
hpdcache_data_word_t cmoh_wdata;
|
||||
// }}}
|
||||
|
||||
// CMO request handler FSM
|
||||
// {{{
|
||||
assign cmoh_nline_q = cmoh_addr_q[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH],
|
||||
cmoh_set_q = cmoh_nline_q[0 +: HPDCACHE_SET_WIDTH],
|
||||
cmoh_tag_q = cmoh_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];
|
||||
|
||||
assign dir_check_set_o = cmoh_set_q,
|
||||
dir_check_tag_o = cmoh_tag_q;
|
||||
|
||||
assign req_ready_o = (cmoh_fsm_q == CMOH_IDLE);
|
||||
|
||||
// Only the least significant word of the write data contains parameters
|
||||
// for the CMO handler
|
||||
assign cmoh_wdata = req_wdata_i[0];
|
||||
|
||||
always_comb
|
||||
begin : cmoh_fsm_comb
|
||||
cmoh_op_d = cmoh_op_q;
|
||||
cmoh_addr_d = cmoh_addr_q;
|
||||
cmoh_way_d = cmoh_way_q;
|
||||
cmoh_set_cnt_d = cmoh_set_cnt_q;
|
||||
|
||||
dir_check_o = 1'b0;
|
||||
|
||||
dir_inval_o = 1'b0;
|
||||
dir_inval_set_o = cmoh_set_q;
|
||||
dir_inval_way_o = '0;
|
||||
|
||||
wbuf_flush_all_o = 1'b0;
|
||||
|
||||
cmoh_fsm_d = cmoh_fsm_q;
|
||||
|
||||
case (cmoh_fsm_q)
|
||||
CMOH_IDLE: begin
|
||||
cmoh_fsm_d = CMOH_IDLE;
|
||||
|
||||
if (req_valid_i) begin
|
||||
unique case (1'b1)
|
||||
req_op_i.is_fence: begin
|
||||
// request to the write buffer to send all open entries
|
||||
wbuf_flush_all_o = rtab_empty_i;
|
||||
|
||||
// then wait for the write buffer to be empty
|
||||
if (!rtab_empty_i || !wbuf_empty_i) begin
|
||||
cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY;
|
||||
end
|
||||
end
|
||||
req_op_i.is_inval_by_nline,
|
||||
req_op_i.is_inval_by_set,
|
||||
req_op_i.is_inval_all: begin
|
||||
cmoh_op_d = req_op_i;
|
||||
cmoh_addr_d = req_addr_i;
|
||||
cmoh_way_d = cmoh_wdata[0 +: HPDCACHE_WAYS];
|
||||
cmoh_set_cnt_d = 0;
|
||||
if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
|
||||
if (req_op_i.is_inval_by_nline) begin
|
||||
cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE;
|
||||
end else begin
|
||||
cmoh_fsm_d = CMOH_INVAL_SET;
|
||||
end
|
||||
end else begin
|
||||
cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY;
|
||||
end
|
||||
end
|
||||
default: begin
|
||||
// pragma translate_off
|
||||
$error("cmo handler: unexpected operation");
|
||||
// pragma translate_on
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY: begin
|
||||
wbuf_flush_all_o = rtab_empty_i;
|
||||
|
||||
if (wbuf_empty_i && rtab_empty_i) begin
|
||||
cmoh_fsm_d = CMOH_IDLE;
|
||||
end else begin
|
||||
cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY;
|
||||
end
|
||||
end
|
||||
CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY: begin
|
||||
cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY;
|
||||
if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
|
||||
if (cmoh_op_q.is_inval_by_nline) begin
|
||||
cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE;
|
||||
end else begin
|
||||
cmoh_fsm_d = CMOH_INVAL_SET;
|
||||
end
|
||||
end
|
||||
end
|
||||
CMOH_INVAL_CHECK_NLINE: begin
|
||||
dir_check_o = 1'b1;
|
||||
cmoh_fsm_d = CMOH_INVAL_SET;
|
||||
end
|
||||
CMOH_INVAL_SET: begin
|
||||
cmoh_fsm_d = CMOH_INVAL_SET;
|
||||
case (1'b1)
|
||||
cmoh_op_q.is_inval_by_nline: begin
|
||||
dir_inval_o = |dir_check_hit_way_i;
|
||||
dir_inval_way_o = dir_check_hit_way_i;
|
||||
cmoh_fsm_d = CMOH_IDLE;
|
||||
end
|
||||
cmoh_op_q.is_inval_all: begin
|
||||
dir_inval_o = 1'b1;
|
||||
dir_inval_way_o = {HPDCACHE_WAYS{1'b1}};
|
||||
dir_inval_set_o = cmoh_set_cnt_q;
|
||||
cmoh_set_cnt_d = cmoh_set_cnt_q + 1;
|
||||
if (cmoh_set_cnt_q == hpdcache_set_t'(HPDCACHE_SETS - 1)) begin
|
||||
cmoh_fsm_d = CMOH_IDLE;
|
||||
end
|
||||
end
|
||||
cmoh_op_q.is_inval_by_set: begin
|
||||
dir_inval_o = 1'b1;
|
||||
dir_inval_way_o = cmoh_way_q;
|
||||
cmoh_fsm_d = CMOH_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// }}}
|
||||
|
||||
// CMO request handler set state
|
||||
// {{{
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin
|
||||
if (!rst_ni) begin
|
||||
cmoh_fsm_q <= CMOH_IDLE;
|
||||
end else begin
|
||||
cmoh_fsm_q <= cmoh_fsm_d;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i)
|
||||
begin
|
||||
cmoh_op_q <= cmoh_op_d;
|
||||
cmoh_addr_q <= cmoh_addr_d;
|
||||
cmoh_way_q <= cmoh_way_d;
|
||||
cmoh_set_cnt_q <= cmoh_set_cnt_d;
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
req_valid_i -> $onehot(req_op_i)) else
|
||||
$error("cmo_handler: more than one operation type requested");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
req_valid_i -> (cmoh_fsm_q == CMOH_IDLE)) else
|
||||
$error("cmo_handler: new request received while busy");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : September, 2023
|
||||
* Description : HPDcache request arbiter
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_core_arbiter
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int NREQUESTERS = 1
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
// Clock and reset signals
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Core request interface
|
||||
// 1st cycle
|
||||
input logic core_req_valid_i [NREQUESTERS-1:0],
|
||||
output logic core_req_ready_o [NREQUESTERS-1:0],
|
||||
input hpdcache_req_t core_req_i [NREQUESTERS-1:0],
|
||||
// 2nd cycle
|
||||
input logic core_req_abort_i [NREQUESTERS-1:0],
|
||||
input hpdcache_tag_t core_req_tag_i [NREQUESTERS-1:0],
|
||||
input hpdcache_pma_t core_req_pma_i [NREQUESTERS-1:0],
|
||||
|
||||
// Core response interface
|
||||
input logic core_rsp_valid_i,
|
||||
input hpdcache_rsp_t core_rsp_i,
|
||||
output logic core_rsp_valid_o [NREQUESTERS-1:0],
|
||||
output hpdcache_rsp_t core_rsp_o [NREQUESTERS-1:0],
|
||||
|
||||
// Granted request
|
||||
output logic arb_req_valid_o,
|
||||
input logic arb_req_ready_i,
|
||||
output hpdcache_req_t arb_req_o,
|
||||
output logic arb_abort_o,
|
||||
output hpdcache_tag_t arb_tag_o,
|
||||
output hpdcache_pma_t arb_pma_o
|
||||
);
|
||||
|
||||
// }}}
|
||||
|
||||
// Declaration of internal signals
|
||||
// {{{
|
||||
logic [NREQUESTERS-1:0] core_req_valid;
|
||||
hpdcache_req_t [NREQUESTERS-1:0] core_req;
|
||||
logic [NREQUESTERS-1:0] core_req_abort;
|
||||
hpdcache_tag_t [NREQUESTERS-1:0] core_req_tag;
|
||||
hpdcache_pma_t [NREQUESTERS-1:0] core_req_pma;
|
||||
|
||||
logic [NREQUESTERS-1:0] arb_req_gnt_q, arb_req_gnt_d;
|
||||
// }}}
|
||||
|
||||
// Requesters arbiter
|
||||
// {{{
|
||||
// Pack request ports
|
||||
genvar gen_i;
|
||||
|
||||
generate
|
||||
for (gen_i = 0; gen_i < int'(NREQUESTERS); gen_i++) begin : gen_core_req
|
||||
assign core_req_ready_o[gen_i] = arb_req_gnt_d[gen_i] & arb_req_ready_i,
|
||||
core_req_valid[gen_i] = core_req_valid_i[gen_i],
|
||||
core_req[gen_i] = core_req_i[gen_i];
|
||||
|
||||
assign core_req_abort[gen_i] = core_req_abort_i[gen_i],
|
||||
core_req_tag[gen_i] = core_req_tag_i[gen_i],
|
||||
core_req_pma[gen_i] = core_req_pma_i[gen_i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Arbiter
|
||||
hpdcache_fxarb #(.N(NREQUESTERS)) req_arbiter_i
|
||||
(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.req_i (core_req_valid),
|
||||
.gnt_o (arb_req_gnt_d),
|
||||
.ready_i (arb_req_ready_i)
|
||||
);
|
||||
|
||||
// Request multiplexor
|
||||
hpdcache_mux #(
|
||||
.NINPUT (NREQUESTERS),
|
||||
.DATA_WIDTH ($bits(hpdcache_req_t)),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) core_req_mux_i (
|
||||
.data_i (core_req),
|
||||
.sel_i (arb_req_gnt_d),
|
||||
.data_o (arb_req_o)
|
||||
);
|
||||
|
||||
// Request abort multiplexor
|
||||
hpdcache_mux #(
|
||||
.NINPUT (NREQUESTERS),
|
||||
.DATA_WIDTH (1),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) core_req_abort_mux_i (
|
||||
.data_i (core_req_abort),
|
||||
.sel_i (arb_req_gnt_q),
|
||||
.data_o (arb_abort_o)
|
||||
);
|
||||
|
||||
// Tag Multiplexor
|
||||
hpdcache_mux #(
|
||||
.NINPUT (NREQUESTERS),
|
||||
.DATA_WIDTH ($bits(hpdcache_tag_t)),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) core_req_tag_mux_i (
|
||||
.data_i (core_req_tag),
|
||||
.sel_i (arb_req_gnt_q),
|
||||
.data_o (arb_tag_o)
|
||||
);
|
||||
|
||||
// PMA Multiplexor
|
||||
hpdcache_mux #(
|
||||
.NINPUT (NREQUESTERS),
|
||||
.DATA_WIDTH ($bits(hpdcache_pma_t)),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) core_req_pma_mux_i (
|
||||
.data_i (core_req_pma),
|
||||
.sel_i (arb_req_gnt_q),
|
||||
.data_o (arb_pma_o)
|
||||
);
|
||||
|
||||
// Save the grant signal for the tag in the next cycle
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : arb_req_gnt_ff
|
||||
if (!rst_ni) arb_req_gnt_q <= '0;
|
||||
else arb_req_gnt_q <= arb_req_gnt_d;
|
||||
end
|
||||
|
||||
assign arb_req_valid_o = |arb_req_gnt_d;
|
||||
// }}}
|
||||
|
||||
// Response demultiplexor
|
||||
// {{{
|
||||
always_comb
|
||||
begin : resp_demux
|
||||
for (int unsigned i = 0; i < NREQUESTERS; i++) begin
|
||||
core_rsp_valid_o[i] = core_rsp_valid_i && (i == int'(core_rsp_i.sid));
|
||||
core_rsp_o[i] = core_rsp_i;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,760 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : HPDcache controller
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_ctrl
|
||||
// Package imports
|
||||
// {{{
|
||||
import hpdcache_pkg::*;
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Core request interface
|
||||
input logic core_req_valid_i,
|
||||
output logic core_req_ready_o,
|
||||
input hpdcache_req_t core_req_i,
|
||||
input logic core_req_abort_i,
|
||||
input hpdcache_tag_t core_req_tag_i,
|
||||
input hpdcache_pma_t core_req_pma_i,
|
||||
|
||||
// Core response interface
|
||||
output logic core_rsp_valid_o,
|
||||
output hpdcache_rsp_t core_rsp_o,
|
||||
|
||||
// Force the write buffer to send all pending writes
|
||||
input logic wbuf_flush_i,
|
||||
|
||||
// Global control signals
|
||||
output logic cachedir_hit_o,
|
||||
|
||||
// Miss handler interface
|
||||
output logic miss_mshr_check_o,
|
||||
output mshr_set_t miss_mshr_check_set_o,
|
||||
output mshr_tag_t miss_mshr_check_tag_o,
|
||||
output logic miss_mshr_alloc_o,
|
||||
output logic miss_mshr_alloc_cs_o,
|
||||
input logic miss_mshr_alloc_ready_i,
|
||||
input logic miss_mshr_alloc_full_i,
|
||||
output hpdcache_nline_t miss_mshr_alloc_nline_o,
|
||||
output hpdcache_req_tid_t miss_mshr_alloc_tid_o,
|
||||
output hpdcache_req_sid_t miss_mshr_alloc_sid_o,
|
||||
output hpdcache_word_t miss_mshr_alloc_word_o,
|
||||
output logic miss_mshr_alloc_need_rsp_o,
|
||||
output logic miss_mshr_alloc_is_prefetch_o,
|
||||
input logic miss_mshr_hit_i,
|
||||
|
||||
// Refill interface
|
||||
input logic refill_req_valid_i,
|
||||
output logic refill_req_ready_o,
|
||||
input logic refill_busy_i,
|
||||
input logic refill_updt_plru_i,
|
||||
input hpdcache_set_t refill_set_i,
|
||||
input hpdcache_dir_entry_t refill_dir_entry_i,
|
||||
output hpdcache_way_vector_t refill_victim_way_o,
|
||||
input hpdcache_way_vector_t refill_victim_way_i,
|
||||
input logic refill_write_dir_i,
|
||||
input logic refill_write_data_i,
|
||||
input hpdcache_word_t refill_word_i,
|
||||
input hpdcache_refill_data_t refill_data_i,
|
||||
input logic refill_core_rsp_valid_i,
|
||||
input hpdcache_rsp_t refill_core_rsp_i,
|
||||
input hpdcache_nline_t refill_nline_i,
|
||||
input logic refill_updt_rtab_i,
|
||||
|
||||
// Write buffer interface
|
||||
input logic wbuf_empty_i,
|
||||
output logic wbuf_flush_all_o,
|
||||
output logic wbuf_write_o,
|
||||
input logic wbuf_write_ready_i,
|
||||
output wbuf_addr_t wbuf_write_addr_o,
|
||||
output wbuf_data_t wbuf_write_data_o,
|
||||
output wbuf_be_t wbuf_write_be_o,
|
||||
output logic wbuf_write_uncacheable_o,
|
||||
input logic wbuf_read_hit_i,
|
||||
output logic wbuf_read_flush_hit_o,
|
||||
output hpdcache_req_addr_t wbuf_rtab_addr_o,
|
||||
output logic wbuf_rtab_is_read_o,
|
||||
input logic wbuf_rtab_hit_open_i,
|
||||
input logic wbuf_rtab_hit_pend_i,
|
||||
input logic wbuf_rtab_hit_sent_i,
|
||||
input logic wbuf_rtab_not_ready_i,
|
||||
|
||||
// Uncacheable request handler
|
||||
input logic uc_busy_i,
|
||||
output logic uc_lrsc_snoop_o,
|
||||
output hpdcache_req_addr_t uc_lrsc_snoop_addr_o,
|
||||
output hpdcache_req_size_t uc_lrsc_snoop_size_o,
|
||||
output logic uc_req_valid_o,
|
||||
output hpdcache_uc_op_t uc_req_op_o,
|
||||
output hpdcache_req_addr_t uc_req_addr_o,
|
||||
output hpdcache_req_size_t uc_req_size_o,
|
||||
output hpdcache_req_data_t uc_req_data_o,
|
||||
output hpdcache_req_be_t uc_req_be_o,
|
||||
output logic uc_req_uc_o,
|
||||
output hpdcache_req_sid_t uc_req_sid_o,
|
||||
output hpdcache_req_tid_t uc_req_tid_o,
|
||||
output logic uc_req_need_rsp_o,
|
||||
input logic uc_wbuf_flush_all_i,
|
||||
input logic uc_dir_amo_match_i,
|
||||
input hpdcache_set_t uc_dir_amo_match_set_i,
|
||||
input hpdcache_tag_t uc_dir_amo_match_tag_i,
|
||||
input logic uc_dir_amo_update_plru_i,
|
||||
output hpdcache_way_vector_t uc_dir_amo_hit_way_o,
|
||||
input logic uc_data_amo_write_i,
|
||||
input logic uc_data_amo_write_enable_i,
|
||||
input hpdcache_set_t uc_data_amo_write_set_i,
|
||||
input hpdcache_req_size_t uc_data_amo_write_size_i,
|
||||
input hpdcache_word_t uc_data_amo_write_word_i,
|
||||
input logic [63:0] uc_data_amo_write_data_i,
|
||||
input logic [7:0] uc_data_amo_write_be_i,
|
||||
output logic uc_core_rsp_ready_o,
|
||||
input logic uc_core_rsp_valid_i,
|
||||
input hpdcache_rsp_t uc_core_rsp_i,
|
||||
|
||||
// Cache Management Operation (CMO)
|
||||
input logic cmo_busy_i,
|
||||
output logic cmo_req_valid_o,
|
||||
output hpdcache_cmoh_op_t cmo_req_op_o,
|
||||
output hpdcache_req_addr_t cmo_req_addr_o,
|
||||
output hpdcache_req_data_t cmo_req_wdata_o,
|
||||
input logic cmo_wbuf_flush_all_i,
|
||||
input logic cmo_dir_check_i,
|
||||
input hpdcache_set_t cmo_dir_check_set_i,
|
||||
input hpdcache_tag_t cmo_dir_check_tag_i,
|
||||
output hpdcache_way_vector_t cmo_dir_check_hit_way_o,
|
||||
input logic cmo_dir_inval_i,
|
||||
input hpdcache_set_t cmo_dir_inval_set_i,
|
||||
input hpdcache_way_vector_t cmo_dir_inval_way_i,
|
||||
|
||||
output logic rtab_empty_o,
|
||||
output logic ctrl_empty_o,
|
||||
|
||||
// Configuration signals
|
||||
input logic cfg_enable_i,
|
||||
input logic cfg_rtab_single_entry_i,
|
||||
|
||||
// Performance events
|
||||
output logic evt_cache_write_miss_o,
|
||||
output logic evt_cache_read_miss_o,
|
||||
output logic evt_uncached_req_o,
|
||||
output logic evt_cmo_req_o,
|
||||
output logic evt_write_req_o,
|
||||
output logic evt_read_req_o,
|
||||
output logic evt_prefetch_req_o,
|
||||
output logic evt_req_on_hold_o,
|
||||
output logic evt_rtab_rollback_o,
|
||||
output logic evt_stall_refill_o,
|
||||
output logic evt_stall_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Definition of internal registers
|
||||
// {{{
|
||||
logic st1_req_valid_q, st1_req_valid_d;
|
||||
hpdcache_req_t st1_req_q;
|
||||
logic st1_req_rtab_q;
|
||||
rtab_ptr_t st1_rtab_pop_try_ptr_q;
|
||||
|
||||
logic st2_req_valid_q, st2_req_valid_d;
|
||||
logic st2_req_is_prefetch_q, st2_req_is_prefetch_d;
|
||||
logic st2_req_need_rsp_q;
|
||||
hpdcache_req_addr_t st2_req_addr_q;
|
||||
hpdcache_req_sid_t st2_req_sid_q;
|
||||
hpdcache_req_tid_t st2_req_tid_q;
|
||||
// }}}
|
||||
|
||||
// Definition of internal signals
|
||||
// {{{
|
||||
logic [1:0] st0_arb_req;
|
||||
logic [1:0] st0_arb_req_grant;
|
||||
logic st0_arb_ready;
|
||||
|
||||
logic st0_req_ready;
|
||||
|
||||
logic st0_req_valid;
|
||||
hpdcache_req_t st0_req;
|
||||
logic st0_req_is_uncacheable;
|
||||
logic st0_req_is_load;
|
||||
logic st0_req_is_store;
|
||||
logic st0_req_is_amo;
|
||||
logic st0_req_is_cmo_fence;
|
||||
logic st0_req_is_cmo_inval;
|
||||
logic st0_req_is_cmo_prefetch;
|
||||
logic st0_req_cachedir_read;
|
||||
logic st0_req_cachedata_read;
|
||||
hpdcache_set_t st0_req_set;
|
||||
hpdcache_word_t st0_req_word;
|
||||
logic st0_rtab_pop_try_valid;
|
||||
logic st0_rtab_pop_try_ready;
|
||||
hpdcache_req_t st0_rtab_pop_try_req;
|
||||
logic st0_rtab_pop_try_sel;
|
||||
rtab_ptr_t st0_rtab_pop_try_ptr;
|
||||
|
||||
logic st1_rsp_valid;
|
||||
logic st1_rsp_aborted;
|
||||
hpdcache_req_t st1_req;
|
||||
logic st1_req_abort;
|
||||
logic st1_req_cachedata_write;
|
||||
logic st1_req_cachedata_write_enable;
|
||||
hpdcache_pma_t st1_req_pma;
|
||||
hpdcache_tag_t st1_req_tag;
|
||||
hpdcache_set_t st1_req_set;
|
||||
hpdcache_word_t st1_req_word;
|
||||
hpdcache_nline_t st1_req_nline;
|
||||
hpdcache_req_addr_t st1_req_addr;
|
||||
logic st1_req_updt_lru;
|
||||
logic st1_req_is_uncacheable;
|
||||
logic st1_req_is_load;
|
||||
logic st1_req_is_store;
|
||||
logic st1_req_is_amo;
|
||||
logic st1_req_is_amo_lr;
|
||||
logic st1_req_is_amo_sc;
|
||||
logic st1_req_is_amo_swap;
|
||||
logic st1_req_is_amo_add;
|
||||
logic st1_req_is_amo_and;
|
||||
logic st1_req_is_amo_or;
|
||||
logic st1_req_is_amo_xor;
|
||||
logic st1_req_is_amo_max;
|
||||
logic st1_req_is_amo_maxu;
|
||||
logic st1_req_is_amo_min;
|
||||
logic st1_req_is_amo_minu;
|
||||
logic st1_req_is_cmo_inval;
|
||||
logic st1_req_is_cmo_fence;
|
||||
logic st1_req_is_cmo_prefetch;
|
||||
hpdcache_way_vector_t st1_dir_hit;
|
||||
hpdcache_req_data_t st1_read_data;
|
||||
logic st1_rtab_alloc;
|
||||
logic st1_rtab_alloc_and_link;
|
||||
logic st1_rtab_pop_try_commit;
|
||||
logic st1_rtab_pop_try_rback;
|
||||
logic st1_rtab_mshr_hit;
|
||||
logic st1_rtab_mshr_full;
|
||||
logic st1_rtab_mshr_ready;
|
||||
logic st1_rtab_wbuf_hit;
|
||||
logic st1_rtab_wbuf_not_ready;
|
||||
logic st1_rtab_check;
|
||||
logic st1_rtab_check_hit;
|
||||
|
||||
logic st2_req_we;
|
||||
hpdcache_word_t st2_req_word;
|
||||
|
||||
logic rtab_full;
|
||||
|
||||
logic hpdcache_init_ready;
|
||||
// }}}
|
||||
|
||||
// Decoding of the request
|
||||
// {{{
|
||||
// Select between request in the replay table or a new core requests
|
||||
assign st0_req_valid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_valid
|
||||
: core_req_valid_i,
|
||||
st0_req.addr_offset = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_offset
|
||||
: core_req_i.addr_offset,
|
||||
st0_req.addr_tag = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_tag
|
||||
: core_req_i.addr_tag,
|
||||
st0_req.wdata = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.wdata
|
||||
: core_req_i.wdata,
|
||||
st0_req.op = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.op
|
||||
: core_req_i.op,
|
||||
st0_req.be = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.be
|
||||
: core_req_i.be,
|
||||
st0_req.size = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.size
|
||||
: core_req_i.size,
|
||||
st0_req.sid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.sid
|
||||
: core_req_i.sid,
|
||||
st0_req.tid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.tid
|
||||
: core_req_i.tid,
|
||||
st0_req.need_rsp = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.need_rsp
|
||||
: core_req_i.need_rsp,
|
||||
st0_req.phys_indexed = st0_rtab_pop_try_sel ? 1'b1
|
||||
: core_req_i.phys_indexed,
|
||||
st0_req.pma = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.pma
|
||||
: core_req_i.pma;
|
||||
|
||||
// Decode operation in stage 0
|
||||
assign st0_req_is_uncacheable = ~cfg_enable_i | ( st0_req.phys_indexed
|
||||
& st0_req.pma.uncacheable),
|
||||
st0_req_is_load = is_load(st0_req.op),
|
||||
st0_req_is_store = is_store(st0_req.op),
|
||||
st0_req_is_amo = is_amo(st0_req.op),
|
||||
st0_req_is_cmo_fence = is_cmo_fence(st0_req.op, st0_req.size),
|
||||
st0_req_is_cmo_inval = is_cmo_inval(st0_req.op, st0_req.size),
|
||||
st0_req_is_cmo_prefetch = is_cmo_prefetch(st0_req.op, st0_req.size);
|
||||
|
||||
// Decode operation in stage 1
|
||||
|
||||
// In case of replay or physically-indexed cache, the tag and PMA come
|
||||
// from stage 0. Otherwise, this information come directly from the
|
||||
// requester in stage 1
|
||||
assign st1_req_tag = st1_req_q.phys_indexed ? st1_req_q.addr_tag : core_req_tag_i,
|
||||
st1_req_pma = st1_req_q.phys_indexed ? st1_req_q.pma : core_req_pma_i;
|
||||
|
||||
assign st1_req.addr_offset = st1_req_q.addr_offset,
|
||||
st1_req.addr_tag = st1_req_rtab_q ? st1_req_q.addr_tag : st1_req_tag,
|
||||
st1_req.wdata = st1_req_q.wdata,
|
||||
st1_req.op = st1_req_q.op,
|
||||
st1_req.be = st1_req_q.be,
|
||||
st1_req.size = st1_req_q.size,
|
||||
st1_req.sid = st1_req_q.sid,
|
||||
st1_req.tid = st1_req_q.tid,
|
||||
st1_req.need_rsp = st1_req_q.need_rsp,
|
||||
st1_req.phys_indexed = st1_req_q.phys_indexed,
|
||||
st1_req.pma = st1_req_rtab_q ? st1_req_q.pma : st1_req_pma;
|
||||
|
||||
// A requester can ask to abort a request it initiated on the
|
||||
// previous cycle (stage 0). Useful in case of TLB miss for example
|
||||
assign st1_req_abort = core_req_abort_i & ~st1_req.phys_indexed;
|
||||
|
||||
assign st1_req_is_uncacheable = ~cfg_enable_i | st1_req.pma.uncacheable,
|
||||
st1_req_is_load = is_load(st1_req.op),
|
||||
st1_req_is_store = is_store(st1_req.op),
|
||||
st1_req_is_amo = is_amo(st1_req.op),
|
||||
st1_req_is_amo_lr = is_amo_lr(st1_req.op),
|
||||
st1_req_is_amo_sc = is_amo_sc(st1_req.op),
|
||||
st1_req_is_amo_swap = is_amo_swap(st1_req.op),
|
||||
st1_req_is_amo_add = is_amo_add(st1_req.op),
|
||||
st1_req_is_amo_and = is_amo_and(st1_req.op),
|
||||
st1_req_is_amo_or = is_amo_or(st1_req.op),
|
||||
st1_req_is_amo_xor = is_amo_xor(st1_req.op),
|
||||
st1_req_is_amo_max = is_amo_max(st1_req.op),
|
||||
st1_req_is_amo_maxu = is_amo_maxu(st1_req.op),
|
||||
st1_req_is_amo_min = is_amo_min(st1_req.op),
|
||||
st1_req_is_amo_minu = is_amo_minu(st1_req.op),
|
||||
st1_req_is_cmo_inval = is_cmo_inval(st1_req.op, st1_req.size),
|
||||
st1_req_is_cmo_fence = is_cmo_fence(st1_req.op, st1_req.size),
|
||||
st1_req_is_cmo_prefetch = is_cmo_prefetch(st1_req.op, st1_req.size);
|
||||
// }}}
|
||||
|
||||
// Refill arbiter: it arbitrates between normal requests (from the core,
|
||||
// coprocessor, prefetch) and refill requests (from the miss handler).
|
||||
//
|
||||
// TODO This arbiter could be replaced by a weighted-round-robin arbiter.
|
||||
// This way we could distribute asymetrically the bandwidth to the core
|
||||
// and the refill interfaces.
|
||||
// {{{
|
||||
hpdcache_rrarb #(.N(2)) st0_arb_i
|
||||
(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.req_i (st0_arb_req),
|
||||
.gnt_o (st0_arb_req_grant),
|
||||
.ready_i (st0_arb_ready)
|
||||
);
|
||||
|
||||
// The arbiter can cycle the priority token when:
|
||||
// - The granted request is consumed (req_grant & req_valid & req_ready)
|
||||
// - The granted request is aborted (req_grant & ~req_valid)
|
||||
assign st0_arb_ready = ((st0_arb_req_grant[0] & st0_req_valid & st0_req_ready ) |
|
||||
(st0_arb_req_grant[1] & refill_req_valid_i & refill_req_ready_o) |
|
||||
(st0_arb_req_grant[0] & ~st0_req_valid ) |
|
||||
(st0_arb_req_grant[1] & ~refill_req_valid_i));
|
||||
|
||||
assign st0_arb_req[0] = st0_req_valid,
|
||||
st0_arb_req[1] = refill_req_valid_i;
|
||||
|
||||
assign core_req_ready_o = st0_req_ready & ~st0_rtab_pop_try_sel,
|
||||
st0_rtab_pop_try_ready = st0_req_ready & st0_rtab_pop_try_sel;
|
||||
|
||||
// Trigger an event signal when the pipeline is stalled (new request is not consumed)
|
||||
assign evt_stall_o = core_req_valid_i & ~core_req_ready_o;
|
||||
// }}}
|
||||
|
||||
// Cache controller protocol engine
|
||||
// {{{
|
||||
hpdcache_ctrl_pe hpdcache_ctrl_pe_i(
|
||||
.arb_st0_req_valid_i (st0_req_valid & st0_arb_req_grant[0]),
|
||||
.arb_st0_req_ready_o (st0_req_ready),
|
||||
.arb_refill_valid_i (refill_req_valid_i & st0_arb_req_grant[1]),
|
||||
.arb_refill_ready_o (refill_req_ready_o),
|
||||
.st0_req_is_uncacheable_i (st0_req_is_uncacheable),
|
||||
.st0_req_need_rsp_i (st0_req.need_rsp),
|
||||
.st0_req_is_load_i (st0_req_is_load),
|
||||
.st0_req_is_store_i (st0_req_is_store),
|
||||
.st0_req_is_amo_i (st0_req_is_amo),
|
||||
.st0_req_is_cmo_fence_i (st0_req_is_cmo_fence),
|
||||
.st0_req_is_cmo_inval_i (st0_req_is_cmo_inval),
|
||||
.st0_req_is_cmo_prefetch_i (st0_req_is_cmo_prefetch),
|
||||
.st0_req_mshr_check_o (miss_mshr_check_o),
|
||||
.st0_req_cachedir_read_o (st0_req_cachedir_read),
|
||||
.st0_req_cachedata_read_o (st0_req_cachedata_read),
|
||||
|
||||
.st1_req_valid_i (st1_req_valid_q),
|
||||
.st1_req_abort_i (st1_req_abort),
|
||||
.st1_req_rtab_i (st1_req_rtab_q),
|
||||
.st1_req_is_uncacheable_i (st1_req_is_uncacheable),
|
||||
.st1_req_need_rsp_i (st1_req.need_rsp),
|
||||
.st1_req_is_load_i (st1_req_is_load),
|
||||
.st1_req_is_store_i (st1_req_is_store),
|
||||
.st1_req_is_amo_i (st1_req_is_amo),
|
||||
.st1_req_is_cmo_inval_i (st1_req_is_cmo_inval),
|
||||
.st1_req_is_cmo_fence_i (st1_req_is_cmo_fence),
|
||||
.st1_req_is_cmo_prefetch_i (st1_req_is_cmo_prefetch),
|
||||
.st1_req_valid_o (st1_req_valid_d),
|
||||
.st1_rsp_valid_o (st1_rsp_valid),
|
||||
.st1_rsp_aborted_o (st1_rsp_aborted),
|
||||
.st1_req_cachedir_updt_lru_o (st1_req_updt_lru),
|
||||
.st1_req_cachedata_write_o (st1_req_cachedata_write),
|
||||
.st1_req_cachedata_write_enable_o (st1_req_cachedata_write_enable),
|
||||
|
||||
.st2_req_valid_i (st2_req_valid_q),
|
||||
.st2_req_is_prefetch_i (st2_req_is_prefetch_q),
|
||||
.st2_req_valid_o (st2_req_valid_d),
|
||||
.st2_req_we_o (st2_req_we),
|
||||
.st2_req_is_prefetch_o (st2_req_is_prefetch_d),
|
||||
.st2_req_mshr_alloc_o (miss_mshr_alloc_o),
|
||||
.st2_req_mshr_alloc_cs_o (miss_mshr_alloc_cs_o),
|
||||
|
||||
.rtab_full_i (rtab_full),
|
||||
.rtab_req_valid_i (st0_rtab_pop_try_valid),
|
||||
.rtab_sel_o (st0_rtab_pop_try_sel),
|
||||
.rtab_check_o (st1_rtab_check),
|
||||
.rtab_check_hit_i (st1_rtab_check_hit),
|
||||
.st1_rtab_alloc_o (st1_rtab_alloc),
|
||||
.st1_rtab_alloc_and_link_o (st1_rtab_alloc_and_link),
|
||||
.st1_rtab_commit_o (st1_rtab_pop_try_commit),
|
||||
.st1_rtab_rback_o (st1_rtab_pop_try_rback),
|
||||
.st1_rtab_mshr_hit_o (st1_rtab_mshr_hit),
|
||||
.st1_rtab_mshr_full_o (st1_rtab_mshr_full),
|
||||
.st1_rtab_mshr_ready_o (st1_rtab_mshr_ready),
|
||||
.st1_rtab_wbuf_hit_o (st1_rtab_wbuf_hit),
|
||||
.st1_rtab_wbuf_not_ready_o (st1_rtab_wbuf_not_ready),
|
||||
|
||||
.cachedir_hit_i (cachedir_hit_o),
|
||||
.cachedir_init_ready_i (hpdcache_init_ready),
|
||||
|
||||
.mshr_alloc_ready_i (miss_mshr_alloc_ready_i),
|
||||
.mshr_hit_i (miss_mshr_hit_i),
|
||||
.mshr_full_i (miss_mshr_alloc_full_i),
|
||||
|
||||
.refill_busy_i,
|
||||
.refill_core_rsp_valid_i,
|
||||
|
||||
.wbuf_write_valid_o (wbuf_write_o),
|
||||
.wbuf_write_ready_i,
|
||||
.wbuf_read_hit_i,
|
||||
.wbuf_write_uncacheable_o,
|
||||
.wbuf_read_flush_hit_o,
|
||||
|
||||
.uc_busy_i,
|
||||
.uc_req_valid_o,
|
||||
.uc_core_rsp_ready_o,
|
||||
|
||||
.cmo_busy_i,
|
||||
.cmo_req_valid_o,
|
||||
|
||||
.evt_cache_write_miss_o,
|
||||
.evt_cache_read_miss_o,
|
||||
.evt_uncached_req_o,
|
||||
.evt_cmo_req_o,
|
||||
.evt_write_req_o,
|
||||
.evt_read_req_o,
|
||||
.evt_prefetch_req_o,
|
||||
.evt_req_on_hold_o,
|
||||
.evt_rtab_rollback_o,
|
||||
.evt_stall_refill_o
|
||||
);
|
||||
|
||||
assign ctrl_empty_o = ~(st1_req_valid_q | st2_req_valid_q);
|
||||
// }}}
|
||||
|
||||
// Replay table
|
||||
// {{{
|
||||
hpdcache_rtab #(
|
||||
.rtab_entry_t (hpdcache_req_t)
|
||||
) hpdcache_rtab_i(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.empty_o (rtab_empty_o),
|
||||
.full_o (rtab_full),
|
||||
|
||||
.check_i (st1_rtab_check),
|
||||
.check_nline_i (st1_req_nline),
|
||||
.check_hit_o (st1_rtab_check_hit),
|
||||
|
||||
.alloc_i (st1_rtab_alloc),
|
||||
.alloc_and_link_i (st1_rtab_alloc_and_link),
|
||||
.alloc_req_i (st1_req),
|
||||
.alloc_mshr_hit_i (st1_rtab_mshr_hit),
|
||||
.alloc_mshr_full_i (st1_rtab_mshr_full),
|
||||
.alloc_mshr_ready_i (st1_rtab_mshr_ready),
|
||||
.alloc_wbuf_hit_i (st1_rtab_wbuf_hit),
|
||||
.alloc_wbuf_not_ready_i (st1_rtab_wbuf_not_ready),
|
||||
|
||||
.pop_try_valid_o (st0_rtab_pop_try_valid),
|
||||
.pop_try_i (st0_rtab_pop_try_ready),
|
||||
.pop_try_req_o (st0_rtab_pop_try_req),
|
||||
.pop_try_ptr_o (st0_rtab_pop_try_ptr),
|
||||
|
||||
.pop_commit_i (st1_rtab_pop_try_commit),
|
||||
.pop_commit_ptr_i (st1_rtab_pop_try_ptr_q),
|
||||
|
||||
.pop_rback_i (st1_rtab_pop_try_rback),
|
||||
.pop_rback_ptr_i (st1_rtab_pop_try_ptr_q),
|
||||
.pop_rback_mshr_hit_i (st1_rtab_mshr_hit),
|
||||
.pop_rback_mshr_full_i (st1_rtab_mshr_full),
|
||||
.pop_rback_mshr_ready_i (st1_rtab_mshr_ready),
|
||||
.pop_rback_wbuf_hit_i (st1_rtab_wbuf_hit),
|
||||
.pop_rback_wbuf_not_ready_i (st1_rtab_wbuf_not_ready),
|
||||
|
||||
.wbuf_addr_o (wbuf_rtab_addr_o),
|
||||
.wbuf_is_read_o (wbuf_rtab_is_read_o),
|
||||
.wbuf_hit_open_i (wbuf_rtab_hit_open_i),
|
||||
.wbuf_hit_pend_i (wbuf_rtab_hit_pend_i),
|
||||
.wbuf_hit_sent_i (wbuf_rtab_hit_sent_i),
|
||||
.wbuf_not_ready_i (wbuf_rtab_not_ready_i),
|
||||
|
||||
.miss_ready_i (miss_mshr_alloc_ready_i),
|
||||
|
||||
.refill_i (refill_updt_rtab_i),
|
||||
.refill_nline_i,
|
||||
|
||||
.cfg_single_entry_i (cfg_rtab_single_entry_i)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Pipeline stage 1 registers
|
||||
// {{{
|
||||
always_ff @(posedge clk_i)
|
||||
begin : st1_req_payload_ff
|
||||
if (st0_req_ready) begin
|
||||
st1_req_q <= st0_req;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : st1_req_valid_ff
|
||||
if (!rst_ni) begin
|
||||
st1_req_valid_q <= 1'b0;
|
||||
st1_req_rtab_q <= 1'b0;
|
||||
st1_rtab_pop_try_ptr_q <= '0;
|
||||
end else begin
|
||||
st1_req_valid_q <= st1_req_valid_d;
|
||||
if (st0_req_ready) begin
|
||||
st1_req_rtab_q <= st0_rtab_pop_try_sel;
|
||||
if (st0_rtab_pop_try_sel) begin
|
||||
st1_rtab_pop_try_ptr_q <= st0_rtab_pop_try_ptr;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Pipeline stage 2 registers
|
||||
// {{{
|
||||
always_ff @(posedge clk_i)
|
||||
begin : st2_req_payload_ff
|
||||
if (st2_req_we) begin
|
||||
st2_req_need_rsp_q <= st1_req.need_rsp;
|
||||
st2_req_addr_q <= st1_req_addr;
|
||||
st2_req_sid_q <= st1_req.sid;
|
||||
st2_req_tid_q <= st1_req.tid;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : st2_req_valid_ff
|
||||
if (!rst_ni) begin
|
||||
st2_req_valid_q <= 1'b0;
|
||||
st2_req_is_prefetch_q <= 1'b0;
|
||||
end else begin
|
||||
st2_req_valid_q <= st2_req_valid_d;
|
||||
st2_req_is_prefetch_q <= st2_req_is_prefetch_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Controller for the HPDcache directory and data memory arrays
|
||||
// {{{
|
||||
assign st0_req_set = hpdcache_get_req_offset_set(st0_req.addr_offset),
|
||||
st0_req_word = hpdcache_get_req_offset_word(st0_req.addr_offset),
|
||||
st1_req_set = hpdcache_get_req_offset_set(st1_req.addr_offset),
|
||||
st1_req_word = hpdcache_get_req_offset_word(st1_req.addr_offset),
|
||||
st1_req_addr = {st1_req.addr_tag, st1_req.addr_offset},
|
||||
st1_req_nline = hpdcache_get_req_addr_nline(st1_req_addr),
|
||||
st2_req_word = hpdcache_get_req_addr_word(st2_req_addr_q);
|
||||
|
||||
hpdcache_memctrl hpdcache_memctrl_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.ready_o (hpdcache_init_ready),
|
||||
|
||||
.dir_match_i (st0_req_cachedir_read),
|
||||
.dir_match_set_i (st0_req_set),
|
||||
.dir_match_tag_i (st1_req.addr_tag),
|
||||
.dir_update_lru_i (st1_req_updt_lru),
|
||||
.dir_hit_way_o (st1_dir_hit),
|
||||
|
||||
.dir_amo_match_i (uc_dir_amo_match_i),
|
||||
.dir_amo_match_set_i (uc_dir_amo_match_set_i),
|
||||
.dir_amo_match_tag_i (uc_dir_amo_match_tag_i),
|
||||
.dir_amo_update_plru_i (uc_dir_amo_update_plru_i),
|
||||
.dir_amo_hit_way_o (uc_dir_amo_hit_way_o),
|
||||
|
||||
.dir_refill_i (refill_write_dir_i),
|
||||
.dir_refill_set_i (refill_set_i),
|
||||
.dir_refill_entry_i (refill_dir_entry_i),
|
||||
.dir_refill_updt_plru_i (refill_updt_plru_i),
|
||||
.dir_victim_way_o (refill_victim_way_o),
|
||||
|
||||
.dir_cmo_check_i (cmo_dir_check_i),
|
||||
.dir_cmo_check_set_i (cmo_dir_check_set_i),
|
||||
.dir_cmo_check_tag_i (cmo_dir_check_tag_i),
|
||||
.dir_cmo_check_hit_way_o (cmo_dir_check_hit_way_o),
|
||||
|
||||
.dir_cmo_inval_i (cmo_dir_inval_i),
|
||||
.dir_cmo_inval_set_i (cmo_dir_inval_set_i),
|
||||
.dir_cmo_inval_way_i (cmo_dir_inval_way_i),
|
||||
|
||||
.data_req_read_i (st0_req_cachedata_read),
|
||||
.data_req_read_set_i (st0_req_set),
|
||||
.data_req_read_size_i (st0_req.size),
|
||||
.data_req_read_word_i (st0_req_word),
|
||||
.data_req_read_data_o (st1_read_data),
|
||||
|
||||
.data_req_write_i (st1_req_cachedata_write),
|
||||
.data_req_write_enable_i (st1_req_cachedata_write_enable),
|
||||
.data_req_write_set_i (st1_req_set),
|
||||
.data_req_write_size_i (st1_req.size),
|
||||
.data_req_write_word_i (st1_req_word),
|
||||
.data_req_write_data_i (st1_req.wdata),
|
||||
.data_req_write_be_i (st1_req.be),
|
||||
|
||||
.data_amo_write_i (uc_data_amo_write_i),
|
||||
.data_amo_write_enable_i (uc_data_amo_write_enable_i),
|
||||
.data_amo_write_set_i (uc_data_amo_write_set_i),
|
||||
.data_amo_write_size_i (uc_data_amo_write_size_i),
|
||||
.data_amo_write_word_i (uc_data_amo_write_word_i),
|
||||
.data_amo_write_data_i (uc_data_amo_write_data_i),
|
||||
.data_amo_write_be_i (uc_data_amo_write_be_i),
|
||||
|
||||
.data_refill_i (refill_write_data_i),
|
||||
.data_refill_way_i (refill_victim_way_i),
|
||||
.data_refill_set_i (refill_set_i),
|
||||
.data_refill_word_i (refill_word_i),
|
||||
.data_refill_data_i (refill_data_i)
|
||||
);
|
||||
|
||||
assign cachedir_hit_o = |st1_dir_hit;
|
||||
// }}}
|
||||
|
||||
// Write buffer outputs
|
||||
// {{{
|
||||
assign wbuf_write_addr_o = st1_req_addr,
|
||||
wbuf_write_data_o = st1_req.wdata,
|
||||
wbuf_write_be_o = st1_req.be,
|
||||
wbuf_flush_all_o = cmo_wbuf_flush_all_i | uc_wbuf_flush_all_i | wbuf_flush_i;
|
||||
// }}}
|
||||
|
||||
// Miss handler outputs
|
||||
// {{{
|
||||
assign miss_mshr_check_set_o =
|
||||
st0_req.addr_offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_MSHR_SET_WIDTH];
|
||||
assign miss_mshr_check_tag_o =
|
||||
st1_req_nline[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH];
|
||||
|
||||
assign miss_mshr_alloc_nline_o = hpdcache_get_req_addr_nline(st2_req_addr_q),
|
||||
miss_mshr_alloc_tid_o = st2_req_tid_q,
|
||||
miss_mshr_alloc_sid_o = st2_req_sid_q,
|
||||
miss_mshr_alloc_word_o = st2_req_word,
|
||||
miss_mshr_alloc_need_rsp_o = st2_req_need_rsp_q,
|
||||
miss_mshr_alloc_is_prefetch_o = st2_req_is_prefetch_q;
|
||||
// }}}
|
||||
|
||||
// Uncacheable request handler outputs
|
||||
// {{{
|
||||
assign uc_lrsc_snoop_o = st1_req_valid_q & st1_req_is_store,
|
||||
uc_lrsc_snoop_addr_o = st1_req_addr,
|
||||
uc_lrsc_snoop_size_o = st1_req.size,
|
||||
uc_req_addr_o = st1_req_addr,
|
||||
uc_req_size_o = st1_req.size,
|
||||
uc_req_data_o = st1_req.wdata,
|
||||
uc_req_be_o = st1_req.be,
|
||||
uc_req_uc_o = st1_req_is_uncacheable,
|
||||
uc_req_sid_o = st1_req.sid,
|
||||
uc_req_tid_o = st1_req.tid,
|
||||
uc_req_need_rsp_o = st1_req.need_rsp,
|
||||
uc_req_op_o.is_ld = st1_req_is_load,
|
||||
uc_req_op_o.is_st = st1_req_is_store,
|
||||
uc_req_op_o.is_amo_lr = st1_req_is_amo_lr,
|
||||
uc_req_op_o.is_amo_sc = st1_req_is_amo_sc,
|
||||
uc_req_op_o.is_amo_swap = st1_req_is_amo_swap,
|
||||
uc_req_op_o.is_amo_add = st1_req_is_amo_add,
|
||||
uc_req_op_o.is_amo_and = st1_req_is_amo_and,
|
||||
uc_req_op_o.is_amo_or = st1_req_is_amo_or,
|
||||
uc_req_op_o.is_amo_xor = st1_req_is_amo_xor,
|
||||
uc_req_op_o.is_amo_max = st1_req_is_amo_max,
|
||||
uc_req_op_o.is_amo_maxu = st1_req_is_amo_maxu,
|
||||
uc_req_op_o.is_amo_min = st1_req_is_amo_min,
|
||||
uc_req_op_o.is_amo_minu = st1_req_is_amo_minu;
|
||||
// }}}
|
||||
|
||||
// CMO request handler outputs
|
||||
// {{{
|
||||
assign cmo_req_addr_o = st1_req_addr,
|
||||
cmo_req_wdata_o = st1_req.wdata,
|
||||
cmo_req_op_o.is_fence = st1_req_is_cmo_fence,
|
||||
cmo_req_op_o.is_inval_by_nline = st1_req_is_cmo_inval &
|
||||
is_cmo_inval_by_nline(st1_req.size),
|
||||
cmo_req_op_o.is_inval_by_set = st1_req_is_cmo_inval &
|
||||
is_cmo_inval_by_set(st1_req.size),
|
||||
cmo_req_op_o.is_inval_all = st1_req_is_cmo_inval &
|
||||
is_cmo_inval_all(st1_req.size);
|
||||
// }}}
|
||||
|
||||
// Control of the response to the core
|
||||
// {{{
|
||||
assign core_rsp_valid_o = refill_core_rsp_valid_i |
|
||||
(uc_core_rsp_valid_i & uc_core_rsp_ready_o) |
|
||||
st1_rsp_valid,
|
||||
core_rsp_o.rdata = (refill_core_rsp_valid_i ? refill_core_rsp_i.rdata :
|
||||
(uc_core_rsp_valid_i ? uc_core_rsp_i.rdata :
|
||||
st1_read_data)),
|
||||
core_rsp_o.sid = (refill_core_rsp_valid_i ? refill_core_rsp_i.sid :
|
||||
(uc_core_rsp_valid_i ? uc_core_rsp_i.sid :
|
||||
st1_req.sid)),
|
||||
core_rsp_o.tid = (refill_core_rsp_valid_i ? refill_core_rsp_i.tid :
|
||||
(uc_core_rsp_valid_i ? uc_core_rsp_i.tid :
|
||||
st1_req.tid)),
|
||||
core_rsp_o.error = (refill_core_rsp_valid_i ? refill_core_rsp_i.error :
|
||||
(uc_core_rsp_valid_i ? uc_core_rsp_i.error :
|
||||
/* FIXME */1'b0)),
|
||||
core_rsp_o.aborted = st1_rsp_aborted;
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// pragma translate_off
|
||||
// {{{
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
$onehot0({core_req_ready_o, st0_rtab_pop_try_ready, refill_req_ready_o})) else
|
||||
$error("ctrl: only one request can be served per cycle");
|
||||
// }}}
|
||||
// pragma translate_on
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,620 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : HPDcache Control Protocol Engine
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_ctrl_pe
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
// Refill arbiter
|
||||
// {{{
|
||||
input logic arb_st0_req_valid_i,
|
||||
output logic arb_st0_req_ready_o,
|
||||
input logic arb_refill_valid_i,
|
||||
output logic arb_refill_ready_o,
|
||||
// }}}
|
||||
|
||||
// Pipeline stage 0
|
||||
// {{{
|
||||
input logic st0_req_is_uncacheable_i,
|
||||
input logic st0_req_need_rsp_i,
|
||||
input logic st0_req_is_load_i,
|
||||
input logic st0_req_is_store_i,
|
||||
input logic st0_req_is_amo_i,
|
||||
input logic st0_req_is_cmo_fence_i,
|
||||
input logic st0_req_is_cmo_inval_i,
|
||||
input logic st0_req_is_cmo_prefetch_i,
|
||||
output logic st0_req_mshr_check_o,
|
||||
output logic st0_req_cachedir_read_o,
|
||||
output logic st0_req_cachedata_read_o,
|
||||
// }}}
|
||||
|
||||
// Pipeline stage 1
|
||||
// {{{
|
||||
input logic st1_req_valid_i,
|
||||
input logic st1_req_abort_i,
|
||||
input logic st1_req_rtab_i,
|
||||
input logic st1_req_is_uncacheable_i,
|
||||
input logic st1_req_need_rsp_i,
|
||||
input logic st1_req_is_load_i,
|
||||
input logic st1_req_is_store_i,
|
||||
input logic st1_req_is_amo_i,
|
||||
input logic st1_req_is_cmo_inval_i,
|
||||
input logic st1_req_is_cmo_fence_i,
|
||||
input logic st1_req_is_cmo_prefetch_i,
|
||||
output logic st1_req_valid_o,
|
||||
output logic st1_rsp_valid_o,
|
||||
output logic st1_rsp_aborted_o,
|
||||
output logic st1_req_cachedir_updt_lru_o,
|
||||
output logic st1_req_cachedata_write_o,
|
||||
output logic st1_req_cachedata_write_enable_o,
|
||||
// }}}
|
||||
|
||||
// Pipeline stage 2
|
||||
// {{{
|
||||
input logic st2_req_valid_i,
|
||||
input logic st2_req_is_prefetch_i,
|
||||
output logic st2_req_valid_o,
|
||||
output logic st2_req_we_o,
|
||||
output logic st2_req_is_prefetch_o,
|
||||
output logic st2_req_mshr_alloc_o,
|
||||
output logic st2_req_mshr_alloc_cs_o,
|
||||
// }}}
|
||||
|
||||
// Replay
|
||||
// {{{
|
||||
input logic rtab_full_i,
|
||||
input logic rtab_req_valid_i,
|
||||
output logic rtab_sel_o,
|
||||
output logic rtab_check_o,
|
||||
input logic rtab_check_hit_i,
|
||||
output logic st1_rtab_alloc_o,
|
||||
output logic st1_rtab_alloc_and_link_o,
|
||||
output logic st1_rtab_commit_o,
|
||||
output logic st1_rtab_rback_o,
|
||||
output logic st1_rtab_mshr_hit_o,
|
||||
output logic st1_rtab_mshr_full_o,
|
||||
output logic st1_rtab_mshr_ready_o,
|
||||
output logic st1_rtab_wbuf_hit_o,
|
||||
output logic st1_rtab_wbuf_not_ready_o,
|
||||
// }}}
|
||||
|
||||
// Cache directory
|
||||
// {{{
|
||||
input logic cachedir_hit_i,
|
||||
input logic cachedir_init_ready_i,
|
||||
// }}}
|
||||
|
||||
// Miss Status Holding Register (MSHR)
|
||||
// {{{
|
||||
input logic mshr_alloc_ready_i,
|
||||
input logic mshr_hit_i,
|
||||
input logic mshr_full_i,
|
||||
// }}}
|
||||
|
||||
// Refill interface
|
||||
// {{{
|
||||
input logic refill_busy_i,
|
||||
input logic refill_core_rsp_valid_i,
|
||||
// }}}
|
||||
|
||||
// Write buffer
|
||||
// {{{
|
||||
input logic wbuf_write_ready_i,
|
||||
input logic wbuf_read_hit_i,
|
||||
output logic wbuf_write_valid_o,
|
||||
output logic wbuf_write_uncacheable_o,
|
||||
output logic wbuf_read_flush_hit_o,
|
||||
// }}}
|
||||
|
||||
// Uncacheable request handler
|
||||
// {{{
|
||||
input logic uc_busy_i,
|
||||
output logic uc_req_valid_o,
|
||||
output logic uc_core_rsp_ready_o,
|
||||
// }}}
|
||||
|
||||
// Cache Management Operation (CMO)
|
||||
// {{{
|
||||
input logic cmo_busy_i,
|
||||
output logic cmo_req_valid_o,
|
||||
// }}}
|
||||
|
||||
// Performance events
|
||||
// {{{
|
||||
output logic evt_cache_write_miss_o,
|
||||
output logic evt_cache_read_miss_o,
|
||||
output logic evt_uncached_req_o,
|
||||
output logic evt_cmo_req_o,
|
||||
output logic evt_write_req_o,
|
||||
output logic evt_read_req_o,
|
||||
output logic evt_prefetch_req_o,
|
||||
output logic evt_req_on_hold_o,
|
||||
output logic evt_rtab_rollback_o,
|
||||
output logic evt_stall_refill_o
|
||||
// }}}
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Definition of internal signals
|
||||
// {{{
|
||||
logic st1_fence;
|
||||
logic st1_rtab_alloc, st1_rtab_alloc_and_link;
|
||||
// }}}
|
||||
|
||||
// Global control signals
|
||||
// {{{
|
||||
|
||||
// Determine if the new request is a "fence". Here, fence instructions are
|
||||
// considered those that need to be executed in program order
|
||||
// (irrespectively of addresses). This means that all memory operations
|
||||
// arrived before the "fence" instruction need to be finished, and only
|
||||
// then the "fence" instruction is executed. In the same manner, all
|
||||
// instructions following the "fence" need to wait the completion of this
|
||||
// last before being executed.
|
||||
assign st1_fence = st1_req_is_uncacheable_i |
|
||||
st1_req_is_cmo_fence_i |
|
||||
st1_req_is_cmo_inval_i |
|
||||
st1_req_is_amo_i;
|
||||
// }}}
|
||||
|
||||
// Arbitration of responses to the core
|
||||
// {{{
|
||||
assign uc_core_rsp_ready_o = ~refill_core_rsp_valid_i;
|
||||
// }}}
|
||||
|
||||
// Arbiter between core or replay request.
|
||||
// {{{
|
||||
// Take the replay request when:
|
||||
// - The replay table is full.
|
||||
// - The replay table has a ready request (request with all dependencies solved)
|
||||
// - There is an outstanding CMO or uncached/AMO request
|
||||
//
|
||||
// IMPORTANT: When the replay table is full, the cache cannot accept new core
|
||||
// requests because this can introduce a dead-lock : If the core request needs to
|
||||
// be put on hold, as there is no place the replay table, the pipeline needs to
|
||||
// stall. If the pipeline is stalled, dependencies of on-hold requests cannot be
|
||||
// solved, and the system is locked.
|
||||
assign rtab_sel_o = rtab_full_i |
|
||||
rtab_req_valid_i |
|
||||
(st1_req_valid_i & st1_fence) |
|
||||
cmo_busy_i |
|
||||
uc_busy_i;
|
||||
// }}}
|
||||
|
||||
// Replay logic
|
||||
// {{{
|
||||
// Replay table allocation
|
||||
assign st1_rtab_alloc_o = st1_rtab_alloc & ~st1_req_rtab_i,
|
||||
st1_rtab_alloc_and_link_o = st1_rtab_alloc_and_link,
|
||||
st1_rtab_rback_o = st1_rtab_alloc & st1_req_rtab_i;
|
||||
|
||||
// Performance event
|
||||
assign evt_req_on_hold_o = st1_rtab_alloc | st1_rtab_alloc_and_link,
|
||||
evt_rtab_rollback_o = st1_rtab_rback_o;
|
||||
// }}}
|
||||
|
||||
// Data-cache control lines
|
||||
// {{{
|
||||
always_comb
|
||||
begin : hpdcache_ctrl_comb
|
||||
automatic logic nop, st1_nop, st2_nop;
|
||||
|
||||
uc_req_valid_o = 1'b0;
|
||||
|
||||
cmo_req_valid_o = 1'b0;
|
||||
|
||||
wbuf_write_valid_o = 1'b0;
|
||||
wbuf_read_flush_hit_o = 1'b0;
|
||||
wbuf_write_uncacheable_o = 1'b0; // unused
|
||||
|
||||
arb_st0_req_ready_o = 1'b0;
|
||||
arb_refill_ready_o = 1'b0;
|
||||
|
||||
st0_req_mshr_check_o = 1'b0;
|
||||
st0_req_cachedir_read_o = 1'b0;
|
||||
st0_req_cachedata_read_o = 1'b0;
|
||||
|
||||
st1_req_valid_o = st1_req_valid_i;
|
||||
st1_nop = 1'b0;
|
||||
st1_req_cachedata_write_o = 1'b0;
|
||||
st1_req_cachedata_write_enable_o = 1'b0;
|
||||
st1_req_cachedir_updt_lru_o = 1'b0;
|
||||
st1_rsp_valid_o = 1'b0;
|
||||
st1_rsp_aborted_o = 1'b0;
|
||||
|
||||
st2_req_valid_o = st2_req_valid_i;
|
||||
st2_req_we_o = 1'b0;
|
||||
st2_req_is_prefetch_o = 1'b0;
|
||||
st2_req_mshr_alloc_cs_o = 1'b0;
|
||||
st2_req_mshr_alloc_o = 1'b0;
|
||||
st2_nop = 1'b0;
|
||||
|
||||
nop = 1'b0;
|
||||
|
||||
rtab_check_o = 1'b0;
|
||||
st1_rtab_alloc = 1'b0;
|
||||
st1_rtab_alloc_and_link = 1'b0;
|
||||
st1_rtab_commit_o = 1'b0;
|
||||
st1_rtab_mshr_hit_o = 1'b0;
|
||||
st1_rtab_mshr_full_o = 1'b0;
|
||||
st1_rtab_mshr_ready_o = 1'b0;
|
||||
st1_rtab_wbuf_hit_o = 1'b0;
|
||||
st1_rtab_wbuf_not_ready_o = 1'b0;
|
||||
|
||||
evt_cache_write_miss_o = 1'b0;
|
||||
evt_cache_read_miss_o = 1'b0;
|
||||
evt_uncached_req_o = 1'b0;
|
||||
evt_cmo_req_o = 1'b0;
|
||||
evt_write_req_o = 1'b0;
|
||||
evt_read_req_o = 1'b0;
|
||||
evt_prefetch_req_o = 1'b0;
|
||||
evt_stall_refill_o = 1'b0;
|
||||
|
||||
// Wait for the cache to be initialized
|
||||
// {{{
|
||||
if (!cachedir_init_ready_i) begin
|
||||
// initialization of the cache RAMs
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Refilling the cache
|
||||
// {{{
|
||||
else if (refill_busy_i) begin
|
||||
// miss handler has the control of the cache
|
||||
evt_stall_refill_o = arb_st0_req_valid_i;
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Normal pipeline operation
|
||||
// {{{
|
||||
else begin
|
||||
// Stage 2 request pending
|
||||
// {{{
|
||||
if (st2_req_valid_i) begin
|
||||
st2_req_valid_o = 1'b0;
|
||||
|
||||
// Allocate an entry in the MSHR
|
||||
st2_req_mshr_alloc_cs_o = 1'b1;
|
||||
st2_req_mshr_alloc_o = 1'b1;
|
||||
|
||||
// Introduce a NOP in the next cycle to prevent a hazard on the MSHR
|
||||
st2_nop = 1'b1;
|
||||
|
||||
// Performance event
|
||||
evt_cache_read_miss_o = ~st2_req_is_prefetch_i;
|
||||
evt_read_req_o = ~st2_req_is_prefetch_i;
|
||||
evt_prefetch_req_o = st2_req_is_prefetch_i;
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Stage 1 request pending
|
||||
// {{{
|
||||
if (st1_req_valid_i) begin
|
||||
// Check if the request in stage 1 has a conflict with one of the
|
||||
// request in the replay table.
|
||||
rtab_check_o = ~st1_req_rtab_i & ~st1_fence;
|
||||
|
||||
// Check if the current request is aborted. If so, respond to the
|
||||
// core (when need_rsp is set) and set the aborted flag
|
||||
if (st1_req_abort_i && !st1_req_rtab_i) begin
|
||||
st1_rsp_valid_o = st1_req_need_rsp_i;
|
||||
st1_rsp_aborted_o = 1'b1;
|
||||
end
|
||||
|
||||
// Allocate a new entry in the replay table in case of conflict with
|
||||
// an on-hold request
|
||||
else if (rtab_check_o && rtab_check_hit_i) begin
|
||||
st1_rtab_alloc_and_link = 1'b1;
|
||||
|
||||
// Do not consume a request in this cycle in stage 0
|
||||
st1_nop = 1'b1;
|
||||
end
|
||||
|
||||
// CMO fence or invalidate
|
||||
// {{{
|
||||
else if (st1_req_is_cmo_fence_i || st1_req_is_cmo_inval_i) begin
|
||||
cmo_req_valid_o = 1'b1;
|
||||
st1_nop = 1'b1;
|
||||
|
||||
// Performance event
|
||||
evt_cmo_req_o = 1'b1;
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Uncacheable load, store or AMO request
|
||||
// {{{
|
||||
else if (st1_req_is_uncacheable_i) begin
|
||||
uc_req_valid_o = 1'b1;
|
||||
st1_nop = 1'b1;
|
||||
|
||||
// Performance event
|
||||
evt_uncached_req_o = 1'b1;
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Cacheable request
|
||||
// {{{
|
||||
else begin
|
||||
// AMO cacheable request
|
||||
// {{{
|
||||
if (st1_req_is_amo_i) begin
|
||||
uc_req_valid_o = 1'b1;
|
||||
st1_nop = 1'b1;
|
||||
|
||||
// Performance event
|
||||
evt_uncached_req_o = 1'b1;
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Load cacheable request
|
||||
// {{{
|
||||
if (|{st1_req_is_load_i,
|
||||
st1_req_is_cmo_prefetch_i})
|
||||
begin
|
||||
// Cache miss
|
||||
// {{{
|
||||
if (!cachedir_hit_i) begin
|
||||
// If there is a match in the write buffer, lets send the
|
||||
// entry right away
|
||||
wbuf_read_flush_hit_o = 1'b1;
|
||||
|
||||
// Do not consume a request in this cycle in stage 0
|
||||
st1_nop = 1'b1;
|
||||
|
||||
// Pending miss on the same line
|
||||
if (mshr_hit_i) begin
|
||||
// Put the request in the replay table
|
||||
st1_rtab_alloc = 1'b1;
|
||||
|
||||
st1_rtab_mshr_hit_o = 1'b1;
|
||||
end
|
||||
|
||||
// No available slot in the MSHR
|
||||
else if (mshr_full_i) begin
|
||||
// Put the request in the replay table
|
||||
st1_rtab_alloc = 1'b1;
|
||||
|
||||
st1_rtab_mshr_full_o = 1'b1;
|
||||
end
|
||||
|
||||
// Hit on an open entry of the write buffer:
|
||||
// wait for the entry to be acknowledged
|
||||
else if (wbuf_read_hit_i) begin
|
||||
// Put the request in the replay table
|
||||
st1_rtab_alloc = 1'b1;
|
||||
|
||||
st1_rtab_wbuf_hit_o = 1'b1;
|
||||
end
|
||||
|
||||
// Miss Handler is not ready to send
|
||||
else if (!mshr_alloc_ready_i) begin
|
||||
// Put the request on hold if the MISS HANDLER is not
|
||||
// ready to send a new miss request. This is to prevent
|
||||
// a deadlock between the read request channel and the
|
||||
// read response channel.
|
||||
//
|
||||
// The request channel may be stalled by targets if they
|
||||
// are not able to send a response (response is
|
||||
// prioritary). Therefore, we need to put the request on
|
||||
// hold to allow a possible refill read response to be
|
||||
// accomplished.
|
||||
st1_rtab_alloc = 1'b1;
|
||||
|
||||
st1_rtab_mshr_ready_o = 1'b1;
|
||||
end
|
||||
|
||||
// Forward the request to the next stage to allocate the
|
||||
// entry in the MSHR and send the refill request
|
||||
else begin
|
||||
// If the request comes from the replay table, free the
|
||||
// corresponding RTAB entry
|
||||
st1_rtab_commit_o = st1_req_rtab_i;
|
||||
|
||||
st2_req_valid_o = 1'b1;
|
||||
st2_req_we_o = 1'b1;
|
||||
st2_req_is_prefetch_o = st1_req_is_cmo_prefetch_i;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Cache hit
|
||||
// {{{
|
||||
else begin
|
||||
// If the request comes from the replay table, free the
|
||||
// corresponding RTAB entry
|
||||
st1_rtab_commit_o = st1_req_rtab_i;
|
||||
|
||||
// Add a NOP when replaying a request, and there is no available
|
||||
// request from the replay table.
|
||||
st1_nop = st1_req_rtab_i & ~rtab_sel_o;
|
||||
|
||||
// Update the PLRU bit for the accessed set
|
||||
st1_req_cachedir_updt_lru_o = st1_req_is_load_i;
|
||||
|
||||
// Respond to the core (if needed)
|
||||
st1_rsp_valid_o = st1_req_need_rsp_i;
|
||||
|
||||
// Performance event
|
||||
evt_read_req_o = ~st1_req_is_cmo_prefetch_i;
|
||||
evt_prefetch_req_o = st1_req_is_cmo_prefetch_i;
|
||||
end
|
||||
// }}}
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Store cacheable request
|
||||
// {{{
|
||||
if (st1_req_is_store_i) begin
|
||||
// Write in the write buffer if there is no pending miss in the same line.
|
||||
//
|
||||
// We assume here that the NoC that transports read and write transactions does
|
||||
// not guaranty the order between transactions on those channels.
|
||||
// Therefore, the cache must hold a write if there is a pending read on the
|
||||
// same address.
|
||||
wbuf_write_valid_o = ~mshr_hit_i;
|
||||
|
||||
// Add a NOP in the pipeline when:
|
||||
// - Structural hazard on the cache data if the st0 request is a load
|
||||
// operation.
|
||||
// - Replaying a request, the cache cannot accept a request from the
|
||||
// core the next cycle. It can however accept a new request from the
|
||||
// replay table
|
||||
//
|
||||
// IMPORTANT: we could remove the NOP in the first scenario if the
|
||||
// controller checks for the hit of this write. However, this adds
|
||||
// a DIR_RAM -> DATA_RAM timing path.
|
||||
st1_nop = (arb_st0_req_valid_i & st0_req_is_load_i) |
|
||||
(st1_req_rtab_i & ~rtab_sel_o);
|
||||
|
||||
// Enable the data RAM in case of write. However, the actual write
|
||||
// depends on the hit signal from the cache directory.
|
||||
//
|
||||
// IMPORTANT: this produces unnecessary power consumption in case of
|
||||
// write misses, but removes timing paths between the cache directory
|
||||
// RAM and the data RAM chip-select.
|
||||
st1_req_cachedata_write_o = 1'b1;
|
||||
|
||||
// Cache miss
|
||||
if (!cachedir_hit_i) begin
|
||||
// Pending miss on the same line
|
||||
if (mshr_hit_i) begin
|
||||
// Put the request in the replay table
|
||||
st1_rtab_alloc = 1'b1;
|
||||
|
||||
st1_rtab_mshr_hit_o = 1'b1;
|
||||
|
||||
// Do not consume a request in this cycle in stage 0
|
||||
st1_nop = 1'b1;
|
||||
end
|
||||
|
||||
// No available entry in the write buffer (or conflict on pending entry)
|
||||
else if (!wbuf_write_ready_i) begin
|
||||
// Put the request in the replay table
|
||||
st1_rtab_alloc = 1'b1;
|
||||
|
||||
st1_rtab_wbuf_not_ready_o = 1'b1;
|
||||
|
||||
// Do not consume a request in this cycle in stage 0
|
||||
st1_nop = 1'b1;
|
||||
end
|
||||
|
||||
else begin
|
||||
// If the request comes from the replay table, free the
|
||||
// corresponding RTAB entry
|
||||
st1_rtab_commit_o = st1_req_rtab_i;
|
||||
|
||||
// Respond to the core (if needed)
|
||||
st1_rsp_valid_o = st1_req_need_rsp_i;
|
||||
|
||||
// Performance event
|
||||
evt_cache_write_miss_o = 1'b1;
|
||||
evt_write_req_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// Cache hit
|
||||
else begin
|
||||
// No available entry in the write buffer (or conflict on pending entry)
|
||||
if (!wbuf_write_ready_i) begin
|
||||
// Put the request in the replay table
|
||||
st1_rtab_alloc = 1'b1;
|
||||
|
||||
st1_rtab_wbuf_not_ready_o = 1'b1;
|
||||
|
||||
// Do not consume a request in this cycle in stage 0
|
||||
st1_nop = 1'b1;
|
||||
end
|
||||
|
||||
// The store can be performed in the write buffer and in the cache
|
||||
else begin
|
||||
// If the request comes from the replay table, free the
|
||||
// corresponding RTAB entry
|
||||
st1_rtab_commit_o = st1_req_rtab_i;
|
||||
|
||||
// Respond to the core
|
||||
st1_rsp_valid_o = st1_req_need_rsp_i;
|
||||
|
||||
// Update the PLRU bit for the accessed set
|
||||
st1_req_cachedir_updt_lru_o = 1'b1;
|
||||
|
||||
// Write in the data RAM
|
||||
st1_req_cachedata_write_enable_o = 1'b1;
|
||||
|
||||
// Performance event
|
||||
evt_write_req_o = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
end
|
||||
// }}}
|
||||
end
|
||||
// }}}
|
||||
|
||||
// New request
|
||||
// {{{
|
||||
nop = st1_nop | st2_nop;
|
||||
|
||||
// The cache controller accepts a core request when:
|
||||
// - The req-refill arbiter grants the request
|
||||
// - The pipeline is not being flushed
|
||||
arb_st0_req_ready_o = arb_st0_req_valid_i & ~nop;
|
||||
|
||||
// The cache controller accepts a refill when:
|
||||
// - The req-refill arbiter grants the refill
|
||||
// - The pipeline is empty
|
||||
arb_refill_ready_o = arb_refill_valid_i & ~(st1_req_valid_i | st2_req_valid_i);
|
||||
|
||||
// Forward the request to stage 1
|
||||
// - There is a valid request in stage 0
|
||||
st1_req_valid_o = arb_st0_req_ready_o;
|
||||
|
||||
// New cacheable stage 0 request granted
|
||||
// {{{
|
||||
// IMPORTANT: here the RAM is enabled independently if the
|
||||
// request needs to be put on-hold.
|
||||
// This increases the power consumption in that cases, but
|
||||
// removes the timing paths RAM-to-RAM between the cache
|
||||
// directory and the data array.
|
||||
if (arb_st0_req_valid_i && !st0_req_is_uncacheable_i) begin
|
||||
st0_req_cachedata_read_o =
|
||||
st0_req_is_load_i &
|
||||
~(st1_req_valid_i & st1_req_is_store_i & ~st1_req_is_uncacheable_i);
|
||||
if (st0_req_is_load_i |
|
||||
st0_req_is_cmo_prefetch_i |
|
||||
st0_req_is_store_i |
|
||||
st0_req_is_amo_i )
|
||||
begin
|
||||
st0_req_mshr_check_o = 1'b1;
|
||||
st0_req_cachedir_read_o = ~st0_req_is_amo_i;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
// }}}
|
||||
end
|
||||
// }}} end of normal pipeline operation
|
||||
end
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : HPDcache Directory and Data Memory Arrays
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_memarray
|
||||
import hpdcache_pkg::*;
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
input hpdcache_dir_addr_t dir_addr_i,
|
||||
input hpdcache_way_vector_t dir_cs_i,
|
||||
input hpdcache_way_vector_t dir_we_i,
|
||||
input hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry_i,
|
||||
output hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry_o,
|
||||
|
||||
input hpdcache_data_addr_t data_addr_i,
|
||||
input hpdcache_data_enable_t data_cs_i,
|
||||
input hpdcache_data_enable_t data_we_i,
|
||||
input hpdcache_data_be_entry_t data_wbyteenable_i,
|
||||
input hpdcache_data_entry_t data_wentry_i,
|
||||
output hpdcache_data_entry_t data_rentry_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Memory arrays
|
||||
// {{{
|
||||
generate
|
||||
genvar x, y, dir_w;
|
||||
|
||||
// Directory
|
||||
//
|
||||
for (dir_w = 0; dir_w < int'(HPDCACHE_WAYS); dir_w++) begin : dir_sram_gen
|
||||
hpdcache_sram #(
|
||||
.DATA_SIZE (HPDCACHE_DIR_RAM_WIDTH),
|
||||
.ADDR_SIZE (HPDCACHE_DIR_RAM_ADDR_WIDTH)
|
||||
) dir_sram (
|
||||
.clk (clk_i),
|
||||
.rst_n (rst_ni),
|
||||
.cs (dir_cs_i[dir_w]),
|
||||
.we (dir_we_i[dir_w]),
|
||||
.addr (dir_addr_i),
|
||||
.wdata (dir_wentry_i[dir_w]),
|
||||
.rdata (dir_rentry_o[dir_w])
|
||||
);
|
||||
end
|
||||
|
||||
// Data
|
||||
//
|
||||
for (y = 0; y < int'(HPDCACHE_DATA_RAM_Y_CUTS); y++) begin : data_sram_row_gen
|
||||
for (x = 0; x < int'(HPDCACHE_DATA_RAM_X_CUTS); x++) begin : data_sram_col_gen
|
||||
if (HPDCACHE_DATA_RAM_WBYTEENABLE) begin : data_sram_wbyteenable_gen
|
||||
hpdcache_sram_wbyteenable #(
|
||||
.DATA_SIZE (HPDCACHE_DATA_RAM_WIDTH),
|
||||
.ADDR_SIZE (HPDCACHE_DATA_RAM_ADDR_WIDTH)
|
||||
) data_sram (
|
||||
.clk (clk_i),
|
||||
.rst_n (rst_ni),
|
||||
.cs (data_cs_i[y][x]),
|
||||
.we (data_we_i[y][x]),
|
||||
.addr (data_addr_i[y][x]),
|
||||
.wdata (data_wentry_i[y][x]),
|
||||
.wbyteenable (data_wbyteenable_i[y][x]),
|
||||
.rdata (data_rentry_o[y][x])
|
||||
);
|
||||
end else begin : data_sram_wmask_gen
|
||||
hpdcache_data_ram_data_t data_wmask;
|
||||
|
||||
// build the bitmask from the write byte enable signal
|
||||
always_comb
|
||||
begin : data_wmask_comb
|
||||
for (int w = 0; w < HPDCACHE_DATA_WAYS_PER_RAM_WORD; w++) begin
|
||||
for (int b = 0; b < HPDCACHE_WORD_WIDTH/8; b++) begin
|
||||
data_wmask[w][8*b +: 8] = {8{data_wbyteenable_i[y][x][w][b]}};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
hpdcache_sram_wmask #(
|
||||
.DATA_SIZE (HPDCACHE_DATA_RAM_WIDTH),
|
||||
.ADDR_SIZE (HPDCACHE_DATA_RAM_ADDR_WIDTH)
|
||||
) data_sram (
|
||||
.clk (clk_i),
|
||||
.rst_n (rst_ni),
|
||||
.cs (data_cs_i[y][x]),
|
||||
.we (data_we_i[y][x]),
|
||||
.addr (data_addr_i[y][x]),
|
||||
.wdata (data_wentry_i[y][x]),
|
||||
.wmask (data_wmask),
|
||||
.rdata (data_rentry_o[y][x])
|
||||
);
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,656 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : HPDcache Directory and Data Memory RAMs Controller
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_memctrl
|
||||
import hpdcache_pkg::*;
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
// Global clock and reset signals
|
||||
// {{{
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// }}}
|
||||
|
||||
// Global control signals
|
||||
// {{{
|
||||
output logic ready_o,
|
||||
// }}}
|
||||
|
||||
// DIR array access interface
|
||||
// {{{
|
||||
input logic dir_match_i,
|
||||
input hpdcache_set_t dir_match_set_i,
|
||||
input hpdcache_tag_t dir_match_tag_i,
|
||||
input logic dir_update_lru_i,
|
||||
output hpdcache_way_vector_t dir_hit_way_o,
|
||||
|
||||
input logic dir_amo_match_i,
|
||||
input hpdcache_set_t dir_amo_match_set_i,
|
||||
input hpdcache_tag_t dir_amo_match_tag_i,
|
||||
input logic dir_amo_update_plru_i,
|
||||
output hpdcache_way_vector_t dir_amo_hit_way_o,
|
||||
|
||||
input logic dir_refill_i,
|
||||
input hpdcache_set_t dir_refill_set_i,
|
||||
input hpdcache_dir_entry_t dir_refill_entry_i,
|
||||
input logic dir_refill_updt_plru_i,
|
||||
output hpdcache_way_vector_t dir_victim_way_o,
|
||||
|
||||
input logic dir_cmo_check_i,
|
||||
input hpdcache_set_t dir_cmo_check_set_i,
|
||||
input hpdcache_tag_t dir_cmo_check_tag_i,
|
||||
output hpdcache_way_vector_t dir_cmo_check_hit_way_o,
|
||||
|
||||
input logic dir_cmo_inval_i,
|
||||
input hpdcache_set_t dir_cmo_inval_set_i,
|
||||
input hpdcache_way_vector_t dir_cmo_inval_way_i,
|
||||
|
||||
// }}}
|
||||
|
||||
// DATA array access interface
|
||||
// {{{
|
||||
input logic data_req_read_i,
|
||||
input hpdcache_set_t data_req_read_set_i,
|
||||
input hpdcache_req_size_t data_req_read_size_i,
|
||||
input hpdcache_word_t data_req_read_word_i,
|
||||
output hpdcache_req_data_t data_req_read_data_o,
|
||||
|
||||
input logic data_req_write_i,
|
||||
input logic data_req_write_enable_i,
|
||||
input hpdcache_set_t data_req_write_set_i,
|
||||
input hpdcache_req_size_t data_req_write_size_i,
|
||||
input hpdcache_word_t data_req_write_word_i,
|
||||
input hpdcache_req_data_t data_req_write_data_i,
|
||||
input hpdcache_req_be_t data_req_write_be_i,
|
||||
|
||||
input logic data_amo_write_i,
|
||||
input logic data_amo_write_enable_i,
|
||||
input hpdcache_set_t data_amo_write_set_i,
|
||||
input hpdcache_req_size_t data_amo_write_size_i,
|
||||
input hpdcache_word_t data_amo_write_word_i,
|
||||
input logic [63:0] data_amo_write_data_i,
|
||||
input logic [7:0] data_amo_write_be_i,
|
||||
|
||||
input logic data_refill_i,
|
||||
input hpdcache_way_vector_t data_refill_way_i,
|
||||
input hpdcache_set_t data_refill_set_i,
|
||||
input hpdcache_word_t data_refill_word_i,
|
||||
input hpdcache_refill_data_t data_refill_data_i
|
||||
// }}}
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Definition of constants
|
||||
// {{{
|
||||
localparam int unsigned HPDCACHE_ALL_CUTS = HPDCACHE_DATA_RAM_X_CUTS*HPDCACHE_DATA_RAM_Y_CUTS;
|
||||
localparam int unsigned HPDCACHE_DATA_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS;
|
||||
// }}}
|
||||
|
||||
// Definition of functions
|
||||
// {{{
|
||||
|
||||
// hpdcache_compute_data_ram_cs
|
||||
//
|
||||
// description: This function computes the chip-select signal for data
|
||||
// RAMs depending on the request size and the word offset
|
||||
function automatic hpdcache_data_row_enable_t hpdcache_compute_data_ram_cs(
|
||||
input hpdcache_req_size_t size_i,
|
||||
input hpdcache_word_t word_i);
|
||||
|
||||
localparam hpdcache_uint32 off_width =
|
||||
HPDCACHE_ACCESS_WORDS > 1 ? $clog2(HPDCACHE_ACCESS_WORDS) : 1;
|
||||
|
||||
hpdcache_data_row_enable_t ret;
|
||||
hpdcache_uint32 off;
|
||||
|
||||
case (size_i)
|
||||
3'h0,
|
||||
3'h1,
|
||||
3'h2,
|
||||
3'h3: ret = hpdcache_data_row_enable_t'({ 64/HPDCACHE_WORD_WIDTH{1'b1}});
|
||||
3'h4: ret = hpdcache_data_row_enable_t'({128/HPDCACHE_WORD_WIDTH{1'b1}});
|
||||
3'h5: ret = hpdcache_data_row_enable_t'({256/HPDCACHE_WORD_WIDTH{1'b1}});
|
||||
default: ret = hpdcache_data_row_enable_t'({512/HPDCACHE_WORD_WIDTH{1'b1}});
|
||||
endcase
|
||||
|
||||
off = HPDCACHE_ACCESS_WORDS > 1 ? hpdcache_uint'(word_i[0 +: off_width]) : 0;
|
||||
return hpdcache_data_row_enable_t'(ret << off);
|
||||
endfunction
|
||||
|
||||
function automatic hpdcache_data_ram_row_idx_t hpdcache_way_to_data_ram_row(
|
||||
input hpdcache_way_vector_t way);
|
||||
for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin
|
||||
if (way[i]) return hpdcache_data_ram_row_idx_t'(i / HPDCACHE_DATA_WAYS_PER_RAM_WORD);
|
||||
end
|
||||
return 0;
|
||||
endfunction
|
||||
|
||||
function automatic hpdcache_data_ram_way_idx_t hpdcache_way_to_data_ram_word(
|
||||
input hpdcache_way_vector_t way);
|
||||
for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin
|
||||
if (way[i]) return hpdcache_data_ram_way_idx_t'(i % HPDCACHE_DATA_WAYS_PER_RAM_WORD);
|
||||
end
|
||||
return 0;
|
||||
endfunction
|
||||
|
||||
function automatic hpdcache_data_ram_addr_t hpdcache_set_to_data_ram_addr(
|
||||
input hpdcache_set_t set,
|
||||
input hpdcache_word_t word);
|
||||
hpdcache_uint ret;
|
||||
|
||||
ret = (hpdcache_uint'(set)*(HPDCACHE_CL_WORDS / HPDCACHE_ACCESS_WORDS)) +
|
||||
(hpdcache_uint'(word) / HPDCACHE_ACCESS_WORDS);
|
||||
|
||||
return hpdcache_data_ram_addr_t'(ret);
|
||||
endfunction
|
||||
// }}}
|
||||
|
||||
// Definition of internal signals and registers
|
||||
// {{{
|
||||
genvar gen_i, gen_j, gen_k;
|
||||
|
||||
// Directory initialization signals and registers
|
||||
logic init_q, init_d;
|
||||
hpdcache_dir_addr_t init_set_q, init_set_d;
|
||||
hpdcache_way_vector_t init_dir_cs;
|
||||
hpdcache_way_vector_t init_dir_we;
|
||||
hpdcache_dir_entry_t init_dir_wentry;
|
||||
|
||||
// Directory valid bit vector (one bit per set and way)
|
||||
hpdcache_way_vector_t [HPDCACHE_SETS-1:0] dir_valid_q, dir_valid_d;
|
||||
hpdcache_set_t dir_req_set_q, dir_req_set_d;
|
||||
hpdcache_dir_addr_t dir_addr;
|
||||
hpdcache_way_vector_t dir_cs;
|
||||
hpdcache_way_vector_t dir_we;
|
||||
hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry;
|
||||
hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry;
|
||||
|
||||
hpdcache_data_addr_t data_addr;
|
||||
hpdcache_data_enable_t data_cs;
|
||||
hpdcache_data_enable_t data_we;
|
||||
hpdcache_data_be_entry_t data_wbyteenable;
|
||||
hpdcache_data_entry_t data_wentry;
|
||||
hpdcache_data_entry_t data_rentry;
|
||||
|
||||
logic data_write;
|
||||
logic data_write_enable;
|
||||
hpdcache_set_t data_write_set;
|
||||
hpdcache_req_size_t data_write_size;
|
||||
hpdcache_word_t data_write_word;
|
||||
hpdcache_refill_data_t data_write_data;
|
||||
hpdcache_refill_be_t data_write_be;
|
||||
|
||||
hpdcache_refill_data_t data_req_write_data;
|
||||
hpdcache_refill_be_t data_req_write_be;
|
||||
|
||||
hpdcache_refill_data_t data_amo_write_data;
|
||||
hpdcache_refill_be_t data_amo_write_be;
|
||||
|
||||
hpdcache_way_vector_t data_way;
|
||||
|
||||
hpdcache_data_ram_row_idx_t data_ram_row;
|
||||
hpdcache_data_ram_way_idx_t data_ram_word;
|
||||
|
||||
// }}}
|
||||
|
||||
// Init FSM
|
||||
// {{{
|
||||
always_comb
|
||||
begin : init_comb
|
||||
init_dir_wentry.tag = '0;
|
||||
init_dir_wentry.reserved = '0;
|
||||
init_dir_cs = '0;
|
||||
init_dir_we = '0;
|
||||
init_d = init_q;
|
||||
init_set_d = init_set_q;
|
||||
|
||||
case (init_q)
|
||||
1'b0: begin
|
||||
init_d = (hpdcache_uint'(init_set_q) == (HPDCACHE_SETS - 1));
|
||||
init_set_d = init_set_q + 1;
|
||||
init_dir_cs = '1;
|
||||
init_dir_we = '1;
|
||||
end
|
||||
|
||||
1'b1: begin
|
||||
init_d = 1'b1;
|
||||
init_set_d = init_set_q;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign ready_o = init_q;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : init_ff
|
||||
if (!rst_ni) begin
|
||||
init_q <= 1'b0;
|
||||
init_set_q <= 0;
|
||||
dir_valid_q <= '0;
|
||||
end else begin
|
||||
init_q <= init_d;
|
||||
init_set_q <= init_set_d;
|
||||
dir_valid_q <= dir_valid_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Memory arrays
|
||||
// {{{
|
||||
hpdcache_memarray hpdcache_memarray_i(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.dir_addr_i (dir_addr),
|
||||
.dir_cs_i (dir_cs),
|
||||
.dir_we_i (dir_we),
|
||||
.dir_wentry_i (dir_wentry),
|
||||
.dir_rentry_o (dir_rentry),
|
||||
|
||||
.data_addr_i (data_addr),
|
||||
.data_cs_i (data_cs),
|
||||
.data_we_i (data_we),
|
||||
.data_wbyteenable_i (data_wbyteenable),
|
||||
.data_wentry_i (data_wentry),
|
||||
.data_rentry_o (data_rentry)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Directory RAM request mux
|
||||
// {{{
|
||||
always_comb
|
||||
begin : dir_ctrl_comb
|
||||
case (1'b1)
|
||||
// Cache directory initialization
|
||||
~init_q: begin
|
||||
dir_addr = init_set_q;
|
||||
dir_cs = init_dir_cs;
|
||||
dir_we = init_dir_we;
|
||||
dir_wentry = {HPDCACHE_WAYS{init_dir_wentry}};
|
||||
end
|
||||
|
||||
// Cache directory match tag -> hit
|
||||
dir_match_i: begin
|
||||
dir_addr = dir_match_set_i;
|
||||
dir_cs = '1;
|
||||
dir_we = '0;
|
||||
dir_wentry = '0;
|
||||
end
|
||||
|
||||
// Cache directory AMO match tag -> hit
|
||||
dir_amo_match_i: begin
|
||||
dir_addr = dir_amo_match_set_i;
|
||||
dir_cs = '1;
|
||||
dir_we = '0;
|
||||
dir_wentry = '0;
|
||||
end
|
||||
|
||||
// Cache directory update
|
||||
dir_refill_i: begin
|
||||
dir_addr = dir_refill_set_i;
|
||||
dir_cs = dir_victim_way_o;
|
||||
dir_we = dir_victim_way_o;
|
||||
dir_wentry = {HPDCACHE_WAYS{dir_refill_entry_i}};
|
||||
end
|
||||
|
||||
// Cache directory CMO match tag
|
||||
dir_cmo_check_i: begin
|
||||
dir_addr = dir_cmo_check_set_i;
|
||||
dir_cs = '1;
|
||||
dir_we = '0;
|
||||
dir_wentry = '0;
|
||||
end
|
||||
|
||||
// Do nothing
|
||||
default: begin
|
||||
dir_addr = '0;
|
||||
dir_cs = '0;
|
||||
dir_we = '0;
|
||||
dir_wentry = '0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Directory valid logic
|
||||
// {{{
|
||||
always_comb
|
||||
begin : dir_valid_comb
|
||||
dir_valid_d = dir_valid_q;
|
||||
|
||||
unique case (1'b1)
|
||||
// Refill the cache after a miss
|
||||
dir_refill_i: begin
|
||||
dir_valid_d[dir_refill_set_i] = dir_valid_q[dir_refill_set_i] | dir_victim_way_o;
|
||||
end
|
||||
// CMO invalidate a set
|
||||
dir_cmo_inval_i: begin
|
||||
dir_valid_d[dir_cmo_inval_set_i] = dir_valid_q[dir_cmo_inval_set_i] & ~dir_cmo_inval_way_i;
|
||||
end
|
||||
default: begin
|
||||
// do nothing
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Directory hit logic
|
||||
// {{{
|
||||
assign dir_req_set_d = dir_match_i ? dir_match_set_i :
|
||||
dir_amo_match_i ? dir_amo_match_set_i :
|
||||
dir_cmo_check_i ? dir_cmo_check_set_i :
|
||||
dir_req_set_q ;
|
||||
|
||||
generate
|
||||
hpdcache_way_vector_t req_hit;
|
||||
hpdcache_way_vector_t amo_hit;
|
||||
hpdcache_way_vector_t cmo_hit;
|
||||
|
||||
for (gen_i = 0; gen_i < int'(HPDCACHE_WAYS); gen_i++)
|
||||
begin : dir_match_tag_gen
|
||||
assign req_hit[gen_i] = (dir_rentry[gen_i].tag == dir_match_tag_i),
|
||||
amo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_amo_match_tag_i),
|
||||
cmo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_cmo_check_tag_i);
|
||||
|
||||
assign dir_hit_way_o [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & req_hit[gen_i],
|
||||
dir_amo_hit_way_o [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & amo_hit[gen_i],
|
||||
dir_cmo_check_hit_way_o[gen_i] = dir_valid_q[dir_req_set_q][gen_i] & cmo_hit[gen_i];
|
||||
end
|
||||
endgenerate
|
||||
// }}}
|
||||
|
||||
// Directory victim select logic
|
||||
// {{{
|
||||
logic plru_updt;
|
||||
hpdcache_way_vector_t plru_updt_way;
|
||||
|
||||
assign plru_updt = dir_update_lru_i | dir_amo_update_plru_i,
|
||||
plru_updt_way = dir_update_lru_i ? dir_hit_way_o : dir_amo_hit_way_o;
|
||||
|
||||
hpdcache_plru #(
|
||||
.SETS (HPDCACHE_SETS),
|
||||
.WAYS (HPDCACHE_WAYS)
|
||||
) plru_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.updt_i (plru_updt),
|
||||
.updt_set_i (dir_req_set_q),
|
||||
.updt_way_i (plru_updt_way),
|
||||
|
||||
.repl_i (dir_refill_i),
|
||||
.repl_set_i (dir_refill_set_i),
|
||||
.repl_dir_valid_i (dir_valid_q[dir_refill_set_i]),
|
||||
.repl_updt_plru_i (dir_refill_updt_plru_i),
|
||||
|
||||
.victim_way_o (dir_victim_way_o)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Data RAM request multiplexor
|
||||
// {{{
|
||||
|
||||
// Upsize the request interface to match the maximum access width of the data RAM
|
||||
generate
|
||||
if (HPDCACHE_DATA_REQ_RATIO > 1) begin : upsize_data_req_write_gen
|
||||
// demux request DATA
|
||||
assign data_req_write_data = {HPDCACHE_DATA_REQ_RATIO{data_req_write_data_i}};
|
||||
|
||||
// demux request BE
|
||||
hpdcache_demux #(
|
||||
.NOUTPUT (HPDCACHE_DATA_REQ_RATIO),
|
||||
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH/8),
|
||||
.ONE_HOT_SEL (1'b0)
|
||||
) data_req_write_be_demux_i (
|
||||
.data_i (data_req_write_be_i),
|
||||
.sel_i (data_req_write_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +:
|
||||
$clog2(HPDCACHE_DATA_REQ_RATIO)]),
|
||||
.data_o (data_req_write_be)
|
||||
);
|
||||
end else begin
|
||||
assign data_req_write_data = data_req_write_data_i,
|
||||
data_req_write_be = data_req_write_be_i;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Upsize the AMO data interface to match the maximum access width of the data RAM
|
||||
generate
|
||||
localparam hpdcache_uint AMO_DATA_RATIO = HPDCACHE_DATA_RAM_ACCESS_WIDTH/64;
|
||||
localparam hpdcache_uint AMO_DATA_INDEX_WIDTH = $clog2(AMO_DATA_RATIO);
|
||||
|
||||
if (AMO_DATA_RATIO > 1) begin
|
||||
assign data_amo_write_data = {AMO_DATA_RATIO{data_amo_write_data_i}};
|
||||
|
||||
hpdcache_demux #(
|
||||
.NOUTPUT (AMO_DATA_RATIO),
|
||||
.DATA_WIDTH (8),
|
||||
.ONE_HOT_SEL (1'b0)
|
||||
) amo_be_demux_i (
|
||||
.data_i (data_amo_write_be_i),
|
||||
.sel_i (data_amo_write_word_i[0 +: AMO_DATA_INDEX_WIDTH]),
|
||||
.data_o (data_amo_write_be)
|
||||
);
|
||||
end else begin
|
||||
assign data_amo_write_data = data_amo_write_data_i,
|
||||
data_amo_write_be = data_amo_write_be_i;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Multiplex between data write requests
|
||||
always_comb
|
||||
begin : data_write_comb
|
||||
case (1'b1)
|
||||
data_refill_i: begin
|
||||
data_write = 1'b1;
|
||||
data_write_enable = 1'b1;
|
||||
data_write_set = data_refill_set_i;
|
||||
data_write_size = hpdcache_req_size_t'($clog2(HPDCACHE_DATA_RAM_ACCESS_WIDTH/8));
|
||||
data_write_word = data_refill_word_i;
|
||||
data_write_data = data_refill_data_i;
|
||||
data_write_be = '1;
|
||||
end
|
||||
|
||||
data_req_write_i: begin
|
||||
data_write = 1'b1;
|
||||
data_write_enable = data_req_write_enable_i;
|
||||
data_write_set = data_req_write_set_i;
|
||||
data_write_size = data_req_write_size_i;
|
||||
data_write_word = data_req_write_word_i;
|
||||
data_write_data = data_req_write_data;
|
||||
data_write_be = data_req_write_be;
|
||||
end
|
||||
|
||||
data_amo_write_i: begin
|
||||
data_write = 1'b1;
|
||||
data_write_enable = data_amo_write_enable_i;
|
||||
data_write_set = data_amo_write_set_i;
|
||||
data_write_size = data_amo_write_size_i;
|
||||
data_write_word = data_amo_write_word_i;
|
||||
data_write_data = data_amo_write_data;
|
||||
data_write_be = data_amo_write_be;
|
||||
end
|
||||
|
||||
default: begin
|
||||
data_write = 1'b0;
|
||||
data_write_enable = 1'b0;
|
||||
data_write_set = '0;
|
||||
data_write_size = '0;
|
||||
data_write_word = '0;
|
||||
data_write_data = '0;
|
||||
data_write_be = '0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// Multiplex between read and write access on the data RAM
|
||||
assign data_way = data_refill_i ? data_refill_way_i :
|
||||
data_amo_write_i ? dir_amo_hit_way_o :
|
||||
dir_hit_way_o;
|
||||
|
||||
// Decode way index
|
||||
assign data_ram_word = hpdcache_way_to_data_ram_word(data_way),
|
||||
data_ram_row = hpdcache_way_to_data_ram_row(data_way);
|
||||
|
||||
always_comb
|
||||
begin : data_ctrl_comb
|
||||
case (1'b1)
|
||||
// Select data read inputs
|
||||
data_req_read_i: begin
|
||||
data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_req_read_set_i,
|
||||
data_req_read_word_i)}};
|
||||
|
||||
data_we = '0;
|
||||
data_wbyteenable = '0;
|
||||
data_wentry = '0;
|
||||
for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin
|
||||
data_cs[i] = hpdcache_compute_data_ram_cs(data_req_read_size_i,
|
||||
data_req_read_word_i);
|
||||
end
|
||||
end
|
||||
|
||||
// Select data write inputs
|
||||
data_write: begin
|
||||
data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_write_set,
|
||||
data_write_word)}};
|
||||
|
||||
for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin
|
||||
for (int unsigned j = 0; j < HPDCACHE_DATA_RAM_X_CUTS; j++) begin
|
||||
data_wentry[i][j] = {HPDCACHE_DATA_WAYS_PER_RAM_WORD{data_write_data[j]}};
|
||||
end
|
||||
end
|
||||
|
||||
for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin
|
||||
data_cs[i] = hpdcache_compute_data_ram_cs(data_write_size, data_write_word);
|
||||
|
||||
if (i == hpdcache_uint'(data_ram_row)) begin
|
||||
data_we[i] = data_write_enable ? data_cs[i] : '0;
|
||||
end else begin
|
||||
data_we[i] = '0;
|
||||
end
|
||||
|
||||
// Build the write mask
|
||||
for (int unsigned j = 0; j < HPDCACHE_ACCESS_WORDS; j++) begin
|
||||
for (int unsigned k = 0; k < HPDCACHE_DATA_WAYS_PER_RAM_WORD; k++) begin
|
||||
data_wbyteenable[i][j][k] = (k == hpdcache_uint'(data_ram_word)) ?
|
||||
data_write_be[j] : '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Do nothing
|
||||
default: begin
|
||||
data_addr = '0;
|
||||
data_cs = '0;
|
||||
data_we = '0;
|
||||
data_wbyteenable = '0;
|
||||
data_wentry = '0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Data RAM read data multiplexor
|
||||
// {{{
|
||||
generate
|
||||
hpdcache_req_data_t [HPDCACHE_DATA_REQ_RATIO-1:0][HPDCACHE_WAYS-1:0] data_read_words;
|
||||
hpdcache_req_data_t [HPDCACHE_WAYS-1:0] data_read_req_word;
|
||||
|
||||
// Organize the read data by words (all ways for the same word are contiguous)
|
||||
for (gen_i = 0; gen_i < int'(HPDCACHE_DATA_REQ_RATIO); gen_i++) begin
|
||||
for (gen_j = 0; gen_j < int'(HPDCACHE_WAYS); gen_j++) begin
|
||||
for (gen_k = 0; gen_k < int'(HPDCACHE_REQ_WORDS); gen_k++) begin
|
||||
assign data_read_words[gen_i][gen_j][gen_k] =
|
||||
data_rentry[(gen_j / HPDCACHE_DATA_WAYS_PER_RAM_WORD)]
|
||||
[(gen_i * HPDCACHE_REQ_WORDS ) + gen_k]
|
||||
[(gen_j % HPDCACHE_DATA_WAYS_PER_RAM_WORD)];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Mux the data according to the access word
|
||||
if (HPDCACHE_DATA_REQ_RATIO > 1) begin : req_width_lt_ram_width
|
||||
typedef logic [$clog2(HPDCACHE_DATA_REQ_RATIO)-1:0] data_req_word_t;
|
||||
data_req_word_t data_read_req_word_index_q;
|
||||
|
||||
hpdcache_mux #(
|
||||
.NINPUT (HPDCACHE_DATA_REQ_RATIO),
|
||||
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH*HPDCACHE_WAYS)
|
||||
) data_read_req_word_mux_i(
|
||||
.data_i (data_read_words),
|
||||
.sel_i (data_read_req_word_index_q),
|
||||
.data_o (data_read_req_word)
|
||||
);
|
||||
|
||||
always_ff @(posedge clk_i)
|
||||
begin : data_req_read_word_ff
|
||||
data_read_req_word_index_q <=
|
||||
data_req_read_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +:
|
||||
$clog2(HPDCACHE_DATA_REQ_RATIO)];
|
||||
end
|
||||
end
|
||||
|
||||
// Request data interface width is equal to the data RAM width
|
||||
else begin : req_width_eq_ram_width
|
||||
assign data_read_req_word = data_read_words;
|
||||
end
|
||||
|
||||
// Mux the data according to the hit way
|
||||
hpdcache_mux #(
|
||||
.NINPUT (HPDCACHE_WAYS),
|
||||
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) data_read_req_word_way_mux_i(
|
||||
.data_i (data_read_req_word),
|
||||
.sel_i (dir_hit_way_o),
|
||||
.data_o (data_req_read_data_o)
|
||||
);
|
||||
endgenerate
|
||||
|
||||
|
||||
// Delay the accessed set for checking the tag from the directory in the
|
||||
// next cycle (hit logic)
|
||||
always_ff @(posedge clk_i)
|
||||
begin : req_read_ff
|
||||
if (dir_match_i || dir_amo_match_i || dir_cmo_check_i) begin
|
||||
dir_req_set_q <= dir_req_set_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
concurrent_dir_access_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
$onehot0({dir_match_i, dir_amo_match_i, dir_cmo_check_i, dir_refill_i})) else
|
||||
$error("hpdcache_memctrl: more than one process is accessing the cache directory");
|
||||
|
||||
concurrent_data_access_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
$onehot0({data_req_read_i, data_req_write_i, data_amo_write_i, data_refill_i})) else
|
||||
$error("hpdcache_memctrl: more than one process is accessing the cache data");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,659 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : HPDcache Miss Handler
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_miss_handler
|
||||
// {{{
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int HPDcacheMemIdWidth = 8,
|
||||
parameter int HPDcacheMemDataWidth = 512,
|
||||
parameter type hpdcache_mem_req_t = logic,
|
||||
parameter type hpdcache_mem_resp_r_t = logic,
|
||||
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Global control signals
|
||||
// {{{
|
||||
output logic mshr_empty_o,
|
||||
output logic mshr_full_o,
|
||||
// }}}
|
||||
|
||||
// Configuration signals
|
||||
// {{{
|
||||
input logic cfg_prefetch_updt_plru_i,
|
||||
// }}}
|
||||
|
||||
// CHECK interface
|
||||
// {{{
|
||||
input logic mshr_check_i,
|
||||
input mshr_set_t mshr_check_set_i,
|
||||
input mshr_tag_t mshr_check_tag_i,
|
||||
output logic mshr_check_hit_o,
|
||||
// }}}
|
||||
|
||||
// MISS interface
|
||||
// {{{
|
||||
// MISS request interface
|
||||
output logic mshr_alloc_ready_o,
|
||||
input logic mshr_alloc_i,
|
||||
input logic mshr_alloc_cs_i,
|
||||
input hpdcache_nline_t mshr_alloc_nline_i,
|
||||
output logic mshr_alloc_full_o,
|
||||
input hpdcache_req_tid_t mshr_alloc_tid_i,
|
||||
input hpdcache_req_sid_t mshr_alloc_sid_i,
|
||||
input hpdcache_word_t mshr_alloc_word_i,
|
||||
input logic mshr_alloc_need_rsp_i,
|
||||
input logic mshr_alloc_is_prefetch_i,
|
||||
|
||||
// REFILL MISS interface
|
||||
input logic refill_req_ready_i,
|
||||
output logic refill_req_valid_o,
|
||||
output logic refill_busy_o,
|
||||
output logic refill_updt_plru_o,
|
||||
output hpdcache_set_t refill_set_o,
|
||||
output hpdcache_dir_entry_t refill_dir_entry_o,
|
||||
input hpdcache_way_vector_t refill_victim_way_i,
|
||||
output logic refill_write_dir_o,
|
||||
output logic refill_write_data_o,
|
||||
output hpdcache_way_vector_t refill_victim_way_o,
|
||||
output hpdcache_refill_data_t refill_data_o,
|
||||
output hpdcache_word_t refill_word_o,
|
||||
output hpdcache_nline_t refill_nline_o,
|
||||
output logic refill_updt_rtab_o,
|
||||
|
||||
// REFILL core response interface
|
||||
output logic refill_core_rsp_valid_o,
|
||||
output hpdcache_rsp_t refill_core_rsp_o,
|
||||
// }}}
|
||||
|
||||
// MEMORY interface
|
||||
// {{{
|
||||
input logic mem_req_ready_i,
|
||||
output logic mem_req_valid_o,
|
||||
output hpdcache_mem_req_t mem_req_o,
|
||||
|
||||
output logic mem_resp_ready_o,
|
||||
input logic mem_resp_valid_i,
|
||||
input hpdcache_mem_resp_r_t mem_resp_i
|
||||
// }}}
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Declaration of constants and types
|
||||
// {{{
|
||||
localparam int unsigned REFILL_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS;
|
||||
|
||||
typedef enum logic {
|
||||
MISS_REQ_IDLE = 1'b0,
|
||||
MISS_REQ_SEND = 1'b1
|
||||
} miss_req_fsm_e;
|
||||
|
||||
typedef enum {
|
||||
REFILL_IDLE,
|
||||
REFILL_WRITE,
|
||||
REFILL_WRITE_DIR
|
||||
} refill_fsm_e;
|
||||
|
||||
typedef struct packed {
|
||||
hpdcache_mem_error_e r_error;
|
||||
hpdcache_mem_id_t r_id;
|
||||
} mem_resp_metadata_t;
|
||||
|
||||
function automatic mshr_set_t get_ack_mshr_set(hpdcache_mem_id_t id);
|
||||
return id[0 +: HPDCACHE_MSHR_SET_WIDTH];
|
||||
endfunction
|
||||
|
||||
function automatic mshr_way_t get_ack_mshr_way(hpdcache_mem_id_t id);
|
||||
return id[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_WAY_WIDTH];
|
||||
endfunction
|
||||
// }}}
|
||||
|
||||
// Declaration of internal signals and registers
|
||||
// {{{
|
||||
miss_req_fsm_e miss_req_fsm_q, miss_req_fsm_d;
|
||||
mshr_way_t mshr_alloc_way_q, mshr_alloc_way_d;
|
||||
mshr_set_t mshr_alloc_set_q, mshr_alloc_set_d;
|
||||
mshr_tag_t mshr_alloc_tag_q, mshr_alloc_tag_d;
|
||||
|
||||
refill_fsm_e refill_fsm_q, refill_fsm_d;
|
||||
hpdcache_set_t refill_set_q;
|
||||
hpdcache_tag_t refill_tag_q;
|
||||
hpdcache_way_vector_t refill_way_q;
|
||||
hpdcache_req_sid_t refill_sid_q;
|
||||
hpdcache_req_tid_t refill_tid_q;
|
||||
hpdcache_word_t refill_cnt_q, refill_cnt_d;
|
||||
logic refill_need_rsp_q;
|
||||
logic refill_is_prefetch_q;
|
||||
hpdcache_word_t refill_core_rsp_word_q;
|
||||
logic refill_way_bypass;
|
||||
|
||||
mem_resp_metadata_t refill_fifo_resp_meta_wdata, refill_fifo_resp_meta_rdata;
|
||||
logic refill_fifo_resp_meta_w, refill_fifo_resp_meta_wok;
|
||||
logic refill_fifo_resp_meta_r, refill_fifo_resp_meta_rok;
|
||||
|
||||
logic refill_fifo_resp_data_w, refill_fifo_resp_data_wok;
|
||||
hpdcache_refill_data_t refill_fifo_resp_data_rdata;
|
||||
logic refill_fifo_resp_data_r;
|
||||
|
||||
logic refill_core_rsp_valid;
|
||||
hpdcache_req_data_t refill_core_rsp_rdata;
|
||||
hpdcache_req_sid_t refill_core_rsp_sid;
|
||||
hpdcache_req_tid_t refill_core_rsp_tid;
|
||||
logic refill_core_rsp_error;
|
||||
hpdcache_word_t refill_core_rsp_word;
|
||||
hpdcache_rsp_t refill_core_rsp;
|
||||
|
||||
logic refill_is_error;
|
||||
|
||||
logic mshr_alloc;
|
||||
logic mshr_alloc_cs;
|
||||
logic mshr_ack;
|
||||
logic mshr_ack_cs;
|
||||
mshr_set_t mshr_ack_set;
|
||||
mshr_way_t mshr_ack_way;
|
||||
hpdcache_nline_t mshr_ack_nline;
|
||||
hpdcache_req_sid_t mshr_ack_src_id;
|
||||
hpdcache_req_tid_t mshr_ack_req_id;
|
||||
hpdcache_word_t mshr_ack_word;
|
||||
logic mshr_ack_need_rsp;
|
||||
logic mshr_ack_is_prefetch;
|
||||
logic mshr_empty;
|
||||
// }}}
|
||||
|
||||
// Miss Request FSM
|
||||
// {{{
|
||||
always_comb
|
||||
begin : miss_req_fsm_comb
|
||||
mshr_alloc_ready_o = 1'b0;
|
||||
mshr_alloc = 1'b0;
|
||||
mshr_alloc_cs = 1'b0;
|
||||
mem_req_valid_o = 1'b0;
|
||||
|
||||
miss_req_fsm_d = miss_req_fsm_q;
|
||||
|
||||
case (miss_req_fsm_q)
|
||||
MISS_REQ_IDLE: begin
|
||||
mshr_alloc_ready_o = 1'b1;
|
||||
mshr_alloc = mshr_alloc_i;
|
||||
mshr_alloc_cs = mshr_alloc_cs_i;
|
||||
if (mshr_alloc_i) begin
|
||||
miss_req_fsm_d = MISS_REQ_SEND;
|
||||
end else begin
|
||||
miss_req_fsm_d = MISS_REQ_IDLE;
|
||||
end
|
||||
end
|
||||
MISS_REQ_SEND: begin
|
||||
mem_req_valid_o = 1'b1;
|
||||
if (mem_req_ready_i) begin
|
||||
miss_req_fsm_d = MISS_REQ_IDLE;
|
||||
end else begin
|
||||
miss_req_fsm_d = MISS_REQ_SEND;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
localparam hpdcache_uint REFILL_REQ_SIZE = $clog2(HPDcacheMemDataWidth/8);
|
||||
localparam hpdcache_uint REFILL_REQ_LEN = HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth;
|
||||
|
||||
assign mem_req_o.mem_req_addr = {mshr_alloc_tag_q, mshr_alloc_set_q, {HPDCACHE_OFFSET_WIDTH{1'b0}} },
|
||||
mem_req_o.mem_req_len = hpdcache_mem_len_t'(REFILL_REQ_LEN-1),
|
||||
mem_req_o.mem_req_size = hpdcache_mem_size_t'(REFILL_REQ_SIZE),
|
||||
mem_req_o.mem_req_id = hpdcache_mem_id_t'({mshr_alloc_way_q, mshr_alloc_set_q}),
|
||||
mem_req_o.mem_req_command = HPDCACHE_MEM_READ,
|
||||
mem_req_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD,
|
||||
mem_req_o.mem_req_cacheable = 1'b1;
|
||||
|
||||
always_ff @(posedge clk_i)
|
||||
begin : miss_req_fsm_internal_ff
|
||||
if (mshr_alloc) begin
|
||||
mshr_alloc_way_q <= mshr_alloc_way_d;
|
||||
mshr_alloc_set_q <= mshr_alloc_set_d;
|
||||
mshr_alloc_tag_q <= mshr_alloc_tag_d;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : miss_req_fsm_ff
|
||||
if (!rst_ni) begin
|
||||
miss_req_fsm_q <= MISS_REQ_IDLE;
|
||||
end else begin
|
||||
miss_req_fsm_q <= miss_req_fsm_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Refill FSM
|
||||
// {{{
|
||||
|
||||
// ask permission to the refill arbiter if there is a pending refill
|
||||
assign refill_req_valid_o = refill_fsm_q == REFILL_IDLE ? refill_fifo_resp_meta_rok : 1'b0;
|
||||
|
||||
// forward the victim way directly from the victim selection logic or
|
||||
// from the internal register
|
||||
assign refill_victim_way_o = refill_way_bypass ? refill_victim_way_i : refill_way_q;
|
||||
|
||||
always_comb
|
||||
begin : miss_resp_fsm_comb
|
||||
automatic hpdcache_uint REFILL_LAST_CHUNK_WORD;
|
||||
REFILL_LAST_CHUNK_WORD = HPDCACHE_CL_WORDS - HPDCACHE_ACCESS_WORDS;
|
||||
|
||||
refill_updt_plru_o = 1'b0;
|
||||
refill_set_o = '0;
|
||||
refill_write_dir_o = 1'b0;
|
||||
refill_write_data_o = 1'b0;
|
||||
refill_updt_rtab_o = 1'b0;
|
||||
refill_cnt_d = refill_cnt_q;
|
||||
refill_way_bypass = 1'b0;
|
||||
|
||||
refill_core_rsp_valid = 1'b0;
|
||||
refill_core_rsp_sid = '0;
|
||||
refill_core_rsp_tid = '0;
|
||||
refill_core_rsp_error = 1'b0;
|
||||
refill_core_rsp_word = 0;
|
||||
|
||||
refill_fifo_resp_meta_r = 1'b0;
|
||||
refill_fifo_resp_data_r = 1'b0;
|
||||
|
||||
mshr_ack_cs = 1'b0;
|
||||
mshr_ack = 1'b0;
|
||||
|
||||
refill_fsm_d = refill_fsm_q;
|
||||
|
||||
case (refill_fsm_q)
|
||||
// Wait for refill responses
|
||||
// {{{
|
||||
REFILL_IDLE: begin
|
||||
if (refill_fifo_resp_meta_rok) begin
|
||||
// anticipate the activation of the MSHR independently of the grant signal from
|
||||
// the refill arbiter. This is to avoid the introduction of unnecessary timing
|
||||
// paths (however there could be a minor augmentation of the power
|
||||
// consumption).
|
||||
mshr_ack_cs = 1'b1;
|
||||
|
||||
// if the permission is granted, start refilling
|
||||
if (refill_req_ready_i) begin
|
||||
refill_fsm_d = REFILL_WRITE;
|
||||
|
||||
// read the MSHR and reset the valid bit for the
|
||||
// corresponding entry
|
||||
mshr_ack = 1'b1;
|
||||
|
||||
// initialize the counter for refill words
|
||||
refill_cnt_d = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Write refill data into the cache
|
||||
// {{{
|
||||
REFILL_WRITE: begin
|
||||
automatic logic is_prefetch;
|
||||
|
||||
// Respond to the core (when needed)
|
||||
if (refill_cnt_q == 0) begin
|
||||
automatic hpdcache_uint _core_rsp_word;
|
||||
_core_rsp_word = hpdcache_uint'(mshr_ack_word)/HPDCACHE_ACCESS_WORDS;
|
||||
|
||||
if (mshr_ack_need_rsp) begin
|
||||
refill_core_rsp_valid = (hpdcache_uint'(_core_rsp_word) == 0);
|
||||
end
|
||||
|
||||
refill_core_rsp_sid = mshr_ack_src_id;
|
||||
refill_core_rsp_tid = mshr_ack_req_id;
|
||||
refill_core_rsp_error = refill_is_error;
|
||||
refill_core_rsp_word = hpdcache_word_t'(
|
||||
hpdcache_uint'(mshr_ack_word)/HPDCACHE_REQ_WORDS);
|
||||
end else begin
|
||||
automatic hpdcache_uint _core_rsp_word;
|
||||
_core_rsp_word = hpdcache_uint'(refill_core_rsp_word_q)/
|
||||
HPDCACHE_ACCESS_WORDS;
|
||||
|
||||
if (refill_need_rsp_q) begin
|
||||
automatic hpdcache_uint _refill_cnt;
|
||||
_refill_cnt = hpdcache_uint'(refill_cnt_q)/HPDCACHE_ACCESS_WORDS;
|
||||
refill_core_rsp_valid = (_core_rsp_word == _refill_cnt);
|
||||
end
|
||||
|
||||
refill_core_rsp_sid = refill_sid_q;
|
||||
refill_core_rsp_tid = refill_tid_q;
|
||||
refill_core_rsp_error = refill_is_error;
|
||||
refill_core_rsp_word = hpdcache_word_t'(
|
||||
hpdcache_uint'(refill_core_rsp_word_q)/HPDCACHE_REQ_WORDS);
|
||||
end
|
||||
|
||||
// Write the the data in the cache data array
|
||||
if (refill_cnt_q == 0) begin
|
||||
refill_set_o = mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH];
|
||||
refill_way_bypass = 1'b1;
|
||||
is_prefetch = mshr_ack_is_prefetch;
|
||||
end else begin
|
||||
refill_set_o = refill_set_q;
|
||||
refill_way_bypass = 1'b0;
|
||||
is_prefetch = refill_is_prefetch_q;
|
||||
end
|
||||
refill_write_data_o = ~refill_is_error;
|
||||
|
||||
// Consume chunk of data from the FIFO buffer in the memory interface
|
||||
refill_fifo_resp_data_r = 1'b1;
|
||||
|
||||
// Update directory on the last chunk of data
|
||||
refill_cnt_d = refill_cnt_q + hpdcache_word_t'(HPDCACHE_ACCESS_WORDS);
|
||||
|
||||
if (hpdcache_uint'(refill_cnt_q) == REFILL_LAST_CHUNK_WORD) begin
|
||||
if (REFILL_LAST_CHUNK_WORD == 0) begin
|
||||
// Special case: if the cache-line data can be written in a single cycle,
|
||||
// wait an additional cycle to write the directory. This allows to prevent
|
||||
// a RAM-to-RAM timing path between the MSHR and the DIR.
|
||||
refill_fsm_d = REFILL_WRITE_DIR;
|
||||
end else begin
|
||||
// Write the new entry in the cache directory
|
||||
refill_write_dir_o = ~refill_is_error;
|
||||
|
||||
// Update the PLRU bits. Only in the following cases:
|
||||
// - There is no error in response AND
|
||||
// - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR
|
||||
// - It is a read miss.
|
||||
refill_updt_plru_o = ~refill_is_error &
|
||||
(~is_prefetch | cfg_prefetch_updt_plru_i);
|
||||
|
||||
// Update dependency flags in the retry table
|
||||
refill_updt_rtab_o = 1'b1;
|
||||
|
||||
// consume the response from the network
|
||||
refill_fifo_resp_meta_r = 1'b1;
|
||||
|
||||
refill_fsm_d = REFILL_IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Write cache directory (this state is only visited when ACCESS_WORDS == CL_WORDS,
|
||||
// this is when the entire cache-line can be written in a single cycle)
|
||||
// {{{
|
||||
REFILL_WRITE_DIR: begin
|
||||
automatic logic is_prefetch;
|
||||
is_prefetch = refill_is_prefetch_q;
|
||||
|
||||
// Select the target set and way
|
||||
refill_set_o = refill_set_q;
|
||||
refill_way_bypass = 1'b0;
|
||||
|
||||
// Write the new entry in the cache directory
|
||||
refill_write_dir_o = ~refill_is_error;
|
||||
|
||||
// Update the PLRU bits. Only in the following cases:
|
||||
// - There is no error in response AND
|
||||
// - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR
|
||||
// - It is a read miss.
|
||||
refill_updt_plru_o = ~refill_is_error &
|
||||
(~is_prefetch | cfg_prefetch_updt_plru_i);
|
||||
|
||||
// Update dependency flags in the retry table
|
||||
refill_updt_rtab_o = 1'b1;
|
||||
|
||||
// consume the response from the network
|
||||
refill_fifo_resp_meta_r = 1'b1;
|
||||
|
||||
refill_fsm_d = REFILL_IDLE;
|
||||
end
|
||||
// }}}
|
||||
|
||||
default: begin
|
||||
// pragma translate_off
|
||||
$error("Illegal state");
|
||||
// pragma translate_on
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign refill_is_error = (refill_fifo_resp_meta_rdata.r_error == HPDCACHE_MEM_RESP_NOK);
|
||||
|
||||
assign refill_busy_o = (refill_fsm_q != REFILL_IDLE),
|
||||
refill_nline_o = {refill_tag_q, refill_set_q},
|
||||
refill_word_o = refill_cnt_q;
|
||||
|
||||
assign mshr_ack_set = get_ack_mshr_set(refill_fifo_resp_meta_rdata.r_id),
|
||||
mshr_ack_way = get_ack_mshr_way(refill_fifo_resp_meta_rdata.r_id);
|
||||
|
||||
assign refill_dir_entry_o.tag = refill_tag_q,
|
||||
refill_dir_entry_o.reserved = '0;
|
||||
|
||||
assign refill_core_rsp.rdata = refill_core_rsp_rdata,
|
||||
refill_core_rsp.sid = refill_core_rsp_sid,
|
||||
refill_core_rsp.tid = refill_core_rsp_tid,
|
||||
refill_core_rsp.error = refill_core_rsp_error,
|
||||
refill_core_rsp.aborted = 1'b0;
|
||||
|
||||
hpdcache_fifo_reg #(
|
||||
.FIFO_DEPTH (1),
|
||||
.FEEDTHROUGH (HPDCACHE_REFILL_CORE_RSP_FEEDTHROUGH),
|
||||
.fifo_data_t (hpdcache_rsp_t)
|
||||
) i_refill_core_rsp_buf(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.w_i (refill_core_rsp_valid),
|
||||
.wok_o (/*unused*/),
|
||||
.wdata_i (refill_core_rsp),
|
||||
.r_i (1'b1), // core shall always be ready to consume a response
|
||||
.rok_o (refill_core_rsp_valid_o),
|
||||
.rdata_o (refill_core_rsp_o)
|
||||
);
|
||||
|
||||
generate
|
||||
// refill's width is bigger than the width of the core's interface
|
||||
if (REFILL_REQ_RATIO > 1) begin : core_rsp_data_mux_gen
|
||||
hpdcache_mux #(
|
||||
.NINPUT (REFILL_REQ_RATIO),
|
||||
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH)
|
||||
) data_read_rsp_mux_i(
|
||||
.data_i (refill_data_o),
|
||||
.sel_i (refill_core_rsp_word[0 +: $clog2(REFILL_REQ_RATIO)]),
|
||||
.data_o (refill_core_rsp_rdata)
|
||||
);
|
||||
end
|
||||
|
||||
// refill's width is equal to the width of the core's interface
|
||||
else begin
|
||||
assign refill_core_rsp_rdata = refill_data_o;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
/* FIXME: when multiple chunks, in case of error, the error bit is not
|
||||
* necessarily set on all chunks */
|
||||
assign refill_fifo_resp_meta_wdata = '{
|
||||
r_error: mem_resp_i.mem_resp_r_error,
|
||||
r_id : mem_resp_i.mem_resp_r_id
|
||||
};
|
||||
|
||||
hpdcache_fifo_reg #(
|
||||
.FIFO_DEPTH (2),
|
||||
.fifo_data_t (mem_resp_metadata_t)
|
||||
) i_r_metadata_fifo (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.w_i (refill_fifo_resp_meta_w),
|
||||
.wok_o (refill_fifo_resp_meta_wok),
|
||||
.wdata_i(refill_fifo_resp_meta_wdata),
|
||||
|
||||
.r_i (refill_fifo_resp_meta_r),
|
||||
.rok_o (refill_fifo_resp_meta_rok),
|
||||
.rdata_o(refill_fifo_resp_meta_rdata)
|
||||
);
|
||||
|
||||
generate
|
||||
if (HPDcacheMemDataWidth < HPDCACHE_REFILL_DATA_WIDTH) begin
|
||||
hpdcache_data_upsize #(
|
||||
.WR_WIDTH(HPDcacheMemDataWidth),
|
||||
.RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH),
|
||||
.DEPTH(2*(HPDCACHE_CL_WIDTH/HPDCACHE_REFILL_DATA_WIDTH))
|
||||
) i_rdata_upsize (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.w_i (refill_fifo_resp_data_w),
|
||||
.wlast_i (mem_resp_i.mem_resp_r_last),
|
||||
.wok_o (refill_fifo_resp_data_wok),
|
||||
.wdata_i (mem_resp_i.mem_resp_r_data),
|
||||
|
||||
.r_i (refill_fifo_resp_data_r),
|
||||
.rok_o (/* unused */),
|
||||
.rdata_o (refill_fifo_resp_data_rdata)
|
||||
);
|
||||
end else if (HPDcacheMemDataWidth > HPDCACHE_REFILL_DATA_WIDTH) begin
|
||||
hpdcache_data_downsize #(
|
||||
.WR_WIDTH(HPDcacheMemDataWidth),
|
||||
.RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH),
|
||||
.DEPTH(2*(HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth))
|
||||
) i_rdata_downsize (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.w_i (refill_fifo_resp_data_w),
|
||||
.wok_o (refill_fifo_resp_data_wok),
|
||||
.wdata_i (mem_resp_i.mem_resp_r_data),
|
||||
|
||||
.r_i (refill_fifo_resp_data_r),
|
||||
.rok_o (/* unused */),
|
||||
.rdata_o (refill_fifo_resp_data_rdata)
|
||||
);
|
||||
end else begin
|
||||
hpdcache_fifo_reg #(
|
||||
.FIFO_DEPTH (2),
|
||||
.fifo_data_t (hpdcache_refill_data_t)
|
||||
) i_rdata_fifo (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.w_i (refill_fifo_resp_data_w),
|
||||
.wok_o (refill_fifo_resp_data_wok),
|
||||
.wdata_i (mem_resp_i.mem_resp_r_data),
|
||||
|
||||
.r_i (refill_fifo_resp_data_r),
|
||||
.rok_o (/* unused */),
|
||||
.rdata_o (refill_fifo_resp_data_rdata)
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign refill_data_o = refill_fifo_resp_data_rdata;
|
||||
|
||||
assign refill_fifo_resp_data_w = mem_resp_valid_i &
|
||||
(refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last),
|
||||
refill_fifo_resp_meta_w = mem_resp_valid_i &
|
||||
(refill_fifo_resp_data_wok & mem_resp_i.mem_resp_r_last),
|
||||
mem_resp_ready_o = refill_fifo_resp_data_wok &
|
||||
(refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last);
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : miss_resp_fsm_ff
|
||||
if (!rst_ni) begin
|
||||
refill_fsm_q <= REFILL_IDLE;
|
||||
end else begin
|
||||
refill_fsm_q <= refill_fsm_d;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i)
|
||||
begin : miss_resp_fsm_internal_ff
|
||||
if ((refill_fsm_q == REFILL_WRITE) && (refill_cnt_q == 0)) begin
|
||||
refill_set_q <= mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH];
|
||||
refill_tag_q <= mshr_ack_nline[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];;
|
||||
refill_way_q <= refill_victim_way_i;
|
||||
refill_sid_q <= mshr_ack_src_id;
|
||||
refill_tid_q <= mshr_ack_req_id;
|
||||
refill_need_rsp_q <= mshr_ack_need_rsp;
|
||||
refill_is_prefetch_q <= mshr_ack_is_prefetch;
|
||||
refill_core_rsp_word_q <= mshr_ack_word;
|
||||
end
|
||||
refill_cnt_q <= refill_cnt_d;
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Miss Status Holding Register component
|
||||
// {{{
|
||||
hpdcache_mshr hpdcache_mshr_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.empty_o (mshr_empty),
|
||||
.full_o (mshr_full_o),
|
||||
|
||||
.check_i (mshr_check_i),
|
||||
.check_set_i (mshr_check_set_i),
|
||||
.check_tag_i (mshr_check_tag_i),
|
||||
.hit_o (mshr_check_hit_o),
|
||||
.alloc_i (mshr_alloc),
|
||||
.alloc_cs_i (mshr_alloc_cs),
|
||||
.alloc_nline_i (mshr_alloc_nline_i),
|
||||
.alloc_req_id_i (mshr_alloc_tid_i),
|
||||
.alloc_src_id_i (mshr_alloc_sid_i),
|
||||
.alloc_word_i (mshr_alloc_word_i),
|
||||
.alloc_need_rsp_i (mshr_alloc_need_rsp_i),
|
||||
.alloc_is_prefetch_i (mshr_alloc_is_prefetch_i),
|
||||
.alloc_full_o (mshr_alloc_full_o),
|
||||
.alloc_set_o (mshr_alloc_set_d),
|
||||
.alloc_tag_o (mshr_alloc_tag_d),
|
||||
.alloc_way_o (mshr_alloc_way_d),
|
||||
|
||||
.ack_i (mshr_ack),
|
||||
.ack_cs_i (mshr_ack_cs),
|
||||
.ack_set_i (mshr_ack_set),
|
||||
.ack_way_i (mshr_ack_way),
|
||||
.ack_req_id_o (mshr_ack_req_id),
|
||||
.ack_src_id_o (mshr_ack_src_id),
|
||||
.ack_nline_o (mshr_ack_nline),
|
||||
.ack_word_o (mshr_ack_word),
|
||||
.ack_need_rsp_o (mshr_ack_need_rsp),
|
||||
.ack_is_prefetch_o (mshr_ack_is_prefetch)
|
||||
);
|
||||
|
||||
// Indicate to the cache controller that there is no pending miss. This
|
||||
// is, when the MSHR is empty, and the MISS handler has finished of
|
||||
// processing the last miss response.
|
||||
assign mshr_empty_o = mshr_empty & ~refill_busy_o;
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
initial assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_SET_WIDTH + HPDCACHE_MSHR_WAY_WIDTH)) else
|
||||
$error("miss_handler: not enough ID bits in the memory interface");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
endmodule
|
||||
// }}}
|
||||
|
|
@ -0,0 +1,385 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : HPDcache Miss Status Holding Register (MSHR)
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_mshr
|
||||
import hpdcache_pkg::*;
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
// Clock and reset signals
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Global control signals
|
||||
output logic empty_o,
|
||||
output logic full_o,
|
||||
|
||||
// Check and allocation interface
|
||||
input logic check_i,
|
||||
input mshr_set_t check_set_i,
|
||||
input mshr_tag_t check_tag_i,
|
||||
output logic hit_o,
|
||||
input logic alloc_i,
|
||||
input logic alloc_cs_i,
|
||||
input hpdcache_nline_t alloc_nline_i,
|
||||
input hpdcache_req_tid_t alloc_req_id_i,
|
||||
input hpdcache_req_sid_t alloc_src_id_i,
|
||||
input hpdcache_word_t alloc_word_i,
|
||||
input logic alloc_need_rsp_i,
|
||||
input logic alloc_is_prefetch_i,
|
||||
output logic alloc_full_o,
|
||||
output mshr_set_t alloc_set_o,
|
||||
output mshr_tag_t alloc_tag_o,
|
||||
output mshr_way_t alloc_way_o,
|
||||
|
||||
// Acknowledge interface
|
||||
input logic ack_i,
|
||||
input logic ack_cs_i,
|
||||
input mshr_set_t ack_set_i,
|
||||
input mshr_way_t ack_way_i,
|
||||
output hpdcache_req_tid_t ack_req_id_o,
|
||||
output hpdcache_req_sid_t ack_src_id_o,
|
||||
output hpdcache_nline_t ack_nline_o,
|
||||
output hpdcache_word_t ack_word_o,
|
||||
output logic ack_need_rsp_o,
|
||||
output logic ack_is_prefetch_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Definition of constants and types
|
||||
// {{{
|
||||
typedef struct packed {
|
||||
mshr_tag_t tag;
|
||||
hpdcache_req_tid_t req_id;
|
||||
hpdcache_req_sid_t src_id;
|
||||
hpdcache_word_t word_idx;
|
||||
logic need_rsp;
|
||||
logic is_prefetch;
|
||||
} mshr_entry_t;
|
||||
|
||||
|
||||
// Compute the width of MSHR entries depending on the support of write
|
||||
// bitmask or not (write byte enable)
|
||||
localparam int unsigned HPDCACHE_MSHR_ENTRY_BITS = $bits(mshr_entry_t);
|
||||
|
||||
localparam int unsigned HPDCACHE_MSHR_RAM_ENTRY_BITS =
|
||||
HPDCACHE_MSHR_RAM_WBYTEENABLE ?
|
||||
((HPDCACHE_MSHR_ENTRY_BITS + 7)/8) * 8 : // align to 8 bits
|
||||
HPDCACHE_MSHR_ENTRY_BITS; // or use the exact number of bits
|
||||
|
||||
typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_data_t;
|
||||
// }}}
|
||||
|
||||
// Definition of internal wires and registers
|
||||
// {{{
|
||||
logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_q, mshr_valid_d;
|
||||
mshr_set_t check_set_q;
|
||||
mshr_set_t alloc_set;
|
||||
mshr_tag_t alloc_tag;
|
||||
hpdcache_set_t alloc_dcache_set;
|
||||
mshr_way_t ack_way_q;
|
||||
mshr_set_t ack_set_q;
|
||||
hpdcache_set_t ack_dcache_set;
|
||||
hpdcache_tag_t ack_dcache_tag;
|
||||
|
||||
logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_set, mshr_valid_rst;
|
||||
mshr_entry_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wentry;
|
||||
mshr_sram_data_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wdata;
|
||||
mshr_entry_t [HPDCACHE_MSHR_WAYS-1:0] mshr_rentry;
|
||||
mshr_sram_data_t [HPDCACHE_MSHR_WAYS-1:0] mshr_rdata;
|
||||
|
||||
logic mshr_we;
|
||||
logic mshr_cs;
|
||||
mshr_set_t mshr_addr;
|
||||
logic check;
|
||||
// }}}
|
||||
|
||||
// Control part for the allocation and check operations
|
||||
// {{{
|
||||
|
||||
// The allocation operation is prioritary with respect to the check operation
|
||||
assign check = check_i & ~alloc_i;
|
||||
|
||||
assign alloc_set = alloc_nline_i[0 +: HPDCACHE_MSHR_SET_WIDTH],
|
||||
alloc_tag = alloc_nline_i[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH],
|
||||
alloc_dcache_set = alloc_nline_i[0 +: HPDCACHE_SET_WIDTH];
|
||||
|
||||
// Look for an available way in case of allocation
|
||||
always_comb
|
||||
begin
|
||||
automatic mshr_way_t found_available_way;
|
||||
|
||||
found_available_way = 0;
|
||||
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
|
||||
if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(alloc_set)]) begin
|
||||
found_available_way = mshr_way_t'(i);
|
||||
break;
|
||||
end
|
||||
end
|
||||
alloc_way_o = found_available_way;
|
||||
end
|
||||
|
||||
// Look if the mshr can accept the checked nline (in case of allocation)
|
||||
always_comb
|
||||
begin
|
||||
automatic bit found_available;
|
||||
|
||||
found_available = 1'b0;
|
||||
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
|
||||
if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(check_set_q)]) begin
|
||||
found_available = 1'b1;
|
||||
break;
|
||||
end
|
||||
end
|
||||
alloc_full_o = ~found_available;
|
||||
end
|
||||
|
||||
assign alloc_set_o = alloc_set,
|
||||
alloc_tag_o = alloc_tag;
|
||||
|
||||
// Write when there is an allocation operation
|
||||
assign mshr_we = alloc_i;
|
||||
|
||||
// HPDcache SET to MSHR SET translation table
|
||||
hpdcache_mshr_to_cache_set trlt_i (
|
||||
.clk_i,
|
||||
.write_i (mshr_we),
|
||||
.write_dcache_set_i (alloc_dcache_set),
|
||||
.write_mshr_way_i (alloc_way_o),
|
||||
.read_mshr_set_i (ack_set_q),
|
||||
.read_mshr_way_i (ack_way_q),
|
||||
.read_dcache_set_o (ack_dcache_set)
|
||||
);
|
||||
|
||||
|
||||
// Generate write data and mask depending on the available way
|
||||
always_comb
|
||||
begin
|
||||
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
|
||||
mshr_wentry[i].tag = alloc_tag;
|
||||
mshr_wentry[i].req_id = alloc_req_id_i;
|
||||
mshr_wentry[i].src_id = alloc_src_id_i;
|
||||
mshr_wentry[i].word_idx = alloc_word_i;
|
||||
mshr_wentry[i].need_rsp = alloc_need_rsp_i;
|
||||
mshr_wentry[i].is_prefetch = alloc_is_prefetch_i;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Shared control signals
|
||||
// {{{
|
||||
assign mshr_cs = check_i | alloc_cs_i | ack_cs_i;
|
||||
assign mshr_addr = ack_i ? ack_set_i :
|
||||
(alloc_i ? alloc_set : check_set_i);
|
||||
|
||||
always_comb
|
||||
begin : mshr_valid_comb
|
||||
automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_alloc_slot;
|
||||
automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_ack_slot;
|
||||
|
||||
mshr_alloc_slot = {alloc_way_o, alloc_set};
|
||||
mshr_ack_slot = { ack_way_i, ack_set_i};
|
||||
|
||||
for (int unsigned i = 0; i < HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS; i++) begin
|
||||
mshr_valid_rst[i] = (i == hpdcache_uint'(mshr_ack_slot)) ? ack_i : 1'b0;
|
||||
mshr_valid_set[i] = (i == hpdcache_uint'(mshr_alloc_slot)) ? alloc_i : 1'b0;
|
||||
end
|
||||
end
|
||||
assign mshr_valid_d = (~mshr_valid_q & mshr_valid_set) | (mshr_valid_q & ~mshr_valid_rst);
|
||||
// }}}
|
||||
|
||||
// Read interface (ack)
|
||||
// {{{
|
||||
generate
|
||||
// extract HPDcache tag from the MSb of the MSHT TAG
|
||||
if (HPDCACHE_SETS >= HPDCACHE_MSHR_SETS) begin : ack_dcache_set_ge_mshr_set_gen
|
||||
assign ack_dcache_tag = mshr_rentry[ack_way_q].tag[
|
||||
HPDCACHE_MSHR_TAG_WIDTH - 1 :
|
||||
HPDCACHE_MSHR_TAG_WIDTH - HPDCACHE_TAG_WIDTH];
|
||||
end
|
||||
|
||||
// extract HPDcache tag from MSb of the MSHR set concatenated with the MSHR tag
|
||||
else begin : ack_dcache_set_lt_mshr_set_gen
|
||||
assign ack_dcache_tag = {
|
||||
mshr_rentry[ack_way_q].tag ,
|
||||
ack_set_q[HPDCACHE_MSHR_SET_WIDTH - 1:HPDCACHE_SET_WIDTH]};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign ack_req_id_o = mshr_rentry[ack_way_q].req_id,
|
||||
ack_src_id_o = mshr_rentry[ack_way_q].src_id,
|
||||
ack_nline_o = {ack_dcache_tag, ack_dcache_set},
|
||||
ack_word_o = mshr_rentry[ack_way_q].word_idx,
|
||||
ack_need_rsp_o = mshr_rentry[ack_way_q].need_rsp,
|
||||
ack_is_prefetch_o = mshr_rentry[ack_way_q].is_prefetch;
|
||||
// }}}
|
||||
|
||||
// Global control signals
|
||||
// {{{
|
||||
assign empty_o = ~|mshr_valid_q;
|
||||
assign full_o = &mshr_valid_q;
|
||||
|
||||
always_comb
|
||||
begin : hit_comb
|
||||
automatic bit [HPDCACHE_MSHR_WAYS-1:0] __hit_way;
|
||||
|
||||
for (int unsigned w = 0; w < HPDCACHE_MSHR_WAYS; w++) begin
|
||||
automatic bit __valid;
|
||||
automatic bit __match;
|
||||
__valid = mshr_valid_q[w*HPDCACHE_MSHR_SETS + int'(check_set_q)];
|
||||
__match = (mshr_rentry[w].tag == check_tag_i);
|
||||
__hit_way[w] = (__valid && __match);
|
||||
end
|
||||
|
||||
hit_o = |__hit_way;
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Internal state assignment
|
||||
// {{{
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : mshr_ff_set
|
||||
if (!rst_ni) begin
|
||||
mshr_valid_q <= '0;
|
||||
ack_way_q <= '0;
|
||||
ack_set_q <= '0;
|
||||
check_set_q <= '0;
|
||||
end else begin
|
||||
mshr_valid_q <= mshr_valid_d;
|
||||
if (ack_i) begin
|
||||
ack_way_q <= ack_way_i;
|
||||
ack_set_q <= ack_set_i;
|
||||
end
|
||||
if (check) begin
|
||||
check_set_q <= check_set_i;
|
||||
end
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Internal components
|
||||
// {{{
|
||||
generate
|
||||
if (HPDCACHE_MSHR_RAM_WBYTEENABLE) begin : mshr_wbyteenable_gen
|
||||
typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS/8-1:0] mshr_sram_wbyteenable_t;
|
||||
mshr_sram_wbyteenable_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wbyteenable;
|
||||
|
||||
always_comb
|
||||
begin : mshr_wbyteenable_comb
|
||||
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
|
||||
mshr_wbyteenable[i] = (int'(alloc_way_o) == i) ? '1 : '0;
|
||||
end
|
||||
end
|
||||
|
||||
if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen
|
||||
hpdcache_regbank_wbyteenable_1rw #(
|
||||
.DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
|
||||
.ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH)
|
||||
) mshr_mem(
|
||||
.clk (clk_i),
|
||||
.rst_n (rst_ni),
|
||||
.cs (mshr_cs),
|
||||
.we (mshr_we),
|
||||
.addr (mshr_addr),
|
||||
.wbyteenable (mshr_wbyteenable),
|
||||
.wdata (mshr_wdata),
|
||||
.rdata (mshr_rdata)
|
||||
);
|
||||
end else begin : mshr_sram_gen
|
||||
hpdcache_sram_wbyteenable #(
|
||||
.DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
|
||||
.ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH)
|
||||
) mshr_mem(
|
||||
.clk (clk_i),
|
||||
.rst_n (rst_ni),
|
||||
.cs (mshr_cs),
|
||||
.we (mshr_we),
|
||||
.addr (mshr_addr),
|
||||
.wbyteenable (mshr_wbyteenable),
|
||||
.wdata (mshr_wdata),
|
||||
.rdata (mshr_rdata)
|
||||
);
|
||||
end
|
||||
end else begin : mshr_wmask_gen
|
||||
typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_wmask_t;
|
||||
mshr_sram_wmask_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wmask;
|
||||
|
||||
always_comb
|
||||
begin : mshr_wmask_comb
|
||||
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
|
||||
mshr_wmask[i] = (int'(alloc_way_o) == i) ? '1 : '0;
|
||||
end
|
||||
end
|
||||
|
||||
if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen
|
||||
hpdcache_regbank_wmask_1rw #(
|
||||
.DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
|
||||
.ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH)
|
||||
) mshr_mem(
|
||||
.clk (clk_i),
|
||||
.rst_n (rst_ni),
|
||||
.cs (mshr_cs),
|
||||
.we (mshr_we),
|
||||
.addr (mshr_addr),
|
||||
.wmask (mshr_wmask),
|
||||
.wdata (mshr_wdata),
|
||||
.rdata (mshr_rdata)
|
||||
);
|
||||
end else begin : mshr_sram_gen
|
||||
hpdcache_sram_wmask #(
|
||||
.DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
|
||||
.ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH)
|
||||
) mshr_mem(
|
||||
.clk (clk_i),
|
||||
.rst_n (rst_ni),
|
||||
.cs (mshr_cs),
|
||||
.we (mshr_we),
|
||||
.addr (mshr_addr),
|
||||
.wmask (mshr_wmask),
|
||||
.wdata (mshr_wdata),
|
||||
.rdata (mshr_rdata)
|
||||
);
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
always_comb
|
||||
begin : ram_word_fitting_comb
|
||||
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
|
||||
mshr_wdata[i] = mshr_sram_data_t'(mshr_wentry[i]);
|
||||
mshr_rentry[i] = mshr_entry_t'(mshr_rdata[i][0 +: HPDCACHE_MSHR_ENTRY_BITS]);
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
one_command_assert: assert property (@(posedge clk_i)
|
||||
(ack_i -> !(alloc_i || check_i))) else
|
||||
$error("MSHR: ack with concurrent alloc or check");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : HPDcache MSHR set translation table
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_mshr_to_cache_set
|
||||
import hpdcache_pkg::*;
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
// Clock signals
|
||||
input logic clk_i,
|
||||
|
||||
// Write interface
|
||||
input logic write_i,
|
||||
input hpdcache_set_t write_dcache_set_i,
|
||||
input mshr_way_t write_mshr_way_i,
|
||||
|
||||
// Read interface
|
||||
input mshr_way_t read_mshr_way_i,
|
||||
input mshr_set_t read_mshr_set_i,
|
||||
output hpdcache_set_t read_dcache_set_o
|
||||
);
|
||||
// }}}
|
||||
//
|
||||
|
||||
generate
|
||||
// Number of HPDcache sets is bigger than the MSHR sets
|
||||
// In this case, a translation table (in flip-flops) is needed
|
||||
// {{{
|
||||
// Write most significant bits of the HPDcache set into the
|
||||
// translation table
|
||||
if (HPDCACHE_SETS > HPDCACHE_MSHR_SETS) begin : hpdcache_sets_gt_mshr_sets_gen
|
||||
localparam hpdcache_uint TRLT_TAB_ENTRY_WIDTH =
|
||||
HPDCACHE_SET_WIDTH - HPDCACHE_MSHR_SET_WIDTH;
|
||||
typedef logic [TRLT_TAB_ENTRY_WIDTH-1:0] trlt_entry_t;
|
||||
|
||||
|
||||
// Translation table
|
||||
//
|
||||
// This table is used to store the most significant bits of the HPDcache set
|
||||
trlt_entry_t [HPDCACHE_MSHR_SETS-1:0][HPDCACHE_MSHR_WAYS-1:0] tab;
|
||||
trlt_entry_t tab_wdata;
|
||||
mshr_set_t write_mshr_set;
|
||||
|
||||
// Write operation
|
||||
// {{{
|
||||
// Write most significant bits of the HPDcache set into the
|
||||
// translation table
|
||||
always_ff @(posedge clk_i)
|
||||
begin
|
||||
if (write_i) begin
|
||||
tab[write_mshr_set][write_mshr_way_i] <= tab_wdata;
|
||||
end
|
||||
end
|
||||
|
||||
assign tab_wdata = write_dcache_set_i[HPDCACHE_MSHR_SET_WIDTH +:
|
||||
TRLT_TAB_ENTRY_WIDTH],
|
||||
write_mshr_set = write_dcache_set_i[0 +: HPDCACHE_MSHR_SET_WIDTH];
|
||||
// }}}
|
||||
|
||||
// Read operation
|
||||
// {{{
|
||||
// Concatenate the mshr set with the most significant bits of the
|
||||
// dcache set stored in the translation table
|
||||
assign read_dcache_set_o = {tab[read_mshr_set_i][read_mshr_way_i], read_mshr_set_i};
|
||||
// }}}
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Number of HPDcache sets is smaller or equal than the MSHR sets
|
||||
// In this case, no translation table is needed
|
||||
// {{{
|
||||
else begin : hpdcache_sets_le_mshr_sets_gen
|
||||
assign read_dcache_set_o = hpdcache_set_t'(read_mshr_set_i);
|
||||
end
|
||||
// }}}
|
||||
endgenerate
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,623 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : Write-Through (WT), High-Throughput (HTPUT) HPDcache Package
|
||||
* History :
|
||||
*/
|
||||
package hpdcache_pkg;
|
||||
// Definition of global constants for the HPDcache data and directory
|
||||
// {{{
|
||||
|
||||
// HPDcache physical address width (bits)
|
||||
localparam int unsigned HPDCACHE_PA_WIDTH = hpdcache_params_pkg::PARAM_PA_WIDTH;
|
||||
|
||||
// HPDcache number of sets
|
||||
localparam int unsigned HPDCACHE_SETS = hpdcache_params_pkg::PARAM_SETS;
|
||||
|
||||
// HPDcache number of ways
|
||||
localparam int unsigned HPDCACHE_WAYS = hpdcache_params_pkg::PARAM_WAYS;
|
||||
|
||||
// HPDcache word width (bits)
|
||||
localparam int unsigned HPDCACHE_WORD_WIDTH = hpdcache_params_pkg::PARAM_WORD_WIDTH;
|
||||
|
||||
// HPDcache cache-line width (bits)
|
||||
localparam int unsigned HPDCACHE_CL_WORDS = hpdcache_params_pkg::PARAM_CL_WORDS;
|
||||
|
||||
// HPDcache number of words in the request data channels (request and response)
|
||||
localparam int unsigned HPDCACHE_REQ_WORDS = hpdcache_params_pkg::PARAM_REQ_WORDS;
|
||||
|
||||
// HPDcache request transaction ID width (bits)
|
||||
localparam int unsigned HPDCACHE_REQ_TRANS_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_TRANS_ID_WIDTH;
|
||||
|
||||
// HPDcache request source ID width (bits)
|
||||
localparam int unsigned HPDCACHE_REQ_SRC_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_SRC_ID_WIDTH;
|
||||
// }}}
|
||||
|
||||
// Utility definitions
|
||||
// {{{
|
||||
typedef logic unsigned [31:0] hpdcache_uint;
|
||||
typedef logic signed [31:0] hpdcache_int;
|
||||
typedef logic unsigned [31:0] hpdcache_uint32;
|
||||
typedef logic signed [31:0] hpdcache_int32;
|
||||
typedef logic unsigned [63:0] hpdcache_uint64;
|
||||
typedef logic signed [63:0] hpdcache_int64;
|
||||
// }}}
|
||||
|
||||
// Definition of constants and types for HPDcache directory memory
|
||||
// {{{
|
||||
localparam int unsigned HPDCACHE_CL_WIDTH = HPDCACHE_CL_WORDS*HPDCACHE_WORD_WIDTH;
|
||||
localparam int unsigned HPDCACHE_OFFSET_WIDTH = $clog2(HPDCACHE_CL_WIDTH/8);
|
||||
localparam int unsigned HPDCACHE_NLINE_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_OFFSET_WIDTH;
|
||||
localparam int unsigned HPDCACHE_SET_WIDTH = $clog2(HPDCACHE_SETS);
|
||||
localparam int unsigned HPDCACHE_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_SET_WIDTH;
|
||||
localparam int unsigned HPDCACHE_WORD_IDX_WIDTH = $clog2(HPDCACHE_CL_WORDS);
|
||||
|
||||
typedef logic unsigned [ HPDCACHE_OFFSET_WIDTH-1:0] hpdcache_offset_t;
|
||||
typedef logic unsigned [ HPDCACHE_NLINE_WIDTH-1:0] hpdcache_nline_t;
|
||||
typedef logic unsigned [ HPDCACHE_SET_WIDTH-1:0] hpdcache_set_t;
|
||||
typedef logic unsigned [ HPDCACHE_TAG_WIDTH-1:0] hpdcache_tag_t;
|
||||
typedef logic unsigned [ $clog2(HPDCACHE_WAYS)-1:0] hpdcache_way_t;
|
||||
typedef logic unsigned [ HPDCACHE_WAYS-1:0] hpdcache_way_vector_t;
|
||||
typedef logic unsigned [HPDCACHE_WORD_IDX_WIDTH-1:0] hpdcache_word_t;
|
||||
|
||||
typedef struct packed {
|
||||
hpdcache_tag_t tag;
|
||||
logic [1:0] reserved;
|
||||
} hpdcache_dir_entry_t;
|
||||
|
||||
localparam int unsigned HPDCACHE_DIR_RAM_WIDTH = $bits(hpdcache_dir_entry_t);
|
||||
localparam int unsigned HPDCACHE_DIR_RAM_DEPTH = HPDCACHE_SETS;
|
||||
localparam int unsigned HPDCACHE_DIR_RAM_ADDR_WIDTH = $clog2(HPDCACHE_DIR_RAM_DEPTH);
|
||||
|
||||
typedef logic [HPDCACHE_DIR_RAM_ADDR_WIDTH-1:0] hpdcache_dir_addr_t;
|
||||
|
||||
function automatic hpdcache_way_t hpdcache_way_vector_to_index(input hpdcache_way_vector_t way);
|
||||
for (int unsigned i = 0; i < HPDCACHE_WAYS; i++) begin
|
||||
if (way[i]) return hpdcache_way_t'(i);
|
||||
end
|
||||
return 0;
|
||||
endfunction
|
||||
|
||||
// }}}
|
||||
|
||||
// Definition of constants and types for HPDcache data memory
|
||||
// {{{
|
||||
localparam int unsigned HPDCACHE_DATA_WAYS_PER_RAM_WORD =
|
||||
hpdcache_params_pkg::PARAM_DATA_WAYS_PER_RAM_WORD;
|
||||
|
||||
localparam int unsigned HPDCACHE_DATA_SETS_PER_RAM = /* FIXME this parameter is currently ignored */
|
||||
hpdcache_params_pkg::PARAM_DATA_SETS_PER_RAM;
|
||||
|
||||
// HPDcache DATA RAM implements write byte enable
|
||||
localparam bit HPDCACHE_DATA_RAM_WBYTEENABLE =
|
||||
hpdcache_params_pkg::PARAM_DATA_RAM_WBYTEENABLE;
|
||||
|
||||
// Define the number of memory contiguous words that can be accessed
|
||||
// simultaneously from the cache.
|
||||
// - This limits the maximum width for the data channel from requesters
|
||||
// - This impacts the refill latency
|
||||
localparam int unsigned HPDCACHE_ACCESS_WORDS = hpdcache_params_pkg::PARAM_ACCESS_WORDS;
|
||||
|
||||
|
||||
localparam int unsigned HPDCACHE_DATA_RAM_WIDTH =
|
||||
HPDCACHE_DATA_WAYS_PER_RAM_WORD*HPDCACHE_WORD_WIDTH;
|
||||
localparam int unsigned HPDCACHE_DATA_RAM_Y_CUTS = HPDCACHE_WAYS/HPDCACHE_DATA_WAYS_PER_RAM_WORD;
|
||||
localparam int unsigned HPDCACHE_DATA_RAM_X_CUTS = HPDCACHE_ACCESS_WORDS;
|
||||
localparam int unsigned HPDCACHE_DATA_RAM_ACCESS_WIDTH = HPDCACHE_ACCESS_WORDS*HPDCACHE_WORD_WIDTH;
|
||||
localparam int unsigned HPDCACHE_DATA_RAM_ENTR_PER_SET = HPDCACHE_CL_WORDS/HPDCACHE_ACCESS_WORDS;
|
||||
localparam int unsigned HPDCACHE_DATA_RAM_DEPTH = HPDCACHE_SETS*HPDCACHE_DATA_RAM_ENTR_PER_SET;
|
||||
localparam int unsigned HPDCACHE_DATA_RAM_ADDR_WIDTH = $clog2(HPDCACHE_DATA_RAM_DEPTH);
|
||||
|
||||
typedef logic [ HPDCACHE_WORD_WIDTH-1:0] hpdcache_data_word_t;
|
||||
typedef logic [ HPDCACHE_WORD_WIDTH/8-1:0] hpdcache_data_be_t;
|
||||
typedef logic [ $clog2(HPDCACHE_DATA_RAM_Y_CUTS)-1:0] hpdcache_data_ram_row_idx_t;
|
||||
typedef logic [ $clog2(HPDCACHE_DATA_WAYS_PER_RAM_WORD)-1:0] hpdcache_data_ram_way_idx_t;
|
||||
|
||||
typedef logic [HPDCACHE_DATA_RAM_ADDR_WIDTH-1:0] hpdcache_data_ram_addr_t;
|
||||
typedef hpdcache_data_word_t[HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_data_t;
|
||||
typedef hpdcache_data_be_t [HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_be_t;
|
||||
|
||||
typedef hpdcache_data_ram_data_t
|
||||
[HPDCACHE_DATA_RAM_Y_CUTS-1:0]
|
||||
[HPDCACHE_DATA_RAM_X_CUTS-1:0]
|
||||
hpdcache_data_entry_t;
|
||||
|
||||
typedef hpdcache_data_ram_be_t
|
||||
[HPDCACHE_DATA_RAM_Y_CUTS-1:0]
|
||||
[HPDCACHE_DATA_RAM_X_CUTS-1:0]
|
||||
hpdcache_data_be_entry_t;
|
||||
|
||||
typedef logic
|
||||
[HPDCACHE_DATA_RAM_X_CUTS-1:0]
|
||||
hpdcache_data_row_enable_t;
|
||||
|
||||
typedef hpdcache_data_row_enable_t
|
||||
[HPDCACHE_DATA_RAM_Y_CUTS-1:0]
|
||||
hpdcache_data_enable_t;
|
||||
|
||||
typedef hpdcache_data_ram_addr_t
|
||||
[HPDCACHE_DATA_RAM_Y_CUTS-1:0]
|
||||
[HPDCACHE_DATA_RAM_X_CUTS-1:0]
|
||||
hpdcache_data_addr_t;
|
||||
// }}}
|
||||
|
||||
// Definition of interface with miss handler
|
||||
// {{{
|
||||
localparam int unsigned HPDCACHE_REFILL_DATA_WIDTH = HPDCACHE_DATA_RAM_ACCESS_WIDTH;
|
||||
|
||||
// Use feedthrough FIFOs from the refill handler to the core. This
|
||||
// reduces the latency (by one cycle) but adds an additional timing path
|
||||
localparam bit HPDCACHE_REFILL_CORE_RSP_FEEDTHROUGH =
|
||||
hpdcache_params_pkg::PARAM_REFILL_CORE_RSP_FEEDTHROUGH;
|
||||
|
||||
typedef hpdcache_data_word_t[HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_data_t;
|
||||
typedef hpdcache_data_be_t [HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_be_t;
|
||||
// }}}
|
||||
|
||||
// Definition of interface with requesters
|
||||
// {{{
|
||||
localparam int unsigned HPDCACHE_REQ_DATA_WIDTH = HPDCACHE_REQ_WORDS*HPDCACHE_WORD_WIDTH;
|
||||
localparam int unsigned HPDCACHE_REQ_DATA_BYTES = HPDCACHE_REQ_DATA_WIDTH/8;
|
||||
localparam int unsigned HPDCACHE_REQ_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_WORDS);
|
||||
localparam int unsigned HPDCACHE_REQ_BYTE_OFFSET_WIDTH = $clog2(HPDCACHE_REQ_DATA_BYTES);
|
||||
localparam int unsigned HPDCACHE_REQ_OFFSET_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_TAG_WIDTH;
|
||||
|
||||
typedef logic [HPDCACHE_PA_WIDTH-1:0] hpdcache_req_addr_t;
|
||||
typedef logic [HPDCACHE_REQ_OFFSET_WIDTH-1:0] hpdcache_req_offset_t;
|
||||
typedef hpdcache_data_word_t [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_data_t;
|
||||
typedef hpdcache_data_be_t [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_be_t;
|
||||
typedef logic [2:0] hpdcache_req_size_t;
|
||||
typedef logic [HPDCACHE_REQ_SRC_ID_WIDTH-1:0] hpdcache_req_sid_t;
|
||||
typedef logic [HPDCACHE_REQ_TRANS_ID_WIDTH-1:0] hpdcache_req_tid_t;
|
||||
|
||||
// Definition of operation codes
|
||||
// {{{
|
||||
typedef enum logic [3:0] {
|
||||
HPDCACHE_REQ_LOAD = 4'h0,
|
||||
HPDCACHE_REQ_STORE = 4'h1,
|
||||
// RESERVED = 4'h2,
|
||||
// RESERVED = 4'h3,
|
||||
HPDCACHE_REQ_AMO_LR = 4'h4,
|
||||
HPDCACHE_REQ_AMO_SC = 4'h5,
|
||||
HPDCACHE_REQ_AMO_SWAP = 4'h6,
|
||||
HPDCACHE_REQ_AMO_ADD = 4'h7,
|
||||
HPDCACHE_REQ_AMO_AND = 4'h8,
|
||||
HPDCACHE_REQ_AMO_OR = 4'h9,
|
||||
HPDCACHE_REQ_AMO_XOR = 4'ha,
|
||||
HPDCACHE_REQ_AMO_MAX = 4'hb,
|
||||
HPDCACHE_REQ_AMO_MAXU = 4'hc,
|
||||
HPDCACHE_REQ_AMO_MIN = 4'hd,
|
||||
HPDCACHE_REQ_AMO_MINU = 4'he,
|
||||
HPDCACHE_REQ_CMO = 4'hf
|
||||
} hpdcache_req_op_t;
|
||||
// }}}
|
||||
|
||||
// Definition of CMO codes
|
||||
// {{{
|
||||
typedef enum hpdcache_req_size_t {
|
||||
HPDCACHE_REQ_CMO_FENCE = 3'h0,
|
||||
// RESERVED = 3'h1,
|
||||
HPDCACHE_REQ_CMO_INVAL_NLINE = 3'h2,
|
||||
HPDCACHE_REQ_CMO_INVAL_SET_WAY = 3'h3,
|
||||
HPDCACHE_REQ_CMO_INVAL_ALL = 3'h4,
|
||||
HPDCACHE_REQ_CMO_PREFETCH = 3'h5
|
||||
} hpdcache_req_cmo_t;
|
||||
// }}}
|
||||
|
||||
// Definition of PMA flags
|
||||
// {{{
|
||||
typedef struct packed
|
||||
{
|
||||
logic uncacheable;
|
||||
logic io; // FIXME: for future use
|
||||
} hpdcache_pma_t;
|
||||
// }}}
|
||||
|
||||
// Definition of interfaces
|
||||
// {{{
|
||||
// Request Interface
|
||||
typedef struct packed
|
||||
{
|
||||
hpdcache_req_offset_t addr_offset;
|
||||
hpdcache_req_data_t wdata;
|
||||
hpdcache_req_op_t op;
|
||||
hpdcache_req_be_t be;
|
||||
hpdcache_req_size_t size;
|
||||
hpdcache_req_sid_t sid;
|
||||
hpdcache_req_tid_t tid;
|
||||
logic need_rsp;
|
||||
|
||||
// only valid in case of physically indexed requests
|
||||
logic phys_indexed;
|
||||
hpdcache_tag_t addr_tag;
|
||||
hpdcache_pma_t pma;
|
||||
} hpdcache_req_t;
|
||||
|
||||
// Response Interface
|
||||
typedef struct packed
|
||||
{
|
||||
hpdcache_req_data_t rdata;
|
||||
hpdcache_req_sid_t sid;
|
||||
hpdcache_req_tid_t tid;
|
||||
logic error;
|
||||
logic aborted;
|
||||
} hpdcache_rsp_t;
|
||||
// }}}
|
||||
|
||||
// Definition of functions
|
||||
// {{{
|
||||
function automatic logic is_load(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_LOAD: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_store(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_STORE: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_LR,
|
||||
HPDCACHE_REQ_AMO_SC,
|
||||
HPDCACHE_REQ_AMO_SWAP,
|
||||
HPDCACHE_REQ_AMO_ADD,
|
||||
HPDCACHE_REQ_AMO_AND,
|
||||
HPDCACHE_REQ_AMO_OR,
|
||||
HPDCACHE_REQ_AMO_XOR,
|
||||
HPDCACHE_REQ_AMO_MAX,
|
||||
HPDCACHE_REQ_AMO_MAXU,
|
||||
HPDCACHE_REQ_AMO_MIN,
|
||||
HPDCACHE_REQ_AMO_MINU:
|
||||
return 1'b1;
|
||||
default:
|
||||
return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_lr(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_LR: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_sc(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_SC: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_swap(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_SWAP: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_add(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_ADD: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_and(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_AND: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_or(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_OR: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_xor(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_XOR: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_max(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_MAX: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_maxu(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_MAXU: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_min(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_MIN: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo_minu(input hpdcache_req_op_t op);
|
||||
case (op)
|
||||
HPDCACHE_REQ_AMO_MINU: return 1'b1;
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_cmo_inval(
|
||||
input hpdcache_req_op_t op,
|
||||
input hpdcache_req_size_t sz);
|
||||
case (op)
|
||||
HPDCACHE_REQ_CMO:
|
||||
case (sz)
|
||||
HPDCACHE_REQ_CMO_INVAL_NLINE,
|
||||
HPDCACHE_REQ_CMO_INVAL_SET_WAY,
|
||||
HPDCACHE_REQ_CMO_INVAL_ALL: begin
|
||||
return 1'b1;
|
||||
end
|
||||
default: begin
|
||||
return 1'b0;
|
||||
end
|
||||
endcase
|
||||
default: begin
|
||||
return 1'b0;
|
||||
end
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_cmo_inval_by_nline(input hpdcache_req_size_t sz);
|
||||
return (sz == HPDCACHE_REQ_CMO_INVAL_NLINE);
|
||||
endfunction
|
||||
|
||||
function automatic logic is_cmo_inval_by_set(input hpdcache_req_size_t sz);
|
||||
return (sz == HPDCACHE_REQ_CMO_INVAL_SET_WAY);
|
||||
endfunction
|
||||
|
||||
function automatic logic is_cmo_inval_all(input hpdcache_req_size_t sz);
|
||||
return (sz == HPDCACHE_REQ_CMO_INVAL_ALL);
|
||||
endfunction
|
||||
|
||||
function automatic logic is_cmo_fence(
|
||||
input hpdcache_req_op_t op,
|
||||
input hpdcache_req_size_t sz);
|
||||
case (op)
|
||||
HPDCACHE_REQ_CMO: begin
|
||||
return (sz == HPDCACHE_REQ_CMO_FENCE);
|
||||
end
|
||||
default: begin
|
||||
return 1'b0;
|
||||
end
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_cmo_prefetch(
|
||||
input hpdcache_req_op_t op,
|
||||
input hpdcache_req_size_t sz);
|
||||
case (op)
|
||||
HPDCACHE_REQ_CMO: begin
|
||||
return (sz == HPDCACHE_REQ_CMO_PREFETCH);
|
||||
end
|
||||
default: begin
|
||||
return 1'b0;
|
||||
end
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic hpdcache_tag_t hpdcache_get_req_addr_tag(input hpdcache_req_addr_t addr);
|
||||
return addr[(HPDCACHE_OFFSET_WIDTH + HPDCACHE_SET_WIDTH) +: HPDCACHE_TAG_WIDTH];
|
||||
endfunction
|
||||
|
||||
function automatic hpdcache_set_t hpdcache_get_req_addr_set(input hpdcache_req_addr_t addr);
|
||||
return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH];
|
||||
endfunction
|
||||
|
||||
function automatic hpdcache_word_t hpdcache_get_req_addr_word(input hpdcache_req_addr_t addr);
|
||||
return addr[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH];
|
||||
endfunction
|
||||
|
||||
function automatic hpdcache_offset_t hpdcache_get_req_addr_offset(input hpdcache_req_addr_t addr);
|
||||
return addr[0 +: HPDCACHE_OFFSET_WIDTH];
|
||||
endfunction
|
||||
|
||||
function automatic hpdcache_nline_t hpdcache_get_req_addr_nline(input hpdcache_req_addr_t addr);
|
||||
return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH];
|
||||
endfunction
|
||||
|
||||
function automatic hpdcache_set_t hpdcache_get_req_offset_set(input hpdcache_req_offset_t offset);
|
||||
return offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH];
|
||||
endfunction
|
||||
|
||||
function automatic hpdcache_word_t hpdcache_get_req_offset_word(input hpdcache_req_offset_t offset);
|
||||
return offset[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH];
|
||||
endfunction
|
||||
|
||||
// }}}
|
||||
// }}}
|
||||
|
||||
// Definition of constants and types for the Miss Status Holding Register (MSHR)
|
||||
// {{{
|
||||
|
||||
// HPDcache MSHR number of sets
|
||||
localparam int unsigned HPDCACHE_MSHR_SETS =
|
||||
hpdcache_params_pkg::PARAM_MSHR_SETS;
|
||||
|
||||
// HPDcache MSHR number of ways
|
||||
localparam int unsigned HPDCACHE_MSHR_WAYS =
|
||||
hpdcache_params_pkg::PARAM_MSHR_WAYS;
|
||||
|
||||
// HPDcache MSHR number of ways in the same SRAM word
|
||||
localparam int unsigned HPDCACHE_MSHR_WAYS_PER_RAM_WORD =
|
||||
hpdcache_params_pkg::PARAM_MSHR_WAYS_PER_RAM_WORD; /* FIXME this parameter is currently ignored */
|
||||
|
||||
// HPDcache MSHR number of sets in the same SRAM
|
||||
localparam int unsigned HPDCACHE_MSHR_SETS_PER_RAM =
|
||||
hpdcache_params_pkg::PARAM_MSHR_SETS_PER_RAM; /* FIXME this parameter is currently ignored */
|
||||
|
||||
// HPDcache MSHR implements write byte enable
|
||||
localparam bit HPDCACHE_MSHR_RAM_WBYTEENABLE =
|
||||
hpdcache_params_pkg::PARAM_MSHR_RAM_WBYTEENABLE;
|
||||
localparam bit HPDCACHE_MSHR_USE_REGBANK =
|
||||
hpdcache_params_pkg::PARAM_MSHR_USE_REGBANK;
|
||||
|
||||
localparam int unsigned HPDCACHE_MSHR_SET_WIDTH = $clog2(HPDCACHE_MSHR_SETS);
|
||||
localparam int unsigned HPDCACHE_MSHR_WAY_WIDTH = $clog2(HPDCACHE_MSHR_WAYS);
|
||||
localparam int unsigned HPDCACHE_MSHR_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_MSHR_SET_WIDTH;
|
||||
|
||||
typedef logic unsigned [HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_set_t;
|
||||
typedef logic unsigned [HPDCACHE_MSHR_TAG_WIDTH-1:0] mshr_tag_t;
|
||||
typedef logic unsigned [HPDCACHE_MSHR_WAY_WIDTH-1:0] mshr_way_t;
|
||||
// }}}
|
||||
|
||||
// Definition of interface with memory
|
||||
// {{{
|
||||
typedef logic [7:0] hpdcache_mem_len_t;
|
||||
typedef logic [2:0] hpdcache_mem_size_t;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
HPDCACHE_MEM_RESP_OK = 2'b00,
|
||||
HPDCACHE_MEM_RESP_NOK = 2'b01
|
||||
} hpdcache_mem_error_e;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
HPDCACHE_MEM_READ = 2'b00,
|
||||
HPDCACHE_MEM_WRITE = 2'b01,
|
||||
HPDCACHE_MEM_ATOMIC = 2'b10
|
||||
// Reserved = 2'b11 - TODO: CMO ?
|
||||
} hpdcache_mem_command_e;
|
||||
|
||||
typedef enum logic [3:0] {
|
||||
HPDCACHE_MEM_ATOMIC_ADD = 4'b0000,
|
||||
HPDCACHE_MEM_ATOMIC_CLR = 4'b0001,
|
||||
HPDCACHE_MEM_ATOMIC_SET = 4'b0010,
|
||||
HPDCACHE_MEM_ATOMIC_EOR = 4'b0011,
|
||||
HPDCACHE_MEM_ATOMIC_SMAX = 4'b0100,
|
||||
HPDCACHE_MEM_ATOMIC_SMIN = 4'b0101,
|
||||
HPDCACHE_MEM_ATOMIC_UMAX = 4'b0110,
|
||||
HPDCACHE_MEM_ATOMIC_UMIN = 4'b0111,
|
||||
HPDCACHE_MEM_ATOMIC_SWAP = 4'b1000,
|
||||
// Reserved = 4'b1001,
|
||||
// Reserved = 4'b1010,
|
||||
// Reserved = 4'b1011,
|
||||
HPDCACHE_MEM_ATOMIC_LDEX = 4'b1100,
|
||||
HPDCACHE_MEM_ATOMIC_STEX = 4'b1101
|
||||
// Reserved = 4'b1110,
|
||||
// Reserved = 4'b1111
|
||||
} hpdcache_mem_atomic_e;
|
||||
|
||||
function automatic hpdcache_mem_size_t get_hpdcache_mem_size(int unsigned bytes);
|
||||
if (bytes == 0) return 0;
|
||||
else if (bytes <= 2) return 1;
|
||||
else if (bytes <= 4) return 2;
|
||||
else if (bytes <= 8) return 3;
|
||||
else if (bytes <= 16) return 4;
|
||||
else if (bytes <= 32) return 5;
|
||||
else if (bytes <= 64) return 6;
|
||||
else if (bytes <= 128) return 7;
|
||||
// pragma translate_off
|
||||
else $error("hpdcache: unsupported number of bytes");
|
||||
// pragma translate_on
|
||||
endfunction
|
||||
// }}}
|
||||
|
||||
// Definition of constants and types for the Write Buffer (WBUF)
|
||||
// {{{
|
||||
localparam int unsigned HPDCACHE_WBUF_DIR_ENTRIES =
|
||||
hpdcache_params_pkg::PARAM_WBUF_DIR_ENTRIES;
|
||||
|
||||
localparam int unsigned HPDCACHE_WBUF_DATA_ENTRIES =
|
||||
hpdcache_params_pkg::PARAM_WBUF_DATA_ENTRIES;
|
||||
|
||||
localparam int unsigned HPDCACHE_WBUF_WORDS =
|
||||
hpdcache_params_pkg::PARAM_WBUF_WORDS;
|
||||
|
||||
localparam int unsigned HPDCACHE_WBUF_TIMECNT_WIDTH =
|
||||
hpdcache_params_pkg::PARAM_WBUF_TIMECNT_WIDTH;
|
||||
|
||||
// Use feedthrough FIFOs from the write-buffer to the NoC. This reduces
|
||||
// the latency (by one cycle) but adds an additional timing path
|
||||
localparam bit HPDCACHE_WBUF_SEND_FEEDTHROUGH =
|
||||
hpdcache_params_pkg::PARAM_WBUF_SEND_FEEDTHROUGH;
|
||||
|
||||
localparam int unsigned HPDCACHE_WBUF_DATA_WIDTH = HPDCACHE_REQ_DATA_WIDTH*
|
||||
HPDCACHE_WBUF_WORDS;
|
||||
localparam int unsigned HPDCACHE_WBUF_DATA_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DATA_ENTRIES);
|
||||
localparam int unsigned HPDCACHE_WBUF_DIR_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DIR_ENTRIES);
|
||||
|
||||
typedef hpdcache_req_addr_t wbuf_addr_t;
|
||||
typedef hpdcache_nline_t wbuf_match_t;
|
||||
typedef hpdcache_req_data_t wbuf_data_t;
|
||||
typedef hpdcache_req_be_t wbuf_be_t;
|
||||
typedef wbuf_data_t[HPDCACHE_WBUF_WORDS-1:0] wbuf_data_buf_t;
|
||||
typedef wbuf_be_t [HPDCACHE_WBUF_WORDS-1:0] wbuf_be_buf_t;
|
||||
typedef logic unsigned [ HPDCACHE_WBUF_TIMECNT_WIDTH-1:0] wbuf_timecnt_t;
|
||||
typedef logic unsigned [ HPDCACHE_WBUF_DIR_PTR_WIDTH-1:0] wbuf_dir_ptr_t;
|
||||
typedef logic unsigned [HPDCACHE_WBUF_DATA_PTR_WIDTH-1:0] wbuf_data_ptr_t;
|
||||
// }}}
|
||||
|
||||
// Definition of constants and types for the Replay Table (RTAB)
|
||||
// {{{
|
||||
localparam int HPDCACHE_RTAB_ENTRIES = hpdcache_params_pkg::PARAM_RTAB_ENTRIES;
|
||||
|
||||
typedef logic [$clog2(HPDCACHE_RTAB_ENTRIES)-1:0] rtab_ptr_t;
|
||||
// }}}
|
||||
|
||||
// Definition of constants and types for the uncacheable request handler (UC)
|
||||
// {{{
|
||||
typedef struct packed {
|
||||
logic is_ld;
|
||||
logic is_st;
|
||||
logic is_amo_lr;
|
||||
logic is_amo_sc;
|
||||
logic is_amo_swap;
|
||||
logic is_amo_add;
|
||||
logic is_amo_and;
|
||||
logic is_amo_or;
|
||||
logic is_amo_xor;
|
||||
logic is_amo_max;
|
||||
logic is_amo_maxu;
|
||||
logic is_amo_min;
|
||||
logic is_amo_minu;
|
||||
} hpdcache_uc_op_t;
|
||||
// }}}
|
||||
|
||||
// Definition of constants and types for the CMO request handler (CMOH)
|
||||
// {{{
|
||||
typedef struct packed {
|
||||
logic is_inval_by_nline;
|
||||
logic is_inval_by_set;
|
||||
logic is_inval_all;
|
||||
logic is_fence;
|
||||
} hpdcache_cmoh_op_t;
|
||||
// }}}
|
||||
endpackage
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : May, 2021
|
||||
* Description : HPDcache Pseudo-LRU replacement policy
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_plru
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int unsigned SETS = 0,
|
||||
parameter int unsigned WAYS = 0,
|
||||
|
||||
localparam type set_t = logic [$clog2(SETS)-1:0],
|
||||
localparam type way_vector_t = logic [WAYS-1:0]
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// PLRU update interface
|
||||
input logic updt_i,
|
||||
input set_t updt_set_i,
|
||||
input way_vector_t updt_way_i,
|
||||
|
||||
// Victim replacement interface
|
||||
input logic repl_i,
|
||||
input set_t repl_set_i,
|
||||
input way_vector_t repl_dir_valid_i,
|
||||
input logic repl_updt_plru_i,
|
||||
|
||||
output way_vector_t victim_way_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Internal signals and registers
|
||||
// {{{
|
||||
way_vector_t [SETS-1:0] plru_q, plru_d;
|
||||
way_vector_t updt_plru;
|
||||
way_vector_t repl_plru;
|
||||
way_vector_t used_victim_way, unused_victim_way;
|
||||
// }}}
|
||||
|
||||
// Victim way selection
|
||||
// {{{
|
||||
hpdcache_prio_1hot_encoder #(.N(WAYS))
|
||||
used_victim_select_i (
|
||||
.val_i (~plru_q[repl_set_i]),
|
||||
.val_o (used_victim_way)
|
||||
);
|
||||
|
||||
hpdcache_prio_1hot_encoder #(.N(WAYS))
|
||||
unused_victim_select_i (
|
||||
.val_i (~repl_dir_valid_i),
|
||||
.val_o (unused_victim_way)
|
||||
);
|
||||
|
||||
// If there is a free entry in the directory (valid == 0), choose it as victim
|
||||
assign victim_way_o = |unused_victim_way ? unused_victim_way : used_victim_way;
|
||||
// }}}
|
||||
|
||||
// Pseudo-LRU update process
|
||||
// {{{
|
||||
assign updt_plru = plru_q[updt_set_i] | updt_way_i;
|
||||
assign repl_plru = plru_q[repl_set_i] | victim_way_o;
|
||||
|
||||
always_comb
|
||||
begin : plru_update_comb
|
||||
plru_d = plru_q;
|
||||
|
||||
case (1'b1)
|
||||
// When replacing a cache-line, set the PLRU bit of the new line
|
||||
repl_i:
|
||||
if (repl_updt_plru_i) begin
|
||||
// If all PLRU bits of a given would be set, reset them all
|
||||
// but the currently accessed way
|
||||
if (&repl_plru) begin
|
||||
plru_d[repl_set_i] = victim_way_o;
|
||||
end else begin
|
||||
plru_d[repl_set_i] = repl_plru;
|
||||
end
|
||||
end
|
||||
|
||||
// When accessing a cache-line, set the corresponding PLRU bit
|
||||
updt_i:
|
||||
// If all PLRU bits of a given would be set, reset them all
|
||||
// but the currently accessed way
|
||||
if (&updt_plru) begin
|
||||
plru_d[updt_set_i] = updt_way_i;
|
||||
end else begin
|
||||
plru_d[updt_set_i] = updt_plru;
|
||||
end
|
||||
|
||||
default: begin
|
||||
// do nothing
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Set state process
|
||||
// {{{
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : lru_ff
|
||||
if (!rst_ni) begin
|
||||
plru_q <= '0;
|
||||
end else begin
|
||||
if (updt_i || repl_i) begin
|
||||
plru_q <= plru_d;
|
||||
end
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,666 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : September, 2021
|
||||
* Description : HPDcache Replay Table
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_rtab
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter type rtab_entry_t = logic
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
// Clock and reset signals
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Global control signals
|
||||
output logic empty_o, // RTAB is empty
|
||||
output logic full_o, // RTAB is full
|
||||
|
||||
// Check RTAB signals
|
||||
// This interface allows to check if there is an address-overlapping
|
||||
// request in the RTAB with respect to the given nline.
|
||||
input logic check_i, // Check for hit (nline) in the RTAB
|
||||
input hpdcache_nline_t check_nline_i,
|
||||
output logic check_hit_o,
|
||||
|
||||
// Allocate signals
|
||||
// This interface allows to allocate a new request in a new linked list
|
||||
input logic alloc_i,
|
||||
input logic alloc_and_link_i,
|
||||
input rtab_entry_t alloc_req_i,
|
||||
input logic alloc_mshr_hit_i,
|
||||
input logic alloc_mshr_full_i,
|
||||
input logic alloc_mshr_ready_i,
|
||||
input logic alloc_wbuf_hit_i,
|
||||
input logic alloc_wbuf_not_ready_i,
|
||||
|
||||
// Pop signals
|
||||
// This interface allows to read (and remove) a request from the RTAB
|
||||
output logic pop_try_valid_o, // Request ready to be replayed
|
||||
input logic pop_try_i,
|
||||
output rtab_entry_t pop_try_req_o,
|
||||
output rtab_ptr_t pop_try_ptr_o,
|
||||
|
||||
// Pop Commit signals
|
||||
// This interface allows to actually remove a popped request
|
||||
input logic pop_commit_i,
|
||||
input rtab_ptr_t pop_commit_ptr_i,
|
||||
|
||||
// Pop Rollback signals
|
||||
// This interface allows to put back a popped request
|
||||
input logic pop_rback_i,
|
||||
input rtab_ptr_t pop_rback_ptr_i,
|
||||
input logic pop_rback_mshr_hit_i,
|
||||
input logic pop_rback_mshr_full_i,
|
||||
input logic pop_rback_mshr_ready_i,
|
||||
input logic pop_rback_wbuf_hit_i,
|
||||
input logic pop_rback_wbuf_not_ready_i,
|
||||
|
||||
|
||||
// Control signals from/to WBUF
|
||||
output hpdcache_req_addr_t wbuf_addr_o, // address to check against ongoing writes
|
||||
output logic wbuf_is_read_o, // monitored request is read
|
||||
input logic wbuf_hit_open_i, // Hit on open entry in the write buf
|
||||
input logic wbuf_hit_pend_i, // Hit on pend entry in the write buf
|
||||
input logic wbuf_hit_sent_i, // Hit on sent entry in the write buf
|
||||
input logic wbuf_not_ready_i, // Write buffer cannot accept the write
|
||||
|
||||
// Control signals from the Miss Handler
|
||||
input logic miss_ready_i, // Miss Handler is ready
|
||||
|
||||
// Control signals from the Refill Handler
|
||||
input logic refill_i, // Active refill
|
||||
input hpdcache_nline_t refill_nline_i, // Cache-line index being refilled
|
||||
|
||||
// Configuration parameters
|
||||
input logic cfg_single_entry_i // Enable only one entry of the table
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Definition of constants, types and functions
|
||||
// {{{
|
||||
localparam int N = HPDCACHE_RTAB_ENTRIES;
|
||||
|
||||
function automatic rtab_ptr_t rtab_bv_to_index(
|
||||
input logic [N-1:0] bv);
|
||||
for (int i = 0; i < N; i++) begin
|
||||
if (bv[i]) return rtab_ptr_t'(i);
|
||||
end
|
||||
return 0;
|
||||
endfunction
|
||||
|
||||
function automatic logic [N-1:0] rtab_index_to_bv(
|
||||
input rtab_ptr_t index);
|
||||
logic [N-1:0] bv;
|
||||
|
||||
for (int i = 0; i < N; i++) begin
|
||||
bv[i] = (rtab_ptr_t'(i) == index);
|
||||
end
|
||||
return bv;
|
||||
endfunction
|
||||
|
||||
function automatic bit rtab_mshr_set_equal(
|
||||
input hpdcache_nline_t x,
|
||||
input hpdcache_nline_t y);
|
||||
return (x[0 +: HPDCACHE_MSHR_SET_WIDTH] == y[0 +: HPDCACHE_MSHR_SET_WIDTH]);
|
||||
endfunction
|
||||
|
||||
function automatic logic [N-1:0] rtab_next(rtab_ptr_t [N-1:0] next, rtab_ptr_t x);
|
||||
return rtab_index_to_bv(next[x]);
|
||||
endfunction
|
||||
|
||||
typedef enum {
|
||||
POP_TRY_HEAD,
|
||||
POP_TRY_NEXT,
|
||||
POP_TRY_NEXT_WAIT
|
||||
} rtab_pop_try_state_e;
|
||||
// }}}
|
||||
|
||||
// Internal signals and registers
|
||||
// {{{
|
||||
rtab_entry_t [N-1:0] req_q;
|
||||
rtab_ptr_t [N-1:0] next_q;
|
||||
|
||||
rtab_pop_try_state_e pop_try_state_q, pop_try_state_d;
|
||||
logic [N-1:0] pop_try_next_q, pop_try_next_d;
|
||||
|
||||
logic [N-1:0] valid_q;
|
||||
logic [N-1:0] valid_set, valid_rst;
|
||||
logic [N-1:0] alloc_valid_set;
|
||||
logic [N-1:0] pop_commit_valid_rst;
|
||||
|
||||
// Bits indicating if the corresponding entry is the head of a linked list
|
||||
logic [N-1:0] head_q;
|
||||
logic [N-1:0] head_set, head_rst;
|
||||
logic [N-1:0] alloc_head_set, alloc_head_rst;
|
||||
logic [N-1:0] pop_try_head_rst;
|
||||
logic [N-1:0] pop_commit_head_set;
|
||||
logic [N-1:0] pop_rback_head_set;
|
||||
|
||||
// Bits indicating if the corresponding entry is the tail of a linked list
|
||||
logic [N-1:0] tail_q;
|
||||
logic [N-1:0] tail_set, tail_rst;
|
||||
logic [N-1:0] alloc_tail_set, alloc_tail_rst;
|
||||
|
||||
// There is a pend ing miss on the target nline
|
||||
logic [N-1:0] deps_mshr_hit_q;
|
||||
logic [N-1:0] deps_mshr_hit_set, deps_mshr_hit_rst;
|
||||
logic [N-1:0] alloc_deps_mshr_hit_set;
|
||||
logic [N-1:0] pop_rback_deps_mshr_hit_set;
|
||||
|
||||
// The MSHR has no available slot for the new miss
|
||||
logic [N-1:0] deps_mshr_full_q;
|
||||
logic [N-1:0] deps_mshr_full_set, deps_mshr_full_rst;
|
||||
logic [N-1:0] alloc_deps_mshr_full_set;
|
||||
logic [N-1:0] pop_rback_deps_mshr_full_set;
|
||||
|
||||
// The MSHR is not ready to send a new miss requests
|
||||
logic [N-1:0] deps_mshr_ready_q;
|
||||
logic [N-1:0] deps_mshr_ready_set, deps_mshr_ready_rst;
|
||||
logic [N-1:0] alloc_deps_mshr_ready_set;
|
||||
logic [N-1:0] pop_rback_deps_mshr_ready_set;
|
||||
|
||||
// Hit on an non-e mpty entry of the write buffer
|
||||
logic [N-1:0] deps_wbuf_hit_q;
|
||||
logic [N-1:0] deps_wbuf_hit_set, deps_wbuf_hit_rst;
|
||||
logic [N-1:0] alloc_deps_wbuf_hit_set;
|
||||
logic [N-1:0] pop_rback_deps_wbuf_hit_set;
|
||||
|
||||
// Hit on a pend entry of the write buffer
|
||||
logic [N-1:0] deps_wbuf_not_ready_q;
|
||||
logic [N-1:0] deps_wbuf_not_ready_set, deps_wbuf_not_ready_rst;
|
||||
logic [N-1:0] alloc_deps_wbuf_not_ready_set;
|
||||
logic [N-1:0] pop_rback_deps_wbuf_not_ready_set;
|
||||
|
||||
logic [N-1:0] nodeps;
|
||||
hpdcache_nline_t [N-1:0] nline;
|
||||
hpdcache_req_addr_t [N-1:0] addr;
|
||||
logic [N-1:0] is_read;
|
||||
logic [N-1:0] check_hit;
|
||||
logic [N-1:0] match_check_nline;
|
||||
logic [N-1:0] match_check_tail;
|
||||
logic [N-1:0] match_refill_nline;
|
||||
logic [N-1:0] match_refill_mshr_set;
|
||||
|
||||
logic [N-1:0] free;
|
||||
logic [N-1:0] free_alloc;
|
||||
logic alloc;
|
||||
|
||||
logic [N-1:0] pop_match_next;
|
||||
logic [N-1:0] pop_rback_ptr_bv;
|
||||
logic [N-1:0] pop_try_bv;
|
||||
logic [N-1:0] ready;
|
||||
|
||||
genvar gen_i;
|
||||
// }}}
|
||||
|
||||
// Compute global control signals
|
||||
// {{{
|
||||
// compute if entries are ready to be replayed
|
||||
assign nodeps = ~(deps_mshr_hit_q |
|
||||
deps_mshr_full_q |
|
||||
deps_mshr_ready_q |
|
||||
deps_wbuf_hit_q |
|
||||
deps_wbuf_not_ready_q);
|
||||
|
||||
assign ready = valid_q & head_q & nodeps;
|
||||
|
||||
assign free = ~valid_q;
|
||||
|
||||
// compute the free vector (one-hot signal)
|
||||
hpdcache_prio_1hot_encoder #(
|
||||
.N (N)
|
||||
) free_encoder_i (
|
||||
.val_i (free),
|
||||
.val_o (free_alloc)
|
||||
);
|
||||
|
||||
// full and empty signals
|
||||
assign empty_o = &(~valid_q);
|
||||
assign full_o = &( valid_q) | (|valid_q & cfg_single_entry_i);
|
||||
// }}}
|
||||
|
||||
// Check interface
|
||||
// {{{
|
||||
generate
|
||||
for (gen_i = 0; gen_i < N; gen_i++) begin : check_gen
|
||||
assign addr[gen_i] = {req_q[gen_i].addr_tag, req_q[gen_i].addr_offset},
|
||||
nline[gen_i] = hpdcache_get_req_addr_nline(addr[gen_i]),
|
||||
match_check_nline[gen_i] = (check_nline_i == nline[gen_i]);
|
||||
|
||||
assign is_read[gen_i] = is_load(req_q[gen_i].op) |
|
||||
is_cmo_prefetch(req_q[gen_i].op, req_q[gen_i].size);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign check_hit = valid_q & match_check_nline,
|
||||
check_hit_o = |check_hit,
|
||||
match_check_tail = check_hit & tail_q;
|
||||
// }}}
|
||||
|
||||
// Allocation process
|
||||
// {{{
|
||||
assign alloc = alloc_i | alloc_and_link_i;
|
||||
|
||||
// Set the valid bit-vector of the replay table
|
||||
assign alloc_valid_set = free_alloc & {N{alloc}};
|
||||
|
||||
// Set of head and tail bit-vectors during an allocation
|
||||
// - The head bit is only set when creating a new linked-list
|
||||
// - The tail bit is always set because new requests are added on the tail.
|
||||
assign alloc_head_set = free_alloc & {N{alloc_i}},
|
||||
alloc_tail_set = alloc_valid_set;
|
||||
|
||||
// Reset of head and tail bit-vectors during an allocation
|
||||
// - When doing an allocation and link, head bit shall be reset
|
||||
// - when doing an allocation and link, the "prev" tail shall be reset
|
||||
assign alloc_head_rst = free_alloc & {N{alloc_and_link_i}},
|
||||
alloc_tail_rst = match_check_tail & {N{alloc_and_link_i}};
|
||||
|
||||
// Set the dependency bits for the allocated entry
|
||||
assign alloc_deps_mshr_hit_set = alloc_valid_set & {N{ alloc_mshr_hit_i}},
|
||||
alloc_deps_mshr_full_set = alloc_valid_set & {N{ alloc_mshr_full_i}},
|
||||
alloc_deps_mshr_ready_set = alloc_valid_set & {N{ alloc_mshr_ready_i}},
|
||||
alloc_deps_wbuf_hit_set = alloc_valid_set & {N{ alloc_wbuf_hit_i}},
|
||||
alloc_deps_wbuf_not_ready_set = alloc_valid_set & {N{alloc_wbuf_not_ready_i}};
|
||||
// }}}
|
||||
|
||||
// Update replay table dependencies
|
||||
// {{{
|
||||
// Update write buffer hit dependencies
|
||||
// {{{
|
||||
// Build a bit-vector with HEAD requests waiting for a conflict in the wbuf
|
||||
logic [N-1:0] wbuf_rd_pending, wbuf_wr_pending;
|
||||
logic [N-1:0] wbuf_rd_gnt, wbuf_wr_gnt;
|
||||
logic [ 1:0] wbuf_pending;
|
||||
logic [ 1:0] wbuf_gnt;
|
||||
logic wbuf_ready;
|
||||
logic [N-1:0] wbuf_sel;
|
||||
|
||||
assign wbuf_rd_pending = valid_q & head_q & deps_wbuf_hit_q,
|
||||
wbuf_wr_pending = valid_q & head_q & deps_wbuf_not_ready_q;
|
||||
|
||||
// Choose in a round-robin manner a ready transaction waiting for a conflict in the wbuf
|
||||
hpdcache_rrarb #(
|
||||
.N (N)
|
||||
) wbuf_rd_pending_arb_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.req_i (wbuf_rd_pending),
|
||||
.gnt_o (wbuf_rd_gnt),
|
||||
.ready_i (wbuf_gnt[0] & wbuf_ready)
|
||||
);
|
||||
|
||||
hpdcache_rrarb #(
|
||||
.N (N)
|
||||
) wbuf_wr_pending_arb_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.req_i (wbuf_wr_pending),
|
||||
.gnt_o (wbuf_wr_gnt),
|
||||
.ready_i (wbuf_gnt[1] & wbuf_ready)
|
||||
);
|
||||
|
||||
assign wbuf_pending = {|wbuf_wr_gnt, |wbuf_rd_gnt},
|
||||
wbuf_ready = |(pop_try_bv & (wbuf_rd_gnt | wbuf_wr_gnt));
|
||||
|
||||
hpdcache_fxarb #(
|
||||
.N (2)
|
||||
) wbuf_pending_arb_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.req_i (wbuf_pending),
|
||||
.gnt_o (wbuf_gnt),
|
||||
.ready_i (wbuf_ready)
|
||||
);
|
||||
|
||||
assign wbuf_sel = wbuf_gnt[0] ? wbuf_rd_gnt :
|
||||
wbuf_gnt[1] ? wbuf_wr_gnt : '0;
|
||||
|
||||
hpdcache_mux #(
|
||||
.NINPUT (N),
|
||||
.DATA_WIDTH ($bits(hpdcache_req_addr_t)),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) wbuf_pending_addr_mux_i (
|
||||
.data_i (addr),
|
||||
.sel_i (wbuf_sel),
|
||||
.data_o (wbuf_addr_o)
|
||||
);
|
||||
|
||||
hpdcache_mux #(
|
||||
.NINPUT (N),
|
||||
.DATA_WIDTH (1),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) wbuf_pending_is_read_mux_i (
|
||||
.data_i (is_read),
|
||||
.sel_i (wbuf_sel),
|
||||
.data_o (wbuf_is_read_o)
|
||||
);
|
||||
|
||||
// reset write buffer dependency bits with the output from the write buffer
|
||||
assign deps_wbuf_hit_rst =
|
||||
wbuf_sel & ~{N{wbuf_hit_open_i | wbuf_hit_pend_i | wbuf_hit_sent_i}};
|
||||
assign deps_wbuf_not_ready_rst =
|
||||
wbuf_sel & ~{N{wbuf_not_ready_i}};
|
||||
// }}}
|
||||
|
||||
// Update miss handler dependency
|
||||
// {{{
|
||||
assign deps_mshr_ready_rst = {N{miss_ready_i}};
|
||||
// }}}
|
||||
|
||||
// Update refill dependencies
|
||||
// {{{
|
||||
generate
|
||||
for (gen_i = 0; gen_i < N; gen_i++) begin : match_refill_gen
|
||||
assign match_refill_mshr_set[gen_i] =
|
||||
rtab_mshr_set_equal(refill_nline_i, nline[gen_i]);
|
||||
assign match_refill_nline[gen_i] =
|
||||
(refill_nline_i == nline[gen_i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign deps_mshr_full_rst = {N{refill_i}} & match_refill_mshr_set;
|
||||
assign deps_mshr_hit_rst = {N{refill_i}} & match_refill_nline;
|
||||
// }}}
|
||||
// }}}
|
||||
|
||||
// Pop interface
|
||||
// {{{
|
||||
logic [N-1:0] pop_sel;
|
||||
logic [N-1:0] pop_commit_bv;
|
||||
|
||||
assign pop_commit_bv = rtab_index_to_bv(pop_commit_ptr_i);
|
||||
|
||||
// Pop try process
|
||||
// {{{
|
||||
logic [N-1:0] pop_gnt;
|
||||
logic pop_head;
|
||||
|
||||
hpdcache_rrarb #(
|
||||
.N (N)
|
||||
) pop_arb_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.req_i (ready),
|
||||
.gnt_o (pop_gnt),
|
||||
.ready_i (pop_head)
|
||||
);
|
||||
|
||||
always_comb
|
||||
begin : req_valid_comb
|
||||
case(pop_try_state_q)
|
||||
POP_TRY_HEAD : pop_try_valid_o = |ready;
|
||||
POP_TRY_NEXT : pop_try_valid_o = 1'b1;
|
||||
POP_TRY_NEXT_WAIT: pop_try_valid_o = 1'b1;
|
||||
default : pop_try_valid_o = 1'b0;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb
|
||||
begin : pop_entry_sel_comb
|
||||
pop_try_state_d = pop_try_state_q;
|
||||
pop_try_next_d = pop_try_next_q;
|
||||
pop_head = 1'b0;
|
||||
pop_sel = '0;
|
||||
|
||||
case (pop_try_state_q)
|
||||
POP_TRY_HEAD: begin
|
||||
// This FSM may be in this state after forwarding the tail of
|
||||
// a list. In that case, a rollback may arrive in this cycle.
|
||||
pop_sel = pop_gnt;
|
||||
if (!pop_rback_i && pop_try_valid_o) begin
|
||||
if (pop_try_i) begin
|
||||
// If the request interface accepts the request, go to the next request
|
||||
// in the list (if the current request is not the tail). Otherwise, stay in
|
||||
// the same state to to forward a request from a new list
|
||||
pop_head = 1'b1;
|
||||
if ((pop_gnt & ~tail_q) != 0) begin
|
||||
pop_try_state_d = POP_TRY_NEXT;
|
||||
pop_try_next_d = rtab_next(next_q, pop_try_ptr_o);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
POP_TRY_NEXT: begin
|
||||
pop_sel = pop_try_next_q;
|
||||
if (pop_rback_i) begin
|
||||
pop_try_state_d = POP_TRY_HEAD;
|
||||
end else begin
|
||||
if (pop_try_i) begin
|
||||
// If the request interface accepts the new request, go to the next request
|
||||
// in the list (if the current request is not the tail). Otherwise, return
|
||||
// to the POP_TRY_HEAD state to forward a request from a new list
|
||||
if ((pop_try_next_q & ~tail_q) != 0) begin
|
||||
pop_try_state_d = POP_TRY_NEXT;
|
||||
pop_try_next_d = rtab_next(next_q, pop_try_ptr_o);
|
||||
end else begin
|
||||
pop_try_state_d = POP_TRY_HEAD;
|
||||
end
|
||||
end else begin
|
||||
// If the request interface is not ready to consume the new request, wait
|
||||
// until it is
|
||||
pop_try_state_d = POP_TRY_NEXT_WAIT;
|
||||
end
|
||||
end
|
||||
end
|
||||
POP_TRY_NEXT_WAIT: begin
|
||||
// Wait for the current request to be accepted. Then go to the next request in the
|
||||
// list or to a new list
|
||||
pop_sel = pop_try_next_q;
|
||||
if (pop_try_i) begin
|
||||
if ((pop_try_next_q & ~tail_q) != 0) begin
|
||||
pop_try_state_d = POP_TRY_NEXT;
|
||||
pop_try_next_d = rtab_next(next_q, pop_try_ptr_o);
|
||||
end else begin
|
||||
pop_try_state_d = POP_TRY_HEAD;
|
||||
end
|
||||
end
|
||||
end
|
||||
default: begin
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign pop_commit_head_set = '0;
|
||||
|
||||
hpdcache_mux #(
|
||||
.NINPUT (N),
|
||||
.DATA_WIDTH ($bits(rtab_entry_t)),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) pop_mux_i (
|
||||
.data_i (req_q),
|
||||
.sel_i (pop_sel),
|
||||
.data_o (pop_try_req_o)
|
||||
);
|
||||
|
||||
// Temporarily unset the head bit of the popped request to prevent it to be rescheduled
|
||||
assign pop_try_bv = pop_sel & {N{pop_try_i}},
|
||||
pop_try_head_rst = pop_try_bv;
|
||||
|
||||
|
||||
// Forward the index of the entry being popped. This is used later by the
|
||||
// commit or rollback operations
|
||||
assign pop_try_ptr_o = rtab_bv_to_index(pop_sel);
|
||||
|
||||
// }}}
|
||||
|
||||
// Pop commit process
|
||||
// {{{
|
||||
// Invalidate the entry being popped (head of the linked list)
|
||||
assign pop_commit_valid_rst = {N{pop_commit_i}} & rtab_index_to_bv(pop_commit_ptr_i);
|
||||
// }}}
|
||||
|
||||
// Pop rollback process
|
||||
// {{{
|
||||
// Set again the head bit of the rolled-back request
|
||||
assign pop_rback_ptr_bv = rtab_index_to_bv(pop_rback_ptr_i);
|
||||
|
||||
assign pop_rback_head_set = {N{pop_rback_i}} & pop_rback_ptr_bv;
|
||||
|
||||
assign pop_rback_deps_mshr_hit_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_hit_i}},
|
||||
pop_rback_deps_mshr_full_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_full_i}},
|
||||
pop_rback_deps_mshr_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_ready_i}},
|
||||
pop_rback_deps_wbuf_hit_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_hit_i}},
|
||||
pop_rback_deps_wbuf_not_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_not_ready_i}};
|
||||
// }}}
|
||||
// }}}
|
||||
|
||||
// Internal state assignment
|
||||
// {{{
|
||||
assign head_set = alloc_head_set | pop_commit_head_set | pop_rback_head_set,
|
||||
head_rst = alloc_head_rst | pop_try_head_rst;
|
||||
|
||||
assign tail_set = alloc_tail_set,
|
||||
tail_rst = alloc_tail_rst;
|
||||
|
||||
assign valid_set = alloc_valid_set,
|
||||
valid_rst = pop_commit_valid_rst;
|
||||
|
||||
assign deps_mshr_hit_set = alloc_deps_mshr_hit_set | pop_rback_deps_mshr_hit_set,
|
||||
deps_mshr_full_set = alloc_deps_mshr_full_set | pop_rback_deps_mshr_full_set,
|
||||
deps_mshr_ready_set = alloc_deps_mshr_ready_set | pop_rback_deps_mshr_ready_set,
|
||||
deps_wbuf_hit_set = alloc_deps_wbuf_hit_set | pop_rback_deps_wbuf_hit_set,
|
||||
deps_wbuf_not_ready_set = alloc_deps_wbuf_not_ready_set | pop_rback_deps_wbuf_not_ready_set;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : rtab_valid_ff
|
||||
if (!rst_ni) begin
|
||||
valid_q <= '0;
|
||||
head_q <= '0;
|
||||
tail_q <= '0;
|
||||
deps_mshr_hit_q <= '0;
|
||||
deps_mshr_full_q <= '0;
|
||||
deps_mshr_ready_q <= '0;
|
||||
deps_wbuf_hit_q <= '0;
|
||||
deps_wbuf_not_ready_q <= '0;
|
||||
next_q <= '0;
|
||||
end else begin
|
||||
valid_q <= (~valid_q & valid_set) |
|
||||
( valid_q & ~valid_rst);
|
||||
|
||||
// update head and tail flags
|
||||
head_q <= (~head_q & head_set) |
|
||||
( head_q & ~head_rst);
|
||||
|
||||
tail_q <= (~tail_q & tail_set) |
|
||||
( tail_q & ~tail_rst);
|
||||
|
||||
// update dependency flags
|
||||
deps_mshr_hit_q <= (~deps_mshr_hit_q & deps_mshr_hit_set) |
|
||||
( deps_mshr_hit_q & ~deps_mshr_hit_rst);
|
||||
deps_mshr_full_q <= (~deps_mshr_full_q & deps_mshr_full_set) |
|
||||
( deps_mshr_full_q & ~deps_mshr_full_rst);
|
||||
deps_mshr_ready_q <= (~deps_mshr_ready_q & deps_mshr_ready_set) |
|
||||
( deps_mshr_ready_q & ~deps_mshr_ready_rst);
|
||||
deps_wbuf_hit_q <= (~deps_wbuf_hit_q & deps_wbuf_hit_set) |
|
||||
( deps_wbuf_hit_q & ~deps_wbuf_hit_rst);
|
||||
deps_wbuf_not_ready_q <= (~deps_wbuf_not_ready_q & deps_wbuf_not_ready_set) |
|
||||
( deps_wbuf_not_ready_q & ~deps_wbuf_not_ready_rst);
|
||||
|
||||
// update the next pointers
|
||||
for (int i = 0; i < N; i++) begin
|
||||
if (alloc_and_link_i && match_check_tail[i]) begin
|
||||
next_q[i] <= rtab_bv_to_index(free_alloc);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : pop_try_ff
|
||||
if (!rst_ni) begin
|
||||
pop_try_state_q <= POP_TRY_HEAD;
|
||||
pop_try_next_q <= '0;
|
||||
end else begin
|
||||
pop_try_state_q <= pop_try_state_d;
|
||||
pop_try_next_q <= pop_try_next_d;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i)
|
||||
begin : rtab_ff
|
||||
for (int i = 0; i < N; i++) begin
|
||||
// update the request array
|
||||
if (valid_set[i]) begin
|
||||
req_q[i] <= alloc_req_i;
|
||||
end
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
check_i |-> $onehot0(match_check_tail)) else
|
||||
$error("rtab: more than one entry matching");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
alloc_and_link_i |-> (check_i & check_hit_o)) else
|
||||
$error("rtab: alloc and link shall be performed in case of check hit");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
alloc_and_link_i |->
|
||||
({alloc_req_i.addr_tag, hpdcache_get_req_offset_set(alloc_req_i.addr_offset)} ==
|
||||
check_nline_i)) else
|
||||
$error("rtab: nline for alloc and link shall match the one being checked");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
alloc_i |-> !alloc_and_link_i) else
|
||||
$error("rtab: only one allocation per cycle is allowed");
|
||||
|
||||
`ifndef VERILATOR
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
pop_try_i |-> ##1 (pop_commit_i | pop_rback_i)) else
|
||||
$error("rtab: a pop try shall be followed by a commit or rollback");
|
||||
`endif
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
pop_commit_i |-> valid_q[pop_commit_ptr_i]) else
|
||||
$error("rtab: commiting an invalid entry");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
pop_rback_i |-> valid_q[pop_rback_ptr_i]) else
|
||||
$error("rtab: rolling-back an invalid entry");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
pop_rback_i |-> !pop_try_i) else
|
||||
$error("rtab: cache shall not accept a new request while rolling back");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
alloc |-> ~full_o) else
|
||||
$error("rtab: trying to allocate while the table is full");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
alloc_and_link_i |-> ~cfg_single_entry_i) else
|
||||
$error("rtab: trying to link a request in single entry mode");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,965 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : May, 2021
|
||||
* Description : HPDcache uncached and AMO request handler
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_uncached
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int HPDcacheMemIdWidth = 8,
|
||||
parameter int HPDcacheMemDataWidth = 512,
|
||||
parameter type hpdcache_mem_req_t = logic,
|
||||
parameter type hpdcache_mem_req_w_t = logic,
|
||||
parameter type hpdcache_mem_resp_r_t = logic,
|
||||
parameter type hpdcache_mem_resp_w_t = logic,
|
||||
|
||||
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Global control signals
|
||||
// {{{
|
||||
input logic wbuf_empty_i,
|
||||
input logic mshr_empty_i,
|
||||
input logic rtab_empty_i,
|
||||
input logic ctrl_empty_i,
|
||||
// }}}
|
||||
|
||||
// Cache-side request interface
|
||||
// {{{
|
||||
input logic req_valid_i,
|
||||
output logic req_ready_o,
|
||||
input hpdcache_uc_op_t req_op_i,
|
||||
input hpdcache_req_addr_t req_addr_i,
|
||||
input hpdcache_req_size_t req_size_i,
|
||||
input hpdcache_req_data_t req_data_i,
|
||||
input hpdcache_req_be_t req_be_i,
|
||||
input logic req_uc_i,
|
||||
input hpdcache_req_sid_t req_sid_i,
|
||||
input hpdcache_req_tid_t req_tid_i,
|
||||
input logic req_need_rsp_i,
|
||||
// }}}
|
||||
|
||||
// Write buffer interface
|
||||
// {{{
|
||||
output logic wbuf_flush_all_o,
|
||||
// }}}
|
||||
|
||||
// AMO Cache Interface
|
||||
// {{{
|
||||
output logic dir_amo_match_o,
|
||||
output hpdcache_set_t dir_amo_match_set_o,
|
||||
output hpdcache_tag_t dir_amo_match_tag_o,
|
||||
output logic dir_amo_update_plru_o,
|
||||
input hpdcache_way_vector_t dir_amo_hit_way_i,
|
||||
|
||||
output logic data_amo_write_o,
|
||||
output logic data_amo_write_enable_o,
|
||||
output hpdcache_set_t data_amo_write_set_o,
|
||||
output hpdcache_req_size_t data_amo_write_size_o,
|
||||
output hpdcache_word_t data_amo_write_word_o,
|
||||
output logic [63:0] data_amo_write_data_o,
|
||||
output logic [7:0] data_amo_write_be_o,
|
||||
// }}}
|
||||
|
||||
// LR/SC reservation buffer
|
||||
// {{{
|
||||
input logic lrsc_snoop_i,
|
||||
input hpdcache_req_addr_t lrsc_snoop_addr_i,
|
||||
input hpdcache_req_size_t lrsc_snoop_size_i,
|
||||
// }}}
|
||||
|
||||
// Core response interface
|
||||
// {{{
|
||||
input logic core_rsp_ready_i,
|
||||
output logic core_rsp_valid_o,
|
||||
output hpdcache_rsp_t core_rsp_o,
|
||||
// }}}
|
||||
|
||||
// MEMORY interfaces
|
||||
// {{{
|
||||
// Memory request unique identifier
|
||||
input hpdcache_mem_id_t mem_read_id_i,
|
||||
input hpdcache_mem_id_t mem_write_id_i,
|
||||
|
||||
// Read interface
|
||||
input logic mem_req_read_ready_i,
|
||||
output logic mem_req_read_valid_o,
|
||||
output hpdcache_mem_req_t mem_req_read_o,
|
||||
|
||||
output logic mem_resp_read_ready_o,
|
||||
input logic mem_resp_read_valid_i,
|
||||
input hpdcache_mem_resp_r_t mem_resp_read_i,
|
||||
|
||||
// Write interface
|
||||
input logic mem_req_write_ready_i,
|
||||
output logic mem_req_write_valid_o,
|
||||
output hpdcache_mem_req_t mem_req_write_o,
|
||||
|
||||
input logic mem_req_write_data_ready_i,
|
||||
output logic mem_req_write_data_valid_o,
|
||||
output hpdcache_mem_req_w_t mem_req_write_data_o,
|
||||
|
||||
output logic mem_resp_write_ready_o,
|
||||
input logic mem_resp_write_valid_i,
|
||||
input hpdcache_mem_resp_w_t mem_resp_write_i,
|
||||
// }}}
|
||||
|
||||
// Configuration interface
|
||||
// {{{
|
||||
input logic cfg_error_on_cacheable_amo_i
|
||||
// }}}
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Definition of constants and types
|
||||
// {{{
|
||||
localparam hpdcache_uint MEM_REQ_RATIO = HPDcacheMemDataWidth/HPDCACHE_REQ_DATA_WIDTH;
|
||||
localparam hpdcache_uint MEM_REQ_WORD_INDEX_WIDTH = $clog2(MEM_REQ_RATIO);
|
||||
|
||||
typedef enum {
|
||||
UC_IDLE,
|
||||
UC_WAIT_PENDING,
|
||||
UC_MEM_REQ,
|
||||
UC_MEM_W_REQ,
|
||||
UC_MEM_WDATA_REQ,
|
||||
UC_MEM_WAIT_RSP,
|
||||
UC_CORE_RSP,
|
||||
UC_AMO_READ_DIR,
|
||||
UC_AMO_WRITE_DATA
|
||||
} hpdcache_uc_fsm_t;
|
||||
|
||||
localparam logic AMO_SC_SUCCESS = 1'b0;
|
||||
localparam logic AMO_SC_FAILURE = 1'b1;
|
||||
|
||||
function automatic logic [63:0] prepare_amo_data_operand(
|
||||
input logic [63:0] data_i,
|
||||
input hpdcache_req_size_t size_i,
|
||||
input hpdcache_req_addr_t addr_i,
|
||||
input logic sign_extend_i
|
||||
);
|
||||
// 64-bits AMOs are already aligned, thus do nothing
|
||||
if (size_i == hpdcache_req_size_t'(3)) begin
|
||||
return data_i;
|
||||
end
|
||||
|
||||
// 32-bits AMOs
|
||||
else begin
|
||||
if (addr_i[2] == 1'b1) begin
|
||||
if (sign_extend_i) begin
|
||||
return {{32{data_i[63]}}, data_i[63:32]};
|
||||
end else begin
|
||||
return {{32{ 1'b0}}, data_i[63:32]};
|
||||
end
|
||||
end else begin
|
||||
if (sign_extend_i) begin
|
||||
return {{32{data_i[31]}}, data_i[31: 0]};
|
||||
end else begin
|
||||
return {{32{ 1'b0}}, data_i[31: 0]};
|
||||
end
|
||||
end
|
||||
end
|
||||
endfunction;
|
||||
|
||||
function automatic logic [63:0] prepare_amo_data_result(
|
||||
input logic [63:0] data_i,
|
||||
input hpdcache_req_size_t size_i
|
||||
);
|
||||
// 64-bits AMOs are already aligned, thus do nothing
|
||||
if (size_i == hpdcache_req_size_t'(3)) begin
|
||||
return data_i;
|
||||
end
|
||||
|
||||
// 32-bits AMOs
|
||||
else begin
|
||||
return {2{data_i[31:0]}};
|
||||
end
|
||||
endfunction;
|
||||
|
||||
function automatic logic amo_need_sign_extend(hpdcache_uc_op_t op);
|
||||
unique case (1'b1)
|
||||
op.is_amo_add,
|
||||
op.is_amo_max,
|
||||
op.is_amo_min: return 1'b1;
|
||||
default : return 1'b0;
|
||||
endcase;
|
||||
endfunction
|
||||
// }}}
|
||||
|
||||
// Internal signals and registers
|
||||
// {{{
|
||||
hpdcache_uc_fsm_t uc_fsm_q, uc_fsm_d;
|
||||
hpdcache_uc_op_t req_op_q;
|
||||
hpdcache_req_addr_t req_addr_q;
|
||||
hpdcache_req_size_t req_size_q;
|
||||
hpdcache_req_data_t req_data_q;
|
||||
hpdcache_req_be_t req_be_q;
|
||||
logic req_uc_q;
|
||||
hpdcache_req_sid_t req_sid_q;
|
||||
hpdcache_req_tid_t req_tid_q;
|
||||
logic req_need_rsp_q;
|
||||
|
||||
logic uc_sc_retcode_q, uc_sc_retcode_d;
|
||||
|
||||
hpdcache_req_data_t rsp_rdata_q, rsp_rdata_d;
|
||||
logic rsp_error_set, rsp_error_rst;
|
||||
logic rsp_error_q;
|
||||
logic mem_resp_write_valid_q, mem_resp_write_valid_d;
|
||||
logic mem_resp_read_valid_q, mem_resp_read_valid_d;
|
||||
|
||||
hpdcache_req_data_t mem_req_write_data;
|
||||
logic [63:0] amo_req_ld_data;
|
||||
logic [63:0] amo_ld_data;
|
||||
logic [63:0] amo_req_st_data;
|
||||
logic [63:0] amo_st_data;
|
||||
logic [ 7:0] amo_st_be;
|
||||
logic [63:0] amo_result;
|
||||
// }}}
|
||||
|
||||
// LR/SC reservation buffer logic
|
||||
// {{{
|
||||
logic lrsc_rsrv_valid_q;
|
||||
hpdcache_req_addr_t lrsc_rsrv_addr_q, lrsc_rsrv_addr_d;
|
||||
hpdcache_nline_t lrsc_rsrv_nline;
|
||||
hpdcache_offset_t lrsc_rsrv_word;
|
||||
|
||||
hpdcache_offset_t lrsc_snoop_words;
|
||||
hpdcache_nline_t lrsc_snoop_nline;
|
||||
hpdcache_offset_t lrsc_snoop_base, lrsc_snoop_end;
|
||||
logic lrsc_snoop_hit;
|
||||
logic lrsc_snoop_reset;
|
||||
|
||||
hpdcache_nline_t lrsc_uc_nline;
|
||||
hpdcache_offset_t lrsc_uc_word;
|
||||
logic lrsc_uc_hit;
|
||||
logic lrsc_uc_set, lrsc_uc_reset;
|
||||
|
||||
// NOTE: Reservation set for LR instruction is always 8-bytes in this
|
||||
// implementation.
|
||||
assign lrsc_rsrv_nline = hpdcache_get_req_addr_nline(lrsc_rsrv_addr_q),
|
||||
lrsc_rsrv_word = hpdcache_get_req_addr_offset(lrsc_rsrv_addr_q) >> 3;
|
||||
|
||||
// Check hit on LR/SC reservation for snoop port (normal write accesses)
|
||||
assign lrsc_snoop_words = (lrsc_snoop_size_i < 3) ? 1 : hpdcache_offset_t'((8'h1 << lrsc_snoop_size_i) >> 3),
|
||||
lrsc_snoop_nline = hpdcache_get_req_addr_nline(lrsc_snoop_addr_i),
|
||||
lrsc_snoop_base = hpdcache_get_req_addr_offset(lrsc_snoop_addr_i) >> 3,
|
||||
lrsc_snoop_end = lrsc_snoop_base + lrsc_snoop_words;
|
||||
|
||||
assign lrsc_snoop_hit = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_snoop_nline) &
|
||||
(lrsc_rsrv_word >= lrsc_snoop_base) &
|
||||
(lrsc_rsrv_word < lrsc_snoop_end );
|
||||
|
||||
assign lrsc_snoop_reset = lrsc_snoop_i & lrsc_snoop_hit;
|
||||
|
||||
// Check hit on LR/SC reservation for AMOs and SC
|
||||
assign lrsc_uc_nline = hpdcache_get_req_addr_nline(req_addr_i),
|
||||
lrsc_uc_word = hpdcache_get_req_addr_offset(req_addr_i) >> 3;
|
||||
|
||||
assign lrsc_uc_hit = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_uc_nline) &
|
||||
(lrsc_rsrv_word == lrsc_uc_word);
|
||||
// }}}
|
||||
|
||||
// Uncacheable request FSM
|
||||
// {{{
|
||||
always_comb
|
||||
begin : uc_fsm_comb
|
||||
mem_resp_write_valid_d = mem_resp_write_valid_q;
|
||||
mem_resp_read_valid_d = mem_resp_read_valid_q;
|
||||
rsp_error_set = 1'b0;
|
||||
rsp_error_rst = 1'b0;
|
||||
lrsc_rsrv_addr_d = lrsc_rsrv_addr_q;
|
||||
uc_sc_retcode_d = uc_sc_retcode_q;
|
||||
wbuf_flush_all_o = 1'b0;
|
||||
lrsc_uc_set = 1'b0;
|
||||
lrsc_uc_reset = 1'b0;
|
||||
|
||||
uc_fsm_d = uc_fsm_q;
|
||||
|
||||
case (uc_fsm_q)
|
||||
// Wait for a request
|
||||
// {{{
|
||||
UC_IDLE: begin
|
||||
|
||||
if (req_valid_i) begin
|
||||
wbuf_flush_all_o = 1'b1;
|
||||
|
||||
unique case (1'b1)
|
||||
req_op_i.is_ld,
|
||||
req_op_i.is_st: begin
|
||||
if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
|
||||
uc_fsm_d = UC_MEM_REQ;
|
||||
end else begin
|
||||
uc_fsm_d = UC_WAIT_PENDING;
|
||||
end
|
||||
end
|
||||
|
||||
req_op_i.is_amo_swap,
|
||||
req_op_i.is_amo_add,
|
||||
req_op_i.is_amo_and,
|
||||
req_op_i.is_amo_or,
|
||||
req_op_i.is_amo_xor,
|
||||
req_op_i.is_amo_max,
|
||||
req_op_i.is_amo_maxu,
|
||||
req_op_i.is_amo_min,
|
||||
req_op_i.is_amo_minu,
|
||||
req_op_i.is_amo_lr: begin
|
||||
// Reset LR/SC reservation if AMO matches its address
|
||||
lrsc_uc_reset = ~req_op_i.is_amo_lr & lrsc_uc_hit;
|
||||
|
||||
if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin
|
||||
rsp_error_set = 1'b1;
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end else begin
|
||||
if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
|
||||
uc_fsm_d = UC_MEM_REQ;
|
||||
end else begin
|
||||
uc_fsm_d = UC_WAIT_PENDING;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
req_op_i.is_amo_sc: begin
|
||||
if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin
|
||||
rsp_error_set = 1'b1;
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end else begin
|
||||
// Reset previous reservation (if any)
|
||||
lrsc_uc_reset = 1'b1;
|
||||
|
||||
// SC with valid reservation
|
||||
if (lrsc_uc_hit) begin
|
||||
if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
|
||||
uc_fsm_d = UC_MEM_REQ;
|
||||
end else begin
|
||||
uc_fsm_d = UC_WAIT_PENDING;
|
||||
end
|
||||
end
|
||||
// SC with no valid reservation, thus respond with the failure code
|
||||
else begin
|
||||
uc_sc_retcode_d = AMO_SC_FAILURE;
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
if (req_need_rsp_i) begin
|
||||
rsp_error_set = 1'b1;
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Wait for the write buffer to be empty
|
||||
// {{{
|
||||
UC_WAIT_PENDING: begin
|
||||
if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
|
||||
uc_fsm_d = UC_MEM_REQ;
|
||||
end else begin
|
||||
uc_fsm_d = UC_WAIT_PENDING;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Send request to memory
|
||||
// {{{
|
||||
UC_MEM_REQ: begin
|
||||
uc_fsm_d = UC_MEM_REQ;
|
||||
|
||||
mem_resp_write_valid_d = 1'b0;
|
||||
mem_resp_read_valid_d = 1'b0;
|
||||
|
||||
case (1'b1)
|
||||
req_op_q.is_ld,
|
||||
req_op_q.is_amo_lr: begin
|
||||
if (mem_req_read_ready_i) begin
|
||||
uc_fsm_d = UC_MEM_WAIT_RSP;
|
||||
end
|
||||
end
|
||||
|
||||
req_op_q.is_st,
|
||||
req_op_q.is_amo_sc,
|
||||
req_op_q.is_amo_swap,
|
||||
req_op_q.is_amo_add,
|
||||
req_op_q.is_amo_and,
|
||||
req_op_q.is_amo_or,
|
||||
req_op_q.is_amo_xor,
|
||||
req_op_q.is_amo_max,
|
||||
req_op_q.is_amo_maxu,
|
||||
req_op_q.is_amo_min,
|
||||
req_op_q.is_amo_minu: begin
|
||||
if (mem_req_write_ready_i && mem_req_write_data_ready_i) begin
|
||||
uc_fsm_d = UC_MEM_WAIT_RSP;
|
||||
end else if (mem_req_write_ready_i) begin
|
||||
uc_fsm_d = UC_MEM_WDATA_REQ;
|
||||
end else if (mem_req_write_data_ready_i) begin
|
||||
uc_fsm_d = UC_MEM_W_REQ;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Send write address
|
||||
// {{{
|
||||
UC_MEM_W_REQ: begin
|
||||
mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i;
|
||||
mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i;
|
||||
|
||||
if (mem_req_write_ready_i) begin
|
||||
uc_fsm_d = UC_MEM_WAIT_RSP;
|
||||
end else begin
|
||||
uc_fsm_d = UC_MEM_W_REQ;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Send write data
|
||||
// {{{
|
||||
UC_MEM_WDATA_REQ: begin
|
||||
mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i;
|
||||
mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i;
|
||||
|
||||
if (mem_req_write_data_ready_i) begin
|
||||
uc_fsm_d = UC_MEM_WAIT_RSP;
|
||||
end else begin
|
||||
uc_fsm_d = UC_MEM_WDATA_REQ;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Wait for the response from the memory
|
||||
// {{{
|
||||
UC_MEM_WAIT_RSP: begin
|
||||
automatic bit rd_error;
|
||||
automatic bit wr_error;
|
||||
|
||||
uc_fsm_d = UC_MEM_WAIT_RSP;
|
||||
mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i;
|
||||
mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i;
|
||||
|
||||
rd_error = mem_resp_read_valid_i &&
|
||||
( mem_resp_read_i.mem_resp_r_error == HPDCACHE_MEM_RESP_NOK);
|
||||
wr_error = mem_resp_write_valid_i &&
|
||||
(mem_resp_write_i.mem_resp_w_error == HPDCACHE_MEM_RESP_NOK);
|
||||
rsp_error_set = req_need_rsp_q & (rd_error | wr_error);
|
||||
|
||||
case (1'b1)
|
||||
req_op_q.is_ld: begin
|
||||
if (mem_resp_read_valid_i) begin
|
||||
if (req_need_rsp_q) begin
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end else begin
|
||||
uc_fsm_d = UC_IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
req_op_q.is_st: begin
|
||||
if (mem_resp_write_valid_i) begin
|
||||
if (req_need_rsp_q) begin
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end else begin
|
||||
uc_fsm_d = UC_IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
req_op_q.is_amo_lr: begin
|
||||
if (mem_resp_read_valid_i) begin
|
||||
// set a new reservation
|
||||
if (!rd_error)
|
||||
begin
|
||||
lrsc_uc_set = 1'b1;
|
||||
lrsc_rsrv_addr_d = req_addr_q;
|
||||
end
|
||||
// in case of a memory error, do not make the reservation and
|
||||
// invalidate an existing one (if valid)
|
||||
else begin
|
||||
lrsc_uc_reset = 1'b1;
|
||||
end
|
||||
|
||||
if (req_uc_q || rd_error) begin
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end else begin
|
||||
uc_fsm_d = UC_AMO_READ_DIR;
|
||||
end
|
||||
end
|
||||
end
|
||||
req_op_q.is_amo_sc: begin
|
||||
if (mem_resp_write_valid_i) begin
|
||||
automatic bit is_atomic;
|
||||
|
||||
is_atomic = mem_resp_write_i.mem_resp_w_is_atomic && !wr_error;
|
||||
uc_sc_retcode_d = is_atomic ? AMO_SC_SUCCESS : AMO_SC_FAILURE;
|
||||
|
||||
if (req_uc_q || !is_atomic) begin
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end else begin
|
||||
uc_fsm_d = UC_AMO_READ_DIR;
|
||||
end
|
||||
end
|
||||
end
|
||||
req_op_q.is_amo_swap,
|
||||
req_op_q.is_amo_add,
|
||||
req_op_q.is_amo_and,
|
||||
req_op_q.is_amo_or,
|
||||
req_op_q.is_amo_xor,
|
||||
req_op_q.is_amo_max,
|
||||
req_op_q.is_amo_maxu,
|
||||
req_op_q.is_amo_min,
|
||||
req_op_q.is_amo_minu: begin
|
||||
// wait for both old data and write acknowledged were received
|
||||
if ((mem_resp_read_valid_i && mem_resp_write_valid_i) ||
|
||||
(mem_resp_read_valid_i && mem_resp_write_valid_q) ||
|
||||
(mem_resp_read_valid_q && mem_resp_write_valid_i))
|
||||
begin
|
||||
if (req_uc_q || rsp_error_q || rd_error || wr_error) begin
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end else begin
|
||||
uc_fsm_d = UC_AMO_READ_DIR;
|
||||
end
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Send the response to the requester
|
||||
// {{{
|
||||
UC_CORE_RSP: begin
|
||||
if (core_rsp_ready_i) begin
|
||||
rsp_error_rst = 1'b1;
|
||||
uc_fsm_d = UC_IDLE;
|
||||
end else begin
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Check for a cache hit on the AMO target address
|
||||
// {{{
|
||||
UC_AMO_READ_DIR: begin
|
||||
uc_fsm_d = UC_AMO_WRITE_DATA;
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Write the locally computed AMO result in the cache
|
||||
// {{{
|
||||
UC_AMO_WRITE_DATA: begin
|
||||
uc_fsm_d = UC_CORE_RSP;
|
||||
end
|
||||
// }}}
|
||||
endcase
|
||||
end
|
||||
// }}}
|
||||
|
||||
// AMO unit
|
||||
// {{{
|
||||
localparam hpdcache_uint AMO_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_DATA_WIDTH/64);
|
||||
|
||||
generate
|
||||
if (AMO_WORD_INDEX_WIDTH > 0) begin : amo_operand_mux_gen
|
||||
hpdcache_mux #(
|
||||
.NINPUT (HPDCACHE_REQ_DATA_WIDTH/64),
|
||||
.DATA_WIDTH (64),
|
||||
.ONE_HOT_SEL (1'b0)
|
||||
) amo_ld_data_mux_i (
|
||||
.data_i (rsp_rdata_q),
|
||||
.sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]),
|
||||
.data_o (amo_req_ld_data)
|
||||
);
|
||||
|
||||
hpdcache_mux #(
|
||||
.NINPUT (HPDCACHE_REQ_DATA_WIDTH/64),
|
||||
.DATA_WIDTH (64),
|
||||
.ONE_HOT_SEL (1'b0)
|
||||
) amo_st_data_mux_i (
|
||||
.data_i (req_data_q),
|
||||
.sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]),
|
||||
.data_o (amo_req_st_data)
|
||||
);
|
||||
|
||||
hpdcache_mux #(
|
||||
.NINPUT (HPDCACHE_REQ_DATA_WIDTH/64),
|
||||
.DATA_WIDTH (8),
|
||||
.ONE_HOT_SEL (1'b0)
|
||||
) amo_st_be_mux_i (
|
||||
.data_i (req_be_q),
|
||||
.sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]),
|
||||
.data_o (amo_st_be)
|
||||
);
|
||||
|
||||
end else begin
|
||||
assign amo_req_ld_data = rsp_rdata_q;
|
||||
assign amo_req_st_data = req_data_q;
|
||||
assign amo_st_be = req_be_q;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign amo_ld_data = prepare_amo_data_operand(amo_req_ld_data, req_size_q,
|
||||
req_addr_q, amo_need_sign_extend(req_op_q));
|
||||
assign amo_st_data = prepare_amo_data_operand(amo_req_st_data, req_size_q,
|
||||
req_addr_q, amo_need_sign_extend(req_op_q));
|
||||
|
||||
hpdcache_amo amo_unit_i (
|
||||
.ld_data_i (amo_ld_data),
|
||||
.st_data_i (amo_st_data),
|
||||
.op_i (req_op_q),
|
||||
.result_o (amo_result)
|
||||
);
|
||||
|
||||
assign dir_amo_match_o = (uc_fsm_q == UC_AMO_READ_DIR),
|
||||
dir_amo_match_set_o = hpdcache_get_req_addr_set(req_addr_q),
|
||||
dir_amo_match_tag_o = hpdcache_get_req_addr_tag(req_addr_q),
|
||||
dir_amo_update_plru_o = dir_amo_match_o;
|
||||
|
||||
assign data_amo_write_o = (uc_fsm_q == UC_AMO_WRITE_DATA),
|
||||
data_amo_write_enable_o = |dir_amo_hit_way_i,
|
||||
data_amo_write_set_o = hpdcache_get_req_addr_set(req_addr_q),
|
||||
data_amo_write_size_o = req_size_q,
|
||||
data_amo_write_word_o = hpdcache_get_req_addr_word(req_addr_q),
|
||||
data_amo_write_data_o = prepare_amo_data_result(amo_result, req_size_q),
|
||||
data_amo_write_be_o = amo_st_be;
|
||||
// }}}
|
||||
|
||||
// Core response outputs
|
||||
// {{{
|
||||
assign req_ready_o = (uc_fsm_q == UC_IDLE),
|
||||
core_rsp_valid_o = (uc_fsm_q == UC_CORE_RSP);
|
||||
// }}}
|
||||
|
||||
// Memory read request outputs
|
||||
// {{{
|
||||
always_comb
|
||||
begin : mem_req_read_comb
|
||||
mem_req_read_o.mem_req_addr = req_addr_q;
|
||||
mem_req_read_o.mem_req_len = 0;
|
||||
mem_req_read_o.mem_req_size = req_size_q;
|
||||
mem_req_read_o.mem_req_id = mem_read_id_i;
|
||||
mem_req_read_o.mem_req_cacheable = 1'b0;
|
||||
mem_req_read_o.mem_req_command = HPDCACHE_MEM_READ;
|
||||
mem_req_read_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD;
|
||||
|
||||
unique case (1'b1)
|
||||
req_op_q.is_ld: begin
|
||||
mem_req_read_valid_o = (uc_fsm_q == UC_MEM_REQ);
|
||||
end
|
||||
req_op_q.is_amo_lr: begin
|
||||
mem_req_read_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_read_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_LDEX;
|
||||
mem_req_read_valid_o = (uc_fsm_q == UC_MEM_REQ);
|
||||
end
|
||||
default: begin
|
||||
mem_req_read_valid_o = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Memory write request outputs
|
||||
// {{{
|
||||
always_comb
|
||||
begin : mem_req_write_comb
|
||||
mem_req_write_data = req_data_q;
|
||||
mem_req_write_o.mem_req_addr = req_addr_q;
|
||||
mem_req_write_o.mem_req_len = 0;
|
||||
mem_req_write_o.mem_req_size = req_size_q;
|
||||
mem_req_write_o.mem_req_id = mem_write_id_i;
|
||||
mem_req_write_o.mem_req_cacheable = 1'b0;
|
||||
unique case (1'b1)
|
||||
req_op_q.is_amo_sc: begin
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_STEX;
|
||||
end
|
||||
req_op_q.is_amo_swap: begin
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SWAP;
|
||||
end
|
||||
req_op_q.is_amo_add: begin
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD;
|
||||
end
|
||||
req_op_q.is_amo_and: begin
|
||||
mem_req_write_data = ~req_data_q;
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_CLR;
|
||||
end
|
||||
req_op_q.is_amo_or: begin
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SET;
|
||||
end
|
||||
req_op_q.is_amo_xor: begin
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_EOR;
|
||||
end
|
||||
req_op_q.is_amo_max: begin
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SMAX;
|
||||
end
|
||||
req_op_q.is_amo_maxu: begin
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_UMAX;
|
||||
end
|
||||
req_op_q.is_amo_min: begin
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SMIN;
|
||||
end
|
||||
req_op_q.is_amo_minu: begin
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_UMIN;
|
||||
end
|
||||
default: begin
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE;
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD;
|
||||
end
|
||||
endcase
|
||||
|
||||
unique case (uc_fsm_q)
|
||||
UC_MEM_REQ: begin
|
||||
unique case (1'b1)
|
||||
req_op_q.is_st,
|
||||
req_op_q.is_amo_sc,
|
||||
req_op_q.is_amo_swap,
|
||||
req_op_q.is_amo_add,
|
||||
req_op_q.is_amo_and,
|
||||
req_op_q.is_amo_or,
|
||||
req_op_q.is_amo_xor,
|
||||
req_op_q.is_amo_max,
|
||||
req_op_q.is_amo_maxu,
|
||||
req_op_q.is_amo_min,
|
||||
req_op_q.is_amo_minu: begin
|
||||
mem_req_write_data_valid_o = 1'b1;
|
||||
mem_req_write_valid_o = 1'b1;
|
||||
end
|
||||
|
||||
default: begin
|
||||
mem_req_write_data_valid_o = 1'b0;
|
||||
mem_req_write_valid_o = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
UC_MEM_W_REQ: begin
|
||||
mem_req_write_valid_o = 1'b1;
|
||||
mem_req_write_data_valid_o = 1'b0;
|
||||
end
|
||||
|
||||
UC_MEM_WDATA_REQ: begin
|
||||
mem_req_write_valid_o = 1'b0;
|
||||
mem_req_write_data_valid_o = 1'b1;
|
||||
end
|
||||
|
||||
default: begin
|
||||
mem_req_write_valid_o = 1'b0;
|
||||
mem_req_write_data_valid_o = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
generate
|
||||
// memory data width is bigger than the width of the core's interface
|
||||
if (MEM_REQ_RATIO > 1) begin : mem_req_data_gen
|
||||
// replicate data
|
||||
assign mem_req_write_data_o.mem_req_w_data = {MEM_REQ_RATIO{mem_req_write_data}};
|
||||
|
||||
// demultiplex the byte-enable
|
||||
hpdcache_demux #(
|
||||
.NOUTPUT (MEM_REQ_RATIO),
|
||||
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH/8)
|
||||
) mem_write_be_demux_i (
|
||||
.data_i (req_be_q),
|
||||
.sel_i (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]),
|
||||
.data_o (mem_req_write_data_o.mem_req_w_be)
|
||||
);
|
||||
end
|
||||
|
||||
// memory data width is equal to the width of the core's interface
|
||||
else begin
|
||||
assign mem_req_write_data_o.mem_req_w_data = mem_req_write_data;
|
||||
assign mem_req_write_data_o.mem_req_w_be = req_be_q;
|
||||
end
|
||||
|
||||
assign mem_req_write_data_o.mem_req_w_last = 1'b1;
|
||||
endgenerate
|
||||
// }}}
|
||||
|
||||
// Response handling
|
||||
// {{{
|
||||
logic [63:0] sc_retcode;
|
||||
logic [63:0] sc_rdata;
|
||||
|
||||
assign sc_retcode = {{63{1'b0}}, uc_sc_retcode_q},
|
||||
sc_rdata = prepare_amo_data_result(sc_retcode, req_size_q);
|
||||
|
||||
assign core_rsp_o.rdata = req_op_q.is_amo_sc ? {HPDCACHE_REQ_WORDS{sc_rdata}} : rsp_rdata_q,
|
||||
core_rsp_o.sid = req_sid_q,
|
||||
core_rsp_o.tid = req_tid_q,
|
||||
core_rsp_o.error = rsp_error_q,
|
||||
core_rsp_o.aborted = 1'b0;
|
||||
|
||||
// Resize the memory response data to the core response width
|
||||
generate
|
||||
// memory data width is bigger than the width of the core's interface
|
||||
if (MEM_REQ_RATIO > 1) begin : core_rsp_data_gen
|
||||
hpdcache_mux #(
|
||||
.NINPUT (MEM_REQ_RATIO),
|
||||
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH)
|
||||
) data_read_rsp_mux_i(
|
||||
.data_i (mem_resp_read_i.mem_resp_r_data),
|
||||
.sel_i (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]),
|
||||
.data_o (rsp_rdata_d)
|
||||
);
|
||||
end
|
||||
|
||||
// memory data width is equal to the width of the core's interface
|
||||
else begin
|
||||
assign rsp_rdata_d = mem_resp_read_i.mem_resp_r_data;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// This FSM is always ready to accept the response
|
||||
assign mem_resp_read_ready_o = 1'b1,
|
||||
mem_resp_write_ready_o = 1'b1;
|
||||
// }}}
|
||||
|
||||
// Set cache request registers
|
||||
// {{{
|
||||
always_ff @(posedge clk_i)
|
||||
begin : req_ff
|
||||
if (req_valid_i && req_ready_o) begin
|
||||
req_op_q <= req_op_i;
|
||||
req_addr_q <= req_addr_i;
|
||||
req_size_q <= req_size_i;
|
||||
req_data_q <= req_data_i;
|
||||
req_be_q <= req_be_i;
|
||||
req_uc_q <= req_uc_i;
|
||||
req_sid_q <= req_sid_i;
|
||||
req_tid_q <= req_tid_i;
|
||||
req_need_rsp_q <= req_need_rsp_i;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Uncacheable request FSM set state
|
||||
// {{{
|
||||
logic lrsc_rsrv_valid_set, lrsc_rsrv_valid_reset;
|
||||
|
||||
assign lrsc_rsrv_valid_set = lrsc_uc_set,
|
||||
lrsc_rsrv_valid_reset = lrsc_uc_reset | lrsc_snoop_reset;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : uc_fsm_ff
|
||||
if (!rst_ni) begin
|
||||
uc_fsm_q <= UC_IDLE;
|
||||
lrsc_rsrv_valid_q <= 1'b0;
|
||||
end else begin
|
||||
uc_fsm_q <= uc_fsm_d;
|
||||
lrsc_rsrv_valid_q <= (~lrsc_rsrv_valid_q & lrsc_rsrv_valid_set ) |
|
||||
( lrsc_rsrv_valid_q & ~lrsc_rsrv_valid_reset);
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i)
|
||||
begin : uc_amo_ff
|
||||
lrsc_rsrv_addr_q <= lrsc_rsrv_addr_d;
|
||||
uc_sc_retcode_q <= uc_sc_retcode_d;
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Response registers
|
||||
// {{{
|
||||
always_ff @(posedge clk_i)
|
||||
begin
|
||||
if (mem_resp_read_valid_i) begin
|
||||
rsp_rdata_q <= rsp_rdata_d;
|
||||
end
|
||||
mem_resp_write_valid_q <= mem_resp_write_valid_d;
|
||||
mem_resp_read_valid_q <= mem_resp_read_valid_d;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin
|
||||
if (!rst_ni) begin
|
||||
rsp_error_q <= 1'b0;
|
||||
end else begin
|
||||
rsp_error_q <= (~rsp_error_q & rsp_error_set) |
|
||||
( rsp_error_q & ~rsp_error_rst);
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
(req_valid_i && req_op_i.is_ld) -> req_uc_i) else
|
||||
$error("uc_handler: unexpected load request on cacheable region");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
(req_valid_i && req_op_i.is_st) -> req_uc_i) else
|
||||
$error("uc_handler: unexpected store request on cacheable region");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
(req_valid_i && (req_op_i.is_amo_lr ||
|
||||
req_op_i.is_amo_sc ||
|
||||
req_op_i.is_amo_swap ||
|
||||
req_op_i.is_amo_add ||
|
||||
req_op_i.is_amo_and ||
|
||||
req_op_i.is_amo_or ||
|
||||
req_op_i.is_amo_xor ||
|
||||
req_op_i.is_amo_max ||
|
||||
req_op_i.is_amo_maxu ||
|
||||
req_op_i.is_amo_min ||
|
||||
req_op_i.is_amo_minu )) -> req_need_rsp_i) else
|
||||
$error("uc_handler: amo requests shall need a response");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
(req_valid_i && (req_op_i.is_amo_lr ||
|
||||
req_op_i.is_amo_sc ||
|
||||
req_op_i.is_amo_swap ||
|
||||
req_op_i.is_amo_add ||
|
||||
req_op_i.is_amo_and ||
|
||||
req_op_i.is_amo_or ||
|
||||
req_op_i.is_amo_xor ||
|
||||
req_op_i.is_amo_max ||
|
||||
req_op_i.is_amo_maxu ||
|
||||
req_op_i.is_amo_min ||
|
||||
req_op_i.is_amo_minu )) -> (req_size_i inside {2,3})) else
|
||||
$error("uc_handler: amo requests shall be 4 or 8 bytes wide");
|
||||
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
(mem_resp_write_valid_i || mem_resp_read_valid_i) -> (uc_fsm_q == UC_MEM_WAIT_RSP)) else
|
||||
$error("uc_handler: unexpected response from memory");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,678 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : HPDcache Write Buffer
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_wbuf
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
// Number of entries in the directory part of the Write Buffer
|
||||
parameter int unsigned WBUF_DIR_ENTRIES = 0,
|
||||
// Number of entries in the data part of the Write Buffer
|
||||
parameter int unsigned WBUF_DATA_ENTRIES = 0,
|
||||
// Width in bits of the write words
|
||||
parameter int unsigned WBUF_WORD_WIDTH = 0,
|
||||
// Number of words per line in the write buffer
|
||||
parameter int unsigned WBUF_WORDS = 0,
|
||||
// Width in bits of the physical address
|
||||
parameter int unsigned WBUF_PA_WIDTH = 0,
|
||||
// Maximum value of the time counter
|
||||
parameter int unsigned WBUF_TIMECNT_MAX = 8,
|
||||
// Number of most significant bits to check for read conflicts
|
||||
parameter int unsigned WBUF_READ_MATCH_WIDTH = 0,
|
||||
// Use a feedthrough FIFO on the send interface
|
||||
parameter bit WBUF_SEND_FEEDTHROUGH = 0,
|
||||
|
||||
localparam int unsigned WBUF_OFFSET_WIDTH = $clog2((WBUF_WORD_WIDTH*WBUF_WORDS)/8),
|
||||
localparam int unsigned WBUF_TAG_WIDTH = WBUF_PA_WIDTH - WBUF_OFFSET_WIDTH,
|
||||
localparam int unsigned WBUF_WORD_OFFSET = $clog2(WBUF_WORD_WIDTH/8),
|
||||
localparam int unsigned WBUF_DATA_PTR_WIDTH = $clog2(WBUF_DATA_ENTRIES),
|
||||
localparam int unsigned WBUF_DIR_PTR_WIDTH = $clog2(WBUF_DIR_ENTRIES),
|
||||
localparam int unsigned WBUF_TIMECNT_WIDTH = $clog2(WBUF_TIMECNT_MAX),
|
||||
localparam type wbuf_addr_t = logic unsigned [ WBUF_PA_WIDTH-1:0],
|
||||
localparam type wbuf_dir_ptr_t = logic unsigned [ WBUF_DIR_PTR_WIDTH-1:0],
|
||||
localparam type wbuf_data_ptr_t = logic unsigned [ WBUF_DATA_PTR_WIDTH-1:0],
|
||||
localparam type wbuf_data_t = logic [ WBUF_WORD_WIDTH-1:0],
|
||||
localparam type wbuf_be_t = logic [ WBUF_WORD_WIDTH/8-1:0],
|
||||
localparam type wbuf_data_buf_t = wbuf_data_t [ WBUF_WORDS-1:0],
|
||||
localparam type wbuf_be_buf_t = wbuf_be_t [ WBUF_WORDS-1:0],
|
||||
localparam type wbuf_tag_t = logic unsigned [ WBUF_TAG_WIDTH-1:0],
|
||||
localparam type wbuf_match_t = logic unsigned [WBUF_READ_MATCH_WIDTH-1:0],
|
||||
localparam type wbuf_timecnt_t = logic unsigned [ WBUF_TIMECNT_WIDTH-1:0]
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
// Clock and reset signals
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Global control signals
|
||||
output logic empty_o,
|
||||
output logic full_o,
|
||||
input logic flush_all_i,
|
||||
|
||||
// Configuration signals
|
||||
// Timer threshold
|
||||
input wbuf_timecnt_t cfg_threshold_i,
|
||||
// Reset timer on write
|
||||
input logic cfg_reset_timecnt_on_write_i,
|
||||
// Sequentialize write-after-write hazards
|
||||
input logic cfg_sequential_waw_i,
|
||||
// Inhibit write coalescing
|
||||
input logic cfg_inhibit_write_coalescing_i,
|
||||
|
||||
// Write interface
|
||||
input logic write_i,
|
||||
output logic write_ready_o,
|
||||
input wbuf_addr_t write_addr_i,
|
||||
input wbuf_data_t write_data_i,
|
||||
input wbuf_be_t write_be_i, // byte-enable
|
||||
input logic write_uc_i, // uncacheable write
|
||||
|
||||
// Read hit interface
|
||||
input wbuf_addr_t read_addr_i,
|
||||
output logic read_hit_o,
|
||||
input logic read_flush_hit_i,
|
||||
|
||||
// Replay hit interface
|
||||
input wbuf_addr_t replay_addr_i,
|
||||
input logic replay_is_read_i,
|
||||
output logic replay_open_hit_o,
|
||||
output logic replay_pend_hit_o,
|
||||
output logic replay_sent_hit_o,
|
||||
output logic replay_not_ready_o,
|
||||
|
||||
// Send interface
|
||||
input logic send_meta_ready_i,
|
||||
output logic send_meta_valid_o,
|
||||
output wbuf_addr_t send_addr_o,
|
||||
output wbuf_dir_ptr_t send_id_o,
|
||||
output logic send_uc_o,
|
||||
|
||||
input logic send_data_ready_i,
|
||||
output logic send_data_valid_o,
|
||||
output wbuf_addr_t send_data_tag_o,
|
||||
output wbuf_data_buf_t send_data_o,
|
||||
output wbuf_be_buf_t send_be_o,
|
||||
|
||||
// Acknowledge interface
|
||||
input logic ack_i,
|
||||
input wbuf_dir_ptr_t ack_id_i,
|
||||
input logic ack_error_i
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Definition of constants, types and functions
|
||||
// {{{
|
||||
localparam int WBUF_SEND_FIFO_DEPTH = WBUF_DATA_ENTRIES;
|
||||
|
||||
typedef logic unsigned [31:0] wbuf_uint;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
WBUF_FREE = 2'b00, // unused/free slot
|
||||
WBUF_OPEN = 2'b01, // there are pending writes in this slot
|
||||
WBUF_PEND = 2'b10, // the slot is waiting to be sent
|
||||
WBUF_SENT = 2'b11 // the slot is sent and waits for the memory acknowledge
|
||||
} wbuf_state_e;
|
||||
|
||||
typedef struct packed {
|
||||
wbuf_data_ptr_t ptr;
|
||||
wbuf_timecnt_t cnt;
|
||||
wbuf_tag_t tag;
|
||||
logic uc;
|
||||
} wbuf_dir_entry_t;
|
||||
|
||||
typedef struct packed {
|
||||
wbuf_data_buf_t data;
|
||||
wbuf_be_buf_t be;
|
||||
} wbuf_data_entry_t;
|
||||
|
||||
typedef struct packed {
|
||||
wbuf_data_ptr_t send_data_ptr;
|
||||
wbuf_tag_t send_data_tag;
|
||||
} wbuf_send_data_t;
|
||||
|
||||
typedef struct packed {
|
||||
wbuf_tag_t send_meta_tag;
|
||||
wbuf_dir_ptr_t send_meta_id;
|
||||
logic send_meta_uc;
|
||||
} wbuf_send_meta_t;
|
||||
|
||||
function automatic wbuf_dir_ptr_t wbuf_dir_find_next(
|
||||
input wbuf_dir_ptr_t curr_ptr,
|
||||
input wbuf_state_e [WBUF_DIR_ENTRIES-1:0] dir_state,
|
||||
input wbuf_state_e state);
|
||||
automatic wbuf_dir_ptr_t next_ptr;
|
||||
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
|
||||
next_ptr = wbuf_dir_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DIR_ENTRIES);
|
||||
if (dir_state[next_ptr] == state) begin
|
||||
return next_ptr;
|
||||
end
|
||||
end
|
||||
return curr_ptr;
|
||||
endfunction
|
||||
|
||||
function automatic wbuf_data_ptr_t wbuf_data_find_next(
|
||||
input wbuf_data_ptr_t curr_ptr,
|
||||
input logic [WBUF_DATA_ENTRIES-1:0] data_valid,
|
||||
input logic state);
|
||||
automatic wbuf_data_ptr_t next_ptr;
|
||||
for (int unsigned i = 0; i < WBUF_DATA_ENTRIES; i++) begin
|
||||
next_ptr = wbuf_data_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DATA_ENTRIES);
|
||||
if (data_valid[next_ptr] == state) begin
|
||||
return next_ptr;
|
||||
end
|
||||
end
|
||||
return curr_ptr;
|
||||
endfunction
|
||||
|
||||
function automatic void wbuf_data_write(
|
||||
output wbuf_data_buf_t wbuf_ret_data,
|
||||
output wbuf_be_buf_t wbuf_ret_be,
|
||||
input wbuf_data_buf_t wbuf_old_data,
|
||||
input wbuf_be_buf_t wbuf_old_be,
|
||||
input wbuf_data_buf_t wbuf_new_data,
|
||||
input wbuf_be_buf_t wbuf_new_be);
|
||||
for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
|
||||
for (int unsigned b = 0; b < WBUF_WORD_WIDTH/8; b++) begin
|
||||
wbuf_ret_data[w][b*8 +: 8] = wbuf_new_be[w][b] ?
|
||||
wbuf_new_data[w][b*8 +: 8] :
|
||||
wbuf_old_data[w][b*8 +: 8];
|
||||
end
|
||||
wbuf_ret_be[w] = wbuf_old_be[w] | wbuf_new_be[w];
|
||||
end
|
||||
endfunction
|
||||
|
||||
function automatic wbuf_match_t wbuf_tag_to_match_addr(wbuf_tag_t tag);
|
||||
return tag[WBUF_TAG_WIDTH - 1:WBUF_TAG_WIDTH - WBUF_READ_MATCH_WIDTH];
|
||||
endfunction
|
||||
// }}}
|
||||
|
||||
// Definition of internal wires and registers
|
||||
// {{{
|
||||
wbuf_state_e [ WBUF_DIR_ENTRIES-1:0] wbuf_dir_state_q, wbuf_dir_state_d;
|
||||
wbuf_dir_entry_t [ WBUF_DIR_ENTRIES-1:0] wbuf_dir_q, wbuf_dir_d;
|
||||
logic [WBUF_DATA_ENTRIES-1:0] wbuf_data_valid_q, wbuf_data_valid_d;
|
||||
wbuf_data_entry_t [WBUF_DATA_ENTRIES-1:0] wbuf_data_q, wbuf_data_d;
|
||||
|
||||
wbuf_dir_ptr_t wbuf_dir_free_ptr_q, wbuf_dir_free_ptr_d;
|
||||
logic wbuf_dir_free;
|
||||
wbuf_dir_ptr_t wbuf_dir_send_ptr_q, wbuf_dir_send_ptr_d;
|
||||
wbuf_data_ptr_t wbuf_data_free_ptr_q, wbuf_data_free_ptr_d;
|
||||
logic wbuf_data_free;
|
||||
|
||||
logic wbuf_write_free;
|
||||
logic wbuf_write_hit_open;
|
||||
logic wbuf_write_hit_pend;
|
||||
logic wbuf_write_hit_sent;
|
||||
wbuf_dir_ptr_t wbuf_write_hit_open_dir_ptr;
|
||||
wbuf_dir_ptr_t wbuf_write_hit_pend_dir_ptr;
|
||||
|
||||
logic send_meta_valid;
|
||||
logic send_meta_ready;
|
||||
wbuf_send_meta_t send_meta_wdata, send_meta_rdata;
|
||||
|
||||
logic send_data_wok;
|
||||
logic send_data_w;
|
||||
wbuf_send_data_t send_data_d;
|
||||
wbuf_send_data_t send_data_q;
|
||||
|
||||
wbuf_tag_t write_tag;
|
||||
wbuf_data_buf_t write_data;
|
||||
wbuf_be_buf_t write_be;
|
||||
|
||||
logic [WBUF_DIR_ENTRIES-1:0] replay_match;
|
||||
logic [WBUF_DIR_ENTRIES-1:0] replay_open_hit;
|
||||
logic [WBUF_DIR_ENTRIES-1:0] replay_pend_hit;
|
||||
logic [WBUF_DIR_ENTRIES-1:0] replay_sent_hit;
|
||||
|
||||
genvar gen_i;
|
||||
// }}}
|
||||
|
||||
// Global control signals
|
||||
// {{{
|
||||
always_comb
|
||||
begin : empty_comb
|
||||
empty_o = 1'b1;
|
||||
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
|
||||
empty_o &= (wbuf_dir_state_q[i] == WBUF_FREE);
|
||||
end
|
||||
end
|
||||
|
||||
always_comb
|
||||
begin : full_comb
|
||||
full_o = 1'b1;
|
||||
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
|
||||
full_o &= (wbuf_dir_state_q[i] != WBUF_FREE);
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Write control
|
||||
// {{{
|
||||
assign write_tag = write_addr_i[WBUF_PA_WIDTH-1:WBUF_OFFSET_WIDTH];
|
||||
|
||||
always_comb
|
||||
begin : wbuf_write_data_comb
|
||||
for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
|
||||
write_data[w] = write_data_i;
|
||||
end
|
||||
end
|
||||
|
||||
generate
|
||||
if (WBUF_OFFSET_WIDTH > WBUF_WORD_OFFSET) begin : wbuf_write_be_gt_gen
|
||||
always_comb
|
||||
begin : wbuf_write_be_comb
|
||||
for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
|
||||
if (w == int'(write_addr_i[WBUF_OFFSET_WIDTH-1:WBUF_WORD_OFFSET])) begin
|
||||
write_be[w] = write_be_i;
|
||||
end else begin
|
||||
write_be[w] = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin : wbuf_write_be_le_gen
|
||||
always_comb
|
||||
begin : wbuf_write_be_comb
|
||||
for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
|
||||
write_be[w] = write_be_i;
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
always_comb
|
||||
begin : wbuf_free_comb
|
||||
wbuf_dir_free_ptr_d = wbuf_dir_free_ptr_q;
|
||||
if (ack_i) begin
|
||||
wbuf_dir_free_ptr_d = ack_id_i;
|
||||
end else if (write_i && wbuf_write_free) begin
|
||||
wbuf_dir_free_ptr_d = wbuf_dir_find_next(wbuf_dir_free_ptr_q, wbuf_dir_state_q, WBUF_FREE);
|
||||
end
|
||||
|
||||
wbuf_data_free_ptr_d = wbuf_data_free_ptr_q;
|
||||
if (send_data_valid_o && send_data_ready_i) begin
|
||||
wbuf_data_free_ptr_d = send_data_q.send_data_ptr;
|
||||
end else if (write_i && wbuf_write_free) begin
|
||||
wbuf_data_free_ptr_d = wbuf_data_find_next(wbuf_data_free_ptr_q, wbuf_data_valid_q, 1'b0);
|
||||
end
|
||||
end
|
||||
|
||||
assign wbuf_dir_free = (wbuf_dir_state_q[wbuf_dir_free_ptr_q] == WBUF_FREE);
|
||||
assign wbuf_data_free = ~wbuf_data_valid_q[wbuf_data_free_ptr_q];
|
||||
|
||||
always_comb
|
||||
begin : wbuf_write_hit_comb
|
||||
wbuf_write_hit_open = 1'b0;
|
||||
wbuf_write_hit_pend = 1'b0;
|
||||
wbuf_write_hit_sent = 1'b0;
|
||||
|
||||
wbuf_write_hit_open_dir_ptr = 0;
|
||||
wbuf_write_hit_pend_dir_ptr = 0;
|
||||
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
|
||||
if (wbuf_dir_q[i].tag == write_tag) begin
|
||||
unique case (wbuf_dir_state_q[i])
|
||||
WBUF_OPEN: begin
|
||||
wbuf_write_hit_open = 1'b1;
|
||||
wbuf_write_hit_open_dir_ptr = wbuf_dir_ptr_t'(i);
|
||||
end
|
||||
WBUF_PEND: begin
|
||||
wbuf_write_hit_pend = 1'b1;
|
||||
wbuf_write_hit_pend_dir_ptr = wbuf_dir_ptr_t'(i);
|
||||
end
|
||||
WBUF_SENT: begin
|
||||
wbuf_write_hit_sent = 1'b1;
|
||||
end
|
||||
default: begin
|
||||
/* do nothing */
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Check if there is a match between the read address and the tag of one
|
||||
// of the used slots in the write buffer directory
|
||||
always_comb
|
||||
begin : read_hit_comb
|
||||
automatic logic [WBUF_DIR_ENTRIES-1:0] read_hit;
|
||||
|
||||
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
|
||||
read_hit[i] = 1'b0;
|
||||
unique case (wbuf_dir_state_q[i])
|
||||
WBUF_OPEN, WBUF_PEND, WBUF_SENT: begin
|
||||
automatic wbuf_addr_t wbuf_addr;
|
||||
automatic wbuf_match_t wbuf_tag;
|
||||
automatic wbuf_match_t read_tag;
|
||||
|
||||
wbuf_addr = wbuf_addr_t'(wbuf_dir_q[i].tag) << WBUF_OFFSET_WIDTH;
|
||||
read_tag = read_addr_i[WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH];
|
||||
wbuf_tag = wbuf_addr [WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH];
|
||||
read_hit[i] = (read_tag == wbuf_tag) ? 1'b1 : 1'b0;
|
||||
end
|
||||
default: begin
|
||||
/* do nothing */
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
read_hit_o = |read_hit;
|
||||
end
|
||||
|
||||
// Check if there is a match between the replay address and the tag of one
|
||||
// of the used slots in the write buffer directory
|
||||
generate
|
||||
for (gen_i = 0; gen_i < WBUF_DIR_ENTRIES; gen_i++) begin : replay_match_gen
|
||||
assign replay_match[gen_i] = replay_is_read_i ?
|
||||
/* replay is read: compare address block tag (e.g. cache line) */
|
||||
(wbuf_tag_to_match_addr(wbuf_dir_q[gen_i].tag) ==
|
||||
replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]) :
|
||||
/* replay is write: compare wbuf tag */
|
||||
(wbuf_dir_q[gen_i].tag ==
|
||||
replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_TAG_WIDTH]);
|
||||
|
||||
assign replay_open_hit[gen_i] =
|
||||
replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_OPEN);
|
||||
assign replay_pend_hit[gen_i] =
|
||||
replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_PEND);
|
||||
assign replay_sent_hit[gen_i] =
|
||||
replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_SENT);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign replay_open_hit_o = |replay_open_hit,
|
||||
replay_pend_hit_o = |replay_pend_hit,
|
||||
replay_sent_hit_o = |replay_sent_hit;
|
||||
|
||||
always_comb
|
||||
begin : replay_wbuf_not_ready_comb
|
||||
replay_not_ready_o = 1'b0;
|
||||
if (replay_pend_hit_o) begin
|
||||
replay_not_ready_o = 1'b1;
|
||||
end else if (replay_sent_hit_o && cfg_sequential_waw_i) begin
|
||||
replay_not_ready_o = 1'b1;
|
||||
end else if (!replay_open_hit_o && (!wbuf_dir_free || !wbuf_data_free)) begin
|
||||
replay_not_ready_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
assign wbuf_write_free =
|
||||
wbuf_dir_free
|
||||
& wbuf_data_free
|
||||
& ~wbuf_write_hit_open
|
||||
& ~wbuf_write_hit_pend
|
||||
& ~(wbuf_write_hit_sent & cfg_sequential_waw_i);
|
||||
|
||||
assign write_ready_o = wbuf_write_free
|
||||
| ((wbuf_write_hit_open | wbuf_write_hit_pend)
|
||||
& ~cfg_inhibit_write_coalescing_i);
|
||||
// }}}
|
||||
|
||||
// Update control
|
||||
// {{{
|
||||
always_comb
|
||||
begin : wbuf_update_comb
|
||||
automatic bit timeout;
|
||||
automatic bit write_hit;
|
||||
automatic bit read_hit;
|
||||
automatic bit match_open_ptr;
|
||||
automatic bit match_pend_ptr;
|
||||
automatic bit match_free;
|
||||
automatic bit send;
|
||||
|
||||
timeout = 1'b0;
|
||||
write_hit = 1'b0;
|
||||
read_hit = 1'b0;
|
||||
match_open_ptr = 1'b0;
|
||||
match_pend_ptr = 1'b0;
|
||||
match_free = 1'b0;
|
||||
send = 1'b0;
|
||||
|
||||
wbuf_dir_state_d = wbuf_dir_state_q;
|
||||
wbuf_dir_d = wbuf_dir_q;
|
||||
wbuf_data_d = wbuf_data_q;
|
||||
|
||||
send_data_w = 1'b0;
|
||||
send_meta_valid = 1'b0;
|
||||
|
||||
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
|
||||
case (wbuf_dir_state_q[i])
|
||||
WBUF_FREE: begin
|
||||
match_free = wbuf_write_free && (i == int'(wbuf_dir_free_ptr_q));
|
||||
|
||||
if (write_i && match_free) begin
|
||||
send = (cfg_threshold_i == 0)
|
||||
| write_uc_i
|
||||
| flush_all_i
|
||||
| cfg_inhibit_write_coalescing_i;
|
||||
|
||||
wbuf_dir_state_d[i] = send ? WBUF_PEND : WBUF_OPEN;
|
||||
wbuf_dir_d[i].tag = write_tag;
|
||||
wbuf_dir_d[i].cnt = 0;
|
||||
wbuf_dir_d[i].ptr = wbuf_data_free_ptr_q;
|
||||
wbuf_dir_d[i].uc = write_uc_i;
|
||||
|
||||
wbuf_data_write(
|
||||
wbuf_data_d[wbuf_data_free_ptr_q].data,
|
||||
wbuf_data_d[wbuf_data_free_ptr_q].be,
|
||||
'0,
|
||||
'0,
|
||||
write_data,
|
||||
write_be
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
WBUF_OPEN: begin
|
||||
match_open_ptr = (i == int'(wbuf_write_hit_open_dir_ptr));
|
||||
timeout = (wbuf_dir_q[i].cnt == (cfg_threshold_i - 1));
|
||||
read_hit = read_flush_hit_i & wbuf_write_hit_open & match_open_ptr;
|
||||
write_hit = write_i
|
||||
& wbuf_write_hit_open
|
||||
& match_open_ptr
|
||||
& ~cfg_inhibit_write_coalescing_i;
|
||||
|
||||
if (!flush_all_i) begin
|
||||
if (write_hit && cfg_reset_timecnt_on_write_i) begin
|
||||
timeout = 1'b0;
|
||||
wbuf_dir_d[i].cnt = 0;
|
||||
end else if (!timeout) begin
|
||||
wbuf_dir_d[i].cnt = wbuf_dir_q[i].cnt + 1;
|
||||
end
|
||||
|
||||
if (read_hit | timeout | cfg_inhibit_write_coalescing_i) begin
|
||||
wbuf_dir_state_d[i] = WBUF_PEND;
|
||||
end
|
||||
end else begin
|
||||
wbuf_dir_state_d[i] = WBUF_PEND;
|
||||
end
|
||||
|
||||
if (write_hit) begin
|
||||
wbuf_data_write(
|
||||
wbuf_data_d[wbuf_dir_q[i].ptr].data,
|
||||
wbuf_data_d[wbuf_dir_q[i].ptr].be,
|
||||
wbuf_data_q[wbuf_dir_q[i].ptr].data,
|
||||
wbuf_data_q[wbuf_dir_q[i].ptr].be,
|
||||
write_data,
|
||||
write_be
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
WBUF_PEND: begin
|
||||
match_pend_ptr = (i == int'(wbuf_write_hit_pend_dir_ptr));
|
||||
write_hit = write_i
|
||||
& wbuf_write_hit_pend
|
||||
& match_pend_ptr
|
||||
& ~cfg_inhibit_write_coalescing_i;
|
||||
|
||||
if (write_hit) begin
|
||||
wbuf_data_write(
|
||||
wbuf_data_d[wbuf_dir_q[i].ptr].data,
|
||||
wbuf_data_d[wbuf_dir_q[i].ptr].be,
|
||||
wbuf_data_q[wbuf_dir_q[i].ptr].data,
|
||||
wbuf_data_q[wbuf_dir_q[i].ptr].be,
|
||||
write_data,
|
||||
write_be
|
||||
);
|
||||
end
|
||||
|
||||
if (i == int'(wbuf_dir_send_ptr_q)) begin
|
||||
send_data_w = send_meta_ready;
|
||||
send_meta_valid = send_data_wok;
|
||||
if (send_meta_ready && send_data_wok) begin
|
||||
wbuf_dir_state_d[i] = WBUF_SENT;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
WBUF_SENT: begin
|
||||
if (ack_i && (i == int'(ack_id_i))) begin
|
||||
wbuf_dir_state_d[i] = WBUF_FREE;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
always_comb
|
||||
begin : wbuf_data_valid_comb
|
||||
wbuf_data_valid_d = wbuf_data_valid_q;
|
||||
|
||||
// allocate a free data buffer on new write
|
||||
if (write_i && wbuf_write_free) begin
|
||||
wbuf_data_valid_d[wbuf_data_free_ptr_q] = 1'b1;
|
||||
end
|
||||
|
||||
// de-allocate a data buffer as soon as it is send
|
||||
if (send_data_valid_o && send_data_ready_i) begin
|
||||
wbuf_data_valid_d[send_data_q.send_data_ptr] = 1'b0;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Send control
|
||||
// {{{
|
||||
// Data channel
|
||||
hpdcache_fifo_reg #(
|
||||
.FIFO_DEPTH (WBUF_SEND_FIFO_DEPTH),
|
||||
.FEEDTHROUGH (WBUF_SEND_FEEDTHROUGH),
|
||||
.fifo_data_t (wbuf_send_data_t)
|
||||
) send_data_ptr_fifo_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.w_i (send_data_w),
|
||||
.wok_o (send_data_wok),
|
||||
.wdata_i (send_data_d),
|
||||
.r_i (send_data_ready_i),
|
||||
.rok_o (send_data_valid_o),
|
||||
.rdata_o (send_data_q)
|
||||
);
|
||||
|
||||
assign send_data_d.send_data_ptr = wbuf_dir_q[wbuf_dir_send_ptr_q].ptr,
|
||||
send_data_d.send_data_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag;
|
||||
|
||||
assign send_data_tag_o = wbuf_addr_t'(send_data_q.send_data_tag),
|
||||
send_data_o = wbuf_data_q[send_data_q.send_data_ptr].data,
|
||||
send_be_o = wbuf_data_q[send_data_q.send_data_ptr].be;
|
||||
|
||||
// Meta-data channel
|
||||
hpdcache_fifo_reg #(
|
||||
.FIFO_DEPTH (WBUF_SEND_FIFO_DEPTH),
|
||||
.FEEDTHROUGH (WBUF_SEND_FEEDTHROUGH),
|
||||
.fifo_data_t (wbuf_send_meta_t)
|
||||
) send_meta_fifo_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.w_i (send_meta_valid),
|
||||
.wok_o (send_meta_ready),
|
||||
.wdata_i (send_meta_wdata),
|
||||
.r_i (send_meta_ready_i),
|
||||
.rok_o (send_meta_valid_o),
|
||||
.rdata_o (send_meta_rdata)
|
||||
);
|
||||
|
||||
assign send_meta_wdata.send_meta_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag,
|
||||
send_meta_wdata.send_meta_id = wbuf_dir_send_ptr_q,
|
||||
send_meta_wdata.send_meta_uc = wbuf_dir_q[wbuf_dir_send_ptr_q].uc;
|
||||
|
||||
assign send_addr_o = { send_meta_rdata.send_meta_tag, {WBUF_OFFSET_WIDTH{1'b0}} },
|
||||
send_id_o = send_meta_rdata.send_meta_id,
|
||||
send_uc_o = send_meta_rdata.send_meta_uc;
|
||||
|
||||
// Send pointer
|
||||
always_comb
|
||||
begin : wbuf_send_comb
|
||||
wbuf_dir_send_ptr_d = wbuf_dir_find_next(wbuf_dir_send_ptr_q, wbuf_dir_state_q, WBUF_PEND);
|
||||
if (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND) begin
|
||||
if (!send_meta_valid || !send_meta_ready) begin
|
||||
wbuf_dir_send_ptr_d = wbuf_dir_send_ptr_q;
|
||||
end
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Internal state assignment
|
||||
// {{{
|
||||
always_ff @(posedge clk_i) wbuf_data_q <= wbuf_data_d;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : wbuf_state_ff
|
||||
if (!rst_ni) begin
|
||||
wbuf_dir_q <= '0;
|
||||
wbuf_dir_state_q <= {WBUF_DIR_ENTRIES{WBUF_FREE}};
|
||||
wbuf_data_valid_q <= '0;
|
||||
wbuf_dir_free_ptr_q <= 0;
|
||||
wbuf_dir_send_ptr_q <= 0;
|
||||
wbuf_data_free_ptr_q <= 0;
|
||||
end else begin
|
||||
wbuf_dir_q <= wbuf_dir_d;
|
||||
wbuf_dir_state_q <= wbuf_dir_state_d;
|
||||
wbuf_data_valid_q <= wbuf_data_valid_d;
|
||||
wbuf_dir_free_ptr_q <= wbuf_dir_free_ptr_d;
|
||||
wbuf_dir_send_ptr_q <= wbuf_dir_send_ptr_d;
|
||||
wbuf_data_free_ptr_q <= wbuf_data_free_ptr_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
initial assert(WBUF_WORDS inside {1, 2, 4, 8, 16}) else
|
||||
$error("WBUF: width of data buffers must be a power of 2");
|
||||
ack_sent_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
(ack_i -> (wbuf_dir_state_q[ack_id_i] == WBUF_SENT))) else
|
||||
$error("WBUF: acknowledging a not SENT slot");
|
||||
send_pend_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
(send_meta_valid -> (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND))) else
|
||||
$error("WBUF: sending a not PEND slot");
|
||||
send_valid_data_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
|
||||
(send_data_valid_o -> (wbuf_data_valid_q[send_data_q.send_data_ptr] == 1'b1))) else
|
||||
$error("WBUF: sending a not valid data");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,228 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : HPDcache Write Buffer Wrapper
|
||||
* History :
|
||||
*/
|
||||
/* This wrapper adapts the send interface of the write buffer to the memory
|
||||
* interface of the cache.
|
||||
*/
|
||||
module hpdcache_wbuf_wrapper
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int HPDcacheMemIdWidth = 8,
|
||||
parameter int HPDcacheMemDataWidth = 512,
|
||||
parameter type hpdcache_mem_req_t = logic,
|
||||
parameter type hpdcache_mem_req_w_t = logic,
|
||||
parameter type hpdcache_mem_resp_w_t = logic,
|
||||
|
||||
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
|
||||
)
|
||||
// }}}
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
// Clock and reset signals
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Global control signals
|
||||
output logic empty_o,
|
||||
output logic full_o,
|
||||
input logic flush_all_i,
|
||||
|
||||
// Configuration signals
|
||||
// Timer threshold
|
||||
input wbuf_timecnt_t cfg_threshold_i,
|
||||
// Reset timer on write
|
||||
input logic cfg_reset_timecnt_on_write_i,
|
||||
// Sequentialize write-after-write hazards
|
||||
input logic cfg_sequential_waw_i,
|
||||
// Inhibit write coalescing
|
||||
input logic cfg_inhibit_write_coalescing_i,
|
||||
|
||||
// Write interface
|
||||
input logic write_i,
|
||||
output logic write_ready_o,
|
||||
input wbuf_addr_t write_addr_i,
|
||||
input wbuf_data_t write_data_i,
|
||||
input wbuf_be_t write_be_i, // byte-enable
|
||||
input logic write_uc_i, // uncacheable write
|
||||
|
||||
// Read hit interface
|
||||
input wbuf_addr_t read_addr_i,
|
||||
output logic read_hit_o,
|
||||
input logic read_flush_hit_i,
|
||||
|
||||
// Replay hit interface
|
||||
input wbuf_addr_t replay_addr_i,
|
||||
input logic replay_is_read_i,
|
||||
output logic replay_open_hit_o,
|
||||
output logic replay_pend_hit_o,
|
||||
output logic replay_sent_hit_o,
|
||||
output logic replay_not_ready_o,
|
||||
|
||||
// Memory interface
|
||||
input logic mem_req_write_ready_i,
|
||||
output logic mem_req_write_valid_o,
|
||||
output hpdcache_mem_req_t mem_req_write_o,
|
||||
|
||||
input logic mem_req_write_data_ready_i,
|
||||
output logic mem_req_write_data_valid_o,
|
||||
output hpdcache_mem_req_w_t mem_req_write_data_o,
|
||||
|
||||
output logic mem_resp_write_ready_o,
|
||||
input logic mem_resp_write_valid_i,
|
||||
input hpdcache_mem_resp_w_t mem_resp_write_i
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Internal signals
|
||||
// {{{
|
||||
wbuf_addr_t send_addr;
|
||||
wbuf_dir_ptr_t send_id;
|
||||
logic send_uc;
|
||||
wbuf_addr_t send_data_tag;
|
||||
wbuf_data_buf_t send_data;
|
||||
wbuf_be_buf_t send_be;
|
||||
wbuf_dir_ptr_t ack_id;
|
||||
logic ack_error;
|
||||
// }}}
|
||||
|
||||
// Wrapped write buffer
|
||||
// {{{
|
||||
hpdcache_wbuf #(
|
||||
.WBUF_DIR_ENTRIES (HPDCACHE_WBUF_DIR_ENTRIES),
|
||||
.WBUF_DATA_ENTRIES (HPDCACHE_WBUF_DATA_ENTRIES),
|
||||
.WBUF_WORD_WIDTH (HPDCACHE_REQ_DATA_WIDTH),
|
||||
.WBUF_WORDS (HPDCACHE_WBUF_WORDS),
|
||||
.WBUF_PA_WIDTH (HPDCACHE_PA_WIDTH),
|
||||
.WBUF_TIMECNT_MAX ((2**HPDCACHE_WBUF_TIMECNT_WIDTH) - 1),
|
||||
.WBUF_READ_MATCH_WIDTH (HPDCACHE_NLINE_WIDTH),
|
||||
.WBUF_SEND_FEEDTHROUGH (HPDCACHE_WBUF_SEND_FEEDTHROUGH)
|
||||
) hpdcache_wbuf_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.empty_o,
|
||||
.full_o,
|
||||
.flush_all_i,
|
||||
.cfg_threshold_i,
|
||||
.cfg_reset_timecnt_on_write_i,
|
||||
.cfg_sequential_waw_i,
|
||||
.cfg_inhibit_write_coalescing_i,
|
||||
.write_i,
|
||||
.write_ready_o,
|
||||
.write_addr_i,
|
||||
.write_data_i,
|
||||
.write_be_i,
|
||||
.write_uc_i,
|
||||
.read_addr_i,
|
||||
.read_hit_o,
|
||||
.read_flush_hit_i,
|
||||
.replay_addr_i,
|
||||
.replay_is_read_i,
|
||||
.replay_open_hit_o,
|
||||
.replay_pend_hit_o,
|
||||
.replay_sent_hit_o,
|
||||
.replay_not_ready_o,
|
||||
.send_meta_ready_i (mem_req_write_ready_i),
|
||||
.send_meta_valid_o (mem_req_write_valid_o),
|
||||
.send_addr_o (send_addr),
|
||||
.send_id_o (send_id),
|
||||
.send_uc_o (send_uc),
|
||||
.send_data_ready_i (mem_req_write_data_ready_i),
|
||||
.send_data_valid_o (mem_req_write_data_valid_o),
|
||||
.send_data_tag_o (send_data_tag),
|
||||
.send_data_o (send_data),
|
||||
.send_be_o (send_be),
|
||||
.ack_i (mem_resp_write_valid_i),
|
||||
.ack_id_i (ack_id),
|
||||
.ack_error_i (ack_error)
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Memory interface
|
||||
// {{{
|
||||
assign mem_req_write_o.mem_req_addr = send_addr,
|
||||
mem_req_write_o.mem_req_len = 0,
|
||||
mem_req_write_o.mem_req_size = get_hpdcache_mem_size(HPDCACHE_WBUF_DATA_WIDTH/8),
|
||||
mem_req_write_o.mem_req_id = hpdcache_mem_id_t'(send_id),
|
||||
mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE,
|
||||
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD,
|
||||
mem_req_write_o.mem_req_cacheable = ~send_uc;
|
||||
|
||||
generate
|
||||
localparam int unsigned WBUF_MEM_DATA_RATIO = HPDcacheMemDataWidth/HPDCACHE_WBUF_DATA_WIDTH;
|
||||
localparam int unsigned WBUF_MEM_DATA_WORD_INDEX_WIDTH = $clog2(WBUF_MEM_DATA_RATIO);
|
||||
|
||||
assign mem_req_write_data_o.mem_req_w_last = 1'b1;
|
||||
|
||||
if (WBUF_MEM_DATA_RATIO > 1)
|
||||
begin : wbuf_data_upsizing_gen
|
||||
logic [HPDCACHE_WBUF_DATA_WIDTH/8-1:0][WBUF_MEM_DATA_RATIO-1:0] mem_req_be;
|
||||
|
||||
// demux send BE
|
||||
hpdcache_demux #(
|
||||
.NOUTPUT (WBUF_MEM_DATA_RATIO),
|
||||
.DATA_WIDTH (HPDCACHE_WBUF_DATA_WIDTH/8),
|
||||
.ONE_HOT_SEL (1'b0)
|
||||
) mem_write_be_demux_i (
|
||||
.data_i (send_be),
|
||||
.sel_i (send_data_tag[0 +: WBUF_MEM_DATA_WORD_INDEX_WIDTH]),
|
||||
.data_o (mem_req_be)
|
||||
);
|
||||
|
||||
assign mem_req_write_data_o.mem_req_w_data = {WBUF_MEM_DATA_RATIO{send_data}},
|
||||
mem_req_write_data_o.mem_req_w_be = mem_req_be;
|
||||
|
||||
end else if (WBUF_MEM_DATA_RATIO == 1)
|
||||
begin : wbuf_data_forwarding_gen
|
||||
assign mem_req_write_data_o.mem_req_w_data = send_data,
|
||||
mem_req_write_data_o.mem_req_w_be = send_be;
|
||||
end
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
initial assert(WBUF_MEM_DATA_RATIO > 0) else
|
||||
$error($sformatf("WBUF: data width of mem interface (%d) shall be g.e. to wbuf data width(%d)",
|
||||
HPDcacheMemDataWidth, HPDCACHE_WBUF_DATA_WIDTH));
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
endgenerate
|
||||
|
||||
assign mem_resp_write_ready_o = 1'b1,
|
||||
ack_id = mem_resp_write_i.mem_resp_w_id[0 +: HPDCACHE_WBUF_DIR_PTR_WIDTH],
|
||||
ack_error = (mem_resp_write_i.mem_resp_w_error != HPDCACHE_MEM_RESP_OK);
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
initial assert (HPDCACHE_WBUF_DIR_PTR_WIDTH <= HPDcacheMemIdWidth) else
|
||||
$fatal("HPDcacheMemIdWidth is not wide enough to fit all possible write buffer transactions");
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,374 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Riccardo Alidori, Cesar Fuguet
|
||||
* Maintainers(s): Cesar Fuguet
|
||||
* Creation Date : June, 2021
|
||||
* Description : HPDcache Linear Hardware Memory Prefetcher.
|
||||
* History :
|
||||
*/
|
||||
module hwpf_stride
|
||||
import hwpf_stride_pkg::*;
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int CACHE_LINE_BYTES = 64
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// CSR
|
||||
input logic csr_base_set_i,
|
||||
input hwpf_stride_base_t csr_base_i,
|
||||
input logic csr_param_set_i,
|
||||
input hwpf_stride_param_t csr_param_i,
|
||||
input logic csr_throttle_set_i,
|
||||
input hwpf_stride_throttle_t csr_throttle_i,
|
||||
|
||||
output hwpf_stride_base_t csr_base_o,
|
||||
output hwpf_stride_param_t csr_param_o,
|
||||
output hwpf_stride_throttle_t csr_throttle_o,
|
||||
|
||||
// If high, the prefetcher is enabled and active
|
||||
output logic busy_o,
|
||||
|
||||
// Snooping
|
||||
// Address to snoop on requests ports
|
||||
output hpdcache_nline_t snoop_nline_o,
|
||||
// If set to one, the snoop address matched one of the requests
|
||||
input snoop_match_i,
|
||||
|
||||
// D-Cache interface
|
||||
output logic hpdcache_req_valid_o,
|
||||
input logic hpdcache_req_ready_i,
|
||||
output hpdcache_req_t hpdcache_req_o,
|
||||
input logic hpdcache_rsp_valid_i,
|
||||
input hpdcache_rsp_t hpdcache_rsp_i
|
||||
);
|
||||
// }}}
|
||||
|
||||
import hpdcache_pkg::hpdcache_req_addr_t;
|
||||
|
||||
// Definition of constants
|
||||
// {{{
|
||||
localparam int STRIDE_WIDTH = $bits(csr_param_i.stride);
|
||||
localparam int NBLOCKS_WIDTH = $bits(csr_param_i.nblocks);
|
||||
localparam int NLINES_WIDTH = $bits(csr_param_i.nlines);
|
||||
localparam int NWAIT_WIDTH = $bits(csr_throttle_i.nwait);
|
||||
localparam int INFLIGHT_WIDTH = $bits(csr_throttle_i.ninflight);
|
||||
localparam int NLINES_CNT_WIDTH = NLINES_WIDTH;
|
||||
// }}}
|
||||
|
||||
// Internal registers and signals
|
||||
// {{{
|
||||
// FSM
|
||||
enum {
|
||||
IDLE,
|
||||
SNOOP,
|
||||
SEND_REQ,
|
||||
WAIT,
|
||||
DONE,
|
||||
ABORT
|
||||
} state_d, state_q;
|
||||
|
||||
logic [NBLOCKS_WIDTH-1:0] nblocks_cnt_d, nblocks_cnt_q;
|
||||
logic [NLINES_CNT_WIDTH-1:0] nlines_cnt_d, nlines_cnt_q;
|
||||
logic [NWAIT_WIDTH-1:0] nwait_cnt_d, nwait_cnt_q;
|
||||
logic [INFLIGHT_WIDTH-1:0] inflight_cnt_d, inflight_cnt_q;
|
||||
logic inflight_inc, inflight_dec;
|
||||
|
||||
hwpf_stride_base_t csr_base_q;
|
||||
hwpf_stride_base_t shadow_base_q, shadow_base_d;
|
||||
hwpf_stride_param_t csr_param_q;
|
||||
hwpf_stride_param_t shadow_param_q, shadow_param_d;
|
||||
hwpf_stride_throttle_t csr_throttle_q;
|
||||
hwpf_stride_throttle_t shadow_throttle_q, shadow_throttle_d;
|
||||
hpdcache_nline_t request_nline_q, request_nline_d;
|
||||
|
||||
hpdcache_set_t hpdcache_req_set;
|
||||
hpdcache_tag_t hpdcache_req_tag;
|
||||
|
||||
logic csr_base_update;
|
||||
hpdcache_nline_t increment_stride;
|
||||
logic is_inflight_max;
|
||||
|
||||
// Default assignment
|
||||
assign increment_stride = hpdcache_nline_t'(shadow_param_q.stride) + 1'b1;
|
||||
assign inflight_dec = hpdcache_rsp_valid_i;
|
||||
assign snoop_nline_o = shadow_base_q.base_cline;
|
||||
assign is_inflight_max = ( shadow_throttle_q.ninflight == '0 ) ?
|
||||
1'b0 : ( inflight_cnt_q >= shadow_throttle_q.ninflight );
|
||||
assign csr_base_o = csr_base_q;
|
||||
assign csr_param_o = csr_param_q;
|
||||
assign csr_throttle_o = csr_throttle_q;
|
||||
// }}}
|
||||
|
||||
// Dcache outputs
|
||||
// {{{
|
||||
assign hpdcache_req_set = request_nline_q[0 +: HPDCACHE_SET_WIDTH],
|
||||
hpdcache_req_tag = request_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];
|
||||
|
||||
assign hpdcache_req_o.addr_offset = { hpdcache_req_set, {HPDCACHE_OFFSET_WIDTH{1'b0}} },
|
||||
hpdcache_req_o.wdata = '0,
|
||||
hpdcache_req_o.op = HPDCACHE_REQ_CMO,
|
||||
hpdcache_req_o.be = '1,
|
||||
hpdcache_req_o.size = HPDCACHE_REQ_CMO_PREFETCH,
|
||||
hpdcache_req_o.sid = '0, // this is set when connecting to the dcache
|
||||
hpdcache_req_o.tid = '0, // this is set by the wrapper of the prefetcher
|
||||
hpdcache_req_o.need_rsp = 1'b1,
|
||||
hpdcache_req_o.phys_indexed = 1'b1,
|
||||
hpdcache_req_o.addr_tag = hpdcache_req_tag,
|
||||
hpdcache_req_o.pma.uncacheable = 1'b0,
|
||||
hpdcache_req_o.pma.io = 1'b0;
|
||||
// }}}
|
||||
|
||||
// Set state of internal registers
|
||||
// {{{
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin
|
||||
if (!rst_ni) begin
|
||||
csr_base_q <= '0;
|
||||
csr_param_q <= '0;
|
||||
shadow_base_q <= '0;
|
||||
shadow_param_q <= '0;
|
||||
shadow_throttle_q <= '0;
|
||||
request_nline_q <= '0;
|
||||
state_q <= IDLE;
|
||||
end else begin
|
||||
if (csr_base_set_i) csr_base_q <= csr_base_i;
|
||||
else if (csr_base_update) csr_base_q <= shadow_base_d;
|
||||
if (csr_param_set_i) csr_param_q <= csr_param_i;
|
||||
if (csr_throttle_set_i) csr_throttle_q <= csr_throttle_i;
|
||||
shadow_base_q <= shadow_base_d;
|
||||
shadow_param_q <= shadow_param_d;
|
||||
shadow_throttle_q <= shadow_throttle_d;
|
||||
request_nline_q <= request_nline_d;
|
||||
state_q <= state_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// Update internal counters
|
||||
// {{{
|
||||
always_comb begin : inflight_cnt
|
||||
inflight_cnt_d = inflight_cnt_q;
|
||||
|
||||
// Every time we send a dcache request, increment the counter
|
||||
if ( inflight_inc ) begin
|
||||
inflight_cnt_d++;
|
||||
end
|
||||
|
||||
// Every time we got a response from the cache, decrement the counter
|
||||
if ( inflight_dec && ( inflight_cnt_q > 0 )) begin
|
||||
inflight_cnt_d--;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
nblocks_cnt_q <= '0;
|
||||
nlines_cnt_q <= '0;
|
||||
nwait_cnt_q <= '0;
|
||||
inflight_cnt_q <= '0;
|
||||
end else begin
|
||||
nblocks_cnt_q <= nblocks_cnt_d;
|
||||
nlines_cnt_q <= nlines_cnt_d;
|
||||
nwait_cnt_q <= nwait_cnt_d;
|
||||
inflight_cnt_q <= inflight_cnt_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
// FSM
|
||||
// {{{
|
||||
always_comb begin : fsm_control
|
||||
// default assignments
|
||||
hpdcache_req_valid_o = 1'b0;
|
||||
nblocks_cnt_d = nblocks_cnt_q;
|
||||
nlines_cnt_d = nlines_cnt_q;
|
||||
nwait_cnt_d = nwait_cnt_q;
|
||||
inflight_inc = 1'b0;
|
||||
busy_o = 1'b0;
|
||||
csr_base_update = 1'b0;
|
||||
|
||||
shadow_base_d = shadow_base_q;
|
||||
shadow_param_d = shadow_param_q;
|
||||
shadow_throttle_d = shadow_throttle_q;
|
||||
request_nline_d = request_nline_q;
|
||||
state_d = state_q;
|
||||
|
||||
case ( state_q )
|
||||
|
||||
IDLE: begin
|
||||
// If enabled, go snooping the dcache ports
|
||||
if ( csr_base_q.enable ) begin
|
||||
shadow_base_d = csr_base_q;
|
||||
if (( csr_param_q.nlines > 0 ) || ( csr_param_q.nblocks > 0 )) begin
|
||||
shadow_param_d = csr_param_q;
|
||||
shadow_throttle_d = csr_throttle_q;
|
||||
state_d = SNOOP;
|
||||
end else begin
|
||||
// no prefetch needed, disarm immediately
|
||||
shadow_base_d.enable = 1'b0;
|
||||
csr_base_update = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
SNOOP: begin
|
||||
if ( csr_base_q.enable ) begin
|
||||
// If a snooper matched an address, send the request
|
||||
if ( snoop_match_i ) begin
|
||||
state_d = SEND_REQ;
|
||||
|
||||
if ( shadow_param_q.nlines == 0 ) begin
|
||||
// skip the first block
|
||||
request_nline_d = shadow_base_q.base_cline +
|
||||
hpdcache_nline_t'(increment_stride);
|
||||
nblocks_cnt_d = ( shadow_param_q.nblocks > 0 ) ?
|
||||
shadow_param_q.nblocks - 1 : 0;
|
||||
nlines_cnt_d = 0;
|
||||
|
||||
// update the base cacheline to the first one of the next block
|
||||
shadow_base_d.base_cline = request_nline_d;
|
||||
end else begin
|
||||
// skip the first cacheline (of the first block)
|
||||
request_nline_d = shadow_base_q.base_cline + 1'b1;
|
||||
nblocks_cnt_d = shadow_param_q.nblocks;
|
||||
nlines_cnt_d = shadow_param_q.nlines - 1;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
SEND_REQ: begin
|
||||
busy_o = 1'b1;
|
||||
|
||||
// make the prefetch request to memory
|
||||
hpdcache_req_valid_o = 1'b1;
|
||||
|
||||
// we've got a grant, so we can move to the next request
|
||||
if ( hpdcache_req_ready_i ) begin
|
||||
inflight_inc = 1'b1;
|
||||
|
||||
if ( nlines_cnt_q == 0 ) begin
|
||||
// go to the first cacheline of the next block
|
||||
request_nline_d = shadow_base_q.base_cline +
|
||||
hpdcache_nline_t'(increment_stride);
|
||||
nblocks_cnt_d = ( nblocks_cnt_q > 0 ) ? nblocks_cnt_q - 1 : 0;
|
||||
nlines_cnt_d = shadow_param_q.nlines;
|
||||
|
||||
// update the base cacheline to the first one of the next block
|
||||
shadow_base_d.base_cline = request_nline_d;
|
||||
end else begin
|
||||
// go to the next cacheline (within the same block)
|
||||
request_nline_d = request_nline_q + 1'b1;
|
||||
nlines_cnt_d = nlines_cnt_q - 1;
|
||||
end
|
||||
|
||||
// if the NWAIT parameter is equal 0, we can issue a request every cycle
|
||||
if (( nblocks_cnt_q == 0 ) && ( nlines_cnt_q == 0 )) begin
|
||||
state_d = DONE;
|
||||
end else if ( shadow_throttle_q.nwait == 0 ) begin
|
||||
// Wait if the number of inflight requests is greater than
|
||||
// the maximum indicated. Otherwise, send the next request
|
||||
state_d = is_inflight_max ? WAIT : SEND_REQ;
|
||||
end else begin
|
||||
// Wait the indicated cycles before sending the next request
|
||||
nwait_cnt_d = shadow_throttle_q.nwait;
|
||||
state_d = WAIT;
|
||||
end
|
||||
|
||||
if ( !csr_base_q.enable ) state_d = ABORT;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
WAIT: begin
|
||||
// Wait until:
|
||||
// - the indicated number of wait cycles between requests is reached (nwait)
|
||||
// - the number of inflight requests is below the indicated maximum (ninflight)
|
||||
busy_o = 1'b1;
|
||||
if ( csr_base_q.enable ) begin
|
||||
if ( !is_inflight_max && ( nwait_cnt_q == 0 )) begin
|
||||
state_d = SEND_REQ;
|
||||
end
|
||||
|
||||
if ( nwait_cnt_q > 0 ) begin
|
||||
nwait_cnt_d = nwait_cnt_q - 1;
|
||||
end
|
||||
end else begin
|
||||
state_d = ABORT;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
DONE: begin
|
||||
busy_o = 1'b1;
|
||||
if ( csr_base_q.enable ) begin
|
||||
if (( inflight_cnt_q == 0 ) && !is_inflight_max && ( nwait_cnt_q == 0 )) begin
|
||||
// Copy back shadow base register into the user visible one
|
||||
csr_base_update = 1'b1;
|
||||
|
||||
// Check the rearm bit
|
||||
if ( shadow_base_q.rearm ) begin
|
||||
state_d = SNOOP;
|
||||
end else begin
|
||||
state_d = IDLE;
|
||||
|
||||
// disarm the prefetcher
|
||||
shadow_base_d.enable = 1'b0;
|
||||
end
|
||||
|
||||
// Check the cycle bit
|
||||
if ( shadow_base_q.cycle ) begin
|
||||
// restore the base address
|
||||
shadow_base_d.base_cline = csr_base_q.base_cline;
|
||||
end
|
||||
end
|
||||
|
||||
if ( nwait_cnt_q > 0 ) begin
|
||||
nwait_cnt_d = nwait_cnt_q - 1;
|
||||
end
|
||||
end else begin
|
||||
state_d = ABORT;
|
||||
end
|
||||
end
|
||||
|
||||
ABORT: begin
|
||||
busy_o = 1'b1;
|
||||
if ( inflight_cnt_q == 0 ) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Author(s) : Riccardo Alidori, Cesar Fuguet
|
||||
* Creation Date : June, 2021
|
||||
* Description : Hw prefetchers arbiter
|
||||
* History :
|
||||
*/
|
||||
module hwpf_stride_arb
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter NUM_HW_PREFETCH = 4
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// Dcache input interface
|
||||
input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid_i,
|
||||
output logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready_o,
|
||||
input hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req_i,
|
||||
output logic [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_valid_o,
|
||||
output hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_o, // Not used
|
||||
|
||||
// Dcache output interface
|
||||
output logic hpdcache_req_valid_o,
|
||||
input logic hpdcache_req_ready_i,
|
||||
output hpdcache_req_t hpdcache_req_o,
|
||||
input logic hpdcache_rsp_valid_i,
|
||||
input hpdcache_rsp_t hpdcache_rsp_i // Not used
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Internal signals
|
||||
// {{{
|
||||
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid;
|
||||
hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req;
|
||||
logic [NUM_HW_PREFETCH-1:0] arb_req_gnt;
|
||||
// }}}
|
||||
|
||||
// Requesters arbiter
|
||||
// {{{
|
||||
// Pack request ports
|
||||
genvar gen_i;
|
||||
generate
|
||||
for (gen_i = 0; gen_i < NUM_HW_PREFETCH; gen_i++) begin : gen_hwpf_stride_req
|
||||
assign hwpf_stride_req_ready_o[gen_i] = arb_req_gnt[gen_i] & hpdcache_req_ready_i,
|
||||
hwpf_stride_req_valid[gen_i] = hwpf_stride_req_valid_i[gen_i],
|
||||
hwpf_stride_req[gen_i] = hwpf_stride_req_i[gen_i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Arbiter
|
||||
hpdcache_rrarb #(
|
||||
.N (NUM_HW_PREFETCH)
|
||||
) hwpf_stride_req_arbiter_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.req_i (hwpf_stride_req_valid),
|
||||
.gnt_o (arb_req_gnt),
|
||||
.ready_i (hpdcache_req_ready_i)
|
||||
);
|
||||
|
||||
// Request Multiplexor
|
||||
hpdcache_mux #(
|
||||
.NINPUT (NUM_HW_PREFETCH),
|
||||
.DATA_WIDTH ($bits(hpdcache_req_t)),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) hwpf_stride_req_mux_i (
|
||||
.data_i (hwpf_stride_req),
|
||||
.sel_i (arb_req_gnt),
|
||||
.data_o (hpdcache_req_o)
|
||||
);
|
||||
|
||||
assign hpdcache_req_valid_o = |arb_req_gnt;
|
||||
// }}}
|
||||
|
||||
// Response demultiplexor
|
||||
// {{{
|
||||
// As the HW prefetcher does not need the TID field in the request, we
|
||||
// use it to transport the identifier of the specific hardware
|
||||
// prefetcher.
|
||||
// This way we share the same SID for all HW prefetchers. Using
|
||||
// different SIDs means that we need different ports to the cache and
|
||||
// we actually want to reduce those.
|
||||
always_comb
|
||||
begin : resp_demux
|
||||
for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin
|
||||
hwpf_stride_rsp_valid_o[i] = hpdcache_rsp_valid_i && (i == int'(hpdcache_rsp_i.tid));
|
||||
hwpf_stride_rsp_o[i] = hpdcache_rsp_i;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : January, 2023
|
||||
* Description : High-Performance, Data-cache (HPDcache) HW memory
|
||||
* prefetcher package
|
||||
* History :
|
||||
*/
|
||||
package hwpf_stride_pkg;
|
||||
// Base address configuration register of the hardware memory prefetcher
|
||||
// {{{
|
||||
typedef struct packed {
|
||||
logic [63:6] base_cline;
|
||||
logic [5:3] unused;
|
||||
logic cycle;
|
||||
logic rearm;
|
||||
logic enable;
|
||||
} hwpf_stride_base_t;
|
||||
// }}}
|
||||
|
||||
// Parameters configuration register of the hardware memory prefetcher
|
||||
// {{{
|
||||
typedef struct packed {
|
||||
logic [63:48] nblocks;
|
||||
logic [47:32] nlines;
|
||||
logic [31:0] stride;
|
||||
} hwpf_stride_param_t;
|
||||
// }}}
|
||||
|
||||
// Throttle configuration register of the hardware memory prefetcher
|
||||
// {{{
|
||||
typedef struct packed {
|
||||
logic [31:16] ninflight;
|
||||
logic [15:0] nwait;
|
||||
} hwpf_stride_throttle_t;
|
||||
// }}}
|
||||
|
||||
// Status register of the hardware memory prefetcher
|
||||
// {{{
|
||||
typedef struct packed {
|
||||
logic [63:48] unused1;
|
||||
logic [47:32] busy;
|
||||
logic free;
|
||||
logic [30:20] unused0;
|
||||
logic [19:16] free_index;
|
||||
logic [15:0] enabled;
|
||||
} hwpf_stride_status_t;
|
||||
// }}}
|
||||
|
||||
endpackage
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Riccardo Alidori, Cesar Fuguet
|
||||
* Creation Date : June, 2021
|
||||
* Description : Snooper used by the hardware memory prefetcher
|
||||
* History :
|
||||
*/
|
||||
module hwpf_stride_snooper
|
||||
import hpdcache_pkg::*;
|
||||
(
|
||||
input logic en_i, // Snooper enable bit.
|
||||
input hpdcache_nline_t base_nline_i, // Address to check
|
||||
input hpdcache_nline_t snoop_addr_i, // Input address to snoop
|
||||
output snoop_match_o // If high, the Snoopers matched the snoop_address
|
||||
);
|
||||
|
||||
// The snooper match if enabled and the two addresses are equal
|
||||
assign snoop_match_o = en_i && ( base_nline_i == snoop_addr_i );
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,265 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Riccardo Alidori, Cesar Fuguet
|
||||
* Creation Date : June, 2021
|
||||
* Description : Linear Hardware Memory Prefetcher wrapper.
|
||||
* History :
|
||||
*/
|
||||
module hwpf_stride_wrapper
|
||||
import hwpf_stride_pkg::*;
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter NUM_HW_PREFETCH = 4,
|
||||
parameter NUM_SNOOP_PORTS = 1
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// CSR
|
||||
// {{{
|
||||
input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_base_set_i,
|
||||
input hwpf_stride_base_t [NUM_HW_PREFETCH-1:0] hwpf_stride_base_i,
|
||||
output hwpf_stride_base_t [NUM_HW_PREFETCH-1:0] hwpf_stride_base_o,
|
||||
|
||||
input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_param_set_i,
|
||||
input hwpf_stride_param_t [NUM_HW_PREFETCH-1:0] hwpf_stride_param_i,
|
||||
output hwpf_stride_param_t [NUM_HW_PREFETCH-1:0] hwpf_stride_param_o,
|
||||
|
||||
input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_set_i,
|
||||
input hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_i,
|
||||
output hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_o,
|
||||
|
||||
output hwpf_stride_status_t hwpf_stride_status_o,
|
||||
// }}}
|
||||
|
||||
// Snooping
|
||||
// {{{
|
||||
input logic [NUM_SNOOP_PORTS-1:0] snoop_valid_i,
|
||||
input logic [NUM_SNOOP_PORTS-1:0] snoop_abort_i,
|
||||
input hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_i,
|
||||
input hpdcache_tag_t [NUM_SNOOP_PORTS-1:0] snoop_addr_tag_i,
|
||||
input logic [NUM_SNOOP_PORTS-1:0] snoop_phys_indexed_i,
|
||||
// }}}
|
||||
|
||||
// Dcache interface
|
||||
// {{{
|
||||
input hpdcache_req_sid_t hpdcache_req_sid_i,
|
||||
output logic hpdcache_req_valid_o,
|
||||
input logic hpdcache_req_ready_i,
|
||||
output hpdcache_req_t hpdcache_req_o,
|
||||
output logic hpdcache_req_abort_o,
|
||||
output hpdcache_tag_t hpdcache_req_tag_o,
|
||||
output hpdcache_pma_t hpdcache_req_pma_o,
|
||||
input logic hpdcache_rsp_valid_i,
|
||||
input hpdcache_rsp_t hpdcache_rsp_i
|
||||
// }}}
|
||||
);
|
||||
// }}}
|
||||
|
||||
// Internal registers
|
||||
// {{{
|
||||
logic [NUM_SNOOP_PORTS-1:0] snoop_valid_q;
|
||||
hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_q;
|
||||
// }}}
|
||||
|
||||
// Internal signals
|
||||
// {{{
|
||||
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_enable;
|
||||
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_free;
|
||||
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_status_busy;
|
||||
logic [3:0] hwpf_stride_status_free_idx;
|
||||
|
||||
hpdcache_nline_t [NUM_HW_PREFETCH-1:0] hwpf_snoop_nline;
|
||||
logic [NUM_HW_PREFETCH-1:0] hwpf_snoop_match;
|
||||
|
||||
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid;
|
||||
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready;
|
||||
hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req;
|
||||
|
||||
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_valid;
|
||||
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_ready;
|
||||
hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req;
|
||||
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp_valid;
|
||||
hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp;
|
||||
// }}}
|
||||
|
||||
// Assertions
|
||||
// {{{
|
||||
// pragma translate_off
|
||||
initial
|
||||
begin
|
||||
max_hwpf_stride_assert: assert (NUM_HW_PREFETCH <= 16) else
|
||||
$error("hwpf_stride: maximum number of HW prefetchers is 16");
|
||||
end
|
||||
// pragma translate_on
|
||||
// }}}
|
||||
|
||||
// Compute the status information
|
||||
// {{{
|
||||
always_comb begin: hwpf_stride_priority_encoder
|
||||
hwpf_stride_status_free_idx = '0;
|
||||
for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin
|
||||
if (hwpf_stride_free[i]) begin
|
||||
hwpf_stride_status_free_idx = i;
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Free flag of engines
|
||||
assign hwpf_stride_free = ~(hwpf_stride_enable | hwpf_stride_status_busy);
|
||||
// Busy flags
|
||||
assign hwpf_stride_status_o[63:32] = {{32-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_status_busy};
|
||||
// Global free flag
|
||||
assign hwpf_stride_status_o[31] = |hwpf_stride_free;
|
||||
// Free Index
|
||||
assign hwpf_stride_status_o[30:16] = {11'b0, hwpf_stride_status_free_idx};
|
||||
// Enable flags
|
||||
assign hwpf_stride_status_o[15:0] = {{16-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_enable};
|
||||
// }}}
|
||||
|
||||
// Hardware prefetcher engines
|
||||
// {{{
|
||||
generate
|
||||
for (genvar j = 0; j < NUM_SNOOP_PORTS; j++) begin
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : snoop_ff
|
||||
if (!rst_ni) begin
|
||||
snoop_valid_q[j] <= 1'b0;
|
||||
snoop_addr_offset_q[j] <= '0;
|
||||
end else begin
|
||||
if (snoop_phys_indexed_i[j]) begin
|
||||
snoop_valid_q[j] <= snoop_valid_i[j];
|
||||
snoop_addr_offset_q[j] <= snoop_addr_offset_i[j];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_HW_PREFETCH; i++) begin
|
||||
assign hwpf_stride_enable[i] = hwpf_stride_base_o[i].enable;
|
||||
|
||||
// Compute snoop match signals
|
||||
// {{{
|
||||
always_comb
|
||||
begin : snoop_comb
|
||||
hwpf_snoop_match[i] = 1'b0;
|
||||
for (int j = 0; j < NUM_SNOOP_PORTS; j++) begin
|
||||
automatic logic snoop_valid;
|
||||
automatic hpdcache_req_offset_t snoop_offset;
|
||||
automatic hpdcache_nline_t snoop_nline;
|
||||
|
||||
if (snoop_phys_indexed_i[j]) begin
|
||||
snoop_valid = snoop_valid_i[j];
|
||||
snoop_offset = snoop_addr_offset_i[j];
|
||||
end else begin
|
||||
snoop_valid = snoop_valid_q[j];
|
||||
snoop_offset = snoop_addr_offset_q[j];
|
||||
end
|
||||
snoop_nline = {snoop_addr_tag_i[j], snoop_offset};
|
||||
hwpf_snoop_match[i] |= (snoop_valid && !snoop_abort_i[j] &&
|
||||
(hwpf_snoop_nline[i] == snoop_nline));
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
hwpf_stride #(
|
||||
.CACHE_LINE_BYTES ( HPDCACHE_CL_WIDTH/8 )
|
||||
) hwpf_stride_i(
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
.csr_base_set_i ( hwpf_stride_base_set_i[i] ),
|
||||
.csr_base_i ( hwpf_stride_base_i[i] ),
|
||||
.csr_param_set_i ( hwpf_stride_param_set_i[i] ),
|
||||
.csr_param_i ( hwpf_stride_param_i[i] ),
|
||||
.csr_throttle_set_i ( hwpf_stride_throttle_set_i[i] ),
|
||||
.csr_throttle_i ( hwpf_stride_throttle_i[i] ),
|
||||
|
||||
.csr_base_o ( hwpf_stride_base_o[i] ),
|
||||
.csr_param_o ( hwpf_stride_param_o[i] ),
|
||||
.csr_throttle_o ( hwpf_stride_throttle_o[i] ),
|
||||
|
||||
.busy_o ( hwpf_stride_status_busy[i] ),
|
||||
|
||||
.snoop_nline_o ( hwpf_snoop_nline[i] ),
|
||||
.snoop_match_i ( hwpf_snoop_match[i] ),
|
||||
|
||||
.hpdcache_req_valid_o ( hwpf_stride_req_valid[i] ),
|
||||
.hpdcache_req_ready_i ( hwpf_stride_req_ready[i] ),
|
||||
.hpdcache_req_o ( hwpf_stride_req[i] ),
|
||||
.hpdcache_rsp_valid_i ( hwpf_stride_arb_in_rsp_valid[i] ),
|
||||
.hpdcache_rsp_i ( hwpf_stride_arb_in_rsp[i] )
|
||||
);
|
||||
|
||||
assign hwpf_stride_req_ready[i] = hwpf_stride_arb_in_req_ready[i],
|
||||
hwpf_stride_arb_in_req_valid[i] = hwpf_stride_req_valid[i],
|
||||
hwpf_stride_arb_in_req[i].addr_offset = hwpf_stride_req[i].addr_offset,
|
||||
hwpf_stride_arb_in_req[i].wdata = hwpf_stride_req[i].wdata,
|
||||
hwpf_stride_arb_in_req[i].op = hwpf_stride_req[i].op,
|
||||
hwpf_stride_arb_in_req[i].be = hwpf_stride_req[i].be,
|
||||
hwpf_stride_arb_in_req[i].size = hwpf_stride_req[i].size,
|
||||
hwpf_stride_arb_in_req[i].sid = hpdcache_req_sid_i,
|
||||
hwpf_stride_arb_in_req[i].tid = hpdcache_req_tid_t'(i),
|
||||
hwpf_stride_arb_in_req[i].need_rsp = hwpf_stride_req[i].need_rsp,
|
||||
hwpf_stride_arb_in_req[i].phys_indexed = hwpf_stride_req[i].phys_indexed,
|
||||
hwpf_stride_arb_in_req[i].addr_tag = '0,
|
||||
hwpf_stride_arb_in_req[i].pma = '0;
|
||||
end
|
||||
endgenerate
|
||||
// }}}
|
||||
|
||||
// Hardware prefetcher arbiter betweem engines
|
||||
// {{{
|
||||
hwpf_stride_arb #(
|
||||
.NUM_HW_PREFETCH ( NUM_HW_PREFETCH )
|
||||
) hwpf_stride_arb_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
|
||||
// DCache input interface
|
||||
.hwpf_stride_req_valid_i ( hwpf_stride_arb_in_req_valid ),
|
||||
.hwpf_stride_req_ready_o ( hwpf_stride_arb_in_req_ready ),
|
||||
.hwpf_stride_req_i ( hwpf_stride_arb_in_req ),
|
||||
.hwpf_stride_rsp_valid_o ( hwpf_stride_arb_in_rsp_valid ),
|
||||
.hwpf_stride_rsp_o ( hwpf_stride_arb_in_rsp ),
|
||||
|
||||
// DCache output interface
|
||||
.hpdcache_req_valid_o,
|
||||
.hpdcache_req_ready_i,
|
||||
.hpdcache_req_o,
|
||||
.hpdcache_rsp_valid_i,
|
||||
.hpdcache_rsp_i
|
||||
);
|
||||
|
||||
assign hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests
|
||||
hpdcache_req_tag_o = '0, // unused on physically indexed requests
|
||||
hpdcache_req_pma_o = '0; // unused on physically indexed requests
|
||||
// }}}
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : Dcache Memory Read Request Channel Arbiter
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_mem_req_read_arbiter
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter hpdcache_uint N = 0,
|
||||
parameter type hpdcache_mem_req_t = logic
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
output logic mem_req_read_ready_o [N-1:0],
|
||||
input logic mem_req_read_valid_i [N-1:0],
|
||||
input hpdcache_mem_req_t mem_req_read_i [N-1:0],
|
||||
|
||||
input logic mem_req_read_ready_i,
|
||||
output logic mem_req_read_valid_o,
|
||||
output hpdcache_mem_req_t mem_req_read_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
logic [N-1:0] mem_read_arb_req_valid;
|
||||
hpdcache_mem_req_t [N-1:0] mem_read_arb_req;
|
||||
logic [N-1:0] mem_read_arb_req_gnt;
|
||||
|
||||
logic req_valid;
|
||||
|
||||
genvar gen_i;
|
||||
|
||||
|
||||
// Pack inputs
|
||||
generate
|
||||
for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen
|
||||
assign mem_read_arb_req_valid[gen_i] = mem_req_read_valid_i[gen_i],
|
||||
mem_read_arb_req [gen_i] = mem_req_read_i[gen_i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign req_valid = |(mem_read_arb_req_gnt & mem_read_arb_req_valid);
|
||||
|
||||
// Fixed-priority arbiter
|
||||
hpdcache_fxarb #(
|
||||
.N (N)
|
||||
) hpdcache_fxarb_mem_req_write_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.req_i (mem_read_arb_req_valid),
|
||||
.gnt_o (mem_read_arb_req_gnt),
|
||||
.ready_i (mem_req_read_ready_i)
|
||||
);
|
||||
|
||||
// Demultiplexor for the ready signal
|
||||
generate
|
||||
for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen
|
||||
assign mem_req_read_ready_o[gen_i] = mem_req_read_ready_i &
|
||||
mem_read_arb_req_gnt[gen_i] & mem_read_arb_req_valid[gen_i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign mem_req_read_valid_o = req_valid;
|
||||
|
||||
// Multiplexor for requests
|
||||
hpdcache_mux #(
|
||||
.NINPUT (N),
|
||||
.DATA_WIDTH ($bits(hpdcache_mem_req_t)),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) mem_read_req_mux_i (
|
||||
.data_i (mem_read_arb_req),
|
||||
.sel_i (mem_read_arb_req_gnt),
|
||||
.data_o (mem_req_read_o)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : Dcache Memory Write Channels Arbiter
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_mem_req_write_arbiter
|
||||
import hpdcache_pkg::*;
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter hpdcache_uint N = 0,
|
||||
parameter type hpdcache_mem_req_t = logic,
|
||||
parameter type hpdcache_mem_req_w_t = logic
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
output logic mem_req_write_ready_o [N-1:0],
|
||||
input logic mem_req_write_valid_i [N-1:0],
|
||||
input hpdcache_mem_req_t mem_req_write_i [N-1:0],
|
||||
|
||||
output logic mem_req_write_data_ready_o [N-1:0],
|
||||
input logic mem_req_write_data_valid_i [N-1:0],
|
||||
input hpdcache_mem_req_w_t mem_req_write_data_i [N-1:0],
|
||||
|
||||
input logic mem_req_write_ready_i,
|
||||
output logic mem_req_write_valid_o,
|
||||
output hpdcache_mem_req_t mem_req_write_o,
|
||||
|
||||
input logic mem_req_write_data_ready_i,
|
||||
output logic mem_req_write_data_valid_o,
|
||||
output hpdcache_mem_req_w_t mem_req_write_data_o
|
||||
);
|
||||
// }}}
|
||||
|
||||
typedef enum {
|
||||
REQ_IDLE,
|
||||
REQ_META_SENT,
|
||||
REQ_DATA_SENT
|
||||
} req_send_fsm_t;
|
||||
|
||||
req_send_fsm_t req_send_fsm_q, req_send_fsm_d;
|
||||
logic req_valid;
|
||||
logic req_data_valid;
|
||||
|
||||
logic [N-1:0] mem_write_arb_req_valid;
|
||||
hpdcache_mem_req_t [N-1:0] mem_write_arb_req;
|
||||
logic [N-1:0] mem_write_arb_req_data_valid;
|
||||
hpdcache_mem_req_w_t [N-1:0] mem_write_arb_req_data;
|
||||
logic [N-1:0] mem_write_arb_req_gnt;
|
||||
logic mem_write_arb_req_ready;
|
||||
|
||||
genvar gen_i;
|
||||
|
||||
|
||||
generate
|
||||
for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen
|
||||
assign mem_write_arb_req_valid [gen_i] = mem_req_write_valid_i[gen_i],
|
||||
mem_write_arb_req [gen_i] = mem_req_write_i[gen_i],
|
||||
mem_write_arb_req_data_valid[gen_i] = mem_req_write_data_valid_i[gen_i],
|
||||
mem_write_arb_req_data [gen_i] = mem_req_write_data_i[gen_i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Fixed-priority arbiter
|
||||
hpdcache_fxarb #(
|
||||
.N (2)
|
||||
) hpdcache_fxarb_mem_req_write_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.req_i (mem_write_arb_req_valid),
|
||||
.gnt_o (mem_write_arb_req_gnt),
|
||||
.ready_i (mem_write_arb_req_ready)
|
||||
);
|
||||
|
||||
assign req_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_valid);
|
||||
assign req_data_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_data_valid);
|
||||
|
||||
// Request sent FSM
|
||||
//
|
||||
// This FSM allows to make sure that the request and its corresponding
|
||||
// data are sent in order. This is, when a requester sends a request, this
|
||||
// FSM keeps the grant signal on this requester until it has sent the
|
||||
// corresponding data.
|
||||
//
|
||||
// {{{
|
||||
always_comb
|
||||
begin : req_send_fsm_comb
|
||||
req_send_fsm_d = req_send_fsm_q;
|
||||
mem_write_arb_req_ready = 1'b0;
|
||||
case (req_send_fsm_q)
|
||||
REQ_IDLE:
|
||||
if (req_valid && mem_req_write_ready_i) begin
|
||||
if (req_data_valid) begin
|
||||
if (mem_req_write_data_ready_i) begin
|
||||
mem_write_arb_req_ready = 1'b1;
|
||||
req_send_fsm_d = REQ_IDLE;
|
||||
end else begin
|
||||
req_send_fsm_d = REQ_META_SENT;
|
||||
end
|
||||
end
|
||||
end else if (req_data_valid && mem_req_write_data_ready_i) begin
|
||||
req_send_fsm_d = REQ_DATA_SENT;
|
||||
end
|
||||
|
||||
REQ_META_SENT:
|
||||
if (req_data_valid && mem_req_write_data_ready_i) begin
|
||||
mem_write_arb_req_ready = 1'b1;
|
||||
req_send_fsm_d = REQ_IDLE;
|
||||
end
|
||||
|
||||
REQ_DATA_SENT:
|
||||
if (req_valid && mem_req_write_ready_i) begin
|
||||
mem_write_arb_req_ready = 1'b1;
|
||||
req_send_fsm_d = REQ_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni)
|
||||
begin : req_send_fsm_ff
|
||||
if (!rst_ni) begin
|
||||
req_send_fsm_q <= REQ_IDLE;
|
||||
end else begin
|
||||
req_send_fsm_q <= req_send_fsm_d;
|
||||
end
|
||||
end
|
||||
// }}}
|
||||
|
||||
generate
|
||||
for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen
|
||||
assign mem_req_write_ready_o[gen_i] =
|
||||
(mem_write_arb_req_gnt[gen_i] & mem_req_write_ready_i) &
|
||||
(req_send_fsm_q != REQ_META_SENT);
|
||||
|
||||
assign mem_req_write_data_ready_o[gen_i] =
|
||||
(mem_write_arb_req_gnt[gen_i] & mem_req_write_data_ready_i) &
|
||||
(req_send_fsm_q != REQ_DATA_SENT);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Output assignments
|
||||
// {{{
|
||||
assign mem_req_write_valid_o = req_valid & (req_send_fsm_q != REQ_META_SENT);
|
||||
assign mem_req_write_data_valid_o = req_data_valid & (req_send_fsm_q != REQ_DATA_SENT);
|
||||
|
||||
hpdcache_mux #(
|
||||
.NINPUT (N),
|
||||
.DATA_WIDTH ($bits(hpdcache_mem_req_t)),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) mem_write_req_mux_i (
|
||||
.data_i (mem_write_arb_req),
|
||||
.sel_i (mem_write_arb_req_gnt),
|
||||
.data_o (mem_req_write_o)
|
||||
);
|
||||
|
||||
hpdcache_mux #(
|
||||
.NINPUT (N),
|
||||
.DATA_WIDTH ($bits(hpdcache_mem_req_w_t)),
|
||||
.ONE_HOT_SEL (1'b1)
|
||||
) mem_write_data_req_mux_i (
|
||||
.data_i (mem_write_arb_req_data),
|
||||
.sel_i (mem_write_arb_req_gnt),
|
||||
.data_o (mem_req_write_data_o)
|
||||
);
|
||||
// }}}
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : June, 2022
|
||||
* Description : Dcache Memory Reponse Demultiplexer
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_mem_resp_demux
|
||||
// Parameters
|
||||
// {{{
|
||||
#(
|
||||
parameter int N = 0,
|
||||
parameter type resp_t = logic,
|
||||
parameter type resp_id_t = logic,
|
||||
|
||||
localparam int RT_DEPTH = (1 << $bits(resp_id_t)),
|
||||
localparam type rt_t = resp_id_t [RT_DEPTH-1:0]
|
||||
)
|
||||
// }}}
|
||||
|
||||
// Ports
|
||||
// {{{
|
||||
(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
output logic mem_resp_ready_o,
|
||||
input logic mem_resp_valid_i,
|
||||
input resp_id_t mem_resp_id_i,
|
||||
input resp_t mem_resp_i,
|
||||
|
||||
input logic mem_resp_ready_i [N-1:0],
|
||||
output logic mem_resp_valid_o [N-1:0],
|
||||
output resp_t mem_resp_o [N-1:0],
|
||||
|
||||
input rt_t mem_resp_rt_i
|
||||
);
|
||||
// }}}
|
||||
|
||||
typedef logic [$clog2(N)-1:0] sel_t;
|
||||
|
||||
logic [N-1:0] mem_resp_demux_valid;
|
||||
resp_t [N-1:0] mem_resp_demux;
|
||||
logic [N-1:0] mem_resp_demux_ready;
|
||||
sel_t mem_resp_demux_sel;
|
||||
|
||||
// Route the response according to the response ID and the routing table
|
||||
assign mem_resp_demux_sel = mem_resp_rt_i[int'(mem_resp_id_i)];
|
||||
|
||||
// Forward the response to the corresponding output port
|
||||
hpdcache_demux #(
|
||||
.NOUTPUT (N),
|
||||
.DATA_WIDTH (1),
|
||||
.ONE_HOT_SEL (0)
|
||||
) i_resp_valid_demux (
|
||||
.data_i (mem_resp_valid_i),
|
||||
.sel_i (mem_resp_demux_sel),
|
||||
.data_o (mem_resp_demux_valid)
|
||||
);
|
||||
|
||||
hpdcache_demux #(
|
||||
.NOUTPUT (N),
|
||||
.DATA_WIDTH ($bits(resp_t)),
|
||||
.ONE_HOT_SEL (0)
|
||||
) i_resp_demux (
|
||||
.data_i (mem_resp_i),
|
||||
.sel_i (mem_resp_demux_sel),
|
||||
.data_o (mem_resp_demux)
|
||||
);
|
||||
|
||||
hpdcache_mux #(
|
||||
.NINPUT (N),
|
||||
.DATA_WIDTH (1),
|
||||
.ONE_HOT_SEL (0)
|
||||
) i_resp_ready_mux (
|
||||
.data_i (mem_resp_demux_ready),
|
||||
.sel_i (mem_resp_demux_sel),
|
||||
.data_o (mem_resp_ready_o)
|
||||
);
|
||||
|
||||
// Pack/unpack responses
|
||||
generate
|
||||
for (genvar gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_unpack_resp_gen
|
||||
assign mem_resp_valid_o [gen_i] = mem_resp_demux_valid [gen_i];
|
||||
assign mem_resp_o [gen_i] = mem_resp_demux [gen_i];
|
||||
assign mem_resp_demux_ready [gen_i] = mem_resp_ready_i [gen_i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule : hpdcache_mem_resp_demux
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : Dcache memory request to axi read channels
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_mem_to_axi_read
|
||||
import hpdcache_pkg::*;
|
||||
#(
|
||||
parameter type hpdcache_mem_req_t = logic,
|
||||
parameter type hpdcache_mem_resp_r_t = logic,
|
||||
parameter type ar_chan_t = logic,
|
||||
parameter type r_chan_t = logic
|
||||
)
|
||||
(
|
||||
output logic req_ready_o,
|
||||
input logic req_valid_i,
|
||||
input hpdcache_mem_req_t req_i,
|
||||
|
||||
input logic resp_ready_i,
|
||||
output logic resp_valid_o,
|
||||
output hpdcache_mem_resp_r_t resp_o,
|
||||
|
||||
output logic axi_ar_valid_o,
|
||||
output ar_chan_t axi_ar_o,
|
||||
input logic axi_ar_ready_i,
|
||||
|
||||
input logic axi_r_valid_i,
|
||||
input r_chan_t axi_r_i,
|
||||
output logic axi_r_ready_o
|
||||
);
|
||||
|
||||
logic lock;
|
||||
axi_pkg::cache_t cache;
|
||||
hpdcache_mem_error_e resp;
|
||||
|
||||
assign lock = (req_i.mem_req_command == HPDCACHE_MEM_ATOMIC) &&
|
||||
(req_i.mem_req_atomic == HPDCACHE_MEM_ATOMIC_LDEX);
|
||||
|
||||
assign cache = req_i.mem_req_cacheable ?
|
||||
axi_pkg::CACHE_BUFFERABLE |
|
||||
axi_pkg::CACHE_MODIFIABLE |
|
||||
axi_pkg::CACHE_RD_ALLOC |
|
||||
axi_pkg::CACHE_WR_ALLOC : '0;
|
||||
|
||||
always_comb
|
||||
begin : resp_decode_comb
|
||||
case (axi_r_i.resp)
|
||||
axi_pkg::RESP_SLVERR,
|
||||
axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK;
|
||||
default: resp = HPDCACHE_MEM_RESP_OK;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign req_ready_o = axi_ar_ready_i,
|
||||
axi_ar_valid_o = req_valid_i,
|
||||
axi_ar_o.id = req_i.mem_req_id,
|
||||
axi_ar_o.addr = req_i.mem_req_addr,
|
||||
axi_ar_o.len = req_i.mem_req_len,
|
||||
axi_ar_o.size = req_i.mem_req_size,
|
||||
axi_ar_o.burst = axi_pkg::BURST_INCR,
|
||||
axi_ar_o.lock = lock,
|
||||
axi_ar_o.cache = cache,
|
||||
axi_ar_o.prot = '0,
|
||||
axi_ar_o.qos = '0,
|
||||
axi_ar_o.region = '0,
|
||||
axi_ar_o.user = '0;
|
||||
|
||||
assign axi_r_ready_o = resp_ready_i,
|
||||
resp_valid_o = axi_r_valid_i,
|
||||
resp_o.mem_resp_r_error = resp,
|
||||
resp_o.mem_resp_r_id = axi_r_i.id,
|
||||
resp_o.mem_resp_r_data = axi_r_i.data,
|
||||
resp_o.mem_resp_r_last = axi_r_i.last;
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* Copyright 2023 CEA*
|
||||
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
*
|
||||
* Licensed under the Solderpad Hardware License v 2.1 (the “License”); you
|
||||
* may not use this file except in compliance with the License, or, at your
|
||||
* option, the Apache License version 2.0. You may obtain a copy of the
|
||||
* License at
|
||||
*
|
||||
* https://solderpad.org/licenses/SHL-2.1/
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, any work
|
||||
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
/*
|
||||
* Authors : Cesar Fuguet
|
||||
* Creation Date : April, 2021
|
||||
* Description : Dcache memory request to axi write channels
|
||||
* History :
|
||||
*/
|
||||
module hpdcache_mem_to_axi_write
|
||||
import hpdcache_pkg::*;
|
||||
#(
|
||||
parameter type hpdcache_mem_req_t = logic,
|
||||
parameter type hpdcache_mem_req_w_t = logic,
|
||||
parameter type hpdcache_mem_resp_w_t = logic,
|
||||
parameter type aw_chan_t = logic,
|
||||
parameter type w_chan_t = logic,
|
||||
parameter type b_chan_t = logic
|
||||
)
|
||||
(
|
||||
output logic req_ready_o,
|
||||
input logic req_valid_i,
|
||||
input hpdcache_mem_req_t req_i,
|
||||
|
||||
output logic req_data_ready_o,
|
||||
input logic req_data_valid_i,
|
||||
input hpdcache_mem_req_w_t req_data_i,
|
||||
|
||||
input logic resp_ready_i,
|
||||
output logic resp_valid_o,
|
||||
output hpdcache_mem_resp_w_t resp_o,
|
||||
|
||||
output logic axi_aw_valid_o,
|
||||
output aw_chan_t axi_aw_o,
|
||||
input logic axi_aw_ready_i,
|
||||
|
||||
output logic axi_w_valid_o,
|
||||
output w_chan_t axi_w_o,
|
||||
input logic axi_w_ready_i,
|
||||
|
||||
input logic axi_b_valid_i,
|
||||
input b_chan_t axi_b_i,
|
||||
output logic axi_b_ready_o
|
||||
);
|
||||
|
||||
logic lock;
|
||||
axi_pkg::atop_t atop;
|
||||
axi_pkg::cache_t cache;
|
||||
hpdcache_mem_error_e resp;
|
||||
|
||||
always_comb
|
||||
begin : atop_comb
|
||||
lock = 1'b0;
|
||||
atop = '0;
|
||||
case (req_i.mem_req_command)
|
||||
HPDCACHE_MEM_ATOMIC: begin
|
||||
case (req_i.mem_req_atomic)
|
||||
HPDCACHE_MEM_ATOMIC_STEX: lock = 1'b1;
|
||||
HPDCACHE_MEM_ATOMIC_ADD : atop = {axi_pkg::ATOP_ATOMICLOAD,
|
||||
axi_pkg::ATOP_LITTLE_END,
|
||||
axi_pkg::ATOP_ADD};
|
||||
HPDCACHE_MEM_ATOMIC_CLR : atop = {axi_pkg::ATOP_ATOMICLOAD,
|
||||
axi_pkg::ATOP_LITTLE_END,
|
||||
axi_pkg::ATOP_CLR};
|
||||
HPDCACHE_MEM_ATOMIC_SET : atop = {axi_pkg::ATOP_ATOMICLOAD,
|
||||
axi_pkg::ATOP_LITTLE_END,
|
||||
axi_pkg::ATOP_SET};
|
||||
HPDCACHE_MEM_ATOMIC_EOR : atop = {axi_pkg::ATOP_ATOMICLOAD,
|
||||
axi_pkg::ATOP_LITTLE_END,
|
||||
axi_pkg::ATOP_EOR};
|
||||
HPDCACHE_MEM_ATOMIC_SMAX: atop = {axi_pkg::ATOP_ATOMICLOAD,
|
||||
axi_pkg::ATOP_LITTLE_END,
|
||||
axi_pkg::ATOP_SMAX};
|
||||
HPDCACHE_MEM_ATOMIC_SMIN: atop = {axi_pkg::ATOP_ATOMICLOAD,
|
||||
axi_pkg::ATOP_LITTLE_END,
|
||||
axi_pkg::ATOP_SMIN};
|
||||
HPDCACHE_MEM_ATOMIC_UMAX: atop = {axi_pkg::ATOP_ATOMICLOAD,
|
||||
axi_pkg::ATOP_LITTLE_END,
|
||||
axi_pkg::ATOP_UMAX};
|
||||
HPDCACHE_MEM_ATOMIC_UMIN: atop = {axi_pkg::ATOP_ATOMICLOAD,
|
||||
axi_pkg::ATOP_LITTLE_END,
|
||||
axi_pkg::ATOP_UMIN};
|
||||
HPDCACHE_MEM_ATOMIC_SWAP: atop = axi_pkg::ATOP_ATOMICSWAP;
|
||||
endcase
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign cache = (req_i.mem_req_cacheable && !lock) ?
|
||||
axi_pkg::CACHE_BUFFERABLE |
|
||||
axi_pkg::CACHE_MODIFIABLE |
|
||||
axi_pkg::CACHE_RD_ALLOC |
|
||||
axi_pkg::CACHE_WR_ALLOC : '0;
|
||||
|
||||
always_comb
|
||||
begin : resp_decode_comb
|
||||
case (axi_b_i.resp)
|
||||
axi_pkg::RESP_SLVERR,
|
||||
axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK;
|
||||
default: resp = HPDCACHE_MEM_RESP_OK;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign req_ready_o = axi_aw_ready_i,
|
||||
axi_aw_valid_o = req_valid_i,
|
||||
axi_aw_o.id = req_i.mem_req_id,
|
||||
axi_aw_o.addr = req_i.mem_req_addr,
|
||||
axi_aw_o.len = req_i.mem_req_len,
|
||||
axi_aw_o.size = req_i.mem_req_size,
|
||||
axi_aw_o.burst = axi_pkg::BURST_INCR,
|
||||
axi_aw_o.lock = lock,
|
||||
axi_aw_o.cache = cache,
|
||||
axi_aw_o.prot = '0,
|
||||
axi_aw_o.qos = '0,
|
||||
axi_aw_o.region = '0,
|
||||
axi_aw_o.atop = atop,
|
||||
axi_aw_o.user = '0;
|
||||
|
||||
assign req_data_ready_o = axi_w_ready_i,
|
||||
axi_w_valid_o = req_data_valid_i,
|
||||
axi_w_o.data = req_data_i.mem_req_w_data,
|
||||
axi_w_o.strb = req_data_i.mem_req_w_be,
|
||||
axi_w_o.last = req_data_i.mem_req_w_last,
|
||||
axi_w_o.user = '0;
|
||||
|
||||
assign axi_b_ready_o = resp_ready_i,
|
||||
resp_valid_o = axi_b_valid_i,
|
||||
resp_o.mem_resp_w_error = resp,
|
||||
resp_o.mem_resp_w_id = axi_b_i.id,
|
||||
resp_o.mem_resp_w_is_atomic = (axi_b_i.resp == axi_pkg::RESP_EXOKAY);
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,826 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 12.11.2017
|
||||
// Description: Handles cache misses.
|
||||
|
||||
// --------------
|
||||
// MISS Handler
|
||||
// --------------
|
||||
|
||||
module miss_handler
|
||||
import ariane_pkg::*;
|
||||
import std_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned NR_PORTS = 4,
|
||||
parameter type axi_req_t = logic,
|
||||
parameter type axi_rsp_t = logic
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic flush_i, // flush request
|
||||
output logic flush_ack_o, // acknowledge successful flush
|
||||
output logic miss_o,
|
||||
input logic busy_i, // dcache is busy with something
|
||||
// Bypass or miss
|
||||
input logic [NR_PORTS-1:0][$bits(miss_req_t)-1:0] miss_req_i,
|
||||
// Bypass handling
|
||||
output logic [NR_PORTS-1:0] bypass_gnt_o,
|
||||
output logic [NR_PORTS-1:0] bypass_valid_o,
|
||||
output logic [NR_PORTS-1:0][63:0] bypass_data_o,
|
||||
|
||||
// AXI port
|
||||
output axi_req_t axi_bypass_o,
|
||||
input axi_rsp_t axi_bypass_i,
|
||||
|
||||
// Miss handling (~> cacheline refill)
|
||||
output logic [NR_PORTS-1:0] miss_gnt_o,
|
||||
output logic [NR_PORTS-1:0] active_serving_o,
|
||||
|
||||
output logic [63:0] critical_word_o,
|
||||
output logic critical_word_valid_o,
|
||||
output axi_req_t axi_data_o,
|
||||
input axi_rsp_t axi_data_i,
|
||||
|
||||
input logic [NR_PORTS-1:0][55:0] mshr_addr_i,
|
||||
output logic [NR_PORTS-1:0] mshr_addr_matches_o,
|
||||
output logic [NR_PORTS-1:0] mshr_index_matches_o,
|
||||
// AMO
|
||||
input amo_req_t amo_req_i,
|
||||
output amo_resp_t amo_resp_o,
|
||||
// Port to SRAMs, for refill and eviction
|
||||
output logic [DCACHE_SET_ASSOC-1:0] req_o,
|
||||
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
|
||||
output cache_line_t data_o,
|
||||
output cl_be_t be_o,
|
||||
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
|
||||
output logic we_o
|
||||
);
|
||||
|
||||
// Three MSHR ports + AMO port
|
||||
parameter NR_BYPASS_PORTS = NR_PORTS + 1;
|
||||
|
||||
// FSM states
|
||||
enum logic [3:0] {
|
||||
IDLE, // 0
|
||||
FLUSHING, // 1
|
||||
FLUSH, // 2
|
||||
WB_CACHELINE_FLUSH, // 3
|
||||
FLUSH_REQ_STATUS, // 4
|
||||
WB_CACHELINE_MISS, // 5
|
||||
WAIT_GNT_SRAM, // 6
|
||||
MISS, // 7
|
||||
REQ_CACHELINE, // 8
|
||||
MISS_REPL, // 9
|
||||
SAVE_CACHELINE, // A
|
||||
INIT, // B
|
||||
AMO_REQ, // C
|
||||
AMO_WAIT_RESP // D
|
||||
}
|
||||
state_d, state_q;
|
||||
|
||||
// Registers
|
||||
mshr_t mshr_d, mshr_q;
|
||||
logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q;
|
||||
logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
|
||||
// cache line to evict
|
||||
cache_line_t evict_cl_d, evict_cl_q;
|
||||
|
||||
logic serve_amo_d, serve_amo_q;
|
||||
// Request from one FSM
|
||||
logic [ NR_PORTS-1:0] miss_req_valid;
|
||||
logic [ NR_PORTS-1:0] miss_req_bypass;
|
||||
logic [ NR_PORTS-1:0][63:0] miss_req_addr;
|
||||
logic [ NR_PORTS-1:0][63:0] miss_req_wdata;
|
||||
logic [ NR_PORTS-1:0] miss_req_we;
|
||||
logic [ NR_PORTS-1:0][ 7:0] miss_req_be;
|
||||
logic [ NR_PORTS-1:0][ 1:0] miss_req_size;
|
||||
|
||||
// Bypass AMO port
|
||||
bypass_req_t amo_bypass_req;
|
||||
bypass_rsp_t amo_bypass_rsp;
|
||||
|
||||
// Bypass ports <-> Arbiter
|
||||
bypass_req_t [ NR_BYPASS_PORTS-1:0] bypass_ports_req;
|
||||
bypass_rsp_t [ NR_BYPASS_PORTS-1:0] bypass_ports_rsp;
|
||||
|
||||
// Arbiter <-> Bypass AXI adapter
|
||||
bypass_req_t bypass_adapter_req;
|
||||
bypass_rsp_t bypass_adapter_rsp;
|
||||
|
||||
// Cache Line Refill <-> AXI
|
||||
logic req_fsm_miss_valid;
|
||||
logic [ 63:0] req_fsm_miss_addr;
|
||||
logic [ DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata;
|
||||
logic req_fsm_miss_we;
|
||||
logic [ (DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be;
|
||||
ariane_pkg::ad_req_t req_fsm_miss_req;
|
||||
logic [ 1:0] req_fsm_miss_size;
|
||||
|
||||
logic gnt_miss_fsm;
|
||||
logic valid_miss_fsm;
|
||||
logic [ (DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm;
|
||||
|
||||
// Cache Management <-> LFSR
|
||||
logic lfsr_enable;
|
||||
logic [ DCACHE_SET_ASSOC-1:0] lfsr_oh;
|
||||
logic [$clog2(DCACHE_SET_ASSOC-1)-1:0] lfsr_bin;
|
||||
// AMOs
|
||||
ariane_pkg::amo_t amo_op;
|
||||
logic [ 63:0] amo_operand_b;
|
||||
|
||||
// ------------------------------
|
||||
// Cache Management
|
||||
// ------------------------------
|
||||
always_comb begin : cache_management
|
||||
automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way;
|
||||
|
||||
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin
|
||||
evict_way[i] = data_i[i].valid & data_i[i].dirty;
|
||||
valid_way[i] = data_i[i].valid;
|
||||
end
|
||||
// ----------------------
|
||||
// Default Assignments
|
||||
// ----------------------
|
||||
// memory array
|
||||
req_o = '0;
|
||||
addr_o = '0;
|
||||
data_o = '0;
|
||||
be_o = '0;
|
||||
we_o = '0;
|
||||
// Cache controller
|
||||
miss_gnt_o = '0;
|
||||
active_serving_o = '0;
|
||||
// LFSR replacement unit
|
||||
lfsr_enable = 1'b0;
|
||||
// to AXI refill
|
||||
req_fsm_miss_valid = 1'b0;
|
||||
req_fsm_miss_addr = '0;
|
||||
req_fsm_miss_wdata = '0;
|
||||
req_fsm_miss_we = 1'b0;
|
||||
req_fsm_miss_be = '0;
|
||||
req_fsm_miss_req = ariane_pkg::CACHE_LINE_REQ;
|
||||
req_fsm_miss_size = 2'b11;
|
||||
// to AXI bypass
|
||||
amo_bypass_req.req = 1'b0;
|
||||
amo_bypass_req.reqtype = ariane_pkg::SINGLE_REQ;
|
||||
amo_bypass_req.amo = ariane_pkg::AMO_NONE;
|
||||
amo_bypass_req.addr = '0;
|
||||
amo_bypass_req.we = 1'b0;
|
||||
amo_bypass_req.wdata = '0;
|
||||
amo_bypass_req.be = '0;
|
||||
amo_bypass_req.size = 2'b11;
|
||||
amo_bypass_req.id = 4'b1011;
|
||||
// core
|
||||
flush_ack_o = 1'b0;
|
||||
miss_o = 1'b0; // to performance counter
|
||||
serve_amo_d = serve_amo_q;
|
||||
// --------------------------------
|
||||
// Flush and Miss operation
|
||||
// --------------------------------
|
||||
state_d = state_q;
|
||||
cnt_d = cnt_q;
|
||||
evict_way_d = evict_way_q;
|
||||
evict_cl_d = evict_cl_q;
|
||||
mshr_d = mshr_q;
|
||||
// communicate to the requester which unit we are currently serving
|
||||
active_serving_o[mshr_q.id] = mshr_q.valid;
|
||||
// AMOs
|
||||
amo_resp_o.ack = 1'b0;
|
||||
amo_resp_o.result = '0;
|
||||
amo_operand_b = '0;
|
||||
|
||||
case (state_q)
|
||||
|
||||
IDLE: begin
|
||||
// lowest priority are AMOs, wait until everything else is served before going for the AMOs
|
||||
if (amo_req_i.req && !busy_i) begin
|
||||
// 1. Flush the cache
|
||||
state_d = FLUSH_REQ_STATUS;
|
||||
serve_amo_d = 1'b1;
|
||||
cnt_d = '0;
|
||||
end
|
||||
// check if we want to flush and can flush e.g.: we are not busy anymore
|
||||
// TODO: Check that the busy flag is indeed needed
|
||||
if (flush_i && !busy_i) begin
|
||||
state_d = FLUSH_REQ_STATUS;
|
||||
cnt_d = '0;
|
||||
end
|
||||
|
||||
// check if one of the state machines missed
|
||||
for (int unsigned i = 0; i < NR_PORTS; i++) begin
|
||||
// here comes the refill portion of code
|
||||
if (miss_req_valid[i] && !miss_req_bypass[i]) begin
|
||||
state_d = MISS;
|
||||
// we are taking another request so don't take the AMO
|
||||
serve_amo_d = 1'b0;
|
||||
// save to MSHR
|
||||
mshr_d.valid = 1'b1;
|
||||
mshr_d.we = miss_req_we[i];
|
||||
mshr_d.id = i;
|
||||
mshr_d.addr = miss_req_addr[i][DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:0];
|
||||
mshr_d.wdata = miss_req_wdata[i];
|
||||
mshr_d.be = miss_req_be[i];
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ~> we missed on the cache
|
||||
MISS: begin
|
||||
// 1. Check if there is an empty cache-line
|
||||
// 2. If not -> evict one
|
||||
req_o = '1;
|
||||
addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
|
||||
state_d = MISS_REPL;
|
||||
miss_o = 1'b1;
|
||||
end
|
||||
|
||||
// ~> second miss cycle
|
||||
MISS_REPL: begin
|
||||
// if all are valid we need to evict one, pseudo random from LFSR
|
||||
if (&valid_way) begin
|
||||
lfsr_enable = 1'b1;
|
||||
evict_way_d = lfsr_oh;
|
||||
// do we need to write back the cache line?
|
||||
if (data_i[lfsr_bin].dirty) begin
|
||||
state_d = WB_CACHELINE_MISS;
|
||||
evict_cl_d.tag = data_i[lfsr_bin].tag;
|
||||
evict_cl_d.data = data_i[lfsr_bin].data;
|
||||
cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
|
||||
// no - we can request a cache line now
|
||||
end else state_d = REQ_CACHELINE;
|
||||
// we have at least one free way
|
||||
end else begin
|
||||
// get victim cache-line by looking for the first non-valid bit
|
||||
evict_way_d = get_victim_cl(~valid_way);
|
||||
state_d = REQ_CACHELINE;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> we can just load the cache-line, the way is store in evict_way_q
|
||||
REQ_CACHELINE: begin
|
||||
req_fsm_miss_valid = 1'b1;
|
||||
req_fsm_miss_addr = mshr_q.addr;
|
||||
|
||||
if (gnt_miss_fsm) begin
|
||||
state_d = SAVE_CACHELINE;
|
||||
miss_gnt_o[mshr_q.id] = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> replace the cacheline
|
||||
SAVE_CACHELINE: begin
|
||||
// calculate cacheline offset
|
||||
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
|
||||
cl_offset = mshr_q.addr[DCACHE_BYTE_OFFSET-1:3] << 6;
|
||||
// we've got a valid response from refill unit
|
||||
if (valid_miss_fsm) begin
|
||||
|
||||
addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
|
||||
req_o = evict_way_q;
|
||||
we_o = 1'b1;
|
||||
be_o = '1;
|
||||
be_o.vldrty = evict_way_q;
|
||||
data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
|
||||
data_o.data = data_miss_fsm;
|
||||
data_o.valid = 1'b1;
|
||||
data_o.dirty = 1'b0;
|
||||
|
||||
// is this a write?
|
||||
if (mshr_q.we) begin
|
||||
// Yes, so safe the updated data now
|
||||
for (int i = 0; i < 8; i++) begin
|
||||
// check if we really want to write the corresponding byte
|
||||
if (mshr_q.be[i]) data_o.data[(cl_offset+i*8)+:8] = mshr_q.wdata[i];
|
||||
end
|
||||
// its immediately dirty if we write
|
||||
data_o.dirty = 1'b1;
|
||||
end
|
||||
// reset MSHR
|
||||
mshr_d.valid = 1'b0;
|
||||
// go back to idle
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
// ------------------------------
|
||||
// Write Back Operation
|
||||
// ------------------------------
|
||||
// ~> evict a cache line from way saved in evict_way_q
|
||||
WB_CACHELINE_FLUSH, WB_CACHELINE_MISS: begin
|
||||
|
||||
req_fsm_miss_valid = 1'b1;
|
||||
req_fsm_miss_addr = {
|
||||
evict_cl_q.tag,
|
||||
cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET],
|
||||
{{DCACHE_BYTE_OFFSET} {1'b0}}
|
||||
};
|
||||
req_fsm_miss_be = '1;
|
||||
req_fsm_miss_we = 1'b1;
|
||||
req_fsm_miss_wdata = evict_cl_q.data;
|
||||
|
||||
// we've got a grant --> this is timing critical, think about it
|
||||
if (gnt_miss_fsm) begin
|
||||
// write status array
|
||||
addr_o = cnt_q;
|
||||
req_o = 1'b1;
|
||||
we_o = 1'b1;
|
||||
data_o.valid = INVALIDATE_ON_FLUSH ? 1'b0 : 1'b1;
|
||||
// invalidate
|
||||
be_o.vldrty = evict_way_q;
|
||||
// go back to handling the miss or flushing, depending on where we came from
|
||||
state_d = (state_q == WB_CACHELINE_MISS) ? MISS : FLUSH_REQ_STATUS;
|
||||
end
|
||||
end
|
||||
|
||||
// ------------------------------
|
||||
// Flushing & Initialization
|
||||
// ------------------------------
|
||||
// ~> make another request to check the same cache-line if there are still some valid entries
|
||||
FLUSH_REQ_STATUS: begin
|
||||
req_o = '1;
|
||||
addr_o = cnt_q;
|
||||
state_d = FLUSHING;
|
||||
end
|
||||
|
||||
FLUSHING: begin
|
||||
// this has priority
|
||||
// at least one of the cache lines is dirty
|
||||
if (|evict_way) begin
|
||||
// evict cache line, look for the first cache-line which is dirty
|
||||
evict_way_d = get_victim_cl(evict_way);
|
||||
evict_cl_d = data_i[one_hot_to_bin(evict_way)];
|
||||
state_d = WB_CACHELINE_FLUSH;
|
||||
// not dirty ~> increment and continue
|
||||
end else begin
|
||||
// increment and re-request
|
||||
cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET);
|
||||
state_d = FLUSH_REQ_STATUS;
|
||||
addr_o = cnt_q;
|
||||
req_o = 1'b1;
|
||||
be_o.vldrty = INVALIDATE_ON_FLUSH ? '1 : '0;
|
||||
we_o = 1'b1;
|
||||
// finished with flushing operation, go back to idle
|
||||
if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) begin
|
||||
// only acknowledge if the flush wasn't triggered by an atomic
|
||||
flush_ack_o = ~serve_amo_q;
|
||||
// if we are flushing because of an AMO go to serve it
|
||||
if (serve_amo_q) begin
|
||||
state_d = AMO_REQ;
|
||||
serve_amo_d = 1'b0;
|
||||
end else begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ~> only called after reset
|
||||
INIT: begin
|
||||
// initialize status array
|
||||
addr_o = cnt_q;
|
||||
req_o = 1'b1;
|
||||
we_o = 1'b1;
|
||||
// only write the dirty array
|
||||
be_o.vldrty = '1;
|
||||
cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET);
|
||||
// finished initialization
|
||||
if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) state_d = IDLE;
|
||||
end
|
||||
// ----------------------
|
||||
// AMOs
|
||||
// ----------------------
|
||||
// ~> we are here because we need to do the AMO, the cache is clean at this point
|
||||
AMO_REQ: begin
|
||||
amo_bypass_req.req = 1'b1;
|
||||
amo_bypass_req.reqtype = ariane_pkg::SINGLE_REQ;
|
||||
amo_bypass_req.amo = amo_req_i.amo_op;
|
||||
// address is in operand a
|
||||
amo_bypass_req.addr = amo_req_i.operand_a;
|
||||
if (amo_req_i.amo_op != AMO_LR) begin
|
||||
amo_bypass_req.we = 1'b1;
|
||||
end
|
||||
amo_bypass_req.size = amo_req_i.size;
|
||||
// AXI implements CLR op instead of AND, negate operand
|
||||
if (amo_req_i.amo_op == AMO_AND) begin
|
||||
amo_operand_b = ~amo_req_i.operand_b;
|
||||
end else begin
|
||||
amo_operand_b = amo_req_i.operand_b;
|
||||
end
|
||||
// align data and byte-enable to correct byte lanes
|
||||
amo_bypass_req.wdata = amo_operand_b;
|
||||
if (amo_req_i.size == 2'b11) begin
|
||||
// 64b transfer
|
||||
amo_bypass_req.be = 8'b11111111;
|
||||
end else begin
|
||||
// 32b transfer
|
||||
if (amo_req_i.operand_a[2:0] == '0) begin
|
||||
// 64b aligned -> activate lower 4 byte lanes
|
||||
amo_bypass_req.be = 8'b00001111;
|
||||
end else begin
|
||||
// 64b unaligned -> activate upper 4 byte lanes
|
||||
amo_bypass_req.be = 8'b11110000;
|
||||
amo_bypass_req.wdata = amo_operand_b[31:0] << 32;
|
||||
end
|
||||
end
|
||||
|
||||
// when request is accepted, wait for response
|
||||
if (amo_bypass_rsp.gnt) begin
|
||||
if (amo_bypass_rsp.valid) begin
|
||||
state_d = IDLE;
|
||||
amo_resp_o.ack = 1'b1;
|
||||
amo_resp_o.result = amo_bypass_rsp.rdata;
|
||||
end else begin
|
||||
state_d = AMO_WAIT_RESP;
|
||||
end
|
||||
end
|
||||
end
|
||||
AMO_WAIT_RESP: begin
|
||||
if (amo_bypass_rsp.valid) begin
|
||||
state_d = IDLE;
|
||||
amo_resp_o.ack = 1'b1;
|
||||
// Request is assumed to be still valid (ack not granted yet)
|
||||
if (amo_req_i.size == 2'b10) begin
|
||||
// 32b request
|
||||
logic [31:0] halfword;
|
||||
if (amo_req_i.operand_a[2:0] == '0) begin
|
||||
// 64b aligned -> activate lower 4 byte lanes
|
||||
halfword = amo_bypass_rsp.rdata[31:0];
|
||||
end else begin
|
||||
// 64b unaligned -> activate upper 4 byte lanes
|
||||
halfword = amo_bypass_rsp.rdata[63:32];
|
||||
end
|
||||
// Sign-extend 32b requests as per RISC-V spec
|
||||
amo_resp_o.result = {{32{halfword[31]}}, halfword};
|
||||
end else begin
|
||||
// 64b request
|
||||
amo_resp_o.result = amo_bypass_rsp.rdata;
|
||||
end
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// check MSHR for aliasing
|
||||
always_comb begin
|
||||
|
||||
mshr_addr_matches_o = 'b0;
|
||||
mshr_index_matches_o = 'b0;
|
||||
|
||||
for (int i = 0; i < NR_PORTS; i++) begin
|
||||
// check mshr for potential matching of other units, exclude the unit currently being served
|
||||
if (mshr_q.valid && mshr_addr_i[i][55:DCACHE_BYTE_OFFSET] == mshr_q.addr[55:DCACHE_BYTE_OFFSET]) begin
|
||||
mshr_addr_matches_o[i] = 1'b1;
|
||||
end
|
||||
|
||||
// same as previous, but checking only the index
|
||||
if (mshr_q.valid && mshr_addr_i[i][DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == mshr_q.addr[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]) begin
|
||||
mshr_index_matches_o[i] = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
// --------------------
|
||||
// Sequential Process
|
||||
// --------------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
mshr_q <= '0;
|
||||
state_q <= INIT;
|
||||
cnt_q <= '0;
|
||||
evict_way_q <= '0;
|
||||
evict_cl_q <= '0;
|
||||
serve_amo_q <= 1'b0;
|
||||
end else begin
|
||||
mshr_q <= mshr_d;
|
||||
state_q <= state_d;
|
||||
cnt_q <= cnt_d;
|
||||
evict_way_q <= evict_way_d;
|
||||
evict_cl_q <= evict_cl_d;
|
||||
serve_amo_q <= serve_amo_d;
|
||||
end
|
||||
end
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
// assert that cache only hits on one way
|
||||
assert property (@(posedge clk_i) $onehot0(evict_way_q))
|
||||
else $warning("Evict-way should be one-hot encoded");
|
||||
`endif
|
||||
//pragma translate_on
|
||||
|
||||
// ----------------------
|
||||
// Pack bypass ports
|
||||
// ----------------------
|
||||
always_comb begin
|
||||
logic [$clog2(NR_BYPASS_PORTS)-1:0] id;
|
||||
|
||||
// Pack MHSR ports first
|
||||
for (id = 0; id < NR_PORTS; id++) begin
|
||||
bypass_ports_req[id].req = miss_req_valid[id] & miss_req_bypass[id];
|
||||
bypass_ports_req[id].reqtype = ariane_pkg::SINGLE_REQ;
|
||||
bypass_ports_req[id].amo = AMO_NONE;
|
||||
bypass_ports_req[id].id = 4'b1000 | 4'(id);
|
||||
bypass_ports_req[id].addr = miss_req_addr[id];
|
||||
bypass_ports_req[id].wdata = miss_req_wdata[id];
|
||||
bypass_ports_req[id].we = miss_req_we[id];
|
||||
bypass_ports_req[id].be = miss_req_be[id];
|
||||
bypass_ports_req[id].size = miss_req_size[id];
|
||||
|
||||
bypass_gnt_o[id] = bypass_ports_rsp[id].gnt;
|
||||
bypass_valid_o[id] = bypass_ports_rsp[id].valid;
|
||||
bypass_data_o[id] = bypass_ports_rsp[id].rdata;
|
||||
end
|
||||
|
||||
// AMO port has lowest priority
|
||||
bypass_ports_req[id] = amo_bypass_req;
|
||||
amo_bypass_rsp = bypass_ports_rsp[id];
|
||||
end
|
||||
|
||||
// ----------------------
|
||||
// Arbitrate bypass ports
|
||||
// ----------------------
|
||||
axi_adapter_arbiter #(
|
||||
.NR_PORTS (NR_BYPASS_PORTS),
|
||||
.MAX_OUTSTANDING_REQ(CVA6Cfg.MaxOutstandingStores),
|
||||
.req_t (bypass_req_t),
|
||||
.rsp_t (bypass_rsp_t)
|
||||
) i_bypass_arbiter (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
// Master Side
|
||||
.req_i (bypass_ports_req),
|
||||
.rsp_o (bypass_ports_rsp),
|
||||
// Slave Side
|
||||
.req_o (bypass_adapter_req),
|
||||
.rsp_i (bypass_adapter_rsp)
|
||||
);
|
||||
|
||||
// ----------------------
|
||||
// Bypass AXI Interface
|
||||
// ----------------------
|
||||
// Cast bypass_adapter_req.addr to axi_adapter port size
|
||||
logic [riscv::XLEN-1:0] bypass_addr;
|
||||
assign bypass_addr = bypass_adapter_req.addr;
|
||||
|
||||
axi_adapter #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.DATA_WIDTH (64),
|
||||
.CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET),
|
||||
.axi_req_t (axi_req_t),
|
||||
.axi_rsp_t (axi_rsp_t)
|
||||
) i_bypass_axi_adapter (
|
||||
.clk_i(clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
.req_i(bypass_adapter_req.req),
|
||||
.type_i(bypass_adapter_req.reqtype),
|
||||
.amo_i(bypass_adapter_req.amo),
|
||||
.id_i(({{CVA6Cfg.AxiIdWidth - 4{1'b0}}, bypass_adapter_req.id})),
|
||||
.addr_i(bypass_addr),
|
||||
.wdata_i(bypass_adapter_req.wdata),
|
||||
.we_i(bypass_adapter_req.we),
|
||||
.be_i(bypass_adapter_req.be),
|
||||
.size_i(bypass_adapter_req.size),
|
||||
.gnt_o(bypass_adapter_rsp.gnt),
|
||||
.valid_o(bypass_adapter_rsp.valid),
|
||||
.rdata_o(bypass_adapter_rsp.rdata),
|
||||
.id_o(), // not used, single outstanding request in arbiter
|
||||
.critical_word_o(), // not used for single requests
|
||||
.critical_word_valid_o(), // not used for single requests
|
||||
.axi_req_o(axi_bypass_o),
|
||||
.axi_resp_i(axi_bypass_i)
|
||||
);
|
||||
|
||||
// ----------------------
|
||||
// Cache Line AXI Refill
|
||||
// ----------------------
|
||||
// Cast req_fsm_miss_addr to axi_adapter port size
|
||||
logic [riscv::XLEN-1:0] miss_addr;
|
||||
assign miss_addr = req_fsm_miss_addr;
|
||||
|
||||
axi_adapter #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.DATA_WIDTH (DCACHE_LINE_WIDTH),
|
||||
.CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET),
|
||||
.axi_req_t (axi_req_t),
|
||||
.axi_rsp_t (axi_rsp_t)
|
||||
) i_miss_axi_adapter (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.req_i (req_fsm_miss_valid),
|
||||
.type_i (req_fsm_miss_req),
|
||||
.amo_i (AMO_NONE),
|
||||
.gnt_o (gnt_miss_fsm),
|
||||
.addr_i (miss_addr),
|
||||
.we_i (req_fsm_miss_we),
|
||||
.wdata_i (req_fsm_miss_wdata),
|
||||
.be_i (req_fsm_miss_be),
|
||||
.size_i (req_fsm_miss_size),
|
||||
.id_i ({{CVA6Cfg.AxiIdWidth - 4{1'b0}}, 4'b0111}),
|
||||
.valid_o (valid_miss_fsm),
|
||||
.rdata_o (data_miss_fsm),
|
||||
.id_o (),
|
||||
.critical_word_o (critical_word_o),
|
||||
.critical_word_valid_o(critical_word_valid_o),
|
||||
.axi_req_o (axi_data_o),
|
||||
.axi_resp_i (axi_data_i)
|
||||
);
|
||||
|
||||
// -----------------
|
||||
// Replacement LFSR
|
||||
// -----------------
|
||||
lfsr_8bit #(
|
||||
.WIDTH(DCACHE_SET_ASSOC)
|
||||
) i_lfsr (
|
||||
.en_i (lfsr_enable),
|
||||
.refill_way_oh (lfsr_oh),
|
||||
.refill_way_bin(lfsr_bin),
|
||||
.*
|
||||
);
|
||||
|
||||
// -----------------
|
||||
// Struct Split
|
||||
// -----------------
|
||||
// Hack as system verilog support in modelsim seems to be buggy here
|
||||
always_comb begin
|
||||
automatic miss_req_t miss_req;
|
||||
|
||||
for (int unsigned i = 0; i < NR_PORTS; i++) begin
|
||||
miss_req = miss_req_t'(miss_req_i[i]);
|
||||
miss_req_valid[i] = miss_req.valid;
|
||||
miss_req_bypass[i] = miss_req.bypass;
|
||||
miss_req_addr[i] = miss_req.addr;
|
||||
miss_req_wdata[i] = miss_req.wdata;
|
||||
miss_req_we[i] = miss_req.we;
|
||||
miss_req_be[i] = miss_req.be;
|
||||
miss_req_size[i] = miss_req.size;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
||||
// --------------
|
||||
// AXI Arbiter
|
||||
// --------------
|
||||
//
|
||||
// Description: Arbitrates access to AXI refill/bypass
|
||||
//
|
||||
module axi_adapter_arbiter #(
|
||||
parameter NR_PORTS = 4,
|
||||
parameter MAX_OUTSTANDING_REQ = 0,
|
||||
parameter type req_t = std_cache_pkg::bypass_req_t,
|
||||
parameter type rsp_t = std_cache_pkg::bypass_rsp_t
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// Master ports
|
||||
input req_t [NR_PORTS-1:0] req_i,
|
||||
output rsp_t [NR_PORTS-1:0] rsp_o,
|
||||
// Slave port
|
||||
output req_t req_o,
|
||||
input rsp_t rsp_i
|
||||
);
|
||||
|
||||
localparam MAX_OUTSTANDING_CNT_WIDTH = $clog2(
|
||||
MAX_OUTSTANDING_REQ + 1
|
||||
) > 0 ? $clog2(
|
||||
MAX_OUTSTANDING_REQ + 1
|
||||
) : 1;
|
||||
|
||||
typedef logic [MAX_OUTSTANDING_CNT_WIDTH-1:0] outstanding_cnt_t;
|
||||
|
||||
enum logic {
|
||||
IDLE,
|
||||
SERVING
|
||||
}
|
||||
state_d, state_q;
|
||||
|
||||
req_t req_d, req_q;
|
||||
logic [NR_PORTS-1:0] sel_d, sel_q;
|
||||
outstanding_cnt_t outstanding_cnt_d, outstanding_cnt_q;
|
||||
|
||||
logic [NR_PORTS-1:0] req_flat;
|
||||
logic any_unselected_port_valid;
|
||||
|
||||
for (genvar i = 0; i < NR_PORTS; i++) begin : gen_req_flat
|
||||
assign req_flat[i] = req_i[i].req;
|
||||
end
|
||||
assign any_unselected_port_valid = |(req_flat & ~(1 << sel_q));
|
||||
|
||||
|
||||
always_comb begin
|
||||
sel_d = sel_q;
|
||||
outstanding_cnt_d = outstanding_cnt_q;
|
||||
|
||||
state_d = state_q;
|
||||
req_d = req_q;
|
||||
|
||||
req_o = req_q;
|
||||
|
||||
rsp_o = '0;
|
||||
rsp_o[sel_q].rdata = rsp_i.rdata;
|
||||
|
||||
case (state_q)
|
||||
|
||||
IDLE: begin
|
||||
// wait for incoming requests
|
||||
for (int unsigned i = 0; i < NR_PORTS; i++) begin
|
||||
if (req_i[i].req == 1'b1) begin
|
||||
sel_d = i[$bits(sel_d)-1:0];
|
||||
state_d = SERVING;
|
||||
break;
|
||||
end
|
||||
end
|
||||
|
||||
req_d = req_i[sel_d];
|
||||
req_o = req_i[sel_d];
|
||||
rsp_o[sel_d].gnt = req_i[sel_d].req;
|
||||
|
||||
// Count outstanding transactions, i.e. requests which have been
|
||||
// granted but response hasn't arrived yet
|
||||
if (req_o.req && rsp_i.gnt) begin
|
||||
req_d.req = 1'b0;
|
||||
outstanding_cnt_d += 1;
|
||||
end
|
||||
end
|
||||
|
||||
SERVING: begin
|
||||
// We can accept multiple outstanding transactions from same port.
|
||||
// To ensure fairness, we allow this only if all other ports are idle
|
||||
if ((!req_o.req) && !any_unselected_port_valid &&
|
||||
(outstanding_cnt_q != (MAX_OUTSTANDING_REQ - 1))) begin
|
||||
if (req_i[sel_q].req) begin
|
||||
req_d = req_i[sel_q];
|
||||
req_o = req_i[sel_q];
|
||||
rsp_o[sel_q].gnt = 1'b1;
|
||||
state_d = SERVING;
|
||||
end
|
||||
end
|
||||
|
||||
// Count outstanding transactions, i.e. requests which have been
|
||||
// granted but response hasn't arrived yet
|
||||
if (req_o.req && rsp_i.gnt) begin
|
||||
req_d.req = 1'b0;
|
||||
outstanding_cnt_d += 1;
|
||||
end
|
||||
if (rsp_i.valid) begin
|
||||
outstanding_cnt_d -= 1;
|
||||
rsp_o[sel_q].valid = 1'b1;
|
||||
|
||||
if ((outstanding_cnt_d == 0) && (!req_o.req || rsp_i.gnt)) state_d = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
default: /* default */;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
state_q <= IDLE;
|
||||
sel_q <= '0;
|
||||
req_q <= '0;
|
||||
outstanding_cnt_q <= '0;
|
||||
end else begin
|
||||
state_q <= state_d;
|
||||
sel_q <= sel_d;
|
||||
req_q <= req_d;
|
||||
outstanding_cnt_q <= outstanding_cnt_d;
|
||||
end
|
||||
end
|
||||
// ------------
|
||||
// Assertions
|
||||
// ------------
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
// make sure that we eventually get an rvalid after we received a grant
|
||||
assert property (@(posedge clk_i) rsp_i.gnt |-> ##[1:$] rsp_i.valid)
|
||||
else begin
|
||||
$error("There was a grant without a rvalid");
|
||||
$stop();
|
||||
end
|
||||
// assert that there is no grant without a request or outstanding transactions
|
||||
assert property (@(negedge clk_i) rsp_i.gnt |-> req_o.req)
|
||||
else begin
|
||||
$error("There was a grant without a request.");
|
||||
$stop();
|
||||
end
|
||||
// assert that the address does not contain X when request is sent
|
||||
assert property (@(posedge clk_i) (req_o.req) |-> (!$isunknown(req_o.addr)))
|
||||
else begin
|
||||
$error("address contains X when request is set");
|
||||
$stop();
|
||||
end
|
||||
|
||||
`endif
|
||||
//pragma translate_on
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,315 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 15.08.2018
|
||||
// Description: Standard Ariane cache subsystem with instruction cache and
|
||||
// write-back data cache.
|
||||
|
||||
|
||||
module std_cache_subsystem
|
||||
import ariane_pkg::*;
|
||||
import std_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned NumPorts = 4,
|
||||
parameter type axi_ar_chan_t = logic,
|
||||
parameter type axi_aw_chan_t = logic,
|
||||
parameter type axi_w_chan_t = logic,
|
||||
parameter type axi_req_t = logic,
|
||||
parameter type axi_rsp_t = logic
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input riscv::priv_lvl_t priv_lvl_i,
|
||||
// I$
|
||||
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
|
||||
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
|
||||
output logic icache_miss_o, // to performance counter
|
||||
// address translation requests
|
||||
input icache_areq_t icache_areq_i, // to/from frontend
|
||||
output icache_arsp_t icache_areq_o,
|
||||
// data requests
|
||||
input icache_dreq_t icache_dreq_i, // to/from frontend
|
||||
output icache_drsp_t icache_dreq_o,
|
||||
// AMOs
|
||||
input amo_req_t amo_req_i,
|
||||
output amo_resp_t amo_resp_o,
|
||||
// D$
|
||||
// Cache management
|
||||
input logic dcache_enable_i, // from CSR
|
||||
input logic dcache_flush_i, // high until acknowledged
|
||||
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
|
||||
output logic dcache_miss_o, // we missed on a ld/st
|
||||
output logic wbuffer_empty_o, // statically set to 1, as there is no wbuffer in this cache system
|
||||
// Request ports
|
||||
input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU
|
||||
output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU
|
||||
// memory side
|
||||
output axi_req_t axi_req_o,
|
||||
input axi_rsp_t axi_resp_i
|
||||
);
|
||||
|
||||
assign wbuffer_empty_o = 1'b1;
|
||||
|
||||
axi_req_t axi_req_icache;
|
||||
axi_rsp_t axi_resp_icache;
|
||||
axi_req_t axi_req_bypass;
|
||||
axi_rsp_t axi_resp_bypass;
|
||||
axi_req_t axi_req_data;
|
||||
axi_rsp_t axi_resp_data;
|
||||
|
||||
cva6_icache_axi_wrapper #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.axi_req_t(axi_req_t),
|
||||
.axi_rsp_t(axi_rsp_t)
|
||||
) i_cva6_icache_axi_wrapper (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.priv_lvl_i(priv_lvl_i),
|
||||
.flush_i (icache_flush_i),
|
||||
.en_i (icache_en_i),
|
||||
.miss_o (icache_miss_o),
|
||||
.areq_i (icache_areq_i),
|
||||
.areq_o (icache_areq_o),
|
||||
.dreq_i (icache_dreq_i),
|
||||
.dreq_o (icache_dreq_o),
|
||||
.axi_req_o (axi_req_icache),
|
||||
.axi_resp_i(axi_resp_icache)
|
||||
);
|
||||
|
||||
// decreasing priority
|
||||
// Port 0: PTW
|
||||
// Port 1: Load Unit
|
||||
// Port 2: Accelerator
|
||||
// Port 3: Store Unit
|
||||
std_nbdcache #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.NumPorts (NumPorts),
|
||||
.axi_req_t(axi_req_t),
|
||||
.axi_rsp_t(axi_rsp_t)
|
||||
) i_nbdcache (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.enable_i (dcache_enable_i),
|
||||
.flush_i (dcache_flush_i),
|
||||
.flush_ack_o (dcache_flush_ack_o),
|
||||
.miss_o (dcache_miss_o),
|
||||
.axi_bypass_o(axi_req_bypass),
|
||||
.axi_bypass_i(axi_resp_bypass),
|
||||
.axi_data_o (axi_req_data),
|
||||
.axi_data_i (axi_resp_data),
|
||||
.req_ports_i (dcache_req_ports_i),
|
||||
.req_ports_o (dcache_req_ports_o),
|
||||
.amo_req_i,
|
||||
.amo_resp_o
|
||||
);
|
||||
|
||||
// -----------------------
|
||||
// Arbitrate AXI Ports
|
||||
// -----------------------
|
||||
logic [1:0] w_select, w_select_fifo, w_select_arbiter;
|
||||
logic [1:0] w_fifo_usage;
|
||||
logic w_fifo_empty, w_fifo_full;
|
||||
|
||||
|
||||
// AR Channel
|
||||
stream_arbiter #(
|
||||
.DATA_T(axi_ar_chan_t),
|
||||
.N_INP (3)
|
||||
) i_stream_arbiter_ar (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.inp_data_i ({axi_req_icache.ar, axi_req_bypass.ar, axi_req_data.ar}),
|
||||
.inp_valid_i({axi_req_icache.ar_valid, axi_req_bypass.ar_valid, axi_req_data.ar_valid}),
|
||||
.inp_ready_o({axi_resp_icache.ar_ready, axi_resp_bypass.ar_ready, axi_resp_data.ar_ready}),
|
||||
.oup_data_o (axi_req_o.ar),
|
||||
.oup_valid_o(axi_req_o.ar_valid),
|
||||
.oup_ready_i(axi_resp_i.ar_ready)
|
||||
);
|
||||
|
||||
// AW Channel
|
||||
stream_arbiter #(
|
||||
.DATA_T(axi_aw_chan_t),
|
||||
.N_INP (3)
|
||||
) i_stream_arbiter_aw (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.inp_data_i ({axi_req_icache.aw, axi_req_bypass.aw, axi_req_data.aw}),
|
||||
.inp_valid_i({axi_req_icache.aw_valid, axi_req_bypass.aw_valid, axi_req_data.aw_valid}),
|
||||
.inp_ready_o({axi_resp_icache.aw_ready, axi_resp_bypass.aw_ready, axi_resp_data.aw_ready}),
|
||||
.oup_data_o (axi_req_o.aw),
|
||||
.oup_valid_o(axi_req_o.aw_valid),
|
||||
.oup_ready_i(axi_resp_i.aw_ready)
|
||||
);
|
||||
|
||||
// WID has been removed in AXI 4 so we need to keep track which AW request has been accepted
|
||||
// to forward the correct write data.
|
||||
always_comb begin
|
||||
w_select = 0;
|
||||
unique casez (axi_req_o.aw.id)
|
||||
4'b0111: w_select = 2; // dcache
|
||||
4'b1???: w_select = 1; // bypass
|
||||
default: w_select = 0; // icache
|
||||
endcase
|
||||
end
|
||||
|
||||
// W Channel
|
||||
fifo_v3 #(
|
||||
.DATA_WIDTH (2),
|
||||
// we can have a maximum of 4 oustanding transactions as each port is blocking
|
||||
.DEPTH (4),
|
||||
.FALL_THROUGH(1'b1)
|
||||
) i_fifo_w_channel (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (w_fifo_full),
|
||||
.empty_o (), // leave open
|
||||
.usage_o (w_fifo_usage),
|
||||
.data_i (w_select),
|
||||
// a new transaction was requested and granted
|
||||
.push_i (axi_req_o.aw_valid & axi_resp_i.aw_ready),
|
||||
// write ID to select the output MUX
|
||||
.data_o (w_select_fifo),
|
||||
// transaction has finished
|
||||
.pop_i (axi_req_o.w_valid & axi_resp_i.w_ready & axi_req_o.w.last)
|
||||
);
|
||||
|
||||
// In fall-through mode, the empty_o will be low when push_i is high (on zero usage).
|
||||
// We do not want this here. Also, usage_o is missing the MSB, so on full fifo, usage_o is zero.
|
||||
assign w_fifo_empty = w_fifo_usage == 0 && !w_fifo_full;
|
||||
|
||||
// icache will never write so select it as default (e.g.: when no arbitration is active)
|
||||
// this is equal to setting it to zero
|
||||
assign w_select_arbiter = w_fifo_empty ? (axi_req_o.aw_valid ? w_select : 0) : w_select_fifo;
|
||||
|
||||
stream_mux #(
|
||||
.DATA_T(axi_w_chan_t),
|
||||
.N_INP (3)
|
||||
) i_stream_mux_w (
|
||||
.inp_data_i ({axi_req_data.w, axi_req_bypass.w, axi_req_icache.w}),
|
||||
.inp_valid_i({axi_req_data.w_valid, axi_req_bypass.w_valid, axi_req_icache.w_valid}),
|
||||
.inp_ready_o({axi_resp_data.w_ready, axi_resp_bypass.w_ready, axi_resp_icache.w_ready}),
|
||||
.inp_sel_i (w_select_arbiter),
|
||||
.oup_data_o (axi_req_o.w),
|
||||
.oup_valid_o(axi_req_o.w_valid),
|
||||
.oup_ready_i(axi_resp_i.w_ready)
|
||||
);
|
||||
|
||||
// Route responses based on ID
|
||||
// 0000 -> I$
|
||||
// 0111 -> D$
|
||||
// 1??? -> Bypass
|
||||
// R Channel
|
||||
assign axi_resp_icache.r = axi_resp_i.r;
|
||||
assign axi_resp_bypass.r = axi_resp_i.r;
|
||||
assign axi_resp_data.r = axi_resp_i.r;
|
||||
|
||||
logic [1:0] r_select;
|
||||
|
||||
always_comb begin
|
||||
r_select = 0;
|
||||
unique casez (axi_resp_i.r.id)
|
||||
4'b0111: r_select = 0; // dcache
|
||||
4'b1???: r_select = 1; // bypass
|
||||
4'b0000: r_select = 2; // icache
|
||||
default: r_select = 0;
|
||||
endcase
|
||||
end
|
||||
|
||||
stream_demux #(
|
||||
.N_OUP(3)
|
||||
) i_stream_demux_r (
|
||||
.inp_valid_i(axi_resp_i.r_valid),
|
||||
.inp_ready_o(axi_req_o.r_ready),
|
||||
.oup_sel_i (r_select),
|
||||
.oup_valid_o({axi_resp_icache.r_valid, axi_resp_bypass.r_valid, axi_resp_data.r_valid}),
|
||||
.oup_ready_i({axi_req_icache.r_ready, axi_req_bypass.r_ready, axi_req_data.r_ready})
|
||||
);
|
||||
|
||||
// B Channel
|
||||
logic [1:0] b_select;
|
||||
|
||||
assign axi_resp_icache.b = axi_resp_i.b;
|
||||
assign axi_resp_bypass.b = axi_resp_i.b;
|
||||
assign axi_resp_data.b = axi_resp_i.b;
|
||||
|
||||
always_comb begin
|
||||
b_select = 0;
|
||||
unique casez (axi_resp_i.b.id)
|
||||
4'b0111: b_select = 0; // dcache
|
||||
4'b1???: b_select = 1; // bypass
|
||||
4'b0000: b_select = 2; // icache
|
||||
default: b_select = 0;
|
||||
endcase
|
||||
end
|
||||
|
||||
stream_demux #(
|
||||
.N_OUP(3)
|
||||
) i_stream_demux_b (
|
||||
.inp_valid_i(axi_resp_i.b_valid),
|
||||
.inp_ready_o(axi_req_o.b_ready),
|
||||
.oup_sel_i (b_select),
|
||||
.oup_valid_o({axi_resp_icache.b_valid, axi_resp_bypass.b_valid, axi_resp_data.b_valid}),
|
||||
.oup_ready_i({axi_req_icache.b_ready, axi_req_bypass.b_ready, axi_req_data.b_ready})
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// assertions
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
|
||||
a_invalid_instruction_fetch :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
|
||||
else
|
||||
$warning(
|
||||
1,
|
||||
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
|
||||
icache_dreq_o.vaddr,
|
||||
icache_dreq_o.data
|
||||
);
|
||||
|
||||
a_invalid_write_data :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> |dcache_req_ports_i[NumPorts-1].data_be |-> (|dcache_req_ports_i[NumPorts-1].data_wdata) !== 1'hX)
|
||||
else
|
||||
$warning(
|
||||
1,
|
||||
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
|
||||
{
|
||||
dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index
|
||||
},
|
||||
dcache_req_ports_i[NumPorts-1].data_be,
|
||||
dcache_req_ports_i[NumPorts-1].data_wdata
|
||||
);
|
||||
generate
|
||||
for (genvar j = 0; j < NumPorts - 1; j++) begin
|
||||
a_invalid_read_data :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
|
||||
else
|
||||
$warning(
|
||||
1,
|
||||
"[l1 dcache] reading invalid data on port %01d: data=%016X",
|
||||
j,
|
||||
dcache_req_ports_o[j].data_rdata
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
`endif
|
||||
//pragma translate_on
|
||||
endmodule // std_cache_subsystem
|
||||
|
|
@ -0,0 +1,279 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 13.10.2017
|
||||
// Description: Nonblocking private L1 dcache
|
||||
|
||||
|
||||
module std_nbdcache
|
||||
import std_cache_pkg::*;
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned NumPorts = 4,
|
||||
parameter type axi_req_t = logic,
|
||||
parameter type axi_rsp_t = logic
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// Cache management
|
||||
input logic enable_i, // from CSR
|
||||
input logic flush_i, // high until acknowledged
|
||||
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
|
||||
output logic miss_o, // we missed on a LD/ST
|
||||
// AMOs
|
||||
input amo_req_t amo_req_i,
|
||||
output amo_resp_t amo_resp_o,
|
||||
// Request ports
|
||||
input dcache_req_i_t [NumPorts-1:0] req_ports_i, // request ports
|
||||
output dcache_req_o_t [NumPorts-1:0] req_ports_o, // request ports
|
||||
// Cache AXI refill port
|
||||
output axi_req_t axi_data_o,
|
||||
input axi_rsp_t axi_data_i,
|
||||
output axi_req_t axi_bypass_o,
|
||||
input axi_rsp_t axi_bypass_i
|
||||
);
|
||||
|
||||
import std_cache_pkg::*;
|
||||
|
||||
// -------------------------------
|
||||
// Controller <-> Arbiter
|
||||
// -------------------------------
|
||||
// 1. Miss handler
|
||||
// 2. PTW
|
||||
// 3. Load Unit
|
||||
// 4. Accelerator
|
||||
// 5. Store unit
|
||||
logic [ NumPorts:0][ DCACHE_SET_ASSOC-1:0] req;
|
||||
logic [ NumPorts:0][DCACHE_INDEX_WIDTH-1:0] addr;
|
||||
logic [ NumPorts:0] gnt;
|
||||
cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata;
|
||||
logic [ NumPorts:0][ DCACHE_TAG_WIDTH-1:0] tag;
|
||||
|
||||
cache_line_t [ NumPorts:0] wdata;
|
||||
logic [ NumPorts:0] we;
|
||||
cl_be_t [ NumPorts:0] be;
|
||||
logic [ DCACHE_SET_ASSOC-1:0] hit_way;
|
||||
// -------------------------------
|
||||
// Controller <-> Miss unit
|
||||
// -------------------------------
|
||||
logic [ NumPorts-1:0] busy;
|
||||
logic [ NumPorts-1:0][ 55:0] mshr_addr;
|
||||
logic [ NumPorts-1:0] mshr_addr_matches;
|
||||
logic [ NumPorts-1:0] mshr_index_matches;
|
||||
logic [ 63:0] critical_word;
|
||||
logic critical_word_valid;
|
||||
|
||||
logic [ NumPorts-1:0][ $bits(miss_req_t)-1:0] miss_req;
|
||||
logic [ NumPorts-1:0] miss_gnt;
|
||||
logic [ NumPorts-1:0] active_serving;
|
||||
|
||||
logic [ NumPorts-1:0] bypass_gnt;
|
||||
logic [ NumPorts-1:0] bypass_valid;
|
||||
logic [ NumPorts-1:0][ 63:0] bypass_data;
|
||||
// -------------------------------
|
||||
// Arbiter <-> Datram,
|
||||
// -------------------------------
|
||||
logic [ DCACHE_SET_ASSOC-1:0] req_ram;
|
||||
logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
|
||||
logic we_ram;
|
||||
cache_line_t wdata_ram;
|
||||
cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata_ram;
|
||||
cl_be_t be_ram;
|
||||
|
||||
// ------------------
|
||||
// Cache Controller
|
||||
// ------------------
|
||||
generate
|
||||
for (genvar i = 0; i < NumPorts; i++) begin : master_ports
|
||||
cache_ctrl #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) i_cache_ctrl (
|
||||
.bypass_i (~enable_i),
|
||||
.busy_o (busy[i]),
|
||||
// from core
|
||||
.req_port_i(req_ports_i[i]),
|
||||
.req_port_o(req_ports_o[i]),
|
||||
// to SRAM array
|
||||
.req_o (req[i+1]),
|
||||
.addr_o (addr[i+1]),
|
||||
.gnt_i (gnt[i+1]),
|
||||
.data_i (rdata),
|
||||
.tag_o (tag[i+1]),
|
||||
.data_o (wdata[i+1]),
|
||||
.we_o (we[i+1]),
|
||||
.be_o (be[i+1]),
|
||||
.hit_way_i (hit_way),
|
||||
|
||||
.miss_req_o (miss_req[i]),
|
||||
.miss_gnt_i (miss_gnt[i]),
|
||||
.active_serving_i (active_serving[i]),
|
||||
.critical_word_i (critical_word),
|
||||
.critical_word_valid_i(critical_word_valid),
|
||||
.bypass_gnt_i (bypass_gnt[i]),
|
||||
.bypass_valid_i (bypass_valid[i]),
|
||||
.bypass_data_i (bypass_data[i]),
|
||||
|
||||
.mshr_addr_o (mshr_addr[i]),
|
||||
.mshr_addr_matches_i (mshr_addr_matches[i]),
|
||||
.mshr_index_matches_i(mshr_index_matches[i]),
|
||||
.*
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ------------------
|
||||
// Miss Handling Unit
|
||||
// ------------------
|
||||
miss_handler #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.NR_PORTS (NumPorts),
|
||||
.axi_req_t(axi_req_t),
|
||||
.axi_rsp_t(axi_rsp_t)
|
||||
) i_miss_handler (
|
||||
.flush_i (flush_i),
|
||||
.busy_i (|busy),
|
||||
// AMOs
|
||||
.amo_req_i (amo_req_i),
|
||||
.amo_resp_o (amo_resp_o),
|
||||
.miss_req_i (miss_req),
|
||||
.miss_gnt_o (miss_gnt),
|
||||
.bypass_gnt_o (bypass_gnt),
|
||||
.bypass_valid_o (bypass_valid),
|
||||
.bypass_data_o (bypass_data),
|
||||
.critical_word_o (critical_word),
|
||||
.critical_word_valid_o(critical_word_valid),
|
||||
.mshr_addr_i (mshr_addr),
|
||||
.mshr_addr_matches_o (mshr_addr_matches),
|
||||
.mshr_index_matches_o (mshr_index_matches),
|
||||
.active_serving_o (active_serving),
|
||||
.req_o (req[0]),
|
||||
.addr_o (addr[0]),
|
||||
.data_i (rdata),
|
||||
.be_o (be[0]),
|
||||
.data_o (wdata[0]),
|
||||
.we_o (we[0]),
|
||||
.axi_bypass_o,
|
||||
.axi_bypass_i,
|
||||
.axi_data_o,
|
||||
.axi_data_i,
|
||||
.*
|
||||
);
|
||||
|
||||
assign tag[0] = '0;
|
||||
|
||||
// --------------
|
||||
// Memory Arrays
|
||||
// --------------
|
||||
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
|
||||
sram #(
|
||||
.DATA_WIDTH(DCACHE_LINE_WIDTH),
|
||||
.NUM_WORDS (DCACHE_NUM_WORDS)
|
||||
) data_sram (
|
||||
.req_i (req_ram[i]),
|
||||
.rst_ni (rst_ni),
|
||||
.we_i (we_ram),
|
||||
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
|
||||
.wuser_i('0),
|
||||
.wdata_i(wdata_ram.data),
|
||||
.be_i (be_ram.data),
|
||||
.ruser_o(),
|
||||
.rdata_o(rdata_ram[i].data),
|
||||
.*
|
||||
);
|
||||
|
||||
sram #(
|
||||
.DATA_WIDTH(DCACHE_TAG_WIDTH),
|
||||
.NUM_WORDS (DCACHE_NUM_WORDS)
|
||||
) tag_sram (
|
||||
.req_i (req_ram[i]),
|
||||
.rst_ni (rst_ni),
|
||||
.we_i (we_ram),
|
||||
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
|
||||
.wuser_i('0),
|
||||
.wdata_i(wdata_ram.tag),
|
||||
.be_i (be_ram.tag),
|
||||
.ruser_o(),
|
||||
.rdata_o(rdata_ram[i].tag),
|
||||
.*
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
// ----------------
|
||||
// Valid/Dirty Regs
|
||||
// ----------------
|
||||
|
||||
// align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
|
||||
// note: if you have an SRAM that supports flat bit enables for your target technology,
|
||||
// you can use it here to save the extra 4x overhead introduced by this workaround.
|
||||
logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
|
||||
|
||||
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
|
||||
assign dirty_wdata[8*i] = wdata_ram.dirty;
|
||||
assign dirty_wdata[8*i+1] = wdata_ram.valid;
|
||||
assign rdata_ram[i].dirty = dirty_rdata[8*i];
|
||||
assign rdata_ram[i].valid = dirty_rdata[8*i+1];
|
||||
end
|
||||
|
||||
sram #(
|
||||
.USER_WIDTH(1),
|
||||
.DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH),
|
||||
.NUM_WORDS (DCACHE_NUM_WORDS)
|
||||
) valid_dirty_sram (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.req_i (|req_ram),
|
||||
.we_i (we_ram),
|
||||
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
|
||||
.wuser_i('0),
|
||||
.wdata_i(dirty_wdata),
|
||||
.be_i (be_ram.vldrty),
|
||||
.ruser_o(),
|
||||
.rdata_o(dirty_rdata)
|
||||
);
|
||||
|
||||
// ------------------------------------------------
|
||||
// Tag Comparison and memory arbitration
|
||||
// ------------------------------------------------
|
||||
tag_cmp #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.NR_PORTS (NumPorts + 1),
|
||||
.ADDR_WIDTH (DCACHE_INDEX_WIDTH),
|
||||
.DCACHE_SET_ASSOC(DCACHE_SET_ASSOC)
|
||||
) i_tag_cmp (
|
||||
.req_i (req),
|
||||
.gnt_o (gnt),
|
||||
.addr_i (addr),
|
||||
.wdata_i (wdata),
|
||||
.we_i (we),
|
||||
.be_i (be),
|
||||
.rdata_o (rdata),
|
||||
.tag_i (tag),
|
||||
.hit_way_o(hit_way),
|
||||
|
||||
.req_o (req_ram),
|
||||
.addr_o (addr_ram),
|
||||
.wdata_o(wdata_ram),
|
||||
.we_o (we_ram),
|
||||
.be_o (be_ram),
|
||||
.rdata_i(rdata_ram),
|
||||
.*
|
||||
);
|
||||
|
||||
|
||||
//pragma translate_off
|
||||
initial begin
|
||||
assert (DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16})
|
||||
else $fatal(1, "Cache line size needs to be a power of two multiple of AxiDataWidth");
|
||||
end
|
||||
//pragma translate_on
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
|
||||
// --------------
|
||||
// Tag Compare
|
||||
// --------------
|
||||
//
|
||||
// Description: Arbitrates access to cache memories, simplified request grant protocol
|
||||
// checks for hit or miss on cache
|
||||
//
|
||||
module tag_cmp #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned NR_PORTS = 3,
|
||||
parameter int unsigned ADDR_WIDTH = 64,
|
||||
parameter type l_data_t = std_cache_pkg::cache_line_t,
|
||||
parameter type l_be_t = std_cache_pkg::cl_be_t,
|
||||
parameter int unsigned DCACHE_SET_ASSOC = 8
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i,
|
||||
output logic [NR_PORTS-1:0] gnt_o,
|
||||
input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
|
||||
input l_data_t [NR_PORTS-1:0] wdata_i,
|
||||
input logic [NR_PORTS-1:0] we_i,
|
||||
input l_be_t [NR_PORTS-1:0] be_i,
|
||||
output l_data_t [DCACHE_SET_ASSOC-1:0] rdata_o,
|
||||
input logic [NR_PORTS-1:0][ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
|
||||
output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way
|
||||
|
||||
|
||||
output logic [DCACHE_SET_ASSOC-1:0] req_o,
|
||||
output logic [ ADDR_WIDTH-1:0] addr_o,
|
||||
output l_data_t wdata_o,
|
||||
output logic we_o,
|
||||
output l_be_t be_o,
|
||||
input l_data_t [DCACHE_SET_ASSOC-1:0] rdata_i
|
||||
);
|
||||
|
||||
assign rdata_o = rdata_i;
|
||||
// one hot encoded
|
||||
logic [NR_PORTS-1:0] id_d, id_q;
|
||||
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] sel_tag;
|
||||
|
||||
always_comb begin : tag_sel
|
||||
sel_tag = '0;
|
||||
for (int unsigned i = 0; i < NR_PORTS; i++) if (id_q[i]) sel_tag = tag_i[i];
|
||||
end
|
||||
|
||||
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp
|
||||
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
|
||||
gnt_o = '0;
|
||||
id_d = '0;
|
||||
wdata_o = '0;
|
||||
req_o = '0;
|
||||
addr_o = '0;
|
||||
be_o = '0;
|
||||
we_o = '0;
|
||||
// Request Side
|
||||
// priority select
|
||||
for (int unsigned i = 0; i < NR_PORTS; i++) begin
|
||||
req_o = req_i[i];
|
||||
id_d = (1'b1 << i);
|
||||
gnt_o[i] = 1'b1;
|
||||
addr_o = addr_i[i];
|
||||
be_o = be_i[i];
|
||||
we_o = we_i[i];
|
||||
wdata_o = wdata_i[i];
|
||||
|
||||
if (req_i[i]) break;
|
||||
end
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`ifndef VERILATOR
|
||||
// assert that cache only hits on one way
|
||||
// this only needs to be checked one cycle after all ways have been requested
|
||||
onehot :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) &req_i |=> $onehot0(hit_way_o))
|
||||
else begin
|
||||
$fatal(1, "Hit should be one-hot encoded");
|
||||
end
|
||||
`endif
|
||||
`endif
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
id_q <= 0;
|
||||
end else begin
|
||||
id_q <= id_d;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,712 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 08.08.2018
|
||||
// Description: adapter module to connect the L1D$ and L1I$ to a 64bit AXI bus.
|
||||
//
|
||||
|
||||
|
||||
module wt_axi_adapter
|
||||
import ariane_pkg::*;
|
||||
import wt_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned ReqFifoDepth = 2,
|
||||
parameter int unsigned MetaFifoDepth = wt_cache_pkg::DCACHE_MAX_TX,
|
||||
parameter type axi_req_t = logic,
|
||||
parameter type axi_rsp_t = logic
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// icache
|
||||
input logic icache_data_req_i,
|
||||
output logic icache_data_ack_o,
|
||||
input icache_req_t icache_data_i,
|
||||
// returning packets must be consumed immediately
|
||||
output logic icache_rtrn_vld_o,
|
||||
output icache_rtrn_t icache_rtrn_o,
|
||||
|
||||
// dcache
|
||||
input logic dcache_data_req_i,
|
||||
output logic dcache_data_ack_o,
|
||||
input dcache_req_t dcache_data_i,
|
||||
// returning packets must be consumed immediately
|
||||
output logic dcache_rtrn_vld_o,
|
||||
output dcache_rtrn_t dcache_rtrn_o,
|
||||
|
||||
// AXI port
|
||||
output axi_req_t axi_req_o,
|
||||
input axi_rsp_t axi_resp_i,
|
||||
|
||||
// Invalidations
|
||||
input logic [63:0] inval_addr_i,
|
||||
input logic inval_valid_i,
|
||||
output logic inval_ready_o
|
||||
);
|
||||
|
||||
// support up to 512bit cache lines
|
||||
localparam AxiNumWords = (ariane_pkg::ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH > ariane_pkg::DCACHE_LINE_WIDTH) +
|
||||
(ariane_pkg::DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH <= ariane_pkg::DCACHE_LINE_WIDTH) ;
|
||||
localparam MaxNumWords = $clog2(CVA6Cfg.AxiDataWidth / 8);
|
||||
localparam AxiRdBlenIcache = ariane_pkg::ICACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1;
|
||||
localparam AxiRdBlenDcache = ariane_pkg::DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// request path
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
icache_req_t icache_data;
|
||||
logic icache_data_full, icache_data_empty;
|
||||
dcache_req_t dcache_data;
|
||||
logic dcache_data_full, dcache_data_empty;
|
||||
|
||||
logic [1:0] arb_req, arb_ack;
|
||||
logic arb_idx, arb_gnt;
|
||||
|
||||
logic axi_rd_req, axi_rd_gnt;
|
||||
logic axi_wr_req, axi_wr_gnt;
|
||||
logic axi_wr_valid, axi_rd_valid, axi_rd_rdy, axi_wr_rdy;
|
||||
logic axi_rd_lock, axi_wr_lock, axi_rd_exokay, axi_wr_exokay, wr_exokay;
|
||||
logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr, axi_wr_addr;
|
||||
logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen;
|
||||
logic [2:0] axi_rd_size, axi_wr_size;
|
||||
logic [CVA6Cfg.AxiIdWidth-1:0]
|
||||
axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out, wr_id_out;
|
||||
logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] axi_wr_data;
|
||||
logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] axi_wr_user;
|
||||
logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
|
||||
logic [CVA6Cfg.AxiUserWidth-1:0] axi_rd_user;
|
||||
logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] axi_wr_be;
|
||||
logic [5:0] axi_wr_atop;
|
||||
logic invalidate;
|
||||
logic [$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] amo_off_d, amo_off_q;
|
||||
// AMO generates r beat
|
||||
logic amo_gen_r_d, amo_gen_r_q;
|
||||
|
||||
logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] icache_rtrn_tid_d, icache_rtrn_tid_q;
|
||||
logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_tid_d, dcache_rtrn_tid_q;
|
||||
logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_rd_tid, dcache_rtrn_wr_tid;
|
||||
logic dcache_rd_pop, dcache_wr_pop;
|
||||
logic icache_rd_full, icache_rd_empty;
|
||||
logic dcache_rd_full, dcache_rd_empty;
|
||||
logic dcache_wr_full, dcache_wr_empty;
|
||||
|
||||
assign icache_data_ack_o = icache_data_req_i & ~icache_data_full;
|
||||
assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full;
|
||||
|
||||
// arbiter
|
||||
assign arb_req = {
|
||||
~(dcache_data_empty | dcache_wr_full | dcache_rd_full), ~(icache_data_empty | icache_rd_full)
|
||||
};
|
||||
|
||||
assign arb_gnt = axi_rd_gnt | axi_wr_gnt;
|
||||
|
||||
rr_arb_tree #(
|
||||
.NumIn (2),
|
||||
.DataWidth(1),
|
||||
.AxiVldRdy(1'b1),
|
||||
.LockIn (1'b1)
|
||||
) i_rr_arb_tree (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i('0),
|
||||
.rr_i ('0),
|
||||
.req_i (arb_req),
|
||||
.gnt_o (arb_ack),
|
||||
.data_i ('0),
|
||||
.gnt_i (arb_gnt),
|
||||
.req_o (),
|
||||
.data_o (),
|
||||
.idx_o (arb_idx)
|
||||
);
|
||||
|
||||
// request side
|
||||
always_comb begin : p_axi_req
|
||||
// write channel
|
||||
axi_wr_id_in = {{CVA6Cfg.AxiIdWidth-1{1'b0}}, arb_idx};
|
||||
axi_wr_data[0] = {(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.data}};
|
||||
axi_wr_user[0] = dcache_data.user;
|
||||
// Cast to AXI address width
|
||||
axi_wr_addr = {{CVA6Cfg.AxiAddrWidth-riscv::PLEN{1'b0}}, dcache_data.paddr};
|
||||
axi_wr_size = dcache_data.size;
|
||||
axi_wr_req = 1'b0;
|
||||
axi_wr_blen = '0;// single word writes
|
||||
axi_wr_be = '0;
|
||||
axi_wr_lock = '0;
|
||||
axi_wr_atop = '0;
|
||||
amo_off_d = amo_off_q;
|
||||
amo_gen_r_d = amo_gen_r_q;
|
||||
|
||||
// read channel
|
||||
axi_rd_id_in = {{CVA6Cfg.AxiIdWidth-1{1'b0}}, arb_idx};
|
||||
axi_rd_req = 1'b0;
|
||||
axi_rd_lock = '0;
|
||||
axi_rd_blen = '0;
|
||||
|
||||
if (dcache_data.paddr[2] == 1'b0) begin
|
||||
axi_wr_user = {{64 - CVA6Cfg.AxiUserWidth{1'b0}}, dcache_data.user};
|
||||
end else begin
|
||||
axi_wr_user = {dcache_data.user, {64 - CVA6Cfg.AxiUserWidth{1'b0}}};
|
||||
end
|
||||
|
||||
// arbiter mux
|
||||
if (arb_idx) begin
|
||||
// Cast to AXI address width
|
||||
axi_rd_addr = {{CVA6Cfg.AxiAddrWidth - riscv::PLEN{1'b0}}, dcache_data.paddr};
|
||||
// If dcache_data.size MSB is set, we want to read as much as possible
|
||||
axi_rd_size = dcache_data.size[2] ? MaxNumWords[2:0] : dcache_data.size;
|
||||
if (dcache_data.size[2]) begin
|
||||
axi_rd_blen = AxiRdBlenDcache[$clog2(AxiNumWords)-1:0];
|
||||
end
|
||||
end else begin
|
||||
// Cast to AXI address width
|
||||
axi_rd_addr = {{CVA6Cfg.AxiAddrWidth - riscv::PLEN{1'b0}}, icache_data.paddr};
|
||||
axi_rd_size = MaxNumWords[2:0]; // always request max number of words in case of ifill
|
||||
if (!icache_data.nc) begin
|
||||
axi_rd_blen = AxiRdBlenIcache[$clog2(AxiNumWords)-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
// signal that an invalidation message
|
||||
// needs to be generated
|
||||
invalidate = 1'b0;
|
||||
|
||||
// decode message type
|
||||
if (|arb_req) begin
|
||||
if (arb_idx == 0) begin
|
||||
//////////////////////////////////////
|
||||
// IMISS
|
||||
axi_rd_req = 1'b1;
|
||||
//////////////////////////////////////
|
||||
end else begin
|
||||
unique case (dcache_data.rtype)
|
||||
//////////////////////////////////////
|
||||
wt_cache_pkg::DCACHE_LOAD_REQ: begin
|
||||
axi_rd_req = 1'b1;
|
||||
end
|
||||
//////////////////////////////////////
|
||||
wt_cache_pkg::DCACHE_STORE_REQ: begin
|
||||
axi_wr_req = 1'b1;
|
||||
axi_wr_be = '0;
|
||||
unique case (dcache_data.size[1:0])
|
||||
2'b00:
|
||||
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte
|
||||
2'b01:
|
||||
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = '1; // hword
|
||||
2'b10:
|
||||
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = '1; // word
|
||||
default:
|
||||
if (riscv::IS_XLEN64)
|
||||
axi_wr_be[0][dcache_data.paddr[$clog2(
|
||||
CVA6Cfg.AxiDataWidth/8
|
||||
)-1:0]+:8] = '1; // dword
|
||||
endcase
|
||||
end
|
||||
//////////////////////////////////////
|
||||
wt_cache_pkg::DCACHE_ATOMIC_REQ: begin
|
||||
if (CVA6Cfg.RVA) begin
|
||||
// default
|
||||
// push back an invalidation here.
|
||||
// since we only keep one read tx in flight, and since
|
||||
// the dcache drains all writes/reads before executing
|
||||
// an atomic, this is safe.
|
||||
invalidate = arb_gnt;
|
||||
axi_wr_req = 1'b1;
|
||||
axi_wr_be = '0;
|
||||
unique case (dcache_data.size[1:0])
|
||||
2'b00:
|
||||
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte
|
||||
2'b01:
|
||||
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] =
|
||||
'1; // hword
|
||||
2'b10:
|
||||
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] =
|
||||
'1; // word
|
||||
default:
|
||||
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:8] =
|
||||
'1; // dword
|
||||
endcase
|
||||
amo_gen_r_d = 1'b1;
|
||||
// need to use a separate ID here, so concat an additional bit
|
||||
axi_wr_id_in[1] = 1'b1;
|
||||
|
||||
unique case (dcache_data.amo_op)
|
||||
AMO_LR: begin
|
||||
axi_rd_lock = 1'b1;
|
||||
axi_rd_req = 1'b1;
|
||||
axi_rd_id_in[1] = 1'b1;
|
||||
// tie to zero in this special case
|
||||
axi_wr_req = 1'b0;
|
||||
axi_wr_be = '0;
|
||||
end
|
||||
AMO_SC: begin
|
||||
axi_wr_lock = 1'b1;
|
||||
amo_gen_r_d = 1'b0;
|
||||
// needed to properly encode success. store the result at offset within the returned
|
||||
// AXI data word aligned with the requested word size.
|
||||
amo_off_d = dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-
|
||||
1:0] & ~((1 << dcache_data.size[1:0]) - 1);
|
||||
end
|
||||
// RISC-V atops have a load semantic
|
||||
AMO_SWAP: axi_wr_atop = axi_pkg::ATOP_ATOMICSWAP;
|
||||
AMO_ADD:
|
||||
axi_wr_atop = {
|
||||
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD
|
||||
};
|
||||
AMO_AND: begin
|
||||
// in this case we need to invert the data to get a "CLR"
|
||||
axi_wr_data[0] = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.data}};
|
||||
axi_wr_user = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.user}};
|
||||
axi_wr_atop = {
|
||||
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR
|
||||
};
|
||||
end
|
||||
AMO_OR:
|
||||
axi_wr_atop = {
|
||||
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET
|
||||
};
|
||||
AMO_XOR:
|
||||
axi_wr_atop = {
|
||||
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR
|
||||
};
|
||||
AMO_MAX:
|
||||
axi_wr_atop = {
|
||||
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX
|
||||
};
|
||||
AMO_MAXU:
|
||||
axi_wr_atop = {
|
||||
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX
|
||||
};
|
||||
AMO_MIN:
|
||||
axi_wr_atop = {
|
||||
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN
|
||||
};
|
||||
AMO_MINU:
|
||||
axi_wr_atop = {
|
||||
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN
|
||||
};
|
||||
default: ; // Do nothing
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: ; // Do nothing
|
||||
//////////////////////////////////////
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
fifo_v3 #(
|
||||
.dtype(icache_req_t),
|
||||
.DEPTH(ReqFifoDepth)
|
||||
) i_icache_data_fifo (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (icache_data_full),
|
||||
.empty_o (icache_data_empty),
|
||||
.usage_o (),
|
||||
.data_i (icache_data_i),
|
||||
.push_i (icache_data_ack_o),
|
||||
.data_o (icache_data),
|
||||
.pop_i (arb_ack[0])
|
||||
);
|
||||
|
||||
fifo_v3 #(
|
||||
.dtype(dcache_req_t),
|
||||
.DEPTH(ReqFifoDepth)
|
||||
) i_dcache_data_fifo (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (dcache_data_full),
|
||||
.empty_o (dcache_data_empty),
|
||||
.usage_o (),
|
||||
.data_i (dcache_data_i),
|
||||
.push_i (dcache_data_ack_o),
|
||||
.data_o (dcache_data),
|
||||
.pop_i (arb_ack[1])
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// meta info feedback fifos
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
logic icache_rtrn_rd_en, dcache_rtrn_rd_en;
|
||||
logic icache_rtrn_vld_d, icache_rtrn_vld_q, dcache_rtrn_vld_d, dcache_rtrn_vld_q;
|
||||
|
||||
fifo_v3 #(
|
||||
.DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
|
||||
.DEPTH (MetaFifoDepth)
|
||||
) i_rd_icache_id (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (icache_rd_full),
|
||||
.empty_o (icache_rd_empty),
|
||||
.usage_o (),
|
||||
.data_i (icache_data.tid),
|
||||
.push_i (arb_ack[0] & axi_rd_gnt),
|
||||
.data_o (icache_rtrn_tid_d),
|
||||
.pop_i (icache_rtrn_vld_d)
|
||||
);
|
||||
|
||||
fifo_v3 #(
|
||||
.DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
|
||||
.DEPTH (MetaFifoDepth)
|
||||
) i_rd_dcache_id (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (dcache_rd_full),
|
||||
.empty_o (dcache_rd_empty),
|
||||
.usage_o (),
|
||||
.data_i (dcache_data.tid),
|
||||
.push_i (arb_ack[1] & axi_rd_gnt),
|
||||
.data_o (dcache_rtrn_rd_tid),
|
||||
.pop_i (dcache_rd_pop)
|
||||
);
|
||||
|
||||
fifo_v3 #(
|
||||
.DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
|
||||
.DEPTH (MetaFifoDepth)
|
||||
) i_wr_dcache_id (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (dcache_wr_full),
|
||||
.empty_o (dcache_wr_empty),
|
||||
.usage_o (),
|
||||
.data_i (dcache_data.tid),
|
||||
.push_i (arb_ack[1] & axi_wr_gnt),
|
||||
.data_o (dcache_rtrn_wr_tid),
|
||||
.pop_i (dcache_wr_pop)
|
||||
);
|
||||
|
||||
// select correct tid to return
|
||||
assign dcache_rtrn_tid_d = (dcache_wr_pop) ? dcache_rtrn_wr_tid : dcache_rtrn_rd_tid;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// return path
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// buffer write responses
|
||||
logic b_full, b_empty, b_push, b_pop;
|
||||
assign axi_wr_rdy = ~b_full;
|
||||
assign b_push = axi_wr_valid & axi_wr_rdy;
|
||||
|
||||
fifo_v3 #(
|
||||
.DATA_WIDTH (CVA6Cfg.AxiIdWidth + 1),
|
||||
.DEPTH (MetaFifoDepth),
|
||||
.FALL_THROUGH(1'b1)
|
||||
) i_b_fifo (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (b_full),
|
||||
.empty_o (b_empty),
|
||||
.usage_o (),
|
||||
.data_i ({axi_wr_exokay, axi_wr_id_out}),
|
||||
.push_i (b_push),
|
||||
.data_o ({wr_exokay, wr_id_out}),
|
||||
.pop_i (b_pop)
|
||||
);
|
||||
|
||||
// buffer read responses in shift regs
|
||||
logic icache_first_d, icache_first_q, dcache_first_d, dcache_first_q;
|
||||
logic [ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0]
|
||||
icache_rd_shift_user_d, icache_rd_shift_user_q;
|
||||
logic [DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0]
|
||||
dcache_rd_shift_user_d, dcache_rd_shift_user_q;
|
||||
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
|
||||
icache_rd_shift_d, icache_rd_shift_q;
|
||||
logic [DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
|
||||
dcache_rd_shift_d, dcache_rd_shift_q;
|
||||
wt_cache_pkg::dcache_in_t dcache_rtrn_type_d, dcache_rtrn_type_q;
|
||||
wt_cache_pkg::dcache_inval_t dcache_rtrn_inv_d, dcache_rtrn_inv_q;
|
||||
logic dcache_sc_rtrn, axi_rd_last;
|
||||
|
||||
always_comb begin : p_axi_rtrn_shift
|
||||
// output directly from regs
|
||||
icache_rtrn_o = '0;
|
||||
icache_rtrn_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK;
|
||||
icache_rtrn_o.tid = icache_rtrn_tid_q;
|
||||
icache_rtrn_o.data = icache_rd_shift_q;
|
||||
icache_rtrn_o.user = icache_rd_shift_user_q;
|
||||
icache_rtrn_vld_o = icache_rtrn_vld_q;
|
||||
|
||||
dcache_rtrn_o = '0;
|
||||
dcache_rtrn_o.rtype = dcache_rtrn_type_q;
|
||||
dcache_rtrn_o.inv = dcache_rtrn_inv_q;
|
||||
dcache_rtrn_o.tid = dcache_rtrn_tid_q;
|
||||
dcache_rtrn_o.data = dcache_rd_shift_q;
|
||||
dcache_rtrn_o.user = dcache_rd_shift_user_q;
|
||||
dcache_rtrn_vld_o = dcache_rtrn_vld_q;
|
||||
|
||||
// read shift registers
|
||||
icache_rd_shift_d = icache_rd_shift_q;
|
||||
icache_rd_shift_user_d = icache_rd_shift_user_q;
|
||||
dcache_rd_shift_d = dcache_rd_shift_q;
|
||||
dcache_rd_shift_user_d = dcache_rd_shift_user_q;
|
||||
icache_first_d = icache_first_q;
|
||||
dcache_first_d = dcache_first_q;
|
||||
|
||||
if (icache_rtrn_rd_en) begin
|
||||
icache_first_d = axi_rd_last;
|
||||
if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
|
||||
icache_rd_shift_d[0] = axi_rd_data;
|
||||
end else begin
|
||||
icache_rd_shift_d = {
|
||||
axi_rd_data, icache_rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]
|
||||
};
|
||||
end
|
||||
icache_rd_shift_user_d = {
|
||||
axi_rd_user, icache_rd_shift_user_q[ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]
|
||||
};
|
||||
// if this is a single word transaction, we need to make sure that word is placed at offset 0
|
||||
if (icache_first_q) begin
|
||||
icache_rd_shift_d[0] = axi_rd_data;
|
||||
icache_rd_shift_user_d[0] = axi_rd_user;
|
||||
end
|
||||
end
|
||||
|
||||
if (dcache_rtrn_rd_en) begin
|
||||
dcache_first_d = axi_rd_last;
|
||||
if (DCACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
|
||||
dcache_rd_shift_d[0] = axi_rd_data;
|
||||
end else begin
|
||||
dcache_rd_shift_d = {
|
||||
axi_rd_data, dcache_rd_shift_q[DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]
|
||||
};
|
||||
end
|
||||
dcache_rd_shift_user_d = {
|
||||
axi_rd_user, dcache_rd_shift_user_q[DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]
|
||||
};
|
||||
// if this is a single word transaction, we need to make sure that word is placed at offset 0
|
||||
if (dcache_first_q) begin
|
||||
dcache_rd_shift_d[0] = axi_rd_data;
|
||||
dcache_rd_shift_user_d[0] = axi_rd_user;
|
||||
end
|
||||
end else if (CVA6Cfg.RVA && dcache_sc_rtrn) begin
|
||||
// encode lr/sc success
|
||||
dcache_rd_shift_d[0] = '0;
|
||||
dcache_rd_shift_user_d[0] = '0;
|
||||
dcache_rd_shift_d[0][amo_off_q*8] = (wr_exokay) ? '0 : 1'b1;
|
||||
dcache_rd_shift_user_d[0][amo_off_q*8] = (wr_exokay) ? '0 : 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// decode virtual read channels of icache
|
||||
always_comb begin : p_axi_rtrn_decode
|
||||
// we are not ready when invalidating
|
||||
// note: b's are buffered separately
|
||||
axi_rd_rdy = ~invalidate;
|
||||
|
||||
icache_rtrn_rd_en = 1'b0;
|
||||
icache_rtrn_vld_d = 1'b0;
|
||||
|
||||
// decode virtual icache channel,
|
||||
// this is independent on dcache decoding below
|
||||
if (axi_rd_valid && axi_rd_id_out == 0 && axi_rd_rdy) begin
|
||||
icache_rtrn_rd_en = 1'b1;
|
||||
icache_rtrn_vld_d = axi_rd_last;
|
||||
end
|
||||
|
||||
dcache_rtrn_rd_en = 1'b0;
|
||||
dcache_rtrn_vld_d = 1'b0;
|
||||
dcache_rd_pop = 1'b0;
|
||||
dcache_wr_pop = 1'b0;
|
||||
dcache_rtrn_inv_d = '0;
|
||||
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_LOAD_ACK;
|
||||
b_pop = 1'b0;
|
||||
dcache_sc_rtrn = 1'b0;
|
||||
|
||||
// External invalidation requests (from coprocessor). This is safe as
|
||||
// there are no other transactions when a coprocessor has pending stores.
|
||||
inval_ready_o = 1'b0;
|
||||
if (inval_valid_i) begin
|
||||
inval_ready_o = 1'b1;
|
||||
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
|
||||
dcache_rtrn_vld_d = 1'b1;
|
||||
dcache_rtrn_inv_d.all = 1'b1;
|
||||
dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
|
||||
//////////////////////////////////////
|
||||
// dcache needs some special treatment
|
||||
// for arbitration and decoding of atomics
|
||||
//////////////////////////////////////
|
||||
// this is safe, there is no other read tx in flight than this atomic.
|
||||
// note that this self invalidation is handled in this way due to the
|
||||
// write-through cache architecture, which is aligned with the openpiton
|
||||
// cache subsystem.
|
||||
end else if (CVA6Cfg.RVA && invalidate) begin
|
||||
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
|
||||
dcache_rtrn_vld_d = 1'b1;
|
||||
|
||||
dcache_rtrn_inv_d.all = 1'b1;
|
||||
dcache_rtrn_inv_d.idx = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
|
||||
//////////////////////////////////////
|
||||
// read responses
|
||||
// note that in case of atomics, the dcache sequentializes requests and
|
||||
// guarantees that there are no other pending transactions in flight
|
||||
end else if (axi_rd_valid && axi_rd_id_out[0] && axi_rd_rdy) begin
|
||||
dcache_rtrn_rd_en = 1'b1;
|
||||
dcache_rtrn_vld_d = axi_rd_last;
|
||||
|
||||
// if this was an atomic op
|
||||
if (CVA6Cfg.RVA && axi_rd_id_out[1]) begin
|
||||
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK;
|
||||
|
||||
// check if transaction was issued over write channel and pop that ID
|
||||
if (!dcache_wr_empty) begin
|
||||
dcache_wr_pop = axi_rd_last;
|
||||
// if this is not the case, there MUST be an id in the read channel (LR)
|
||||
end else begin
|
||||
dcache_rd_pop = axi_rd_last;
|
||||
end
|
||||
end else begin
|
||||
dcache_rd_pop = axi_rd_last;
|
||||
end
|
||||
//////////////////////////////////////
|
||||
// write responses, check b fifo
|
||||
end else if (!b_empty) begin
|
||||
b_pop = 1'b1;
|
||||
|
||||
// this was an atomic
|
||||
if (CVA6Cfg.RVA && wr_id_out[1]) begin
|
||||
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK;
|
||||
|
||||
// silently discard b response if we already popped the fifo
|
||||
// with a R beat (iff the amo transaction generated an R beat)
|
||||
if (!amo_gen_r_q) begin
|
||||
dcache_rtrn_vld_d = 1'b1;
|
||||
dcache_wr_pop = 1'b1;
|
||||
dcache_sc_rtrn = 1'b1;
|
||||
end
|
||||
end else begin
|
||||
// regular response
|
||||
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_STORE_ACK;
|
||||
dcache_rtrn_vld_d = 1'b1;
|
||||
dcache_wr_pop = 1'b1;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////////
|
||||
end
|
||||
|
||||
// remote invalidations are not supported yet (this needs a cache coherence protocol)
|
||||
// note that the atomic transactions would also need a "master exclusive monitor" in that case
|
||||
// assign icache_rtrn_o.inv.idx = '0;
|
||||
// assign icache_rtrn_o.inv.way = '0;
|
||||
// assign icache_rtrn_o.inv.vld = '0;
|
||||
// assign icache_rtrn_o.inv.all = '0;
|
||||
|
||||
// assign dcache_rtrn_o.inv.idx = '0;
|
||||
// assign dcache_rtrn_o.inv.way = '0;
|
||||
// assign dcache_rtrn_o.inv.vld = '0;
|
||||
// assign dcache_rtrn_o.inv.all = '0;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf
|
||||
if (!rst_ni) begin
|
||||
icache_first_q <= 1'b1;
|
||||
dcache_first_q <= 1'b1;
|
||||
icache_rd_shift_q <= '0;
|
||||
icache_rd_shift_user_q <= '0;
|
||||
dcache_rd_shift_q <= '0;
|
||||
dcache_rd_shift_user_q <= '0;
|
||||
icache_rtrn_vld_q <= '0;
|
||||
dcache_rtrn_vld_q <= '0;
|
||||
icache_rtrn_tid_q <= '0;
|
||||
dcache_rtrn_tid_q <= '0;
|
||||
dcache_rtrn_type_q <= wt_cache_pkg::DCACHE_LOAD_ACK;
|
||||
dcache_rtrn_inv_q <= '0;
|
||||
amo_off_q <= '0;
|
||||
amo_gen_r_q <= 1'b0;
|
||||
end else begin
|
||||
icache_first_q <= icache_first_d;
|
||||
dcache_first_q <= dcache_first_d;
|
||||
icache_rd_shift_q <= icache_rd_shift_d;
|
||||
icache_rd_shift_user_q <= icache_rd_shift_user_d;
|
||||
dcache_rd_shift_q <= dcache_rd_shift_d;
|
||||
dcache_rd_shift_user_q <= dcache_rd_shift_user_d;
|
||||
icache_rtrn_vld_q <= icache_rtrn_vld_d;
|
||||
dcache_rtrn_vld_q <= dcache_rtrn_vld_d;
|
||||
icache_rtrn_tid_q <= icache_rtrn_tid_d;
|
||||
dcache_rtrn_tid_q <= dcache_rtrn_tid_d;
|
||||
dcache_rtrn_type_q <= dcache_rtrn_type_d;
|
||||
dcache_rtrn_inv_q <= dcache_rtrn_inv_d;
|
||||
amo_off_q <= amo_off_d;
|
||||
amo_gen_r_q <= amo_gen_r_d;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// axi protocol shim
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
axi_shim #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.AxiNumWords(AxiNumWords),
|
||||
.axi_req_t (axi_req_t),
|
||||
.axi_rsp_t (axi_rsp_t)
|
||||
) i_axi_shim (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.rd_req_i (axi_rd_req),
|
||||
.rd_gnt_o (axi_rd_gnt),
|
||||
.rd_addr_i (axi_rd_addr),
|
||||
.rd_blen_i (axi_rd_blen),
|
||||
.rd_size_i (axi_rd_size),
|
||||
.rd_id_i (axi_rd_id_in),
|
||||
.rd_rdy_i (axi_rd_rdy),
|
||||
.rd_lock_i (axi_rd_lock),
|
||||
.rd_last_o (axi_rd_last),
|
||||
.rd_valid_o (axi_rd_valid),
|
||||
.rd_data_o (axi_rd_data),
|
||||
.rd_user_o (axi_rd_user),
|
||||
.rd_id_o (axi_rd_id_out),
|
||||
.rd_exokay_o(axi_rd_exokay),
|
||||
.wr_req_i (axi_wr_req),
|
||||
.wr_gnt_o (axi_wr_gnt),
|
||||
.wr_addr_i (axi_wr_addr),
|
||||
.wr_data_i (axi_wr_data),
|
||||
.wr_user_i (axi_wr_user),
|
||||
.wr_be_i (axi_wr_be),
|
||||
.wr_blen_i (axi_wr_blen),
|
||||
.wr_size_i (axi_wr_size),
|
||||
.wr_id_i (axi_wr_id_in),
|
||||
.wr_lock_i (axi_wr_lock),
|
||||
.wr_atop_i (axi_wr_atop),
|
||||
.wr_rdy_i (axi_wr_rdy),
|
||||
.wr_valid_o (axi_wr_valid),
|
||||
.wr_id_o (axi_wr_id_out),
|
||||
.wr_exokay_o(axi_wr_exokay),
|
||||
.axi_req_o (axi_req_o),
|
||||
.axi_resp_i (axi_resp_i)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// assertions
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
|
||||
`endif
|
||||
//pragma translate_on
|
||||
|
||||
endmodule // wt_l15_adapter
|
||||
|
|
@ -0,0 +1,233 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 15.08.2018
|
||||
// Description: Ariane cache subsystem that is compatible with the OpenPiton
|
||||
// coherent memory system.
|
||||
//
|
||||
// Define PITON_ARIANE if you want to use this cache.
|
||||
// Define DCACHE_TYPE if you want to use this cache
|
||||
// with a standard 64 bit AXI interface instead of the OpenPiton
|
||||
// L1.5 interface.
|
||||
|
||||
|
||||
module wt_cache_subsystem
|
||||
import ariane_pkg::*;
|
||||
import wt_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned NumPorts = 4,
|
||||
parameter type noc_req_t = logic,
|
||||
parameter type noc_resp_t = logic
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// I$
|
||||
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
|
||||
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
|
||||
output logic icache_miss_o, // to performance counter
|
||||
// address translation requests
|
||||
input icache_areq_t icache_areq_i, // to/from frontend
|
||||
output icache_arsp_t icache_areq_o,
|
||||
// data requests
|
||||
input icache_dreq_t icache_dreq_i, // to/from frontend
|
||||
output icache_drsp_t icache_dreq_o,
|
||||
// D$
|
||||
// Cache management
|
||||
input logic dcache_enable_i, // from CSR
|
||||
input logic dcache_flush_i, // high until acknowledged
|
||||
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
|
||||
output logic dcache_miss_o, // we missed on a ld/st
|
||||
// For Performance Counter
|
||||
output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
|
||||
// AMO interface
|
||||
input amo_req_t dcache_amo_req_i,
|
||||
output amo_resp_t dcache_amo_resp_o,
|
||||
// Request ports
|
||||
input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU
|
||||
output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU
|
||||
// writebuffer status
|
||||
output logic wbuffer_empty_o,
|
||||
output logic wbuffer_not_ni_o,
|
||||
// memory side
|
||||
output noc_req_t noc_req_o,
|
||||
input noc_resp_t noc_resp_i,
|
||||
// Invalidations
|
||||
input logic [63:0] inval_addr_i,
|
||||
input logic inval_valid_i,
|
||||
output logic inval_ready_o
|
||||
// TODO: interrupt interface
|
||||
);
|
||||
|
||||
logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld;
|
||||
wt_cache_pkg::icache_req_t icache_adapter;
|
||||
wt_cache_pkg::icache_rtrn_t adapter_icache;
|
||||
|
||||
|
||||
logic dcache_adapter_data_req, adapter_dcache_data_ack, adapter_dcache_rtrn_vld;
|
||||
wt_cache_pkg::dcache_req_t dcache_adapter;
|
||||
wt_cache_pkg::dcache_rtrn_t adapter_dcache;
|
||||
|
||||
cva6_icache #(
|
||||
// use ID 0 for icache reads
|
||||
.CVA6Cfg(CVA6Cfg),
|
||||
.RdTxId (0)
|
||||
) i_cva6_icache (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (icache_flush_i),
|
||||
.en_i (icache_en_i),
|
||||
.miss_o (icache_miss_o),
|
||||
.areq_i (icache_areq_i),
|
||||
.areq_o (icache_areq_o),
|
||||
.dreq_i (icache_dreq_i),
|
||||
.dreq_o (icache_dreq_o),
|
||||
.mem_rtrn_vld_i(adapter_icache_rtrn_vld),
|
||||
.mem_rtrn_i (adapter_icache),
|
||||
.mem_data_req_o(icache_adapter_data_req),
|
||||
.mem_data_ack_i(adapter_icache_data_ack),
|
||||
.mem_data_o (icache_adapter)
|
||||
);
|
||||
|
||||
|
||||
// Note:
|
||||
// Ports 0/1 for PTW and LD unit are read only.
|
||||
// they have equal prio and are RR arbited
|
||||
// Port 2 is write only and goes into the merging write buffer
|
||||
wt_dcache #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
// use ID 1 for dcache reads and amos. note that the writebuffer
|
||||
// uses all IDs up to DCACHE_MAX_TX-1 for write transactions.
|
||||
.RdAmoTxId(1)
|
||||
) i_wt_dcache (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.enable_i (dcache_enable_i),
|
||||
.flush_i (dcache_flush_i),
|
||||
.flush_ack_o (dcache_flush_ack_o),
|
||||
.miss_o (dcache_miss_o),
|
||||
.wbuffer_empty_o (wbuffer_empty_o),
|
||||
.wbuffer_not_ni_o(wbuffer_not_ni_o),
|
||||
.amo_req_i (dcache_amo_req_i),
|
||||
.amo_resp_o (dcache_amo_resp_o),
|
||||
.req_ports_i (dcache_req_ports_i),
|
||||
.req_ports_o (dcache_req_ports_o),
|
||||
.miss_vld_bits_o (miss_vld_bits_o),
|
||||
.mem_rtrn_vld_i (adapter_dcache_rtrn_vld),
|
||||
.mem_rtrn_i (adapter_dcache),
|
||||
.mem_data_req_o (dcache_adapter_data_req),
|
||||
.mem_data_ack_i (adapter_dcache_data_ack),
|
||||
.mem_data_o (dcache_adapter)
|
||||
);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// memory plumbing, either use 64bit AXI port or native
|
||||
// L15 cache interface (derived from OpenSPARC CCX).
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
`ifdef PITON_ARIANE
|
||||
wt_l15_adapter #(
|
||||
.CVA6Cfg(CVA6Cfg),
|
||||
) i_adapter (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.icache_data_req_i(icache_adapter_data_req),
|
||||
.icache_data_ack_o(adapter_icache_data_ack),
|
||||
.icache_data_i (icache_adapter),
|
||||
.icache_rtrn_vld_o(adapter_icache_rtrn_vld),
|
||||
.icache_rtrn_o (adapter_icache),
|
||||
.dcache_data_req_i(dcache_adapter_data_req),
|
||||
.dcache_data_ack_o(adapter_dcache_data_ack),
|
||||
.dcache_data_i (dcache_adapter),
|
||||
.dcache_rtrn_vld_o(adapter_dcache_rtrn_vld),
|
||||
.dcache_rtrn_o (adapter_dcache),
|
||||
.l15_req_o (noc_req_o),
|
||||
.l15_rtrn_i (noc_resp_i)
|
||||
);
|
||||
`else
|
||||
wt_axi_adapter #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.axi_req_t(noc_req_t),
|
||||
.axi_rsp_t(noc_resp_t)
|
||||
) i_adapter (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.icache_data_req_i(icache_adapter_data_req),
|
||||
.icache_data_ack_o(adapter_icache_data_ack),
|
||||
.icache_data_i (icache_adapter),
|
||||
.icache_rtrn_vld_o(adapter_icache_rtrn_vld),
|
||||
.icache_rtrn_o (adapter_icache),
|
||||
.dcache_data_req_i(dcache_adapter_data_req),
|
||||
.dcache_data_ack_o(adapter_dcache_data_ack),
|
||||
.dcache_data_i (dcache_adapter),
|
||||
.dcache_rtrn_vld_o(adapter_dcache_rtrn_vld),
|
||||
.dcache_rtrn_o (adapter_dcache),
|
||||
.axi_req_o (noc_req_o),
|
||||
.axi_resp_i (noc_resp_i),
|
||||
.inval_addr_i (inval_addr_i),
|
||||
.inval_valid_i (inval_valid_i),
|
||||
.inval_ready_o (inval_ready_o)
|
||||
);
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// assertions
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
a_invalid_instruction_fetch :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
|
||||
else
|
||||
$warning(
|
||||
1,
|
||||
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
|
||||
icache_dreq_o.vaddr,
|
||||
icache_dreq_o.data
|
||||
);
|
||||
|
||||
for (genvar j = 0; j < riscv::XLEN / 8; j++) begin : gen_invalid_write_assertion
|
||||
a_invalid_write_data :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> dcache_req_ports_i[NumPorts-1].data_be[j] |-> (|dcache_req_ports_i[NumPorts-1].data_wdata[j*8+:8] !== 1'hX))
|
||||
else
|
||||
$warning(
|
||||
1,
|
||||
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X, databe=%016X",
|
||||
{
|
||||
dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index
|
||||
},
|
||||
dcache_req_ports_i[NumPorts-1].data_be,
|
||||
dcache_req_ports_i[NumPorts-1].data_wdata,
|
||||
dcache_req_ports_i[NumPorts-1].data_be & dcache_req_ports_i[NumPorts-1].data_wdata
|
||||
);
|
||||
end
|
||||
|
||||
|
||||
for (genvar j = 0; j < NumPorts - 1; j++) begin : gen_assertion
|
||||
a_invalid_read_data :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
|
||||
else
|
||||
$warning(
|
||||
1,
|
||||
"[l1 dcache] reading invalid data on port %01d: data=%016X",
|
||||
j,
|
||||
dcache_req_ports_o[j].data_rdata
|
||||
);
|
||||
end
|
||||
`endif
|
||||
//pragma translate_on
|
||||
|
||||
|
||||
endmodule // wt_cache_subsystem
|
||||
|
|
@ -0,0 +1,360 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 13.09.2018
|
||||
// Description: Write-Through Data cache that is compatible with openpiton.
|
||||
|
||||
|
||||
module wt_dcache
|
||||
import ariane_pkg::*;
|
||||
import wt_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned NumPorts = 4, // number of miss ports
|
||||
// ID to be used for read and AMO transactions.
|
||||
// note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions
|
||||
parameter logic [CACHE_ID_WIDTH-1:0] RdAmoTxId = 1
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
|
||||
// Cache management
|
||||
input logic enable_i, // from CSR
|
||||
input logic flush_i, // high until acknowledged
|
||||
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
|
||||
output logic miss_o, // we missed on a ld/st
|
||||
output logic wbuffer_empty_o,
|
||||
output logic wbuffer_not_ni_o,
|
||||
|
||||
// AMO interface
|
||||
input amo_req_t amo_req_i,
|
||||
output amo_resp_t amo_resp_o,
|
||||
|
||||
// Request ports
|
||||
input dcache_req_i_t [NumPorts-1:0] req_ports_i,
|
||||
output dcache_req_o_t [NumPorts-1:0] req_ports_o,
|
||||
|
||||
output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
|
||||
|
||||
input logic mem_rtrn_vld_i,
|
||||
input dcache_rtrn_t mem_rtrn_i,
|
||||
output logic mem_data_req_o,
|
||||
input logic mem_data_ack_i,
|
||||
output dcache_req_t mem_data_o
|
||||
);
|
||||
|
||||
// miss unit <-> read controllers
|
||||
logic cache_en;
|
||||
|
||||
// miss unit <-> memory
|
||||
logic wr_cl_vld;
|
||||
logic wr_cl_nc;
|
||||
logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we;
|
||||
logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag;
|
||||
logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx;
|
||||
logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off;
|
||||
logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data;
|
||||
logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user;
|
||||
logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be;
|
||||
logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits;
|
||||
logic [ DCACHE_SET_ASSOC-1:0] wr_req;
|
||||
logic wr_ack;
|
||||
logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_idx;
|
||||
logic [ DCACHE_OFFSET_WIDTH-1:0] wr_off;
|
||||
riscv::xlen_t wr_data;
|
||||
logic [ (riscv::XLEN/8)-1:0] wr_data_be;
|
||||
logic [ DCACHE_USER_WIDTH-1:0] wr_user;
|
||||
|
||||
// miss unit <-> controllers/wbuffer
|
||||
logic [ NumPorts-1:0] miss_req;
|
||||
logic [ NumPorts-1:0] miss_ack;
|
||||
logic [ NumPorts-1:0] miss_nc;
|
||||
logic [ NumPorts-1:0] miss_we;
|
||||
logic [ NumPorts-1:0][ riscv::XLEN-1:0] miss_wdata;
|
||||
logic [ NumPorts-1:0][ DCACHE_USER_WIDTH-1:0] miss_wuser;
|
||||
logic [ NumPorts-1:0][ riscv::PLEN-1:0] miss_paddr;
|
||||
logic [ NumPorts-1:0][ 2:0] miss_size;
|
||||
logic [ NumPorts-1:0][ CACHE_ID_WIDTH-1:0] miss_id;
|
||||
logic [ NumPorts-1:0] miss_replay;
|
||||
logic [ NumPorts-1:0] miss_rtrn_vld;
|
||||
logic [ CACHE_ID_WIDTH-1:0] miss_rtrn_id;
|
||||
|
||||
// memory <-> read controllers/miss unit
|
||||
logic [ NumPorts-1:0] rd_prio;
|
||||
logic [ NumPorts-1:0] rd_tag_only;
|
||||
logic [ NumPorts-1:0] rd_req;
|
||||
logic [ NumPorts-1:0] rd_ack;
|
||||
logic [ NumPorts-1:0][ DCACHE_TAG_WIDTH-1:0] rd_tag;
|
||||
logic [ NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx;
|
||||
logic [ NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off;
|
||||
riscv::xlen_t rd_data;
|
||||
logic [ DCACHE_USER_WIDTH-1:0] rd_user;
|
||||
logic [ DCACHE_SET_ASSOC-1:0] rd_vld_bits;
|
||||
logic [ DCACHE_SET_ASSOC-1:0] rd_hit_oh;
|
||||
|
||||
// miss unit <-> wbuffer
|
||||
logic [ DCACHE_MAX_TX-1:0][ riscv::PLEN-1:0] tx_paddr;
|
||||
logic [ DCACHE_MAX_TX-1:0] tx_vld;
|
||||
|
||||
// wbuffer <-> memory
|
||||
wbuffer_t [ DCACHE_WBUF_DEPTH-1:0] wbuffer_data;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// miss handling unit
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
wt_dcache_missunit #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.AmoTxId (RdAmoTxId),
|
||||
.NumPorts(NumPorts)
|
||||
) i_wt_dcache_missunit (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.enable_i (enable_i),
|
||||
.flush_i (flush_i),
|
||||
.flush_ack_o (flush_ack_o),
|
||||
.miss_o (miss_o),
|
||||
.wbuffer_empty_i(wbuffer_empty_o),
|
||||
.cache_en_o (cache_en),
|
||||
// amo interface
|
||||
.amo_req_i (amo_req_i),
|
||||
.amo_resp_o (amo_resp_o),
|
||||
// miss handling interface
|
||||
.miss_req_i (miss_req),
|
||||
.miss_ack_o (miss_ack),
|
||||
.miss_nc_i (miss_nc),
|
||||
.miss_we_i (miss_we),
|
||||
.miss_wdata_i (miss_wdata),
|
||||
.miss_wuser_i (miss_wuser),
|
||||
.miss_paddr_i (miss_paddr),
|
||||
.miss_vld_bits_i(miss_vld_bits_o),
|
||||
.miss_size_i (miss_size),
|
||||
.miss_id_i (miss_id),
|
||||
.miss_replay_o (miss_replay),
|
||||
.miss_rtrn_vld_o(miss_rtrn_vld),
|
||||
.miss_rtrn_id_o (miss_rtrn_id),
|
||||
// from writebuffer
|
||||
.tx_paddr_i (tx_paddr),
|
||||
.tx_vld_i (tx_vld),
|
||||
// cache memory interface
|
||||
.wr_cl_vld_o (wr_cl_vld),
|
||||
.wr_cl_nc_o (wr_cl_nc),
|
||||
.wr_cl_we_o (wr_cl_we),
|
||||
.wr_cl_tag_o (wr_cl_tag),
|
||||
.wr_cl_idx_o (wr_cl_idx),
|
||||
.wr_cl_off_o (wr_cl_off),
|
||||
.wr_cl_data_o (wr_cl_data),
|
||||
.wr_cl_user_o (wr_cl_user),
|
||||
.wr_cl_data_be_o(wr_cl_data_be),
|
||||
.wr_vld_bits_o (wr_vld_bits),
|
||||
// memory interface
|
||||
.mem_rtrn_vld_i (mem_rtrn_vld_i),
|
||||
.mem_rtrn_i (mem_rtrn_i),
|
||||
.mem_data_req_o (mem_data_req_o),
|
||||
.mem_data_ack_i (mem_data_ack_i),
|
||||
.mem_data_o (mem_data_o)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// read controllers (LD unit and PTW/MMU)
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// 0 is used by MMU, 1 by READ access requests
|
||||
for (genvar k = 0; k < NumPorts - 1; k++) begin : gen_rd_ports
|
||||
// set these to high prio ports
|
||||
if ((k == 0 && MMU_PRESENT) || (k == 1) || (k == 2 && CVA6Cfg.EnableAccelerator)) begin
|
||||
assign rd_prio[k] = 1'b1;
|
||||
wt_dcache_ctrl #(
|
||||
.CVA6Cfg(CVA6Cfg),
|
||||
.RdTxId (RdAmoTxId)
|
||||
) i_wt_dcache_ctrl (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.cache_en_i (cache_en),
|
||||
// reqs from core
|
||||
.req_port_i (req_ports_i[k]),
|
||||
.req_port_o (req_ports_o[k]),
|
||||
// miss interface
|
||||
.miss_req_o (miss_req[k]),
|
||||
.miss_ack_i (miss_ack[k]),
|
||||
.miss_we_o (miss_we[k]),
|
||||
.miss_wdata_o (miss_wdata[k]),
|
||||
.miss_wuser_o (miss_wuser[k]),
|
||||
.miss_vld_bits_o(miss_vld_bits_o[k]),
|
||||
.miss_paddr_o (miss_paddr[k]),
|
||||
.miss_nc_o (miss_nc[k]),
|
||||
.miss_size_o (miss_size[k]),
|
||||
.miss_id_o (miss_id[k]),
|
||||
.miss_replay_i (miss_replay[k]),
|
||||
.miss_rtrn_vld_i(miss_rtrn_vld[k]),
|
||||
// used to detect readout mux collisions
|
||||
.wr_cl_vld_i (wr_cl_vld),
|
||||
// cache mem interface
|
||||
.rd_tag_o (rd_tag[k]),
|
||||
.rd_idx_o (rd_idx[k]),
|
||||
.rd_off_o (rd_off[k]),
|
||||
.rd_req_o (rd_req[k]),
|
||||
.rd_tag_only_o (rd_tag_only[k]),
|
||||
.rd_ack_i (rd_ack[k]),
|
||||
.rd_data_i (rd_data),
|
||||
.rd_user_i (rd_user),
|
||||
.rd_vld_bits_i (rd_vld_bits),
|
||||
.rd_hit_oh_i (rd_hit_oh)
|
||||
);
|
||||
end else begin
|
||||
assign rd_prio[k] = 1'b0;
|
||||
assign req_ports_o[k] = '0;
|
||||
assign miss_req[k] = 1'b0;
|
||||
assign miss_we[k] = 1'b0;
|
||||
assign miss_wdata[k] = {{riscv::XLEN} {1'b0}};
|
||||
assign miss_wuser[k] = {{DCACHE_USER_WIDTH} {1'b0}};
|
||||
assign miss_vld_bits_o[k] = {{DCACHE_SET_ASSOC} {1'b0}};
|
||||
assign miss_paddr[k] = {{riscv::PLEN} {1'b0}};
|
||||
assign miss_nc[k] = 1'b0;
|
||||
assign miss_size[k] = 3'b0;
|
||||
assign miss_id[k] = {{CACHE_ID_WIDTH} {1'b0}};
|
||||
assign rd_tag[k] = {{DCACHE_TAG_WIDTH} {1'b0}};
|
||||
assign rd_idx[k] = {{DCACHE_CL_IDX_WIDTH} {1'b0}};
|
||||
assign rd_off[k] = {{DCACHE_OFFSET_WIDTH} {1'b0}};
|
||||
assign rd_req[k] = 1'b0;
|
||||
assign rd_tag_only[k] = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// store unit controller
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// set read port to low priority
|
||||
assign rd_prio[NumPorts-1] = 1'b0;
|
||||
|
||||
wt_dcache_wbuffer #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) i_wt_dcache_wbuffer (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.empty_o (wbuffer_empty_o),
|
||||
.not_ni_o (wbuffer_not_ni_o),
|
||||
// TODO: fix this
|
||||
.cache_en_i (cache_en),
|
||||
// .cache_en_i ( '0 ),
|
||||
// request ports from core (store unit)
|
||||
.req_port_i (req_ports_i[NumPorts-1]),
|
||||
.req_port_o (req_ports_o[NumPorts-1]),
|
||||
// miss unit interface
|
||||
.miss_req_o (miss_req[NumPorts-1]),
|
||||
.miss_ack_i (miss_ack[NumPorts-1]),
|
||||
.miss_we_o (miss_we[NumPorts-1]),
|
||||
.miss_wdata_o (miss_wdata[NumPorts-1]),
|
||||
.miss_wuser_o (miss_wuser[NumPorts-1]),
|
||||
.miss_vld_bits_o(miss_vld_bits_o[NumPorts-1]),
|
||||
.miss_paddr_o (miss_paddr[NumPorts-1]),
|
||||
.miss_nc_o (miss_nc[NumPorts-1]),
|
||||
.miss_size_o (miss_size[NumPorts-1]),
|
||||
.miss_id_o (miss_id[NumPorts-1]),
|
||||
.miss_rtrn_vld_i(miss_rtrn_vld[NumPorts-1]),
|
||||
.miss_rtrn_id_i (miss_rtrn_id),
|
||||
// cache read interface
|
||||
.rd_tag_o (rd_tag[NumPorts-1]),
|
||||
.rd_idx_o (rd_idx[NumPorts-1]),
|
||||
.rd_off_o (rd_off[NumPorts-1]),
|
||||
.rd_req_o (rd_req[NumPorts-1]),
|
||||
.rd_tag_only_o (rd_tag_only[NumPorts-1]),
|
||||
.rd_ack_i (rd_ack[NumPorts-1]),
|
||||
.rd_data_i (rd_data),
|
||||
.rd_vld_bits_i (rd_vld_bits),
|
||||
.rd_hit_oh_i (rd_hit_oh),
|
||||
// incoming invalidations/cache refills
|
||||
.wr_cl_vld_i (wr_cl_vld),
|
||||
.wr_cl_idx_i (wr_cl_idx),
|
||||
// single word write interface
|
||||
.wr_req_o (wr_req),
|
||||
.wr_ack_i (wr_ack),
|
||||
.wr_idx_o (wr_idx),
|
||||
.wr_off_o (wr_off),
|
||||
.wr_data_o (wr_data),
|
||||
.wr_user_o (wr_user),
|
||||
.wr_data_be_o (wr_data_be),
|
||||
// write buffer forwarding
|
||||
.wbuffer_data_o (wbuffer_data),
|
||||
.tx_paddr_o (tx_paddr),
|
||||
.tx_vld_o (tx_vld)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// memory arrays, arbitration and tag comparison
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
wt_dcache_mem #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.NumPorts(NumPorts)
|
||||
) i_wt_dcache_mem (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
// read ports
|
||||
.rd_prio_i (rd_prio),
|
||||
.rd_tag_i (rd_tag),
|
||||
.rd_idx_i (rd_idx),
|
||||
.rd_off_i (rd_off),
|
||||
.rd_req_i (rd_req),
|
||||
.rd_tag_only_i (rd_tag_only),
|
||||
.rd_ack_o (rd_ack),
|
||||
.rd_vld_bits_o (rd_vld_bits),
|
||||
.rd_hit_oh_o (rd_hit_oh),
|
||||
.rd_data_o (rd_data),
|
||||
.rd_user_o (rd_user),
|
||||
// cacheline write port
|
||||
.wr_cl_vld_i (wr_cl_vld),
|
||||
.wr_cl_nc_i (wr_cl_nc),
|
||||
.wr_cl_we_i (wr_cl_we),
|
||||
.wr_cl_tag_i (wr_cl_tag),
|
||||
.wr_cl_idx_i (wr_cl_idx),
|
||||
.wr_cl_off_i (wr_cl_off),
|
||||
.wr_cl_data_i (wr_cl_data),
|
||||
.wr_cl_user_i (wr_cl_user),
|
||||
.wr_cl_data_be_i(wr_cl_data_be),
|
||||
.wr_vld_bits_i (wr_vld_bits),
|
||||
// single word write port
|
||||
.wr_req_i (wr_req),
|
||||
.wr_ack_o (wr_ack),
|
||||
.wr_idx_i (wr_idx),
|
||||
.wr_off_i (wr_off),
|
||||
.wr_data_i (wr_data),
|
||||
.wr_user_i (wr_user),
|
||||
.wr_data_be_i (wr_data_be),
|
||||
// write buffer forwarding
|
||||
.wbuffer_data_i (wbuffer_data)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// assertions
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// check for concurrency issues
|
||||
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
flush :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) flush_i |-> flush_ack_o |-> wbuffer_empty_o)
|
||||
else $fatal(1, "[l1 dcache] flushed cache implies flushed wbuffer");
|
||||
|
||||
initial begin
|
||||
// assert wrong parameterizations
|
||||
assert (DCACHE_INDEX_WIDTH <= 12)
|
||||
else $fatal(1, "[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages");
|
||||
end
|
||||
`endif
|
||||
//pragma translate_on
|
||||
|
||||
endmodule // wt_dcache
|
||||
|
|
@ -0,0 +1,299 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 13.09.2018
|
||||
// Description: DCache controller for read port
|
||||
|
||||
|
||||
module wt_dcache_ctrl
|
||||
import ariane_pkg::*;
|
||||
import wt_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 1
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic cache_en_i,
|
||||
// core request ports
|
||||
input dcache_req_i_t req_port_i,
|
||||
output dcache_req_o_t req_port_o,
|
||||
// interface to miss handler
|
||||
output logic miss_req_o,
|
||||
input logic miss_ack_i,
|
||||
output logic miss_we_o, // unused (set to 0)
|
||||
output riscv::xlen_t miss_wdata_o, // unused (set to 0)
|
||||
output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, // unused (set to 0)
|
||||
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index
|
||||
output logic [riscv::PLEN-1:0] miss_paddr_o,
|
||||
output logic miss_nc_o, // request to I/O space
|
||||
output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline
|
||||
output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // set to constant ID
|
||||
input logic miss_replay_i, // request collided with pending miss - have to replay the request
|
||||
input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory
|
||||
// used to detect readout mux collisions
|
||||
input logic wr_cl_vld_i,
|
||||
// cache memory interface
|
||||
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
|
||||
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
|
||||
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
|
||||
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
|
||||
output logic rd_tag_only_o, // set to zero here
|
||||
input logic rd_ack_i,
|
||||
input riscv::xlen_t rd_data_i,
|
||||
input logic [DCACHE_USER_WIDTH-1:0] rd_user_i,
|
||||
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i,
|
||||
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i
|
||||
);
|
||||
|
||||
// controller FSM
|
||||
typedef enum logic [2:0] {
|
||||
IDLE,
|
||||
READ,
|
||||
MISS_REQ,
|
||||
MISS_WAIT,
|
||||
KILL_MISS,
|
||||
KILL_MISS_ACK,
|
||||
REPLAY_REQ,
|
||||
REPLAY_READ
|
||||
} state_e;
|
||||
state_e state_d, state_q;
|
||||
|
||||
logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q;
|
||||
logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q;
|
||||
logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q;
|
||||
logic [DCACHE_TID_WIDTH-1:0] id_d, id_q;
|
||||
logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q;
|
||||
logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q;
|
||||
logic [1:0] data_size_d, data_size_q;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// misc
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// map address to tag/idx/offset and save
|
||||
assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q;
|
||||
assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q;
|
||||
assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q;
|
||||
assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0] : address_off_q;
|
||||
assign id_d = (req_port_o.data_gnt) ? req_port_i.data_id : id_q;
|
||||
assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q;
|
||||
assign rd_tag_o = address_tag_d;
|
||||
assign rd_idx_o = address_idx_d;
|
||||
assign rd_off_o = address_off_d;
|
||||
|
||||
assign req_port_o.data_rdata = rd_data_i;
|
||||
assign req_port_o.data_ruser = rd_user_i;
|
||||
assign req_port_o.data_rid = id_q;
|
||||
|
||||
// to miss unit
|
||||
assign miss_vld_bits_o = vld_data_q;
|
||||
assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q};
|
||||
assign miss_size_o = (miss_nc_o) ? {1'b0, data_size_q} : 3'b111;
|
||||
|
||||
// noncacheable if request goes to I/O space, or if cache is disabled
|
||||
assign miss_nc_o = (~cache_en_i) | (~config_pkg::is_inside_cacheable_regions(
|
||||
CVA6Cfg,
|
||||
{{{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {DCACHE_INDEX_WIDTH{1'b0}}}
|
||||
));
|
||||
|
||||
|
||||
assign miss_we_o = '0;
|
||||
assign miss_wdata_o = '0;
|
||||
assign miss_wuser_o = '0;
|
||||
assign miss_id_o = RdTxId;
|
||||
assign rd_req_d = rd_req_o;
|
||||
assign rd_ack_d = rd_ack_i;
|
||||
assign rd_tag_only_o = '0;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// main control logic
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
always_comb begin : p_fsm
|
||||
// default assignment
|
||||
state_d = state_q;
|
||||
save_tag = 1'b0;
|
||||
rd_req_o = 1'b0;
|
||||
miss_req_o = 1'b0;
|
||||
req_port_o.data_rvalid = 1'b0;
|
||||
req_port_o.data_gnt = 1'b0;
|
||||
|
||||
// interfaces
|
||||
unique case (state_q)
|
||||
//////////////////////////////////
|
||||
// wait for an incoming request
|
||||
IDLE: begin
|
||||
if (req_port_i.data_req) begin
|
||||
rd_req_o = 1'b1;
|
||||
// if read ack then ack the `req_port_o`, and goto `READ` state
|
||||
if (rd_ack_i) begin
|
||||
state_d = READ;
|
||||
req_port_o.data_gnt = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// check whether we have a hit
|
||||
// in case the cache is disabled,
|
||||
// or in case the address is NC, we
|
||||
// reuse the miss mechanism to handle
|
||||
// the request
|
||||
READ, REPLAY_READ: begin
|
||||
// speculatively request cache line
|
||||
rd_req_o = 1'b1;
|
||||
|
||||
// kill -> go back to IDLE
|
||||
if (req_port_i.kill_req) begin
|
||||
state_d = IDLE;
|
||||
req_port_o.data_rvalid = 1'b1;
|
||||
end else if (req_port_i.tag_valid | state_q == REPLAY_READ) begin
|
||||
save_tag = (state_q != REPLAY_READ);
|
||||
if (wr_cl_vld_i || !rd_ack_q) begin
|
||||
state_d = REPLAY_REQ;
|
||||
// we've got a hit
|
||||
end else if ((|rd_hit_oh_i) && cache_en_i) begin
|
||||
state_d = IDLE;
|
||||
req_port_o.data_rvalid = 1'b1;
|
||||
// we can handle another request
|
||||
if (rd_ack_i && req_port_i.data_req) begin
|
||||
state_d = READ;
|
||||
req_port_o.data_gnt = 1'b1;
|
||||
end
|
||||
// we've got a miss
|
||||
end else begin
|
||||
state_d = MISS_REQ;
|
||||
end
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// issue request
|
||||
MISS_REQ: begin
|
||||
miss_req_o = 1'b1;
|
||||
|
||||
if (req_port_i.kill_req) begin
|
||||
req_port_o.data_rvalid = 1'b1;
|
||||
if (miss_ack_i) begin
|
||||
state_d = KILL_MISS;
|
||||
end else begin
|
||||
state_d = KILL_MISS_ACK;
|
||||
end
|
||||
end else if (miss_replay_i) begin
|
||||
state_d = REPLAY_REQ;
|
||||
end else if (miss_ack_i) begin
|
||||
state_d = MISS_WAIT;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// wait until the memory transaction
|
||||
// returns.
|
||||
MISS_WAIT: begin
|
||||
if (req_port_i.kill_req) begin
|
||||
req_port_o.data_rvalid = 1'b1;
|
||||
if (miss_rtrn_vld_i) begin
|
||||
state_d = IDLE;
|
||||
end else begin
|
||||
state_d = KILL_MISS;
|
||||
end
|
||||
end else if (miss_rtrn_vld_i) begin
|
||||
state_d = IDLE;
|
||||
req_port_o.data_rvalid = 1'b1;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// replay read request
|
||||
REPLAY_REQ: begin
|
||||
rd_req_o = 1'b1;
|
||||
if (req_port_i.kill_req) begin
|
||||
req_port_o.data_rvalid = 1'b1;
|
||||
state_d = IDLE;
|
||||
end else if (rd_ack_i) begin
|
||||
state_d = REPLAY_READ;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
KILL_MISS_ACK: begin
|
||||
miss_req_o = 1'b1;
|
||||
// in this case the miss handler did not issue
|
||||
// a transaction and we can safely go to idle
|
||||
if (miss_replay_i) begin
|
||||
state_d = IDLE;
|
||||
end else if (miss_ack_i) begin
|
||||
state_d = KILL_MISS;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// killed miss,
|
||||
// wait until miss unit responds and
|
||||
// go back to idle
|
||||
KILL_MISS: begin
|
||||
if (miss_rtrn_vld_i) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
default: begin
|
||||
// we should never get here
|
||||
state_d = IDLE;
|
||||
end
|
||||
endcase // state_q
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// ff's
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
|
||||
if (!rst_ni) begin
|
||||
state_q <= IDLE;
|
||||
address_tag_q <= '0;
|
||||
address_idx_q <= '0;
|
||||
address_off_q <= '0;
|
||||
id_q <= '0;
|
||||
vld_data_q <= '0;
|
||||
data_size_q <= '0;
|
||||
rd_req_q <= '0;
|
||||
rd_ack_q <= '0;
|
||||
end else begin
|
||||
state_q <= state_d;
|
||||
address_tag_q <= address_tag_d;
|
||||
address_idx_q <= address_idx_d;
|
||||
address_off_q <= address_off_d;
|
||||
id_q <= id_d;
|
||||
vld_data_q <= vld_data_d;
|
||||
data_size_q <= data_size_d;
|
||||
rd_req_q <= rd_req_d;
|
||||
rd_ack_q <= rd_ack_d;
|
||||
end
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// assertions
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
|
||||
hot1 :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) (!rd_ack_i) |=> cache_en_i |-> $onehot0(
|
||||
rd_hit_oh_i
|
||||
))
|
||||
else $fatal(1, "[l1 dcache ctrl] rd_hit_oh_i signal must be hot1");
|
||||
|
||||
initial begin
|
||||
// assert wrong parameterizations
|
||||
assert (DCACHE_INDEX_WIDTH <= 12)
|
||||
else
|
||||
$fatal(1, "[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages");
|
||||
end
|
||||
`endif
|
||||
//pragma translate_on
|
||||
|
||||
endmodule // wt_dcache_ctrl
|
||||
|
|
@ -0,0 +1,428 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 13.09.2018
|
||||
// Description: Memory arrays, arbiter and tag comparison for WT dcache.
|
||||
//
|
||||
//
|
||||
// Notes: 1) all ports can trigger a readout of all ways, and the way where the tag hits is selected
|
||||
//
|
||||
// 2) only port0 can write full cache lines. higher ports are read only. also, port0 can only read the tag array,
|
||||
// and does not trigger a cache line readout.
|
||||
//
|
||||
// 3) the single word write port is a separate port without access to the tag memory.
|
||||
// these single word writes can interleave with read operations if they go to different
|
||||
// cacheline offsets, since each word offset is placed into a different SRAM bank.
|
||||
//
|
||||
// 4) Read ports with same priority are RR arbited. but high prio ports (rd_prio_i[port_nr] = '1b1) will stall
|
||||
// low prio ports (rd_prio_i[port_nr] = '1b0)
|
||||
|
||||
|
||||
module wt_dcache_mem
|
||||
import ariane_pkg::*;
|
||||
import wt_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned NumPorts = 3
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
// ports
|
||||
input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later
|
||||
input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i,
|
||||
input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i,
|
||||
input logic [NumPorts-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways
|
||||
input logic [NumPorts-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays
|
||||
input logic [NumPorts-1:0] rd_prio_i, // 0: low prio, 1: high prio
|
||||
output logic [NumPorts-1:0] rd_ack_o,
|
||||
output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o,
|
||||
output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o,
|
||||
output riscv::xlen_t rd_data_o,
|
||||
output logic [DCACHE_USER_WIDTH-1:0] rd_user_o,
|
||||
|
||||
// only available on port 0, uses address signals of port 0
|
||||
input logic wr_cl_vld_i,
|
||||
input logic wr_cl_nc_i, // noncacheable access
|
||||
input logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline
|
||||
input logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i,
|
||||
input logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
|
||||
input logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i,
|
||||
input logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data_i,
|
||||
input logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i,
|
||||
input logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i,
|
||||
input logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits_i,
|
||||
|
||||
// separate port for single word write, no tag access
|
||||
input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3]
|
||||
output logic wr_ack_o,
|
||||
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i,
|
||||
input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i,
|
||||
input riscv::xlen_t wr_data_i,
|
||||
input logic [DCACHE_USER_WIDTH-1:0] wr_user_i,
|
||||
input logic [(riscv::XLEN/8)-1:0] wr_data_be_i,
|
||||
|
||||
// forwarded wbuffer
|
||||
input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i
|
||||
);
|
||||
|
||||
// functions
|
||||
function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh(
|
||||
input logic [DCACHE_NUM_BANKS_WIDTH-1:0] in);
|
||||
logic [DCACHE_NUM_BANKS-1:0] out;
|
||||
out = '0;
|
||||
out[in] = 1'b1;
|
||||
return out;
|
||||
endfunction
|
||||
|
||||
// number of bits needed to address AXI data. If AxiDataWidth equals XLEN this parameter
|
||||
// is not needed. Therefore, increment it by one to avoid reverse range select during elaboration.
|
||||
localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == riscv::XLEN ? $clog2(
|
||||
CVA6Cfg.AxiDataWidth / 8
|
||||
) + 1 : $clog2(
|
||||
CVA6Cfg.AxiDataWidth / 8
|
||||
);
|
||||
|
||||
logic [DCACHE_NUM_BANKS-1:0] bank_req;
|
||||
logic [DCACHE_NUM_BANKS-1:0] bank_we;
|
||||
logic [DCACHE_NUM_BANKS-1:0][ DCACHE_SET_ASSOC-1:0][(riscv::XLEN/8)-1:0] bank_be;
|
||||
logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx;
|
||||
logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q;
|
||||
logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q;
|
||||
|
||||
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_wdata; //
|
||||
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_rdata; //
|
||||
logic [DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] rdata_cl; // selected word from each cacheline
|
||||
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_wuser; //
|
||||
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_ruser; //
|
||||
logic [DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] ruser_cl; // selected word from each cacheline
|
||||
|
||||
logic [DCACHE_TAG_WIDTH-1:0] rd_tag;
|
||||
logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
|
||||
logic vld_we; // valid bits write enable
|
||||
logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
|
||||
logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem
|
||||
logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
|
||||
|
||||
logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q;
|
||||
|
||||
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh;
|
||||
logic [ (riscv::XLEN/8)-1:0] wbuffer_be;
|
||||
riscv::xlen_t wbuffer_rdata, rdata;
|
||||
logic [DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser;
|
||||
logic [riscv::PLEN-1:0] wbuffer_cmp_addr;
|
||||
|
||||
logic cmp_en_d, cmp_en_q;
|
||||
logic rd_acked;
|
||||
logic [NumPorts-1:0] bank_collision, rd_req_masked, rd_req_prio;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// arbiter
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// Priority is highest for lowest read port index
|
||||
//
|
||||
// SRAM bank mapping:
|
||||
//
|
||||
// Bank 0 Bank 2
|
||||
// [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] ..
|
||||
|
||||
// byte enable mapping
|
||||
for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_bank
|
||||
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : gen_bank_way
|
||||
assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*(riscv::XLEN/8) +: (riscv::XLEN/8)] :
|
||||
(wr_req_i[j] & wr_ack_o) ? wr_data_be_i :
|
||||
'0;
|
||||
assign bank_wdata[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_i[k*riscv::XLEN +: riscv::XLEN] :
|
||||
wr_data_i;
|
||||
assign bank_wuser[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_user_i[k*DCACHE_USER_WIDTH +: DCACHE_USER_WIDTH] :
|
||||
wr_user_i;
|
||||
end
|
||||
end
|
||||
|
||||
assign vld_wdata = wr_vld_bits_i;
|
||||
assign vld_addr = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
|
||||
assign rd_tag = rd_tag_i[vld_sel_q]; //delayed by one cycle
|
||||
assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d];
|
||||
assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
|
||||
assign vld_req = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0;
|
||||
|
||||
|
||||
// priority masking
|
||||
// disable low prio requests when any of the high prio reqs is present
|
||||
assign rd_req_prio = rd_req_i & rd_prio_i;
|
||||
assign rd_req_masked = (|rd_req_prio) ? rd_req_prio : rd_req_i;
|
||||
|
||||
logic rd_req;
|
||||
rr_arb_tree #(
|
||||
.NumIn (NumPorts),
|
||||
.DataWidth(1)
|
||||
) i_rr_arb_tree (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i('0),
|
||||
.rr_i ('0),
|
||||
.req_i (rd_req_masked),
|
||||
.gnt_o (rd_ack_o),
|
||||
.data_i ('0),
|
||||
.gnt_i (~wr_cl_vld_i),
|
||||
.req_o (rd_req),
|
||||
.data_o (),
|
||||
.idx_o (vld_sel_d)
|
||||
);
|
||||
|
||||
assign rd_acked = rd_req & ~wr_cl_vld_i;
|
||||
|
||||
always_comb begin : p_bank_req
|
||||
vld_we = wr_cl_vld_i;
|
||||
bank_req = '0;
|
||||
wr_ack_o = '0;
|
||||
bank_we = '0;
|
||||
bank_idx = '{default: wr_idx_i};
|
||||
|
||||
for (int k = 0; k < NumPorts; k++) begin
|
||||
bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] == wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES];
|
||||
end
|
||||
|
||||
if (wr_cl_vld_i & |wr_cl_we_i) begin
|
||||
bank_req = '1;
|
||||
bank_we = '1;
|
||||
bank_idx = '{default: wr_cl_idx_i};
|
||||
end else begin
|
||||
if (rd_acked) begin
|
||||
if (!rd_tag_only_i[vld_sel_d]) begin
|
||||
bank_req =
|
||||
dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
|
||||
bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]] = rd_idx_i[vld_sel_d];
|
||||
end
|
||||
end
|
||||
|
||||
if (|wr_req_i) begin
|
||||
if (rd_tag_only_i[vld_sel_d] || !(rd_ack_o[vld_sel_d] && bank_collision[vld_sel_d])) begin
|
||||
wr_ack_o = 1'b1;
|
||||
bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
|
||||
bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// tag comparison, hit generatio, readoud muxes
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_off;
|
||||
logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off;
|
||||
logic [ $clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx;
|
||||
logic [ $clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx;
|
||||
|
||||
assign cmp_en_d = (|vld_req) & ~vld_we;
|
||||
|
||||
// word tag comparison in write buffer
|
||||
assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} :
|
||||
{rd_tag, bank_idx_q, bank_off_q};
|
||||
// hit generation
|
||||
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
|
||||
// tag comparison of ways >0
|
||||
assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q;
|
||||
// byte offset mux of ways >0
|
||||
assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i];
|
||||
assign ruser_cl[i] = bank_ruser[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i];
|
||||
end
|
||||
|
||||
for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_wbuffer_hit
|
||||
assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & ({{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_data_i[k].wtag} == (wbuffer_cmp_addr >> riscv::XLEN_ALIGN_BYTES));
|
||||
end
|
||||
|
||||
lzc #(
|
||||
.WIDTH(DCACHE_WBUF_DEPTH)
|
||||
) i_lzc_wbuffer_hit (
|
||||
.in_i (wbuffer_hit_oh),
|
||||
.cnt_o (wbuffer_hit_idx),
|
||||
.empty_o()
|
||||
);
|
||||
|
||||
lzc #(
|
||||
.WIDTH(DCACHE_SET_ASSOC)
|
||||
) i_lzc_rd_hit (
|
||||
.in_i (rd_hit_oh_o),
|
||||
.cnt_o (rd_hit_idx),
|
||||
.empty_o()
|
||||
);
|
||||
|
||||
assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data;
|
||||
assign wbuffer_ruser = wbuffer_data_i[wbuffer_hit_idx].user;
|
||||
assign wbuffer_be = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0;
|
||||
|
||||
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset
|
||||
// In case of an uncached read, return the desired XLEN-bit segment of the most recent AXI read
|
||||
assign wr_cl_off = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == riscv::XLEN) ? '0 :
|
||||
{{DCACHE_OFFSET_WIDTH-AXI_OFFSET_WIDTH{1'b0}}, wr_cl_off_i[AXI_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]} :
|
||||
wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES];
|
||||
end else begin : gen_piton_offset
|
||||
assign wr_cl_off = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
if (wr_cl_vld_i) begin
|
||||
rdata = wr_cl_data_i[wr_cl_off*riscv::XLEN+:riscv::XLEN];
|
||||
ruser = wr_cl_user_i[wr_cl_off*DCACHE_USER_WIDTH+:DCACHE_USER_WIDTH];
|
||||
end else begin
|
||||
rdata = rdata_cl[rd_hit_idx];
|
||||
ruser = ruser_cl[rd_hit_idx];
|
||||
end
|
||||
end
|
||||
|
||||
// overlay bytes that hit in the write buffer
|
||||
for (genvar k = 0; k < (riscv::XLEN / 8); k++) begin : gen_rd_data
|
||||
assign rd_data_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k+:8] : rdata[8*k+:8];
|
||||
end
|
||||
for (genvar k = 0; k < DCACHE_USER_WIDTH / 8; k++) begin : gen_rd_user
|
||||
assign rd_user_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_ruser[8*k+:8] : ruser[8*k+:8];
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// memory arrays and regs
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata[DCACHE_SET_ASSOC-1:0];
|
||||
|
||||
for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_data_banks
|
||||
// Data RAM
|
||||
sram #(
|
||||
.USER_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * DATA_USER_WIDTH),
|
||||
.DATA_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * riscv::XLEN),
|
||||
.USER_EN (ariane_pkg::DATA_USER_EN),
|
||||
.NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS)
|
||||
) i_data_sram (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.req_i (bank_req[k]),
|
||||
.we_i (bank_we[k]),
|
||||
.addr_i (bank_idx[k]),
|
||||
.wuser_i(bank_wuser[k]),
|
||||
.wdata_i(bank_wdata[k]),
|
||||
.be_i (bank_be[k]),
|
||||
.ruser_o(bank_ruser[k]),
|
||||
.rdata_o(bank_rdata[k])
|
||||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_srams
|
||||
|
||||
assign tag_rdata[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH-1:0];
|
||||
assign rd_vld_bits_o[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH];
|
||||
|
||||
// Tag RAM
|
||||
sram #(
|
||||
// tag + valid bit
|
||||
.DATA_WIDTH(ariane_pkg::DCACHE_TAG_WIDTH + 1),
|
||||
.NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS)
|
||||
) i_tag_sram (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.req_i (vld_req[i]),
|
||||
.we_i (vld_we),
|
||||
.addr_i (vld_addr),
|
||||
.wuser_i('0),
|
||||
.wdata_i({vld_wdata[i], wr_cl_tag_i}),
|
||||
.be_i ('1),
|
||||
.ruser_o(),
|
||||
.rdata_o(vld_tag_rdata[i])
|
||||
);
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
|
||||
if (!rst_ni) begin
|
||||
bank_idx_q <= '0;
|
||||
bank_off_q <= '0;
|
||||
vld_sel_q <= '0;
|
||||
cmp_en_q <= '0;
|
||||
end else begin
|
||||
bank_idx_q <= bank_idx_d;
|
||||
bank_off_q <= bank_off_d;
|
||||
vld_sel_q <= vld_sel_d;
|
||||
cmp_en_q <= cmp_en_d;
|
||||
end
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// assertions
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
initial begin
|
||||
cach_line_width_axi :
|
||||
assert (DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth)
|
||||
else $fatal(1, "[l1 dcache] cache line size needs to be greater or equal AXI data width");
|
||||
end
|
||||
|
||||
initial begin
|
||||
axi_xlen :
|
||||
assert (CVA6Cfg.AxiDataWidth >= riscv::XLEN)
|
||||
else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal XLEN");
|
||||
end
|
||||
|
||||
initial begin
|
||||
cach_line_width_xlen :
|
||||
assert (DCACHE_LINE_WIDTH > riscv::XLEN)
|
||||
else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than XLEN");
|
||||
end
|
||||
|
||||
hit_hot1 :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(
|
||||
rd_hit_oh_o
|
||||
))
|
||||
else $fatal(1, "[l1 dcache] rd_hit_oh_o signal must be hot1");
|
||||
|
||||
word_write_hot1 :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) wr_ack_o |-> $onehot0(wr_req_i))
|
||||
else $fatal(1, "[l1 dcache] wr_req_i signal must be hot1");
|
||||
|
||||
wbuffer_hit_hot1 :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(
|
||||
wbuffer_hit_oh
|
||||
))
|
||||
else $fatal(1, "[l1 dcache] wbuffer_hit_oh signal must be hot1");
|
||||
|
||||
// this is only used for verification!
|
||||
logic vld_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
|
||||
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
|
||||
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
|
||||
if (!rst_ni) begin
|
||||
vld_mirror <= '{default: '0};
|
||||
tag_mirror <= '{default: '0};
|
||||
end else begin
|
||||
for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin
|
||||
if (vld_req[i] & vld_we) begin
|
||||
vld_mirror[vld_addr][i] <= vld_wdata[i];
|
||||
tag_mirror[vld_addr][i] <= wr_cl_tag_i;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_dubl_test
|
||||
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata);
|
||||
end
|
||||
|
||||
tag_write_duplicate :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
|
||||
else $fatal(1, "[l1 dcache] cannot allocate a CL that is already present in the cache");
|
||||
|
||||
`endif
|
||||
//pragma translate_on
|
||||
|
||||
endmodule // wt_dcache_mem
|
||||
|
|
@ -0,0 +1,645 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 13.09.2018
|
||||
// Description: miss controller for WT dcache. Note that the current assumption
|
||||
// is that the port with the highest index issues writes instead of reads.
|
||||
|
||||
|
||||
module wt_dcache_missunit
|
||||
import ariane_pkg::*;
|
||||
import wt_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter logic [CACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs
|
||||
parameter int unsigned NumPorts = 4 // number of miss ports
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// cache management, signals from/to core
|
||||
input logic enable_i, // from CSR
|
||||
input logic flush_i, // flush request, this waits for pending tx (write, read) to finish and will clear the cache
|
||||
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
|
||||
output logic miss_o, // we missed on a ld/st
|
||||
// local cache management signals
|
||||
input logic wbuffer_empty_i,
|
||||
output logic cache_en_o, // local cache enable signal
|
||||
// AMO interface
|
||||
input amo_req_t amo_req_i,
|
||||
output amo_resp_t amo_resp_o,
|
||||
// miss handling interface (ld, ptw, wbuffer)
|
||||
input logic [NumPorts-1:0] miss_req_i,
|
||||
output logic [NumPorts-1:0] miss_ack_o,
|
||||
input logic [NumPorts-1:0] miss_nc_i,
|
||||
input logic [NumPorts-1:0] miss_we_i,
|
||||
input logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata_i,
|
||||
input logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser_i,
|
||||
input logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr_i,
|
||||
input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i,
|
||||
input logic [NumPorts-1:0][2:0] miss_size_i,
|
||||
input logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID
|
||||
// signals that the request collided with a pending read
|
||||
output logic [NumPorts-1:0] miss_replay_o,
|
||||
// signals response from memory
|
||||
output logic [NumPorts-1:0] miss_rtrn_vld_o,
|
||||
output logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads
|
||||
// from writebuffer
|
||||
input logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_i, // used to check for address collisions with read operations
|
||||
input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations
|
||||
// write interface to cache memory
|
||||
output logic wr_cl_vld_o, // writes a full cacheline
|
||||
output logic wr_cl_nc_o, // writes a full cacheline
|
||||
output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline
|
||||
output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o,
|
||||
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o,
|
||||
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o,
|
||||
output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o,
|
||||
output logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o,
|
||||
output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o,
|
||||
output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o,
|
||||
// memory interface
|
||||
input logic mem_rtrn_vld_i,
|
||||
input dcache_rtrn_t mem_rtrn_i,
|
||||
output logic mem_data_req_o,
|
||||
input logic mem_data_ack_i,
|
||||
output dcache_req_t mem_data_o
|
||||
);
|
||||
|
||||
// functions
|
||||
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh(
|
||||
input logic [L1D_WAY_WIDTH-1:0] in);
|
||||
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out;
|
||||
out = '0;
|
||||
out[in] = 1'b1;
|
||||
return out;
|
||||
endfunction
|
||||
|
||||
// align the physical address to the specified size:
|
||||
// 000: bytes
|
||||
// 001: hword
|
||||
// 010: word
|
||||
// 011: dword
|
||||
// 111: DCACHE line
|
||||
function automatic logic [riscv::PLEN-1:0] paddrSizeAlign(input logic [riscv::PLEN-1:0] paddr,
|
||||
input logic [2:0] size);
|
||||
logic [riscv::PLEN-1:0] out;
|
||||
out = paddr;
|
||||
unique case (size)
|
||||
3'b001: out[0:0] = '0;
|
||||
3'b010: out[1:0] = '0;
|
||||
3'b011: out[2:0] = '0;
|
||||
3'b111: out[DCACHE_OFFSET_WIDTH-1:0] = '0;
|
||||
default: ;
|
||||
endcase
|
||||
return out;
|
||||
endfunction : paddrSizeAlign
|
||||
|
||||
// controller FSM
|
||||
typedef enum logic [2:0] {
|
||||
IDLE,
|
||||
DRAIN,
|
||||
AMO,
|
||||
FLUSH,
|
||||
STORE_WAIT,
|
||||
LOAD_WAIT,
|
||||
AMO_WAIT
|
||||
} state_e;
|
||||
state_e state_d, state_q;
|
||||
|
||||
// MSHR for reads
|
||||
typedef struct packed {
|
||||
logic [riscv::PLEN-1:0] paddr;
|
||||
logic [2:0] size;
|
||||
logic [DCACHE_SET_ASSOC-1:0] vld_bits;
|
||||
logic [CACHE_ID_WIDTH-1:0] id;
|
||||
logic nc;
|
||||
logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way;
|
||||
logic [$clog2(NumPorts)-1:0] miss_port_idx;
|
||||
} mshr_t;
|
||||
|
||||
mshr_t mshr_d, mshr_q;
|
||||
logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way, inv_way, rnd_way;
|
||||
logic mshr_vld_d, mshr_vld_q, mshr_vld_q1;
|
||||
logic mshr_allocate;
|
||||
logic update_lfsr, all_ways_valid;
|
||||
|
||||
logic enable_d, enable_q;
|
||||
logic flush_ack_d, flush_ack_q;
|
||||
logic flush_en, flush_done;
|
||||
logic mask_reads, lock_reqs;
|
||||
logic amo_sel, miss_is_write;
|
||||
logic amo_req_d, amo_req_q;
|
||||
logic [63:0] amo_rtrn_mux;
|
||||
riscv::xlen_t amo_data, amo_data_a, amo_data_b;
|
||||
riscv::xlen_t amo_user; //DCACHE USER ? DATA_USER_WIDTH
|
||||
logic [riscv::PLEN-1:0] tmp_paddr;
|
||||
logic [$clog2(NumPorts)-1:0] miss_port_idx;
|
||||
logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q;
|
||||
logic [NumPorts-1:0] miss_req_masked_d, miss_req_masked_q;
|
||||
|
||||
logic inv_vld, inv_vld_all, cl_write_en;
|
||||
logic load_ack, store_ack, amo_ack;
|
||||
|
||||
logic [NumPorts-1:0] mshr_rdrd_collision_d, mshr_rdrd_collision_q;
|
||||
logic [NumPorts-1:0] mshr_rdrd_collision;
|
||||
logic tx_rdwr_collision, mshr_rdwr_collision;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// input arbitration and general control sigs
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
assign cache_en_o = enable_q;
|
||||
assign cnt_d = (flush_en) ? cnt_q + 1 : '0;
|
||||
assign flush_done = (cnt_q == wt_cache_pkg::DCACHE_NUM_WORDS - 1);
|
||||
|
||||
assign miss_req_masked_d = (lock_reqs) ? miss_req_masked_q :
|
||||
(mask_reads) ? miss_we_i & miss_req_i : miss_req_i;
|
||||
assign miss_is_write = miss_we_i[miss_port_idx];
|
||||
|
||||
// read port arbiter
|
||||
lzc #(
|
||||
.WIDTH(NumPorts)
|
||||
) i_lzc_reqs (
|
||||
.in_i (miss_req_masked_d),
|
||||
.cnt_o (miss_port_idx),
|
||||
.empty_o()
|
||||
);
|
||||
|
||||
always_comb begin : p_ack
|
||||
miss_ack_o = '0;
|
||||
if (!amo_sel) begin
|
||||
miss_ack_o[miss_port_idx] = mem_data_ack_i & mem_data_req_o;
|
||||
end
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// MSHR and way replacement logic (only for read ops)
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// find invalid cache line
|
||||
lzc #(
|
||||
.WIDTH(ariane_pkg::DCACHE_SET_ASSOC)
|
||||
) i_lzc_inv (
|
||||
.in_i (~miss_vld_bits_i[miss_port_idx]),
|
||||
.cnt_o (inv_way),
|
||||
.empty_o(all_ways_valid)
|
||||
);
|
||||
|
||||
// generate random cacheline index
|
||||
lfsr #(
|
||||
.LfsrWidth(8),
|
||||
.OutWidth ($clog2(ariane_pkg::DCACHE_SET_ASSOC))
|
||||
) i_lfsr_inv (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
.en_i (update_lfsr),
|
||||
.out_o (rnd_way)
|
||||
);
|
||||
|
||||
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
|
||||
|
||||
assign mshr_d.size = (mshr_allocate) ? miss_size_i[miss_port_idx] : mshr_q.size;
|
||||
assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i[miss_port_idx] : mshr_q.paddr;
|
||||
assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits;
|
||||
assign mshr_d.id = (mshr_allocate) ? miss_id_i[miss_port_idx] : mshr_q.id;
|
||||
assign mshr_d.nc = (mshr_allocate) ? miss_nc_i[miss_port_idx] : mshr_q.nc;
|
||||
assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way;
|
||||
assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx;
|
||||
|
||||
// currently we only have one outstanding read TX, hence an incoming load clears the MSHR
|
||||
assign mshr_vld_d = (mshr_allocate) ? 1'b1 : (load_ack) ? 1'b0 : mshr_vld_q;
|
||||
|
||||
assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0;
|
||||
|
||||
|
||||
for (genvar k = 0; k < NumPorts; k++) begin : gen_rdrd_collision
|
||||
assign mshr_rdrd_collision[k] = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1);
|
||||
assign mshr_rdrd_collision_d[k] = (!miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k];
|
||||
end
|
||||
|
||||
// read/write collision, stalls the corresponding request
|
||||
// write port[NumPorts-1] collides with MSHR_Q
|
||||
assign mshr_rdwr_collision = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[NumPorts-1][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && mshr_vld_q;
|
||||
|
||||
// read collides with inflight TX
|
||||
always_comb begin : p_tx_coll
|
||||
tx_rdwr_collision = 1'b0;
|
||||
for (int k = 0; k < DCACHE_MAX_TX; k++) begin
|
||||
tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k];
|
||||
end
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// to memory
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// if size = 32bit word, select appropriate offset, replicate for openpiton...
|
||||
|
||||
if (CVA6Cfg.RVA) begin
|
||||
if (riscv::IS_XLEN64) begin : gen_amo_64b_data
|
||||
assign amo_data_a = {amo_req_i.operand_b[0+:32], amo_req_i.operand_b[0+:32]};
|
||||
assign amo_data_b = amo_req_i.operand_b;
|
||||
end else begin : gen_amo_32b_data
|
||||
assign amo_data_a = amo_req_i.operand_b[0+:32];
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
if (CVA6Cfg.RVA) begin
|
||||
if (riscv::IS_XLEN64) begin
|
||||
if (amo_req_i.size == 2'b10) begin
|
||||
amo_data = amo_data_a;
|
||||
end else begin
|
||||
amo_data = amo_data_b;
|
||||
end
|
||||
end else begin
|
||||
amo_data = amo_data_a;
|
||||
end
|
||||
if (ariane_pkg::DATA_USER_EN) begin
|
||||
amo_user = amo_data;
|
||||
end else begin
|
||||
amo_user = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (CVA6Cfg.RVA) begin
|
||||
// note: openpiton returns a full cacheline!
|
||||
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_rtrn_mux
|
||||
if (CVA6Cfg.AxiDataWidth > 64) begin
|
||||
assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[$clog2(
|
||||
CVA6Cfg.AxiDataWidth/8
|
||||
)-1:3]*64+:64];
|
||||
end else begin
|
||||
assign amo_rtrn_mux = mem_rtrn_i.data[0+:64];
|
||||
end
|
||||
end else begin : gen_piton_rtrn_mux
|
||||
assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64+:64];
|
||||
end
|
||||
|
||||
// always sign extend 32bit values
|
||||
assign amo_resp_o.result = (amo_req_i.size==2'b10) ? {{32{amo_rtrn_mux[amo_req_i.operand_a[2]*32 + 31]}},amo_rtrn_mux[amo_req_i.operand_a[2]*32 +: 32]} :
|
||||
amo_rtrn_mux ;
|
||||
assign amo_req_d = amo_req_i.req;
|
||||
end
|
||||
|
||||
// outgoing memory requests (AMOs are always uncached)
|
||||
assign mem_data_o.tid = (CVA6Cfg.RVA && amo_sel) ? AmoTxId : miss_id_i[miss_port_idx];
|
||||
assign mem_data_o.nc = (CVA6Cfg.RVA && amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx];
|
||||
assign mem_data_o.way = (CVA6Cfg.RVA && amo_sel) ? '0 : repl_way;
|
||||
assign mem_data_o.data = (CVA6Cfg.RVA && amo_sel) ? amo_data : miss_wdata_i[miss_port_idx];
|
||||
assign mem_data_o.user = (CVA6Cfg.RVA && amo_sel) ? amo_user : miss_wuser_i[miss_port_idx];
|
||||
assign mem_data_o.size = (CVA6Cfg.RVA && amo_sel) ? {1'b0, amo_req_i.size} : miss_size_i [miss_port_idx];
|
||||
assign mem_data_o.amo_op = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.amo_op : AMO_NONE;
|
||||
|
||||
assign tmp_paddr = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.operand_a[riscv::PLEN-1:0] : miss_paddr_i[miss_port_idx];
|
||||
assign mem_data_o.paddr = paddrSizeAlign(tmp_paddr, mem_data_o.size);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// back-off mechanism for LR/SC completion guarantee
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
logic sc_fail, sc_pass, sc_backoff_over;
|
||||
exp_backoff #(
|
||||
.Seed (3),
|
||||
.MaxExp(16)
|
||||
) i_exp_backoff (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.set_i (sc_fail),
|
||||
.clr_i (sc_pass),
|
||||
.is_zero_o(sc_backoff_over)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// responses from memory
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// keep track of pending stores
|
||||
logic store_sent;
|
||||
logic [$clog2(wt_cache_pkg::DCACHE_MAX_TX + 1)-1:0] stores_inflight_d, stores_inflight_q;
|
||||
assign store_sent = mem_data_req_o & mem_data_ack_i & (mem_data_o.rtype == DCACHE_STORE_REQ);
|
||||
|
||||
assign stores_inflight_d = (store_ack && store_sent) ? stores_inflight_q :
|
||||
(store_ack) ? stores_inflight_q - 1 :
|
||||
(store_sent) ? stores_inflight_q + 1 :
|
||||
stores_inflight_q;
|
||||
|
||||
// incoming responses
|
||||
always_comb begin : p_rtrn_logic
|
||||
load_ack = 1'b0;
|
||||
store_ack = 1'b0;
|
||||
amo_ack = 1'b0;
|
||||
inv_vld = 1'b0;
|
||||
inv_vld_all = 1'b0;
|
||||
sc_fail = 1'b0;
|
||||
sc_pass = 1'b0;
|
||||
miss_rtrn_vld_o = '0;
|
||||
if (mem_rtrn_vld_i) begin
|
||||
unique case (mem_rtrn_i.rtype)
|
||||
DCACHE_LOAD_ACK: begin
|
||||
if (mshr_vld_q) begin
|
||||
load_ack = 1'b1;
|
||||
miss_rtrn_vld_o[mshr_q.miss_port_idx] = 1'b1;
|
||||
end
|
||||
end
|
||||
DCACHE_STORE_ACK: begin
|
||||
if (stores_inflight_q > 0) begin
|
||||
store_ack = 1'b1;
|
||||
miss_rtrn_vld_o[NumPorts-1] = 1'b1;
|
||||
end
|
||||
end
|
||||
DCACHE_ATOMIC_ACK: begin
|
||||
if (CVA6Cfg.RVA) begin
|
||||
if (amo_req_q) begin
|
||||
amo_ack = 1'b1;
|
||||
// need to set SC backoff counter if
|
||||
// this op failed
|
||||
if (amo_req_i.amo_op == AMO_SC) begin
|
||||
if (amo_resp_o.result > 0) begin
|
||||
sc_fail = 1'b1;
|
||||
end else begin
|
||||
sc_pass = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
DCACHE_INV_REQ: begin
|
||||
inv_vld = mem_rtrn_i.inv.vld | mem_rtrn_i.inv.all;
|
||||
inv_vld_all = mem_rtrn_i.inv.all;
|
||||
end
|
||||
// TODO:
|
||||
// DCACHE_INT_REQ: begin
|
||||
// end
|
||||
default: begin
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// to write buffer
|
||||
assign miss_rtrn_id_o = mem_rtrn_i.tid;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// writes to cache memory
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// cacheline write port
|
||||
assign wr_cl_nc_o = mshr_q.nc;
|
||||
assign wr_cl_vld_o = load_ack | (|wr_cl_we_o);
|
||||
|
||||
assign wr_cl_we_o = (flush_en) ? '1 : (inv_vld_all) ? '1 : (inv_vld) ? dcache_way_bin2oh(
|
||||
mem_rtrn_i.inv.way
|
||||
) : (cl_write_en) ? dcache_way_bin2oh(
|
||||
mshr_q.repl_way
|
||||
) : '0;
|
||||
|
||||
assign wr_vld_bits_o = (flush_en) ? '0 : (inv_vld) ? '0 : (cl_write_en) ? dcache_way_bin2oh(
|
||||
mshr_q.repl_way
|
||||
) : '0;
|
||||
|
||||
assign wr_cl_idx_o = (flush_en) ? cnt_q :
|
||||
(inv_vld) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] :
|
||||
mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
|
||||
|
||||
assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
|
||||
assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0];
|
||||
assign wr_cl_data_o = mem_rtrn_i.data;
|
||||
assign wr_cl_user_o = mem_rtrn_i.user;
|
||||
assign wr_cl_data_be_o = (cl_write_en) ? '1 : '0;// we only write complete cachelines into the memory
|
||||
|
||||
// only non-NC responses write to the cache
|
||||
assign cl_write_en = load_ack & ~mshr_q.nc;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// main control logic for generating tx
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
always_comb begin : p_fsm
|
||||
// default assignment
|
||||
state_d = state_q;
|
||||
|
||||
flush_ack_o = 1'b0;
|
||||
mem_data_o.rtype = DCACHE_LOAD_REQ;
|
||||
mem_data_req_o = 1'b0;
|
||||
amo_resp_o.ack = 1'b0;
|
||||
miss_replay_o = '0;
|
||||
|
||||
// disabling cache is possible anytime, enabling goes via flush
|
||||
enable_d = enable_q & enable_i;
|
||||
flush_ack_d = flush_ack_q;
|
||||
flush_en = 1'b0;
|
||||
amo_sel = 1'b0;
|
||||
update_lfsr = 1'b0;
|
||||
mshr_allocate = 1'b0;
|
||||
lock_reqs = 1'b0;
|
||||
mask_reads = mshr_vld_q;
|
||||
|
||||
// interfaces
|
||||
unique case (state_q)
|
||||
//////////////////////////////////
|
||||
// wait for misses / amo ops
|
||||
IDLE: begin
|
||||
if (flush_i || (enable_i && !enable_q)) begin
|
||||
if (wbuffer_empty_i && !mshr_vld_q) begin
|
||||
flush_ack_d = flush_i;
|
||||
state_d = FLUSH;
|
||||
end else begin
|
||||
state_d = DRAIN;
|
||||
end
|
||||
end else if (CVA6Cfg.RVA && amo_req_i.req) begin
|
||||
if (wbuffer_empty_i && !mshr_vld_q) begin
|
||||
state_d = AMO;
|
||||
end else begin
|
||||
state_d = DRAIN;
|
||||
end
|
||||
// we've got a miss to handle
|
||||
end else if (|miss_req_masked_d) begin
|
||||
// this is a write miss, just pass through (but check whether write collides with MSHR)
|
||||
if (miss_is_write) begin
|
||||
// stall in case this write collides with the MSHR address
|
||||
if (!mshr_rdwr_collision) begin
|
||||
mem_data_req_o = 1'b1;
|
||||
mem_data_o.rtype = DCACHE_STORE_REQ;
|
||||
if (!mem_data_ack_i) begin
|
||||
state_d = STORE_WAIT;
|
||||
end
|
||||
end
|
||||
// this is a read miss, can only allocate 1 MSHR
|
||||
// in case of a load_ack we can accept a new miss, since the MSHR is being cleared
|
||||
end else if (!mshr_vld_q || load_ack) begin
|
||||
// replay the read request in case the address has collided with MSHR during the time the request was pending
|
||||
// i.e., the cache state may have been updated in the mean time due to a refill at the same CL address
|
||||
if (mshr_rdrd_collision_d[miss_port_idx]) begin
|
||||
miss_replay_o[miss_port_idx] = 1'b1;
|
||||
// stall in case this CL address overlaps with a write TX that is in flight
|
||||
end else if (!tx_rdwr_collision) begin
|
||||
mem_data_req_o = 1'b1;
|
||||
mem_data_o.rtype = DCACHE_LOAD_REQ;
|
||||
update_lfsr = all_ways_valid & mem_data_ack_i; // need to evict a random way
|
||||
mshr_allocate = mem_data_ack_i;
|
||||
if (!mem_data_ack_i) begin
|
||||
state_d = LOAD_WAIT;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// wait until this request is acked
|
||||
STORE_WAIT: begin
|
||||
lock_reqs = 1'b1;
|
||||
mem_data_req_o = 1'b1;
|
||||
mem_data_o.rtype = DCACHE_STORE_REQ;
|
||||
if (mem_data_ack_i) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// wait until this request is acked
|
||||
LOAD_WAIT: begin
|
||||
lock_reqs = 1'b1;
|
||||
mem_data_req_o = 1'b1;
|
||||
mem_data_o.rtype = DCACHE_LOAD_REQ;
|
||||
if (mem_data_ack_i) begin
|
||||
update_lfsr = all_ways_valid; // need to evict a random way
|
||||
mshr_allocate = 1'b1;
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// only handle stores, do not accept new read requests
|
||||
// wait until MSHR is cleared and wbuffer is empty
|
||||
DRAIN: begin
|
||||
mask_reads = 1'b1;
|
||||
// these are writes, check whether they collide with MSHR
|
||||
if (|miss_req_masked_d && !mshr_rdwr_collision) begin
|
||||
mem_data_req_o = 1'b1;
|
||||
mem_data_o.rtype = DCACHE_STORE_REQ;
|
||||
end
|
||||
|
||||
if (wbuffer_empty_i && !mshr_vld_q) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// flush the cache
|
||||
FLUSH: begin
|
||||
// internal flush signal
|
||||
flush_en = 1'b1;
|
||||
if (flush_done) begin
|
||||
state_d = IDLE;
|
||||
flush_ack_o = flush_ack_q;
|
||||
flush_ack_d = 1'b0;
|
||||
enable_d = enable_i;
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// send out amo op request
|
||||
AMO: begin
|
||||
if (CVA6Cfg.RVA) begin
|
||||
mem_data_o.rtype = DCACHE_ATOMIC_REQ;
|
||||
amo_sel = 1'b1;
|
||||
// if this is an LR, we need to consult the backoff counter
|
||||
if ((amo_req_i.amo_op != AMO_LR) || sc_backoff_over) begin
|
||||
mem_data_req_o = 1'b1;
|
||||
if (mem_data_ack_i) begin
|
||||
state_d = AMO_WAIT;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
// block and wait until AMO OP returns
|
||||
AMO_WAIT: begin
|
||||
if (CVA6Cfg.RVA) begin
|
||||
amo_sel = 1'b1;
|
||||
if (amo_ack) begin
|
||||
amo_resp_o.ack = 1'b1;
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
//////////////////////////////////
|
||||
default: begin
|
||||
// we should never get here
|
||||
state_d = IDLE;
|
||||
end
|
||||
endcase // state_q
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// ff's
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
|
||||
if (!rst_ni) begin
|
||||
state_q <= FLUSH;
|
||||
cnt_q <= '0;
|
||||
enable_q <= '0;
|
||||
flush_ack_q <= '0;
|
||||
mshr_vld_q <= '0;
|
||||
mshr_vld_q1 <= '0;
|
||||
mshr_q <= '0;
|
||||
mshr_rdrd_collision_q <= '0;
|
||||
miss_req_masked_q <= '0;
|
||||
amo_req_q <= '0;
|
||||
stores_inflight_q <= '0;
|
||||
end else begin
|
||||
state_q <= state_d;
|
||||
cnt_q <= cnt_d;
|
||||
enable_q <= enable_d;
|
||||
flush_ack_q <= flush_ack_d;
|
||||
mshr_vld_q <= mshr_vld_d;
|
||||
mshr_vld_q1 <= mshr_vld_q;
|
||||
mshr_q <= mshr_d;
|
||||
mshr_rdrd_collision_q <= mshr_rdrd_collision_d;
|
||||
miss_req_masked_q <= miss_req_masked_d;
|
||||
amo_req_q <= amo_req_d;
|
||||
stores_inflight_q <= stores_inflight_d;
|
||||
end
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// assertions
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
|
||||
read_tid :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id)
|
||||
else $fatal(1, "[l1 dcache missunit] TID of load response doesn't match");
|
||||
|
||||
read_ports :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) |miss_req_i[NumPorts-2:0] |-> miss_we_i[NumPorts-2:0] == 0)
|
||||
else $fatal(1, "[l1 dcache missunit] only last port can issue write requests");
|
||||
|
||||
write_port :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) miss_req_i[NumPorts-1] |-> miss_we_i[NumPorts-1])
|
||||
else $fatal(1, "[l1 dcache missunit] last port can only issue write requests");
|
||||
|
||||
initial begin
|
||||
// assert wrong parameterizations
|
||||
assert (NumPorts >= 2)
|
||||
else
|
||||
$fatal(
|
||||
1, "[l1 dcache missunit] at least two ports are required (one read port, one write port)"
|
||||
);
|
||||
end
|
||||
`endif
|
||||
//pragma translate_on
|
||||
|
||||
endmodule // wt_dcache_missunit
|
||||
|
|
@ -0,0 +1,635 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
|
||||
// Date: 13.09.2018
|
||||
// Description: coalescing write buffer for WT dcache
|
||||
//
|
||||
// A couple of notes:
|
||||
//
|
||||
// 1) the write buffer behaves as a fully-associative cache, and is therefore coalescing.
|
||||
// this cache is used by the cache readout logic to forward data to the load unit.
|
||||
//
|
||||
// each byte can be in the following states (valid/dirty/txblock):
|
||||
//
|
||||
// 0/0/0: invalid -> free entry in the buffer
|
||||
// 1/1/0: valid and dirty, Byte is hence not part of TX in-flight
|
||||
// 1/0/1: valid and not dirty, Byte is part of a TX in-flight
|
||||
// 1/1/1: valid and, part of tx and dirty. this means that the byte has been
|
||||
// overwritten while in TX and needs to be retransmitted once the write of that byte returns.
|
||||
// 1/0/0: this would represent a clean state, but is never reached in the wbuffer in the current implementation.
|
||||
// this is because when a TX returns, and the byte is in state [1/0/1], it is written to cache if needed and
|
||||
// its state is immediately cleared to 0/x/x.
|
||||
//
|
||||
// this state is used to distinguish between bytes that have been written and not
|
||||
// yet sent to the memory subsystem, and bytes that are part of a transaction.
|
||||
//
|
||||
// 2) further, each word in the write buffer has a cache states (checked, hit_oh)
|
||||
//
|
||||
// checked == 0: unknown cache state
|
||||
// checked == 1: cache state has been looked up, valid way is stored in "hit_oh"
|
||||
//
|
||||
// cache invalidations/refills affecting a particular word will clear its word state to 0,
|
||||
// so another lookup has to be done. note that these lookups are triggered as soon as there is
|
||||
// a valid word with checked == 0 in the write buffer.
|
||||
//
|
||||
// 3) returning write ACKs trigger a cache update if the word is present in the cache, and evict that
|
||||
// word from the write buffer. if the word is not allocated to the cache, it is just evicted from the write buffer.
|
||||
// if the word cache state is VOID, the pipeline is stalled until it is clear whether that word is in the cache or not.
|
||||
//
|
||||
// 4) we handle NC writes using the writebuffer circuitry. upon an NC request, the writebuffer will first be drained.
|
||||
// then, only the NC word is written into the write buffer and no further write requests are acknowledged until that
|
||||
// word has been evicted from the write buffer.
|
||||
|
||||
|
||||
module wt_dcache_wbuffer
|
||||
import ariane_pkg::*;
|
||||
import wt_cache_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
|
||||
input logic cache_en_i, // writes are treated as NC if disabled
|
||||
output logic empty_o, // asserted if no data is present in write buffer
|
||||
output logic not_ni_o, // asserted if no ni data is present in write buffer
|
||||
// core request ports
|
||||
input dcache_req_i_t req_port_i,
|
||||
output dcache_req_o_t req_port_o,
|
||||
// interface to miss handler
|
||||
input logic miss_ack_i,
|
||||
output logic [riscv::PLEN-1:0] miss_paddr_o,
|
||||
output logic miss_req_o,
|
||||
output logic miss_we_o, // always 1 here
|
||||
output riscv::xlen_t miss_wdata_o,
|
||||
output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o,
|
||||
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0)
|
||||
output logic miss_nc_o, // request to I/O space
|
||||
output logic [2:0] miss_size_o, //
|
||||
output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1)
|
||||
// write responses from memory
|
||||
input logic miss_rtrn_vld_i,
|
||||
input logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear
|
||||
// cache read interface
|
||||
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
|
||||
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
|
||||
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
|
||||
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
|
||||
output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays
|
||||
input logic rd_ack_i,
|
||||
input riscv::xlen_t rd_data_i, // unused
|
||||
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused
|
||||
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i,
|
||||
// cacheline writes
|
||||
input logic wr_cl_vld_i,
|
||||
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
|
||||
// cache word write interface
|
||||
output logic [DCACHE_SET_ASSOC-1:0] wr_req_o,
|
||||
input logic wr_ack_i,
|
||||
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o,
|
||||
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o,
|
||||
output riscv::xlen_t wr_data_o,
|
||||
output logic [(riscv::XLEN/8)-1:0] wr_data_be_o,
|
||||
output logic [DCACHE_USER_WIDTH-1:0] wr_user_o,
|
||||
// to forwarding logic and miss unit
|
||||
output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o,
|
||||
output logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_o, // used to check for address collisions with read operations
|
||||
output logic [DCACHE_MAX_TX-1:0] tx_vld_o
|
||||
);
|
||||
|
||||
tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q;
|
||||
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q;
|
||||
logic [DCACHE_WBUF_DEPTH-1:0] valid;
|
||||
logic [DCACHE_WBUF_DEPTH-1:0] dirty;
|
||||
logic [DCACHE_WBUF_DEPTH-1:0] tocheck;
|
||||
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit;
|
||||
//logic [DCACHE_WBUF_DEPTH-1:0][7:0] bdirty;
|
||||
logic [DCACHE_WBUF_DEPTH-1:0][(riscv::XLEN/8)-1:0] bdirty;
|
||||
|
||||
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0]
|
||||
next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr;
|
||||
logic [CACHE_ID_WIDTH-1:0] tx_id, rtrn_id;
|
||||
|
||||
logic [riscv::XLEN_ALIGN_BYTES-1:0] bdirty_off;
|
||||
logic [(riscv::XLEN/8)-1:0] tx_be;
|
||||
logic [riscv::PLEN-1:0] wr_paddr, rd_paddr, extract_tag;
|
||||
logic [DCACHE_TAG_WIDTH-1:0] rd_tag_d, rd_tag_q;
|
||||
logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_d, rd_hit_oh_q;
|
||||
logic check_en_d, check_en_q, check_en_q1;
|
||||
logic full, dirty_rd_en, rdy;
|
||||
logic rtrn_empty, evict;
|
||||
logic [DCACHE_WBUF_DEPTH-1:0] ni_pending_d, ni_pending_q;
|
||||
logic wbuffer_wren;
|
||||
logic free_tx_slots;
|
||||
|
||||
logic wr_cl_vld_q, wr_cl_vld_d;
|
||||
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d;
|
||||
|
||||
logic [riscv::PLEN-1:0] debug_paddr[DCACHE_WBUF_DEPTH-1:0];
|
||||
|
||||
wbuffer_t wbuffer_check_mux, wbuffer_dirty_mux;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// misc
|
||||
///////////////////////////////////////////////////////
|
||||
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] miss_tag;
|
||||
logic is_nc_miss;
|
||||
logic is_ni;
|
||||
assign miss_tag = miss_paddr_o[ariane_pkg::DCACHE_INDEX_WIDTH+:ariane_pkg::DCACHE_TAG_WIDTH];
|
||||
assign is_nc_miss = !config_pkg::is_inside_cacheable_regions(
|
||||
CVA6Cfg,
|
||||
{
|
||||
{64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {DCACHE_INDEX_WIDTH{1'b0}}
|
||||
}
|
||||
);
|
||||
assign miss_nc_o = !cache_en_i || is_nc_miss;
|
||||
// Non-idempotent if request goes to NI region
|
||||
assign is_ni = config_pkg::is_inside_nonidempotent_regions(
|
||||
CVA6Cfg,
|
||||
{
|
||||
{64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}},
|
||||
req_port_i.address_tag,
|
||||
{DCACHE_INDEX_WIDTH{1'b0}}
|
||||
}
|
||||
);
|
||||
|
||||
assign miss_we_o = 1'b1;
|
||||
assign miss_vld_bits_o = '0;
|
||||
assign wbuffer_data_o = wbuffer_q;
|
||||
|
||||
for (genvar k = 0; k < DCACHE_MAX_TX; k++) begin : gen_tx_vld
|
||||
assign tx_vld_o[k] = tx_stat_q[k].vld;
|
||||
assign tx_paddr_o[k] = {
|
||||
{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[tx_stat_q[k].ptr].wtag << riscv::XLEN_ALIGN_BYTES
|
||||
};
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// openpiton does not understand byte enable sigs
|
||||
// need to convert to the four cases:
|
||||
// 00: byte
|
||||
// 01: halfword
|
||||
// 10: word
|
||||
// 11: dword
|
||||
// non-contiguous writes need to be serialized!
|
||||
// e.g. merged dwords with BE like this: 8'b01001100
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// get byte offset
|
||||
lzc #(
|
||||
.WIDTH(riscv::XLEN / 8)
|
||||
) i_vld_bdirty (
|
||||
.in_i (bdirty[dirty_ptr]),
|
||||
.cnt_o (bdirty_off),
|
||||
.empty_o()
|
||||
);
|
||||
|
||||
// add the offset to the physical base address of this buffer entry
|
||||
assign miss_paddr_o = {wbuffer_dirty_mux.wtag, bdirty_off};
|
||||
assign miss_id_o = tx_id;
|
||||
|
||||
// is there any dirty word to be transmitted, and is there a free TX slot?
|
||||
assign miss_req_o = (|dirty) && free_tx_slots;
|
||||
|
||||
// get size of aligned words, and the corresponding byte enables
|
||||
// note: openpiton can only handle aligned offsets + size, and hence
|
||||
// we have to split unaligned data into multiple transfers (see toSize64)
|
||||
// e.g. if we have the following valid bytes: 0011_1001 -> TX0: 0000_0001, TX1: 0000_1000, TX2: 0011_0000
|
||||
if (riscv::IS_XLEN64) begin : gen_size_64b
|
||||
assign miss_size_o = {1'b0, toSize64(bdirty[dirty_ptr])};
|
||||
end else begin : gen_size_32b
|
||||
assign miss_size_o = {1'b0, toSize32(bdirty[dirty_ptr])};
|
||||
end
|
||||
|
||||
// replicate transfers shorter than a dword
|
||||
assign miss_wdata_o = riscv::IS_XLEN64 ? repData64(
|
||||
wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]
|
||||
) : repData32(
|
||||
wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]
|
||||
);
|
||||
if (ariane_pkg::DATA_USER_EN) begin
|
||||
assign miss_wuser_o = riscv::IS_XLEN64 ? repData64(
|
||||
wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]
|
||||
) : repData32(
|
||||
wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]
|
||||
);
|
||||
end else begin
|
||||
assign miss_wuser_o = '0;
|
||||
end
|
||||
|
||||
assign tx_be = riscv::IS_XLEN64 ? to_byte_enable8(
|
||||
bdirty_off, miss_size_o[1:0]
|
||||
) : to_byte_enable4(
|
||||
bdirty_off, miss_size_o[1:0]
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// TX status registers and ID counters
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// TODO: todo: make this fall through if timing permits it
|
||||
fifo_v3 #(
|
||||
.FALL_THROUGH(1'b0),
|
||||
.DATA_WIDTH ($clog2(DCACHE_MAX_TX)),
|
||||
.DEPTH (DCACHE_MAX_TX)
|
||||
) i_rtrn_id_fifo (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (),
|
||||
.empty_o (rtrn_empty),
|
||||
.usage_o (),
|
||||
.data_i (miss_rtrn_id_i),
|
||||
.push_i (miss_rtrn_vld_i),
|
||||
.data_o (rtrn_id),
|
||||
.pop_i (evict)
|
||||
);
|
||||
|
||||
always_comb begin : p_tx_stat
|
||||
tx_stat_d = tx_stat_q;
|
||||
evict = 1'b0;
|
||||
wr_req_o = '0;
|
||||
|
||||
// clear entry if it is clear whether it can be pushed to the cache or not
|
||||
if ((!rtrn_empty) && wbuffer_q[rtrn_ptr].checked) begin
|
||||
// check if data is clean and can be written, otherwise skip
|
||||
// check if CL is present, otherwise skip
|
||||
if ((|wr_data_be_o) && (|wbuffer_q[rtrn_ptr].hit_oh)) begin
|
||||
wr_req_o = wbuffer_q[rtrn_ptr].hit_oh;
|
||||
if (wr_ack_i) begin
|
||||
evict = 1'b1;
|
||||
tx_stat_d[rtrn_id].vld = 1'b0;
|
||||
end
|
||||
end else begin
|
||||
evict = 1'b1;
|
||||
tx_stat_d[rtrn_id].vld = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
// allocate a new entry
|
||||
if (dirty_rd_en) begin
|
||||
tx_stat_d[tx_id].vld = 1'b1;
|
||||
tx_stat_d[tx_id].ptr = dirty_ptr;
|
||||
tx_stat_d[tx_id].be = tx_be;
|
||||
end
|
||||
end
|
||||
|
||||
assign free_tx_slots = |(~tx_vld_o);
|
||||
|
||||
// next word to lookup in the cache
|
||||
rr_arb_tree #(
|
||||
.NumIn (DCACHE_MAX_TX),
|
||||
.LockIn (1'b1),
|
||||
.DataWidth(1)
|
||||
) i_tx_id_rr (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i('0),
|
||||
.rr_i ('0),
|
||||
.req_i (~tx_vld_o),
|
||||
.gnt_o (),
|
||||
.data_i ('0),
|
||||
.gnt_i (dirty_rd_en),
|
||||
.req_o (),
|
||||
.data_o (),
|
||||
.idx_o (tx_id)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// cache readout & update
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
assign extract_tag = rd_paddr >> DCACHE_INDEX_WIDTH;
|
||||
assign rd_tag_d = extract_tag[DCACHE_TAG_WIDTH-1:0];
|
||||
|
||||
// trigger TAG readout in cache
|
||||
assign rd_tag_only_o = 1'b1;
|
||||
assign rd_paddr = {
|
||||
{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_check_mux.wtag << riscv::XLEN_ALIGN_BYTES
|
||||
};
|
||||
assign rd_req_o = |tocheck;
|
||||
assign rd_tag_o = rd_tag_q; //delay by one cycle
|
||||
assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
|
||||
assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0];
|
||||
assign check_en_d = rd_req_o & rd_ack_i;
|
||||
|
||||
// cache update port
|
||||
assign rtrn_ptr = tx_stat_q[rtrn_id].ptr;
|
||||
// if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache
|
||||
// when the TX returns
|
||||
assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty);
|
||||
assign wr_paddr = {
|
||||
{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[rtrn_ptr].wtag << riscv::XLEN_ALIGN_BYTES
|
||||
};
|
||||
assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
|
||||
assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0];
|
||||
assign wr_data_o = wbuffer_q[rtrn_ptr].data;
|
||||
assign wr_user_o = wbuffer_q[rtrn_ptr].user;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// readout of status bits, index calculation
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
logic [DCACHE_WBUF_DEPTH-1:0][DCACHE_CL_IDX_WIDTH-1:0] wtag_comp;
|
||||
|
||||
assign wr_cl_vld_d = wr_cl_vld_i;
|
||||
assign wr_cl_idx_d = wr_cl_idx_i;
|
||||
|
||||
for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_flags
|
||||
// only for debug, will be pruned
|
||||
if (CVA6Cfg.DebugEn) begin
|
||||
assign debug_paddr[k] = {
|
||||
{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[k].wtag << riscv::XLEN_ALIGN_BYTES
|
||||
};
|
||||
end
|
||||
|
||||
// dirty bytes that are ready for transmission.
|
||||
// note that we cannot retransmit a word that is already in-flight
|
||||
// since the multiple transactions might overtake each other in the memory system!
|
||||
assign bdirty[k] = (|wbuffer_q[k].txblock) ? '0 : wbuffer_q[k].dirty & wbuffer_q[k].valid;
|
||||
|
||||
|
||||
assign dirty[k] = |bdirty[k];
|
||||
assign valid[k] = |wbuffer_q[k].valid;
|
||||
assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]});
|
||||
|
||||
// checks if an invalidation/cache refill hits a particular word
|
||||
// note: an invalidation can hit multiple words!
|
||||
// need to respect previous cycle, too, since we add a cycle of latency to the rd_hit_oh_i signal...
|
||||
assign wtag_comp[k] = wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES-1:DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES];
|
||||
assign inval_hit[k] = (wr_cl_vld_d & valid[k] & (wtag_comp[k] == wr_cl_idx_d)) |
|
||||
(wr_cl_vld_q & valid[k] & (wtag_comp[k] == wr_cl_idx_q));
|
||||
|
||||
// these word have to be looked up in the cache
|
||||
assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k];
|
||||
end
|
||||
|
||||
assign wr_ptr = (|wbuffer_hit_oh) ? hit_ptr : next_ptr;
|
||||
assign rdy = (|wbuffer_hit_oh) | (~full);
|
||||
|
||||
// next free entry in the buffer
|
||||
lzc #(
|
||||
.WIDTH(DCACHE_WBUF_DEPTH)
|
||||
) i_vld_lzc (
|
||||
.in_i (~valid),
|
||||
.cnt_o (next_ptr),
|
||||
.empty_o(full)
|
||||
);
|
||||
|
||||
// get index of hit
|
||||
lzc #(
|
||||
.WIDTH(DCACHE_WBUF_DEPTH)
|
||||
) i_hit_lzc (
|
||||
.in_i (wbuffer_hit_oh),
|
||||
.cnt_o (hit_ptr),
|
||||
.empty_o()
|
||||
);
|
||||
|
||||
// next dirty word to serve
|
||||
rr_arb_tree #(
|
||||
.NumIn (DCACHE_WBUF_DEPTH),
|
||||
.LockIn (1'b1),
|
||||
.DataType(wbuffer_t)
|
||||
) i_dirty_rr (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i('0),
|
||||
.rr_i ('0),
|
||||
.req_i (dirty),
|
||||
.gnt_o (),
|
||||
.data_i (wbuffer_q),
|
||||
.gnt_i (dirty_rd_en),
|
||||
.req_o (),
|
||||
.data_o (wbuffer_dirty_mux),
|
||||
.idx_o (dirty_ptr)
|
||||
);
|
||||
|
||||
// next word to lookup in the cache
|
||||
rr_arb_tree #(
|
||||
.NumIn (DCACHE_WBUF_DEPTH),
|
||||
.DataType(wbuffer_t)
|
||||
) i_clean_rr (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i('0),
|
||||
.rr_i ('0),
|
||||
.req_i (tocheck),
|
||||
.gnt_o (),
|
||||
.data_i (wbuffer_q),
|
||||
.gnt_i (check_en_d),
|
||||
.req_o (),
|
||||
.data_o (wbuffer_check_mux),
|
||||
.idx_o (check_ptr_d)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// update logic
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
assign req_port_o.data_rvalid = '0;
|
||||
assign req_port_o.data_rdata = '0;
|
||||
assign req_port_o.data_ruser = '0;
|
||||
assign req_port_o.data_rid = '0;
|
||||
|
||||
assign rd_hit_oh_d = rd_hit_oh_i;
|
||||
|
||||
logic ni_inside, ni_conflict;
|
||||
assign ni_inside = |ni_pending_q;
|
||||
assign ni_conflict = CVA6Cfg.NonIdemPotenceEn && is_ni && ni_inside;
|
||||
assign not_ni_o = !ni_inside;
|
||||
assign empty_o = !(|valid);
|
||||
|
||||
// TODO: rewrite and separate into MUXES and write strobe logic
|
||||
always_comb begin : p_buffer
|
||||
wbuffer_d = wbuffer_q;
|
||||
ni_pending_d = ni_pending_q;
|
||||
dirty_rd_en = 1'b0;
|
||||
req_port_o.data_gnt = 1'b0;
|
||||
wbuffer_wren = 1'b0;
|
||||
|
||||
// TAG lookup returns, mark corresponding word
|
||||
if (check_en_q1) begin
|
||||
if (|wbuffer_q[check_ptr_q1].valid) begin
|
||||
wbuffer_d[check_ptr_q1].checked = 1'b1;
|
||||
wbuffer_d[check_ptr_q1].hit_oh = rd_hit_oh_q;
|
||||
end
|
||||
end
|
||||
|
||||
// if an invalidation or cache line refill comes in and hits on the write buffer,
|
||||
// we have to discard our knowledge of the corresponding cacheline state
|
||||
for (int k = 0; k < DCACHE_WBUF_DEPTH; k++) begin
|
||||
if (inval_hit[k]) begin
|
||||
wbuffer_d[k].checked = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
// once TX write response came back, we can clear the TX block. if it was not dirty, we
|
||||
// can completely evict it - otherwise we have to leave it there for retransmission
|
||||
if (evict) begin
|
||||
for (int k = 0; k < (riscv::XLEN / 8); k++) begin
|
||||
if (tx_stat_q[rtrn_id].be[k]) begin
|
||||
wbuffer_d[rtrn_ptr].txblock[k] = 1'b0;
|
||||
if (!wbuffer_q[rtrn_ptr].dirty[k]) begin
|
||||
wbuffer_d[rtrn_ptr].valid[k] = 1'b0;
|
||||
|
||||
// NOTE: this is not strictly needed, but makes it much
|
||||
// easier to debug, since no invalid data remains in the buffer
|
||||
// wbuffer_d[rtrn_ptr].data[k*8 +:8] = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
// if all bytes are evicted, clear the cache status flag
|
||||
if (wbuffer_d[rtrn_ptr].valid == 0) begin
|
||||
wbuffer_d[rtrn_ptr].checked = 1'b0;
|
||||
ni_pending_d[rtrn_ptr] = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
// mark bytes sent out to the memory system
|
||||
if (miss_req_o && miss_ack_i) begin
|
||||
dirty_rd_en = 1'b1;
|
||||
for (int k = 0; k < (riscv::XLEN / 8); k++) begin
|
||||
if (tx_be[k]) begin
|
||||
wbuffer_d[dirty_ptr].dirty[k] = 1'b0;
|
||||
wbuffer_d[dirty_ptr].txblock[k] = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// write new word into the buffer
|
||||
if (req_port_i.data_req && rdy) begin
|
||||
// in case we have an NI address, need to drain the buffer first
|
||||
// in case we are serving an NI address, we block until it is written to memory
|
||||
if (!ni_conflict) begin //empty of NI operations
|
||||
wbuffer_wren = 1'b1;
|
||||
|
||||
req_port_o.data_gnt = 1'b1;
|
||||
ni_pending_d[wr_ptr] = is_ni;
|
||||
|
||||
wbuffer_d[wr_ptr].checked = 1'b0;
|
||||
wbuffer_d[wr_ptr].wtag = {
|
||||
req_port_i.address_tag,
|
||||
req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]
|
||||
};
|
||||
|
||||
// mark bytes as dirty
|
||||
for (int k = 0; k < (riscv::XLEN / 8); k++) begin
|
||||
if (req_port_i.data_be[k]) begin
|
||||
wbuffer_d[wr_ptr].valid[k] = 1'b1;
|
||||
wbuffer_d[wr_ptr].dirty[k] = 1'b1;
|
||||
wbuffer_d[wr_ptr].data[k*8+:8] = req_port_i.data_wdata[k*8+:8];
|
||||
if (ariane_pkg::DATA_USER_EN) begin
|
||||
wbuffer_d[wr_ptr].user[k*8+:8] = req_port_i.data_wuser[k*8+:8];
|
||||
end else begin
|
||||
wbuffer_d[wr_ptr].user[k*8+:8] = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// ff's
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
|
||||
if (!rst_ni) begin
|
||||
wbuffer_q <= '{default: '0};
|
||||
tx_stat_q <= '{default: '0};
|
||||
ni_pending_q <= '0;
|
||||
check_ptr_q <= '0;
|
||||
check_ptr_q1 <= '0;
|
||||
check_en_q <= '0;
|
||||
check_en_q1 <= '0;
|
||||
rd_tag_q <= '0;
|
||||
rd_hit_oh_q <= '0;
|
||||
wr_cl_vld_q <= '0;
|
||||
wr_cl_idx_q <= '0;
|
||||
end else begin
|
||||
wbuffer_q <= wbuffer_d;
|
||||
tx_stat_q <= tx_stat_d;
|
||||
ni_pending_q <= ni_pending_d;
|
||||
check_ptr_q <= check_ptr_d;
|
||||
check_ptr_q1 <= check_ptr_q;
|
||||
check_en_q <= check_en_d;
|
||||
check_en_q1 <= check_en_q;
|
||||
rd_tag_q <= rd_tag_d;
|
||||
rd_hit_oh_q <= rd_hit_oh_d;
|
||||
wr_cl_vld_q <= wr_cl_vld_d;
|
||||
wr_cl_idx_q <= wr_cl_idx_d;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// assertions
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
|
||||
hot1 :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> $onehot0(
|
||||
wbuffer_hit_oh
|
||||
))
|
||||
else $fatal(1, "[l1 dcache wbuffer] wbuffer_hit_oh signal must be hot1");
|
||||
|
||||
tx_status :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) evict && miss_ack_i && miss_req_o |-> (tx_id != rtrn_id))
|
||||
else $fatal(1, "[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle");
|
||||
|
||||
tx_valid0 :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> tx_stat_q[rtrn_id].vld)
|
||||
else $fatal(1, "[l1 dcache wbuffer] evicting invalid transaction slot");
|
||||
|
||||
tx_valid1 :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid)
|
||||
else $fatal(1, "[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid");
|
||||
|
||||
write_full :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> req_port_o.data_gnt |-> ((!full) || (|wbuffer_hit_oh)))
|
||||
else $fatal(1, "[l1 dcache wbuffer] cannot write if full or no hit");
|
||||
|
||||
unused0 :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.tag_valid)
|
||||
else $fatal(1, "[l1 dcache wbuffer] req_port_i.tag_valid should not be asserted");
|
||||
|
||||
unused1 :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.kill_req)
|
||||
else $fatal(1, "[l1 dcache wbuffer] req_port_i.kill_req should not be asserted");
|
||||
|
||||
for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_assert1
|
||||
for (genvar j = 0; j < (riscv::XLEN / 8); j++) begin : gen_assert2
|
||||
byteStates :
|
||||
assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} )
|
||||
else
|
||||
$fatal(
|
||||
1,
|
||||
"[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b",
|
||||
j,
|
||||
k,
|
||||
wbuffer_q[k].valid[j],
|
||||
wbuffer_q[k].dirty[j],
|
||||
wbuffer_q[k].txblock[j]
|
||||
);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
//pragma translate_on
|
||||
|
||||
endmodule // wt_dcache_wbuffer
|
||||
|
|
@ -0,0 +1,298 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 15.04.2017
|
||||
// Description: Commits to the architectural state resulting from the scoreboard.
|
||||
|
||||
|
||||
module commit_stage
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic halt_i, // request to halt the core
|
||||
input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline
|
||||
output exception_t exception_o, // take exception to controller
|
||||
output logic dirty_fp_state_o, // mark the F state as dirty
|
||||
input logic single_step_i, // we are in single step debug mode
|
||||
// from scoreboard
|
||||
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit
|
||||
output logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_o, // acknowledge that we are indeed committing
|
||||
// to register file
|
||||
output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o, // register file write address
|
||||
output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o, // register file write data
|
||||
output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o, // register file write enable
|
||||
output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o, // floating point register enable
|
||||
// Atomic memory operations
|
||||
input amo_resp_t amo_resp_i, // result of AMO operation
|
||||
// to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline)
|
||||
output logic [riscv::VLEN-1:0] pc_o,
|
||||
// to/from CSR file
|
||||
output fu_op csr_op_o, // decoded CSR operation
|
||||
output riscv::xlen_t csr_wdata_o, // data to write to CSR
|
||||
input riscv::xlen_t csr_rdata_i, // data to read from CSR
|
||||
input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit)
|
||||
output logic csr_write_fflags_o, // write the fflags CSR
|
||||
// commit signals to ex
|
||||
output logic commit_lsu_o, // commit the pending store
|
||||
input logic commit_lsu_ready_i, // commit buffer of LSU is ready
|
||||
output logic [TRANS_ID_BITS-1:0] commit_tran_id_o, // transaction id of first commit port
|
||||
output logic amo_valid_commit_o, // valid AMO in commit stage
|
||||
input logic no_st_pending_i, // there is no store pending
|
||||
output logic commit_csr_o, // commit the pending CSR instruction
|
||||
output logic fence_i_o, // flush I$ and pipeline
|
||||
output logic fence_o, // flush D$ and pipeline
|
||||
output logic flush_commit_o, // request a pipeline flush
|
||||
output logic sfence_vma_o // flush TLBs and pipeline
|
||||
);
|
||||
|
||||
// ila_0 i_ila_commit (
|
||||
// .clk(clk_i), // input wire clk
|
||||
// .probe0(commit_instr_i[0].pc), // input wire [63:0] probe0
|
||||
// .probe1(commit_instr_i[1].pc), // input wire [63:0] probe1
|
||||
// .probe2(commit_instr_i[0].valid), // input wire [0:0] probe2
|
||||
// .probe3(commit_instr_i[1].valid), // input wire [0:0] probe3
|
||||
// .probe4(commit_ack_o[0]), // input wire [0:0] probe4
|
||||
// .probe5(commit_ack_o[0]), // input wire [0:0] probe5
|
||||
// .probe6(1'b0), // input wire [0:0] probe6
|
||||
// .probe7(1'b0), // input wire [0:0] probe7
|
||||
// .probe8(1'b0), // input wire [0:0] probe8
|
||||
// .probe9(1'b0) // input wire [0:0] probe9
|
||||
// );
|
||||
|
||||
for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_waddr
|
||||
assign waddr_o[i] = commit_instr_i[i].rd[4:0];
|
||||
end
|
||||
|
||||
assign pc_o = commit_instr_i[0].pc;
|
||||
// Dirty the FP state if we are committing anything related to the FPU
|
||||
always_comb begin : dirty_fp_state
|
||||
dirty_fp_state_o = 1'b0;
|
||||
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
|
||||
dirty_fp_state_o |= commit_ack_o[i] & (commit_instr_i[i].fu inside {FPU, FPU_VEC} || (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
commit_instr_i[i].op
|
||||
)));
|
||||
// Check if we issued a vector floating-point instruction to the accellerator
|
||||
dirty_fp_state_o |= commit_instr_i[i].fu == ACCEL && commit_instr_i[i].vfp;
|
||||
end
|
||||
end
|
||||
|
||||
assign commit_tran_id_o = commit_instr_i[0].trans_id;
|
||||
|
||||
logic instr_0_is_amo;
|
||||
assign instr_0_is_amo = is_amo(commit_instr_i[0].op);
|
||||
// -------------------
|
||||
// Commit Instruction
|
||||
// -------------------
|
||||
// write register file or commit instruction in LSU or CSR Buffer
|
||||
always_comb begin : commit
|
||||
// default assignments
|
||||
commit_ack_o[0] = 1'b0;
|
||||
|
||||
amo_valid_commit_o = 1'b0;
|
||||
|
||||
we_gpr_o[0] = 1'b0;
|
||||
we_fpr_o = '{default: 1'b0};
|
||||
commit_lsu_o = 1'b0;
|
||||
commit_csr_o = 1'b0;
|
||||
// amos will commit on port 0
|
||||
wdata_o[0] = (CVA6Cfg.RVA && amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result;
|
||||
csr_op_o = ADD; // this corresponds to a CSR NOP
|
||||
csr_wdata_o = {riscv::XLEN{1'b0}};
|
||||
fence_i_o = 1'b0;
|
||||
fence_o = 1'b0;
|
||||
sfence_vma_o = 1'b0;
|
||||
csr_write_fflags_o = 1'b0;
|
||||
flush_commit_o = 1'b0;
|
||||
|
||||
// we will not commit the instruction if we took an exception
|
||||
// and we do not commit the instruction if we requested a halt
|
||||
if (commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i) begin
|
||||
// we can definitely write the register file
|
||||
// if the instruction is not committing anything the destination
|
||||
commit_ack_o[0] = 1'b1;
|
||||
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[0].op)) begin
|
||||
we_fpr_o[0] = 1'b1;
|
||||
end else begin
|
||||
we_gpr_o[0] = 1'b1;
|
||||
end
|
||||
// check whether the instruction we retire was a store
|
||||
if ((!CVA6Cfg.RVA && commit_instr_i[0].fu == STORE) || (CVA6Cfg.RVA && commit_instr_i[0].fu == STORE && !instr_0_is_amo)) begin
|
||||
// check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store)
|
||||
if (commit_lsu_ready_i) begin
|
||||
commit_ack_o[0] = 1'b1;
|
||||
commit_lsu_o = 1'b1;
|
||||
// stall in case the store buffer is not able to accept anymore instructions
|
||||
end else begin
|
||||
commit_ack_o[0] = 1'b0;
|
||||
end
|
||||
end
|
||||
// ---------
|
||||
// FPU Flags
|
||||
// ---------
|
||||
if (CVA6Cfg.FpPresent) begin
|
||||
if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
|
||||
// write the CSR with potential exception flags from retiring floating point instruction
|
||||
csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[0].ex.cause[4:0]};
|
||||
csr_write_fflags_o = 1'b1;
|
||||
commit_ack_o[0] = 1'b1;
|
||||
end
|
||||
end
|
||||
// ---------
|
||||
// CSR Logic
|
||||
// ---------
|
||||
// check whether the instruction we retire was a CSR instruction and it did not
|
||||
// throw an exception
|
||||
if (commit_instr_i[0].fu == CSR) begin
|
||||
// write the CSR file
|
||||
csr_op_o = commit_instr_i[0].op;
|
||||
csr_wdata_o = commit_instr_i[0].result;
|
||||
if (!csr_exception_i.valid) begin
|
||||
commit_csr_o = 1'b1;
|
||||
wdata_o[0] = csr_rdata_i;
|
||||
commit_ack_o[0] = 1'b1;
|
||||
end else begin
|
||||
commit_ack_o[0] = 1'b0;
|
||||
we_gpr_o[0] = 1'b0;
|
||||
end
|
||||
end
|
||||
// ------------------
|
||||
// SFENCE.VMA Logic
|
||||
// ------------------
|
||||
// sfence.vma is idempotent so we can safely re-execute it after returning
|
||||
// from interrupt service routine
|
||||
// check if this instruction was a SFENCE_VMA
|
||||
if (CVA6Cfg.RVS && commit_instr_i[0].op == SFENCE_VMA) begin
|
||||
// no store pending so we can flush the TLBs and pipeline
|
||||
sfence_vma_o = no_st_pending_i;
|
||||
// wait for the store buffer to drain until flushing the pipeline
|
||||
commit_ack_o[0] = no_st_pending_i;
|
||||
end
|
||||
// ------------------
|
||||
// FENCE.I Logic
|
||||
// ------------------
|
||||
// fence.i is idempotent so we can safely re-execute it after returning
|
||||
// from interrupt service routine
|
||||
// Fence synchronizes data and instruction streams. That means that we need to flush the private icache
|
||||
// and the private dcache. This is the most expensive instruction.
|
||||
if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && DCACHE_TYPE == int'(config_pkg::WB) && commit_instr_i[0].fu != STORE)) begin
|
||||
commit_ack_o[0] = no_st_pending_i;
|
||||
// tell the controller to flush the I$
|
||||
fence_i_o = no_st_pending_i;
|
||||
end
|
||||
// ------------------
|
||||
// FENCE Logic
|
||||
// ------------------
|
||||
// fence is idempotent so we can safely re-execute it after returning
|
||||
// from interrupt service routine
|
||||
if (commit_instr_i[0].op == FENCE) begin
|
||||
commit_ack_o[0] = no_st_pending_i;
|
||||
// tell the controller to flush the D$
|
||||
fence_o = no_st_pending_i;
|
||||
end
|
||||
// ------------------
|
||||
// AMO
|
||||
// ------------------
|
||||
if (CVA6Cfg.RVA && instr_0_is_amo) begin
|
||||
// AMO finished
|
||||
commit_ack_o[0] = amo_resp_i.ack;
|
||||
// flush the pipeline
|
||||
flush_commit_o = amo_resp_i.ack;
|
||||
amo_valid_commit_o = 1'b1;
|
||||
we_gpr_o[0] = amo_resp_i.ack;
|
||||
end
|
||||
end
|
||||
|
||||
if (CVA6Cfg.NrCommitPorts > 1) begin
|
||||
|
||||
commit_ack_o[1] = 1'b0;
|
||||
we_gpr_o[1] = 1'b0;
|
||||
wdata_o[1] = commit_instr_i[1].result;
|
||||
|
||||
// -----------------
|
||||
// Commit Port 2
|
||||
// -----------------
|
||||
// check if the second instruction can be committed as well and the first wasn't a CSR instruction
|
||||
// also if we are in single step mode don't retire the second instruction
|
||||
if (commit_ack_o[0] && commit_instr_i[1].valid
|
||||
&& !halt_i
|
||||
&& !(commit_instr_i[0].fu inside {CSR})
|
||||
&& !flush_dcache_i
|
||||
&& !instr_0_is_amo
|
||||
&& !single_step_i) begin
|
||||
// only if the first instruction didn't throw an exception and this instruction won't throw an exception
|
||||
// and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC
|
||||
if (!exception_o.valid && !commit_instr_i[1].ex.valid
|
||||
&& (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin
|
||||
|
||||
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[1].op)) we_fpr_o[1] = 1'b1;
|
||||
else we_gpr_o[1] = 1'b1;
|
||||
|
||||
commit_ack_o[1] = 1'b1;
|
||||
|
||||
// additionally check if we are retiring an FPU instruction because we need to make sure that we write all
|
||||
// exception flags
|
||||
if (CVA6Cfg.FpPresent && commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin
|
||||
if (csr_write_fflags_o)
|
||||
csr_wdata_o = {
|
||||
{riscv::XLEN - 5{1'b0}},
|
||||
(commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])
|
||||
};
|
||||
else csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[1].ex.cause[4:0]};
|
||||
|
||||
csr_write_fflags_o = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// -----------------------------
|
||||
// Exception & Interrupt Logic
|
||||
// -----------------------------
|
||||
// here we know for sure that we are taking the exception
|
||||
always_comb begin : exception_handling
|
||||
// Multiple simultaneous interrupts and traps at the same privilege level are handled in the following decreasing
|
||||
// priority order: external interrupts, software interrupts, timer interrupts, then finally any synchronous traps. (1.10 p.30)
|
||||
// interrupts are correctly prioritized in the CSR reg file, exceptions are prioritized here
|
||||
exception_o.valid = 1'b0;
|
||||
exception_o.cause = '0;
|
||||
exception_o.tval = '0;
|
||||
// we need a valid instruction in the commit stage
|
||||
if (commit_instr_i[0].valid) begin
|
||||
// ------------------------
|
||||
// check for CSR exception
|
||||
// ------------------------
|
||||
if (csr_exception_i.valid) begin
|
||||
exception_o = csr_exception_i;
|
||||
// if no earlier exception happened the commit instruction will still contain
|
||||
// the instruction bits from the ID stage. If a earlier exception happened we don't care
|
||||
// as we will overwrite it anyway in the next IF bl
|
||||
exception_o.tval = commit_instr_i[0].ex.tval;
|
||||
end
|
||||
// ------------------------
|
||||
// Earlier Exceptions
|
||||
// ------------------------
|
||||
// but we give precedence to exceptions which happened earlier e.g.: instruction page
|
||||
// faults for example
|
||||
if (commit_instr_i[0].ex.valid) begin
|
||||
exception_o = commit_instr_i[0].ex;
|
||||
end
|
||||
end
|
||||
// Don't take any exceptions iff:
|
||||
// - If we halted the processor
|
||||
if (halt_i) begin
|
||||
exception_o.valid = 1'b0;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,935 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License. //
|
||||
//
|
||||
// Author: Florian Zaruba - zarubaf@iis.ee.ethz.ch
|
||||
// Engineer: Sven Stucki - svstucki@student.ethz.ch
|
||||
//
|
||||
// Design Name: Compressed instruction decoder
|
||||
// Project Name: zero-riscy
|
||||
// Language: SystemVerilog
|
||||
//
|
||||
// Description: Decodes RISC-V compressed instructions into their RV32
|
||||
// equivalent. This module is fully combinatorial.
|
||||
|
||||
|
||||
module compressed_decoder #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic [31:0] instr_i,
|
||||
output logic [31:0] instr_o,
|
||||
output logic illegal_instr_o,
|
||||
output logic is_compressed_o
|
||||
);
|
||||
|
||||
// -------------------
|
||||
// Compressed Decoder
|
||||
// -------------------
|
||||
always_comb begin
|
||||
illegal_instr_o = 1'b0;
|
||||
instr_o = '0;
|
||||
is_compressed_o = 1'b1;
|
||||
instr_o = instr_i;
|
||||
|
||||
// I: | imm[11:0] | rs1 | funct3 | rd | opcode |
|
||||
// S: | imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode |
|
||||
unique case (instr_i[1:0])
|
||||
// C0
|
||||
riscv::OpcodeC0: begin
|
||||
unique case (instr_i[15:13])
|
||||
riscv::OpcodeC0Addi4spn: begin
|
||||
// c.addi4spn -> addi rd', x2, imm
|
||||
instr_o = {
|
||||
2'b0,
|
||||
instr_i[10:7],
|
||||
instr_i[12:11],
|
||||
instr_i[5],
|
||||
instr_i[6],
|
||||
2'b00,
|
||||
5'h02,
|
||||
3'b000,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1;
|
||||
end
|
||||
|
||||
riscv::OpcodeC0Fld: begin
|
||||
if (CVA6Cfg.FpPresent) begin
|
||||
// c.fld -> fld rd', imm(rs1')
|
||||
// CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 |
|
||||
instr_o = {
|
||||
4'b0,
|
||||
instr_i[6:5],
|
||||
instr_i[12:10],
|
||||
3'b000,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b011,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
riscv::OpcodeLoadFp
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
riscv::OpcodeC0Lw: begin
|
||||
// c.lw -> lw rd', imm(rs1')
|
||||
instr_o = {
|
||||
5'b0,
|
||||
instr_i[5],
|
||||
instr_i[12:10],
|
||||
instr_i[6],
|
||||
2'b00,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b010,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
riscv::OpcodeLoad
|
||||
};
|
||||
end
|
||||
|
||||
riscv::OpcodeC0Ld: begin
|
||||
// RV64
|
||||
// c.ld -> ld rd', imm(rs1')
|
||||
// RV32
|
||||
// c.flw -> flw fprd', imm(rs1')
|
||||
if (riscv::IS_XLEN64) begin
|
||||
// CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 |
|
||||
instr_o = {
|
||||
4'b0,
|
||||
instr_i[6:5],
|
||||
instr_i[12:10],
|
||||
3'b000,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b011,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
riscv::OpcodeLoad
|
||||
};
|
||||
end else begin
|
||||
if (CVA6Cfg.FpPresent) begin
|
||||
// CFLW: | funct3 (change to LW) | imm[5:3] | rs1' | imm[2|6] | rd' | C0 |
|
||||
instr_o = {
|
||||
5'b0,
|
||||
instr_i[5],
|
||||
instr_i[12:10],
|
||||
instr_i[6],
|
||||
2'b00,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b010,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
riscv::OpcodeLoadFp
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
riscv::OpcodeC0Zcb: begin
|
||||
if (CVA6Cfg.RVZCB) begin
|
||||
unique case (instr_i[12:10])
|
||||
3'b000: begin
|
||||
// c.lbu -> lbu rd', uimm(rs1')
|
||||
instr_o = {
|
||||
10'b0,
|
||||
instr_i[5],
|
||||
instr_i[6],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b100,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
riscv::OpcodeLoad
|
||||
};
|
||||
end
|
||||
|
||||
3'b001: begin
|
||||
if (instr_i[6]) begin
|
||||
// c.lh -> lh rd', uimm(rs1')
|
||||
instr_o = {
|
||||
10'b0,
|
||||
instr_i[5],
|
||||
1'b0,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b001,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
riscv::OpcodeLoad
|
||||
};
|
||||
end else begin
|
||||
// c.lhu -> lhu rd', uimm(rs1')
|
||||
instr_o = {
|
||||
10'b0,
|
||||
instr_i[5],
|
||||
1'b0,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b101,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
riscv::OpcodeLoad
|
||||
};
|
||||
end
|
||||
end
|
||||
|
||||
3'b010: begin
|
||||
// c.sb -> sb rs2', uimm(rs1')
|
||||
instr_o = {
|
||||
7'b0,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b000,
|
||||
3'b0,
|
||||
instr_i[5],
|
||||
instr_i[6],
|
||||
riscv::OpcodeStore
|
||||
};
|
||||
end
|
||||
|
||||
3'b011: begin
|
||||
// c.sh -> sh rs2', uimm(rs1')
|
||||
instr_o = {
|
||||
7'b0,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b001,
|
||||
3'b0,
|
||||
instr_i[5],
|
||||
1'b0,
|
||||
riscv::OpcodeStore
|
||||
};
|
||||
end
|
||||
|
||||
default: begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
endcase
|
||||
|
||||
end else begin
|
||||
instr_o = instr_i;
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
riscv::OpcodeC0Fsd: begin
|
||||
if (CVA6Cfg.FpPresent) begin
|
||||
// c.fsd -> fsd rs2', imm(rs1')
|
||||
instr_o = {
|
||||
4'b0,
|
||||
instr_i[6:5],
|
||||
instr_i[12],
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b011,
|
||||
instr_i[11:10],
|
||||
3'b000,
|
||||
riscv::OpcodeStoreFp
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
riscv::OpcodeC0Sw: begin
|
||||
// c.sw -> sw rs2', imm(rs1')
|
||||
instr_o = {
|
||||
5'b0,
|
||||
instr_i[5],
|
||||
instr_i[12],
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b010,
|
||||
instr_i[11:10],
|
||||
instr_i[6],
|
||||
2'b00,
|
||||
riscv::OpcodeStore
|
||||
};
|
||||
end
|
||||
|
||||
riscv::OpcodeC0Sd: begin
|
||||
// RV64
|
||||
// c.sd -> sd rs2', imm(rs1')
|
||||
// RV32
|
||||
// c.fsw -> fsw fprs2', imm(rs1')
|
||||
if (riscv::IS_XLEN64) begin
|
||||
instr_o = {
|
||||
4'b0,
|
||||
instr_i[6:5],
|
||||
instr_i[12],
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b011,
|
||||
instr_i[11:10],
|
||||
3'b000,
|
||||
riscv::OpcodeStore
|
||||
};
|
||||
end else begin
|
||||
if (CVA6Cfg.FpPresent) begin
|
||||
instr_o = {
|
||||
5'b0,
|
||||
instr_i[5],
|
||||
instr_i[12],
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b010,
|
||||
instr_i[11:10],
|
||||
instr_i[6],
|
||||
2'b00,
|
||||
riscv::OpcodeStoreFp
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// C1
|
||||
riscv::OpcodeC1: begin
|
||||
unique case (instr_i[15:13])
|
||||
riscv::OpcodeC1Addi: begin
|
||||
// c.addi -> addi rd, rd, nzimm
|
||||
// c.nop -> addi 0, 0, 0
|
||||
instr_o = {
|
||||
{6{instr_i[12]}},
|
||||
instr_i[12],
|
||||
instr_i[6:2],
|
||||
instr_i[11:7],
|
||||
3'b0,
|
||||
instr_i[11:7],
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
end
|
||||
|
||||
|
||||
riscv::OpcodeC1Addiw: begin // or riscv::OpcodeC1Jal for RV32IC
|
||||
if (riscv::IS_XLEN64) begin
|
||||
// c.addiw -> addiw rd, rd, nzimm for RV64IC
|
||||
if (instr_i[11:7] != 5'h0) begin // only valid if the destination is not r0
|
||||
instr_o = {
|
||||
{6{instr_i[12]}},
|
||||
instr_i[12],
|
||||
instr_i[6:2],
|
||||
instr_i[11:7],
|
||||
3'b0,
|
||||
instr_i[11:7],
|
||||
riscv::OpcodeOpImm32
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end else begin
|
||||
// c.jal -> jal x1, imm for RV32IC only
|
||||
instr_o = {
|
||||
instr_i[12],
|
||||
instr_i[8],
|
||||
instr_i[10:9],
|
||||
instr_i[6],
|
||||
instr_i[7],
|
||||
instr_i[2],
|
||||
instr_i[11],
|
||||
instr_i[5:3],
|
||||
{9{instr_i[12]}},
|
||||
5'b1,
|
||||
riscv::OpcodeJal
|
||||
};
|
||||
|
||||
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
riscv::OpcodeC1Li: begin
|
||||
// c.li -> addi rd, x0, nzimm
|
||||
instr_o = {
|
||||
{6{instr_i[12]}},
|
||||
instr_i[12],
|
||||
instr_i[6:2],
|
||||
5'b0,
|
||||
3'b0,
|
||||
instr_i[11:7],
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
end
|
||||
|
||||
riscv::OpcodeC1LuiAddi16sp: begin
|
||||
// c.lui -> lui rd, imm
|
||||
instr_o = {{15{instr_i[12]}}, instr_i[6:2], instr_i[11:7], riscv::OpcodeLui};
|
||||
|
||||
if (instr_i[11:7] == 5'h02) begin
|
||||
// c.addi16sp -> addi x2, x2, nzimm
|
||||
instr_o = {
|
||||
{3{instr_i[12]}},
|
||||
instr_i[4:3],
|
||||
instr_i[5],
|
||||
instr_i[2],
|
||||
instr_i[6],
|
||||
4'b0,
|
||||
5'h02,
|
||||
3'b000,
|
||||
5'h02,
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
end
|
||||
|
||||
if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
|
||||
end
|
||||
|
||||
riscv::OpcodeC1MiscAlu: begin
|
||||
unique case (instr_i[11:10])
|
||||
2'b00, 2'b01: begin
|
||||
// 00: c.srli -> srli rd, rd, shamt
|
||||
// 01: c.srai -> srai rd, rd, shamt
|
||||
instr_o = {
|
||||
1'b0,
|
||||
instr_i[10],
|
||||
4'b0,
|
||||
instr_i[12],
|
||||
instr_i[6:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b101,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
end
|
||||
|
||||
2'b10: begin
|
||||
// c.andi -> andi rd, rd, imm
|
||||
instr_o = {
|
||||
{6{instr_i[12]}},
|
||||
instr_i[12],
|
||||
instr_i[6:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b111,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
end
|
||||
|
||||
2'b11: begin
|
||||
unique case ({
|
||||
instr_i[12], instr_i[6:5]
|
||||
})
|
||||
3'b000: begin
|
||||
// c.sub -> sub rd', rd', rs2'
|
||||
instr_o = {
|
||||
2'b01,
|
||||
5'b0,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b000,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOp
|
||||
};
|
||||
end
|
||||
|
||||
3'b001: begin
|
||||
// c.xor -> xor rd', rd', rs2'
|
||||
instr_o = {
|
||||
7'b0,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b100,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOp
|
||||
};
|
||||
end
|
||||
|
||||
3'b010: begin
|
||||
// c.or -> or rd', rd', rs2'
|
||||
instr_o = {
|
||||
7'b0,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b110,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOp
|
||||
};
|
||||
end
|
||||
|
||||
3'b011: begin
|
||||
// c.and -> and rd', rd', rs2'
|
||||
instr_o = {
|
||||
7'b0,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b111,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOp
|
||||
};
|
||||
end
|
||||
|
||||
3'b100: begin
|
||||
if (riscv::IS_XLEN64) begin
|
||||
// c.subw -> subw rd', rd', rs2'
|
||||
instr_o = {
|
||||
2'b01,
|
||||
5'b0,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b000,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOp32
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
3'b101: begin
|
||||
if (riscv::IS_XLEN64) begin
|
||||
// c.addw -> addw rd', rd', rs2'
|
||||
instr_o = {
|
||||
2'b00,
|
||||
5'b0,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b000,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOp32
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
3'b110: begin
|
||||
if (CVA6Cfg.RVZCB) begin
|
||||
// c.mul -> mul rd', rd', rs2'
|
||||
instr_o = {
|
||||
6'b0,
|
||||
1'b1,
|
||||
2'b01,
|
||||
instr_i[4:2],
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b000,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOp
|
||||
};
|
||||
end else begin
|
||||
instr_o = instr_i;
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
3'b111: begin
|
||||
if (CVA6Cfg.RVZCB) begin
|
||||
|
||||
unique case (instr_i[4:2])
|
||||
3'b000: begin
|
||||
// c.zext.b -> andi rd', rd', 0xff
|
||||
instr_o = {
|
||||
4'b0,
|
||||
8'hFF,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b111,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
end
|
||||
|
||||
3'b001: begin
|
||||
if (CVA6Cfg.RVB) begin
|
||||
// c.sext.b -> sext.b rd', rd'
|
||||
instr_o = {
|
||||
7'h30,
|
||||
5'h4,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b001,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
end else illegal_instr_o = 1'b1;
|
||||
end
|
||||
|
||||
3'b010: begin
|
||||
if (CVA6Cfg.RVB) begin
|
||||
// c.zext.h -> zext.h rd', rd'
|
||||
if (riscv::IS_XLEN64) begin
|
||||
instr_o = {
|
||||
7'h4,
|
||||
5'h0,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b100,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOp32
|
||||
};
|
||||
end else begin
|
||||
instr_o = {
|
||||
7'h4,
|
||||
5'h0,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b100,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOp
|
||||
};
|
||||
end
|
||||
end else illegal_instr_o = 1'b1;
|
||||
end
|
||||
|
||||
3'b011: begin
|
||||
if (CVA6Cfg.RVB) begin
|
||||
// c.sext.h -> sext.h rd', rd'
|
||||
instr_o = {
|
||||
7'h30,
|
||||
5'h5,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b001,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
end else illegal_instr_o = 1'b1;
|
||||
end
|
||||
|
||||
3'b100: begin
|
||||
if (CVA6Cfg.RVB) begin
|
||||
// c.zext.w -> add.uw
|
||||
if (riscv::IS_XLEN64) begin
|
||||
instr_o = {
|
||||
7'h4,
|
||||
5'h0,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b000,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOp32
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end else illegal_instr_o = 1'b1;
|
||||
end
|
||||
|
||||
3'b101: begin
|
||||
// c.not -> xori rd', rd', -1
|
||||
instr_o = {
|
||||
12'hFFF,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
3'b100,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
end
|
||||
|
||||
default: begin
|
||||
instr_o = instr_i;
|
||||
illegal_instr_o = 1;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
riscv::OpcodeC1J: begin
|
||||
// 101: c.j -> jal x0, imm
|
||||
instr_o = {
|
||||
instr_i[12],
|
||||
instr_i[8],
|
||||
instr_i[10:9],
|
||||
instr_i[6],
|
||||
instr_i[7],
|
||||
instr_i[2],
|
||||
instr_i[11],
|
||||
instr_i[5:3],
|
||||
{9{instr_i[12]}},
|
||||
4'b0,
|
||||
~instr_i[15],
|
||||
riscv::OpcodeJal
|
||||
};
|
||||
end
|
||||
|
||||
riscv::OpcodeC1Beqz, riscv::OpcodeC1Bnez: begin
|
||||
// 0: c.beqz -> beq rs1', x0, imm
|
||||
// 1: c.bnez -> bne rs1', x0, imm
|
||||
instr_o = {
|
||||
{4{instr_i[12]}},
|
||||
instr_i[6:5],
|
||||
instr_i[2],
|
||||
5'b0,
|
||||
2'b01,
|
||||
instr_i[9:7],
|
||||
2'b00,
|
||||
instr_i[13],
|
||||
instr_i[11:10],
|
||||
instr_i[4:3],
|
||||
instr_i[12],
|
||||
riscv::OpcodeBranch
|
||||
};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// C2
|
||||
riscv::OpcodeC2: begin
|
||||
unique case (instr_i[15:13])
|
||||
riscv::OpcodeC2Slli: begin
|
||||
// c.slli -> slli rd, rd, shamt
|
||||
instr_o = {
|
||||
6'b0,
|
||||
instr_i[12],
|
||||
instr_i[6:2],
|
||||
instr_i[11:7],
|
||||
3'b001,
|
||||
instr_i[11:7],
|
||||
riscv::OpcodeOpImm
|
||||
};
|
||||
end
|
||||
|
||||
riscv::OpcodeC2Fldsp: begin
|
||||
if (CVA6Cfg.FpPresent) begin
|
||||
// c.fldsp -> fld rd, imm(x2)
|
||||
instr_o = {
|
||||
3'b0,
|
||||
instr_i[4:2],
|
||||
instr_i[12],
|
||||
instr_i[6:5],
|
||||
3'b000,
|
||||
5'h02,
|
||||
3'b011,
|
||||
instr_i[11:7],
|
||||
riscv::OpcodeLoadFp
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
riscv::OpcodeC2Lwsp: begin
|
||||
// c.lwsp -> lw rd, imm(x2)
|
||||
instr_o = {
|
||||
4'b0,
|
||||
instr_i[3:2],
|
||||
instr_i[12],
|
||||
instr_i[6:4],
|
||||
2'b00,
|
||||
5'h02,
|
||||
3'b010,
|
||||
instr_i[11:7],
|
||||
riscv::OpcodeLoad
|
||||
};
|
||||
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
|
||||
end
|
||||
|
||||
riscv::OpcodeC2Ldsp: begin
|
||||
// RV64
|
||||
// c.ldsp -> ld rd, imm(x2)
|
||||
// RV32
|
||||
// c.flwsp -> flw fprd, imm(x2)
|
||||
if (riscv::IS_XLEN64) begin
|
||||
instr_o = {
|
||||
3'b0,
|
||||
instr_i[4:2],
|
||||
instr_i[12],
|
||||
instr_i[6:5],
|
||||
3'b000,
|
||||
5'h02,
|
||||
3'b011,
|
||||
instr_i[11:7],
|
||||
riscv::OpcodeLoad
|
||||
};
|
||||
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
|
||||
end else begin
|
||||
if (CVA6Cfg.FpPresent) begin
|
||||
instr_o = {
|
||||
4'b0,
|
||||
instr_i[3:2],
|
||||
instr_i[12],
|
||||
instr_i[6:4],
|
||||
2'b00,
|
||||
5'h02,
|
||||
3'b010,
|
||||
instr_i[11:7],
|
||||
riscv::OpcodeLoadFp
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
riscv::OpcodeC2JalrMvAdd: begin
|
||||
if (instr_i[12] == 1'b0) begin
|
||||
// c.mv -> add rd/rs1, x0, rs2
|
||||
instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOp};
|
||||
|
||||
if (instr_i[6:2] == 5'b0) begin
|
||||
// c.jr -> jalr x0, rd/rs1, 0
|
||||
instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, riscv::OpcodeJalr};
|
||||
// rs1 != 0
|
||||
illegal_instr_o = (instr_i[11:7] != '0) ? 1'b0 : 1'b1;
|
||||
end
|
||||
end else begin
|
||||
// c.add -> add rd, rd, rs2
|
||||
instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOp};
|
||||
|
||||
if (instr_i[6:2] == 5'b0) begin
|
||||
if (instr_i[11:7] == 5'b0) begin
|
||||
// c.ebreak -> ebreak
|
||||
instr_o = {32'h00_10_00_73};
|
||||
end else begin
|
||||
// c.jalr -> jalr x1, rs1, 0
|
||||
instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, riscv::OpcodeJalr};
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
riscv::OpcodeC2Fsdsp: begin
|
||||
if (CVA6Cfg.FpPresent) begin
|
||||
// c.fsdsp -> fsd rs2, imm(x2)
|
||||
instr_o = {
|
||||
3'b0,
|
||||
instr_i[9:7],
|
||||
instr_i[12],
|
||||
instr_i[6:2],
|
||||
5'h02,
|
||||
3'b011,
|
||||
instr_i[11:10],
|
||||
3'b000,
|
||||
riscv::OpcodeStoreFp
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
riscv::OpcodeC2Swsp: begin
|
||||
// c.swsp -> sw rs2, imm(x2)
|
||||
instr_o = {
|
||||
4'b0,
|
||||
instr_i[8:7],
|
||||
instr_i[12],
|
||||
instr_i[6:2],
|
||||
5'h02,
|
||||
3'b010,
|
||||
instr_i[11:9],
|
||||
2'b00,
|
||||
riscv::OpcodeStore
|
||||
};
|
||||
end
|
||||
|
||||
riscv::OpcodeC2Sdsp: begin
|
||||
// RV64
|
||||
// c.sdsp -> sd rs2, imm(x2)
|
||||
// RV32
|
||||
// c.fswsp -> fsw fprs2, imm(x2)
|
||||
if (riscv::IS_XLEN64) begin
|
||||
instr_o = {
|
||||
3'b0,
|
||||
instr_i[9:7],
|
||||
instr_i[12],
|
||||
instr_i[6:2],
|
||||
5'h02,
|
||||
3'b011,
|
||||
instr_i[11:10],
|
||||
3'b000,
|
||||
riscv::OpcodeStore
|
||||
};
|
||||
end else begin
|
||||
if (CVA6Cfg.FpPresent) begin
|
||||
instr_o = {
|
||||
4'b0,
|
||||
instr_i[8:7],
|
||||
instr_i[12],
|
||||
instr_i[6:2],
|
||||
5'h02,
|
||||
3'b010,
|
||||
instr_i[11:9],
|
||||
2'b00,
|
||||
riscv::OpcodeStoreFp
|
||||
};
|
||||
end else begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
illegal_instr_o = 1'b1;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// normal instruction
|
||||
default: is_compressed_o = 1'b0;
|
||||
endcase
|
||||
|
||||
// Check if the instruction was illegal, if it was then output the offending instruction (zero-extended)
|
||||
if (illegal_instr_o) begin
|
||||
instr_o = instr_i;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,194 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 08.05.2017
|
||||
// Description: Flush controller
|
||||
|
||||
|
||||
module controller
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
output logic set_pc_commit_o, // Set PC om PC Gen
|
||||
output logic flush_if_o, // Flush the IF stage
|
||||
output logic flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard
|
||||
output logic flush_id_o, // Flush ID stage
|
||||
output logic flush_ex_o, // Flush EX stage
|
||||
output logic flush_bp_o, // Flush branch predictors
|
||||
output logic flush_icache_o, // Flush ICache
|
||||
output logic flush_dcache_o, // Flush DCache
|
||||
input logic flush_dcache_ack_i, // Acknowledge the whole DCache Flush
|
||||
output logic flush_tlb_o, // Flush TLBs
|
||||
|
||||
input logic halt_csr_i, // Halt request from CSR (WFI instruction)
|
||||
input logic halt_acc_i, // Halt request from accelerator dispatcher
|
||||
output logic halt_o, // Halt signal to commit stage
|
||||
input logic eret_i, // Return from exception
|
||||
input logic ex_valid_i, // We got an exception, flush the pipeline
|
||||
input logic set_debug_pc_i, // set the debug pc from CSR
|
||||
input bp_resolve_t resolved_branch_i, // We got a resolved branch, check if we need to flush the front-end
|
||||
input logic flush_csr_i, // We got an instruction which altered the CSR, flush the pipeline
|
||||
input logic fence_i_i, // fence.i in
|
||||
input logic fence_i, // fence in
|
||||
input logic sfence_vma_i, // We got an instruction to flush the TLBs and pipeline
|
||||
input logic flush_commit_i, // Flush request from commit stage
|
||||
input logic flush_acc_i // Flush request from accelerator
|
||||
);
|
||||
|
||||
// active fence - high if we are currently flushing the dcache
|
||||
logic fence_active_d, fence_active_q;
|
||||
logic flush_dcache;
|
||||
|
||||
// ------------
|
||||
// Flush CTRL
|
||||
// ------------
|
||||
always_comb begin : flush_ctrl
|
||||
fence_active_d = fence_active_q;
|
||||
set_pc_commit_o = 1'b0;
|
||||
flush_if_o = 1'b0;
|
||||
flush_unissued_instr_o = 1'b0;
|
||||
flush_id_o = 1'b0;
|
||||
flush_ex_o = 1'b0;
|
||||
flush_dcache = 1'b0;
|
||||
flush_icache_o = 1'b0;
|
||||
flush_tlb_o = 1'b0;
|
||||
flush_bp_o = 1'b0;
|
||||
// ------------
|
||||
// Mis-predict
|
||||
// ------------
|
||||
// flush on mispredict
|
||||
if (resolved_branch_i.is_mispredict) begin
|
||||
// flush only un-issued instructions
|
||||
flush_unissued_instr_o = 1'b1;
|
||||
// and if stage
|
||||
flush_if_o = 1'b1;
|
||||
end
|
||||
|
||||
// ---------------------------------
|
||||
// FENCE
|
||||
// ---------------------------------
|
||||
if (fence_i) begin
|
||||
// this can be seen as a CSR instruction with side-effect
|
||||
set_pc_commit_o = 1'b1;
|
||||
flush_if_o = 1'b1;
|
||||
flush_unissued_instr_o = 1'b1;
|
||||
flush_id_o = 1'b1;
|
||||
flush_ex_o = 1'b1;
|
||||
// this is not needed in the case since we
|
||||
// have a write-through cache in this case
|
||||
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
|
||||
flush_dcache = 1'b1;
|
||||
fence_active_d = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// ---------------------------------
|
||||
// FENCE.I
|
||||
// ---------------------------------
|
||||
if (fence_i_i) begin
|
||||
set_pc_commit_o = 1'b1;
|
||||
flush_if_o = 1'b1;
|
||||
flush_unissued_instr_o = 1'b1;
|
||||
flush_id_o = 1'b1;
|
||||
flush_ex_o = 1'b1;
|
||||
flush_icache_o = 1'b1;
|
||||
// this is not needed in the case since we
|
||||
// have a write-through cache in this case
|
||||
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
|
||||
flush_dcache = 1'b1;
|
||||
fence_active_d = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// this is not needed in the case since we
|
||||
// have a write-through cache in this case
|
||||
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
|
||||
// wait for the acknowledge here
|
||||
if (flush_dcache_ack_i && fence_active_q) begin
|
||||
fence_active_d = 1'b0;
|
||||
// keep the flush dcache signal high as long as we didn't get the acknowledge from the cache
|
||||
end else if (fence_active_q) begin
|
||||
flush_dcache = 1'b1;
|
||||
end
|
||||
end
|
||||
// ---------------------------------
|
||||
// SFENCE.VMA
|
||||
// ---------------------------------
|
||||
if (CVA6Cfg.RVS && sfence_vma_i) begin
|
||||
set_pc_commit_o = 1'b1;
|
||||
flush_if_o = 1'b1;
|
||||
flush_unissued_instr_o = 1'b1;
|
||||
flush_id_o = 1'b1;
|
||||
flush_ex_o = 1'b1;
|
||||
|
||||
flush_tlb_o = 1'b1;
|
||||
end
|
||||
|
||||
// Set PC to commit stage and flush pipeline
|
||||
if (flush_csr_i || flush_acc_i) begin
|
||||
set_pc_commit_o = 1'b1;
|
||||
flush_if_o = 1'b1;
|
||||
flush_unissued_instr_o = 1'b1;
|
||||
flush_id_o = 1'b1;
|
||||
flush_ex_o = 1'b1;
|
||||
end else if (CVA6Cfg.RVA && flush_commit_i) begin
|
||||
set_pc_commit_o = 1'b1;
|
||||
flush_if_o = 1'b1;
|
||||
flush_unissued_instr_o = 1'b1;
|
||||
flush_id_o = 1'b1;
|
||||
flush_ex_o = 1'b1;
|
||||
end
|
||||
|
||||
// ---------------------------------
|
||||
// 1. Exception
|
||||
// 2. Return from exception
|
||||
// ---------------------------------
|
||||
if (ex_valid_i || eret_i || (CVA6Cfg.DebugEn && set_debug_pc_i)) begin
|
||||
// don't flush pcgen as we want to take the exception: Flush PCGen is not a flush signal
|
||||
// for the PC Gen stage but instead tells it to take the PC we gave it
|
||||
set_pc_commit_o = 1'b0;
|
||||
flush_if_o = 1'b1;
|
||||
flush_unissued_instr_o = 1'b1;
|
||||
flush_id_o = 1'b1;
|
||||
flush_ex_o = 1'b1;
|
||||
// this potentially reduces performance, but is needed
|
||||
// to suppress speculative fetches to virtual memory from
|
||||
// machine mode. TODO: remove when PMA checkers have been
|
||||
// added to the system
|
||||
flush_bp_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------
|
||||
// Halt Logic
|
||||
// ----------------------
|
||||
always_comb begin
|
||||
// halt the core if the fence is active
|
||||
halt_o = halt_csr_i || halt_acc_i || (DCACHE_TYPE == int'(config_pkg::WB) && fence_active_q);
|
||||
end
|
||||
|
||||
// ----------------------
|
||||
// Registers
|
||||
// ----------------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
fence_active_q <= 1'b0;
|
||||
flush_dcache_o <= 1'b0;
|
||||
end else begin
|
||||
fence_active_q <= fence_active_d;
|
||||
// register on the flush signal, this signal might be critical
|
||||
flush_dcache_o <= flush_dcache;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 05.05.2017
|
||||
// Description: Buffer to hold CSR address, this acts like a functional unit
|
||||
// to the scoreboard.
|
||||
|
||||
|
||||
module csr_buffer
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i,
|
||||
|
||||
input fu_data_t fu_data_i,
|
||||
|
||||
output logic csr_ready_o, // FU is ready e.g. not busy
|
||||
input logic csr_valid_i, // Input is valid
|
||||
output riscv::xlen_t csr_result_o,
|
||||
input logic csr_commit_i, // commit the pending CSR OP
|
||||
// to CSR file
|
||||
output logic [11:0] csr_addr_o // CSR address to commit stage
|
||||
);
|
||||
// this is a single entry store buffer for the address of the CSR
|
||||
// which we are going to need in the commit stage
|
||||
struct packed {
|
||||
logic [11:0] csr_address;
|
||||
logic valid;
|
||||
}
|
||||
csr_reg_n, csr_reg_q;
|
||||
|
||||
// control logic, scoreboard signals
|
||||
assign csr_result_o = fu_data_i.operand_a;
|
||||
assign csr_addr_o = csr_reg_q.csr_address;
|
||||
|
||||
// write logic
|
||||
always_comb begin : write
|
||||
csr_reg_n = csr_reg_q;
|
||||
// by default we are ready
|
||||
csr_ready_o = 1'b1;
|
||||
// if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready
|
||||
if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i) csr_ready_o = 1'b0;
|
||||
// if we got a valid from the scoreboard
|
||||
// store the CSR address
|
||||
if (csr_valid_i) begin
|
||||
csr_reg_n.csr_address = fu_data_i.operand_b[11:0];
|
||||
csr_reg_n.valid = 1'b1;
|
||||
end
|
||||
// if we get a commit and no new valid instruction -> clear the valid bit
|
||||
if (csr_commit_i && ~csr_valid_i) begin
|
||||
csr_reg_n.valid = 1'b0;
|
||||
end
|
||||
// clear the buffer if we flushed
|
||||
if (flush_i) csr_reg_n.valid = 1'b0;
|
||||
end
|
||||
// sequential process
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
csr_reg_q <= '{default: 0};
|
||||
end else begin
|
||||
csr_reg_q <= csr_reg_n;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright 2023 ETH Zurich and University of Bologna.
|
||||
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Nils Wistoff <nwistoff@iis.ee.ethz.ch>
|
||||
|
||||
// Module stub for the cva6_accel_first_pass_decoder. Replace this with your accelerator's
|
||||
// first pass decoder.
|
||||
|
||||
module cva6_accel_first_pass_decoder
|
||||
import ariane_pkg::*;
|
||||
(
|
||||
input logic [31:0] instruction_i, // instruction from IF
|
||||
input riscv::xs_t fs_i, // floating point extension status
|
||||
input riscv::xs_t vs_i, // vector extension status
|
||||
output logic is_accel_o, // is an accelerator instruction
|
||||
output scoreboard_entry_t instruction_o, // predecoded instruction
|
||||
output logic illegal_instr_o, // is an illegal instruction
|
||||
output logic is_control_flow_instr_o // is a control flow instruction
|
||||
);
|
||||
|
||||
assign is_accel_o = 1'b0;
|
||||
assign instruction_o = '0;
|
||||
assign illegal_instr_o = 1'b0;
|
||||
assign is_control_flow_instr_o = 1'b0;
|
||||
|
||||
$error("cva6_accel_first_pass_decoder: instantiated non-functional module stub.\
|
||||
Please replace this with your accelerator's first pass decoder \
|
||||
(or unset ENABLE_ACCELERATOR).");
|
||||
|
||||
endmodule : cva6_accel_first_pass_decoder
|
||||
|
|
@ -0,0 +1,294 @@
|
|||
// Copyright 2024 Thales DIS France SAS
|
||||
//
|
||||
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Original Author: Yannick Casamatta - Thales
|
||||
// Date: 09/01/2024
|
||||
|
||||
|
||||
module cva6_rvfi
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter type rvfi_instr_t = logic,
|
||||
parameter type rvfi_probes_t = logic
|
||||
) (
|
||||
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
input rvfi_probes_t rvfi_probes_i,
|
||||
output rvfi_instr_t [CVA6Cfg.NrCommitPorts-1:0] rvfi_o
|
||||
|
||||
);
|
||||
|
||||
// ------------------------------------------
|
||||
// CVA6 configuration
|
||||
// ------------------------------------------
|
||||
// Extended config
|
||||
localparam bit RVF = (riscv::IS_XLEN64 | riscv::IS_XLEN32) & CVA6Cfg.FpuEn;
|
||||
localparam bit RVD = (riscv::IS_XLEN64 ? 1 : 0) & CVA6Cfg.FpuEn;
|
||||
localparam bit FpPresent = RVF | RVD | CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8;
|
||||
localparam bit NSX = CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8 | CVA6Cfg.XFVec; // Are non-standard extensions present?
|
||||
localparam int unsigned FLen = RVD ? 64 : // D ext.
|
||||
RVF ? 32 : // F ext.
|
||||
CVA6Cfg.XF16 ? 16 : // Xf16 ext.
|
||||
CVA6Cfg.XF16ALT ? 16 : // Xf16alt ext.
|
||||
CVA6Cfg.XF8 ? 8 : // Xf8 ext.
|
||||
1; // Unused in case of no FP
|
||||
|
||||
// Transprecision floating-point extensions configuration
|
||||
localparam bit RVFVec = RVF & CVA6Cfg.XFVec & FLen>32; // FP32 vectors available if vectors and larger fmt enabled
|
||||
localparam bit XF16Vec = CVA6Cfg.XF16 & CVA6Cfg.XFVec & FLen>16; // FP16 vectors available if vectors and larger fmt enabled
|
||||
localparam bit XF16ALTVec = CVA6Cfg.XF16ALT & CVA6Cfg.XFVec & FLen>16; // FP16ALT vectors available if vectors and larger fmt enabled
|
||||
localparam bit XF8Vec = CVA6Cfg.XF8 & CVA6Cfg.XFVec & FLen>8; // FP8 vectors available if vectors and larger fmt enabled
|
||||
|
||||
localparam bit EnableAccelerator = CVA6Cfg.RVV; // Currently only used by V extension (Ara)
|
||||
localparam int unsigned NrWbPorts = (CVA6Cfg.CvxifEn || EnableAccelerator) ? 5 : 4;
|
||||
|
||||
localparam NrRgprPorts = 2;
|
||||
|
||||
localparam bit NonIdemPotenceEn = CVA6Cfg.NrNonIdempotentRules && CVA6Cfg.NonIdempotentLength; // Currently only used by V extension (Ara)
|
||||
|
||||
localparam config_pkg::cva6_cfg_t CVA6ExtendCfg = {
|
||||
CVA6Cfg.NrCommitPorts,
|
||||
CVA6Cfg.AxiAddrWidth,
|
||||
CVA6Cfg.AxiDataWidth,
|
||||
CVA6Cfg.AxiIdWidth,
|
||||
CVA6Cfg.AxiUserWidth,
|
||||
CVA6Cfg.NrLoadBufEntries,
|
||||
CVA6Cfg.FpuEn,
|
||||
CVA6Cfg.XF16,
|
||||
CVA6Cfg.XF16ALT,
|
||||
CVA6Cfg.XF8,
|
||||
CVA6Cfg.RVA,
|
||||
CVA6Cfg.RVB,
|
||||
CVA6Cfg.RVV,
|
||||
CVA6Cfg.RVC,
|
||||
CVA6Cfg.RVZCB,
|
||||
CVA6Cfg.XFVec,
|
||||
CVA6Cfg.CvxifEn,
|
||||
CVA6Cfg.ZiCondExtEn,
|
||||
// Extended
|
||||
bit'(RVF),
|
||||
bit'(RVD),
|
||||
bit'(FpPresent),
|
||||
bit'(NSX),
|
||||
unsigned'(FLen),
|
||||
bit'(RVFVec),
|
||||
bit'(XF16Vec),
|
||||
bit'(XF16ALTVec),
|
||||
bit'(XF8Vec),
|
||||
unsigned'(NrRgprPorts),
|
||||
unsigned'(NrWbPorts),
|
||||
bit'(EnableAccelerator),
|
||||
CVA6Cfg.RVS,
|
||||
CVA6Cfg.RVU,
|
||||
CVA6Cfg.HaltAddress,
|
||||
CVA6Cfg.ExceptionAddress,
|
||||
CVA6Cfg.RASDepth,
|
||||
CVA6Cfg.BTBEntries,
|
||||
CVA6Cfg.BHTEntries,
|
||||
CVA6Cfg.DmBaseAddress,
|
||||
CVA6Cfg.NrPMPEntries,
|
||||
CVA6Cfg.PMPCfgRstVal,
|
||||
CVA6Cfg.PMPAddrRstVal,
|
||||
CVA6Cfg.PMPEntryReadOnly,
|
||||
CVA6Cfg.NOCType,
|
||||
CVA6Cfg.NrNonIdempotentRules,
|
||||
CVA6Cfg.NonIdempotentAddrBase,
|
||||
CVA6Cfg.NonIdempotentLength,
|
||||
CVA6Cfg.NrExecuteRegionRules,
|
||||
CVA6Cfg.ExecuteRegionAddrBase,
|
||||
CVA6Cfg.ExecuteRegionLength,
|
||||
CVA6Cfg.NrCachedRegionRules,
|
||||
CVA6Cfg.CachedRegionAddrBase,
|
||||
CVA6Cfg.CachedRegionLength,
|
||||
CVA6Cfg.MaxOutstandingStores,
|
||||
CVA6Cfg.DebugEn,
|
||||
NonIdemPotenceEn,
|
||||
CVA6Cfg.AxiBurstWriteEn
|
||||
};
|
||||
|
||||
logic flush;
|
||||
logic issue_instr_ack;
|
||||
logic fetch_entry_valid;
|
||||
logic [ 31:0] instruction;
|
||||
logic is_compressed;
|
||||
|
||||
logic [ TRANS_ID_BITS-1:0] issue_pointer;
|
||||
logic [CVA6ExtendCfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer;
|
||||
|
||||
logic flush_unissued_instr;
|
||||
logic decoded_instr_valid;
|
||||
logic decoded_instr_ack;
|
||||
|
||||
riscv::xlen_t rs1_forwarding;
|
||||
riscv::xlen_t rs2_forwarding;
|
||||
|
||||
scoreboard_entry_t [CVA6ExtendCfg.NrCommitPorts-1:0] commit_instr;
|
||||
exception_t ex_commit;
|
||||
riscv::priv_lvl_t priv_lvl;
|
||||
|
||||
lsu_ctrl_t lsu_ctrl;
|
||||
logic [ CVA6ExtendCfg.NrWbPorts-1:0][ riscv::XLEN-1:0] wbdata;
|
||||
logic [CVA6ExtendCfg.NrCommitPorts-1:0] commit_ack;
|
||||
logic [ riscv::PLEN-1:0] mem_paddr;
|
||||
logic debug_mode;
|
||||
logic [CVA6ExtendCfg.NrCommitPorts-1:0][ riscv::XLEN-1:0] wdata;
|
||||
|
||||
logic [ riscv::VLEN-1:0] lsu_addr;
|
||||
logic [ (riscv::XLEN/8)-1:0] lsu_rmask;
|
||||
logic [ (riscv::XLEN/8)-1:0] lsu_wmask;
|
||||
logic [ TRANS_ID_BITS-1:0] lsu_addr_trans_id;
|
||||
|
||||
assign flush = rvfi_probes_i.flush;
|
||||
assign issue_instr_ack = rvfi_probes_i.issue_instr_ack;
|
||||
assign fetch_entry_valid = rvfi_probes_i.fetch_entry_valid;
|
||||
assign instruction = rvfi_probes_i.instruction;
|
||||
assign is_compressed = rvfi_probes_i.is_compressed;
|
||||
|
||||
assign issue_pointer = rvfi_probes_i.issue_pointer;
|
||||
assign commit_pointer = rvfi_probes_i.commit_pointer;
|
||||
|
||||
assign flush_unissued_instr = rvfi_probes_i.flush_unissued_instr;
|
||||
assign decoded_instr_valid = rvfi_probes_i.decoded_instr_valid;
|
||||
assign decoded_instr_ack = rvfi_probes_i.decoded_instr_ack;
|
||||
|
||||
assign rs1_forwarding = rvfi_probes_i.rs1_forwarding;
|
||||
assign rs2_forwarding = rvfi_probes_i.rs2_forwarding;
|
||||
|
||||
assign commit_instr = rvfi_probes_i.commit_instr;
|
||||
assign ex_commit = rvfi_probes_i.ex_commit;
|
||||
assign priv_lvl = rvfi_probes_i.priv_lvl;
|
||||
|
||||
assign lsu_ctrl = rvfi_probes_i.lsu_ctrl;
|
||||
assign wbdata = rvfi_probes_i.wbdata;
|
||||
assign commit_ack = rvfi_probes_i.commit_ack;
|
||||
assign mem_paddr = rvfi_probes_i.mem_paddr;
|
||||
assign debug_mode = rvfi_probes_i.debug_mode;
|
||||
assign wdata = rvfi_probes_i.wdata;
|
||||
|
||||
assign lsu_addr = lsu_ctrl.vaddr;
|
||||
assign lsu_rmask = lsu_ctrl.fu == LOAD ? lsu_ctrl.be : '0;
|
||||
assign lsu_wmask = lsu_ctrl.fu == STORE ? lsu_ctrl.be : '0;
|
||||
assign lsu_addr_trans_id = lsu_ctrl.trans_id;
|
||||
|
||||
|
||||
//ID STAGE
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [31:0] instr;
|
||||
} issue_struct_t;
|
||||
issue_struct_t issue_n, issue_q;
|
||||
|
||||
always_comb begin
|
||||
issue_n = issue_q;
|
||||
|
||||
if (issue_instr_ack) issue_n.valid = 1'b0;
|
||||
|
||||
if ((!issue_q.valid || issue_instr_ack) && fetch_entry_valid) begin
|
||||
issue_n.valid = 1'b1;
|
||||
issue_n.instr = (is_compressed) ? {{16{1'b0}}, instruction[15:0]} : instruction;
|
||||
end
|
||||
|
||||
if (flush) issue_n.valid = 1'b0;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
issue_q <= '0;
|
||||
end else begin
|
||||
issue_q <= issue_n;
|
||||
end
|
||||
end
|
||||
|
||||
//ISSUE STAGE
|
||||
|
||||
// this is the FIFO struct of the issue queue
|
||||
typedef struct packed {
|
||||
riscv::xlen_t rs1_rdata;
|
||||
riscv::xlen_t rs2_rdata;
|
||||
logic [riscv::VLEN-1:0] lsu_addr;
|
||||
logic [(riscv::XLEN/8)-1:0] lsu_rmask;
|
||||
logic [(riscv::XLEN/8)-1:0] lsu_wmask;
|
||||
riscv::xlen_t lsu_wdata;
|
||||
logic [31:0] instr;
|
||||
} sb_mem_t;
|
||||
sb_mem_t [NR_SB_ENTRIES-1:0] mem_q, mem_n;
|
||||
|
||||
always_comb begin : issue_fifo
|
||||
mem_n = mem_q;
|
||||
|
||||
if (decoded_instr_valid && decoded_instr_ack && !flush_unissued_instr) begin
|
||||
mem_n[issue_pointer] = '{
|
||||
rs1_rdata: rs1_forwarding,
|
||||
rs2_rdata: rs2_forwarding,
|
||||
lsu_addr: '0,
|
||||
lsu_rmask: '0,
|
||||
lsu_wmask: '0,
|
||||
lsu_wdata: '0,
|
||||
instr: issue_q.instr
|
||||
};
|
||||
end
|
||||
|
||||
if (lsu_rmask != 0) begin
|
||||
mem_n[lsu_addr_trans_id].lsu_addr = lsu_addr;
|
||||
mem_n[lsu_addr_trans_id].lsu_rmask = lsu_rmask;
|
||||
end else if (lsu_wmask != 0) begin
|
||||
mem_n[lsu_addr_trans_id].lsu_addr = lsu_addr;
|
||||
mem_n[lsu_addr_trans_id].lsu_wmask = lsu_wmask;
|
||||
mem_n[lsu_addr_trans_id].lsu_wdata = wbdata[1];
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : regs
|
||||
if (!rst_ni) begin
|
||||
mem_q <= '{default: sb_mem_t'(0)};
|
||||
end else begin
|
||||
mem_q <= mem_n;
|
||||
end
|
||||
end
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------
|
||||
// PACK
|
||||
//----------------------------------------------------------------------------------------------------------
|
||||
|
||||
always_comb begin
|
||||
for (int i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin
|
||||
logic exception;
|
||||
exception = commit_instr[i].valid && ex_commit.valid;
|
||||
rvfi_o[i].valid = (commit_ack[i] && !ex_commit.valid) ||
|
||||
(exception && (ex_commit.cause == riscv::ENV_CALL_MMODE ||
|
||||
ex_commit.cause == riscv::ENV_CALL_SMODE ||
|
||||
ex_commit.cause == riscv::ENV_CALL_UMODE));
|
||||
rvfi_o[i].insn = mem_q[commit_pointer[i]].instr;
|
||||
// when trap, the instruction is not executed
|
||||
rvfi_o[i].trap = exception;
|
||||
rvfi_o[i].cause = ex_commit.cause;
|
||||
rvfi_o[i].mode = (CVA6ExtendCfg.DebugEn && debug_mode) ? 2'b10 : priv_lvl;
|
||||
rvfi_o[i].ixl = riscv::XLEN == 64 ? 2 : 1;
|
||||
rvfi_o[i].rs1_addr = commit_instr[i].rs1[4:0];
|
||||
rvfi_o[i].rs2_addr = commit_instr[i].rs2[4:0];
|
||||
rvfi_o[i].rd_addr = commit_instr[i].rd[4:0];
|
||||
rvfi_o[i].rd_wdata = (CVA6ExtendCfg.FpPresent && is_rd_fpr(commit_instr[i].op)) ?
|
||||
commit_instr[i].result : wdata[i];
|
||||
rvfi_o[i].pc_rdata = commit_instr[i].pc;
|
||||
rvfi_o[i].mem_addr = mem_q[commit_pointer[i]].lsu_addr;
|
||||
// So far, only write paddr is reported. TODO: read paddr
|
||||
rvfi_o[i].mem_paddr = mem_paddr;
|
||||
rvfi_o[i].mem_wmask = mem_q[commit_pointer[i]].lsu_wmask;
|
||||
rvfi_o[i].mem_wdata = mem_q[commit_pointer[i]].lsu_wdata;
|
||||
rvfi_o[i].mem_rmask = mem_q[commit_pointer[i]].lsu_rmask;
|
||||
rvfi_o[i].mem_rdata = commit_instr[i].result;
|
||||
rvfi_o[i].rs1_rdata = mem_q[commit_pointer[i]].rs1_rdata;
|
||||
rvfi_o[i].rs2_rdata = mem_q[commit_pointer[i]].rs2_rdata;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2024 Thales DIS France SAS
|
||||
//
|
||||
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Original Author: Yannick Casamatta - Thales
|
||||
// Date: 09/01/2024
|
||||
|
||||
|
||||
module cva6_rvfi_probes
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter type rvfi_probes_t = logic
|
||||
) (
|
||||
|
||||
input logic flush_i,
|
||||
input logic issue_instr_ack_i,
|
||||
input logic fetch_entry_valid_i,
|
||||
input logic [31:0] instruction_i,
|
||||
input logic is_compressed_i,
|
||||
|
||||
input logic [TRANS_ID_BITS-1:0] issue_pointer_i,
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer_i,
|
||||
|
||||
input logic flush_unissued_instr_i,
|
||||
input logic decoded_instr_valid_i,
|
||||
input logic decoded_instr_ack_i,
|
||||
|
||||
input riscv::xlen_t rs1_forwarding_i,
|
||||
input riscv::xlen_t rs2_forwarding_i,
|
||||
|
||||
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
|
||||
input exception_t ex_commit_i,
|
||||
input riscv::priv_lvl_t priv_lvl_i,
|
||||
|
||||
input lsu_ctrl_t lsu_ctrl_i,
|
||||
input logic [ CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i,
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
|
||||
input logic [ riscv::PLEN-1:0] mem_paddr_i,
|
||||
input logic debug_mode_i,
|
||||
input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i,
|
||||
output rvfi_probes_t rvfi_probes_o
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
rvfi_probes_o = '0;
|
||||
|
||||
rvfi_probes_o.flush = flush_i;
|
||||
rvfi_probes_o.issue_instr_ack = issue_instr_ack_i;
|
||||
rvfi_probes_o.fetch_entry_valid = fetch_entry_valid_i;
|
||||
rvfi_probes_o.instruction = instruction_i;
|
||||
rvfi_probes_o.is_compressed = is_compressed_i;
|
||||
|
||||
rvfi_probes_o.issue_pointer = issue_pointer_i;
|
||||
rvfi_probes_o.commit_pointer = commit_pointer_i;
|
||||
|
||||
rvfi_probes_o.flush_unissued_instr = flush_unissued_instr_i;
|
||||
rvfi_probes_o.decoded_instr_valid = decoded_instr_valid_i;
|
||||
rvfi_probes_o.decoded_instr_ack = decoded_instr_ack_i;
|
||||
|
||||
rvfi_probes_o.rs1_forwarding = rs1_forwarding_i;
|
||||
rvfi_probes_o.rs2_forwarding = rs2_forwarding_i;
|
||||
|
||||
rvfi_probes_o.commit_instr = commit_instr_i;
|
||||
rvfi_probes_o.ex_commit = ex_commit_i;
|
||||
rvfi_probes_o.priv_lvl = priv_lvl_i;
|
||||
|
||||
rvfi_probes_o.lsu_ctrl = lsu_ctrl_i;
|
||||
rvfi_probes_o.wbdata = wbdata_i;
|
||||
rvfi_probes_o.commit_ack = commit_ack_i;
|
||||
rvfi_probes_o.mem_paddr = mem_paddr_i;
|
||||
rvfi_probes_o.debug_mode = debug_mode_i;
|
||||
rvfi_probes_o.wdata = wdata_i;
|
||||
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
// Copyright 2021 Thales DIS design services SAS
|
||||
//
|
||||
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
|
||||
// Example coprocessor adds rs1,rs2(,rs3) together and gives back the result to the CPU via the CoreV-X-Interface.
|
||||
// Coprocessor delays the sending of the result depending on result least significant bits.
|
||||
|
||||
module cvxif_example_coprocessor
|
||||
import cvxif_pkg::*;
|
||||
import cvxif_instr_pkg::*;
|
||||
(
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input cvxif_req_t cvxif_req_i,
|
||||
output cvxif_resp_t cvxif_resp_o
|
||||
);
|
||||
|
||||
//Compressed interface
|
||||
logic x_compressed_valid_i;
|
||||
logic x_compressed_ready_o;
|
||||
x_compressed_req_t x_compressed_req_i;
|
||||
x_compressed_resp_t x_compressed_resp_o;
|
||||
//Issue interface
|
||||
logic x_issue_valid_i;
|
||||
logic x_issue_ready_o;
|
||||
x_issue_req_t x_issue_req_i;
|
||||
x_issue_resp_t x_issue_resp_o;
|
||||
//Commit interface
|
||||
logic x_commit_valid_i;
|
||||
x_commit_t x_commit_i;
|
||||
//Memory interface
|
||||
logic x_mem_valid_o;
|
||||
logic x_mem_ready_i;
|
||||
x_mem_req_t x_mem_req_o;
|
||||
x_mem_resp_t x_mem_resp_i;
|
||||
//Memory result interface
|
||||
logic x_mem_result_valid_i;
|
||||
x_mem_result_t x_mem_result_i;
|
||||
//Result interface
|
||||
logic x_result_valid_o;
|
||||
logic x_result_ready_i;
|
||||
x_result_t x_result_o;
|
||||
|
||||
assign x_compressed_valid_i = cvxif_req_i.x_compressed_valid;
|
||||
assign x_compressed_req_i = cvxif_req_i.x_compressed_req;
|
||||
assign x_issue_valid_i = cvxif_req_i.x_issue_valid;
|
||||
assign x_issue_req_i = cvxif_req_i.x_issue_req;
|
||||
assign x_commit_valid_i = cvxif_req_i.x_commit_valid;
|
||||
assign x_commit_i = cvxif_req_i.x_commit;
|
||||
assign x_mem_ready_i = cvxif_req_i.x_mem_ready;
|
||||
assign x_mem_resp_i = cvxif_req_i.x_mem_resp;
|
||||
assign x_mem_result_valid_i = cvxif_req_i.x_mem_result_valid;
|
||||
assign x_mem_result_i = cvxif_req_i.x_mem_result;
|
||||
assign x_result_ready_i = cvxif_req_i.x_result_ready;
|
||||
|
||||
assign cvxif_resp_o.x_compressed_ready = x_compressed_ready_o;
|
||||
assign cvxif_resp_o.x_compressed_resp = x_compressed_resp_o;
|
||||
assign cvxif_resp_o.x_issue_ready = x_issue_ready_o;
|
||||
assign cvxif_resp_o.x_issue_resp = x_issue_resp_o;
|
||||
assign cvxif_resp_o.x_mem_valid = x_mem_valid_o;
|
||||
assign cvxif_resp_o.x_mem_req = x_mem_req_o;
|
||||
assign cvxif_resp_o.x_result_valid = x_result_valid_o;
|
||||
assign cvxif_resp_o.x_result = x_result_o;
|
||||
|
||||
//Compressed interface
|
||||
assign x_compressed_ready_o = '0;
|
||||
assign x_compressed_resp_o.instr = '0;
|
||||
assign x_compressed_resp_o.accept = '0;
|
||||
|
||||
instr_decoder #(
|
||||
.NbInstr (cvxif_instr_pkg::NbInstr),
|
||||
.CoproInstr(cvxif_instr_pkg::CoproInstr)
|
||||
) instr_decoder_i (
|
||||
.clk_i (clk_i),
|
||||
.x_issue_req_i (x_issue_req_i),
|
||||
.x_issue_resp_o(x_issue_resp_o)
|
||||
);
|
||||
|
||||
typedef struct packed {
|
||||
x_issue_req_t req;
|
||||
x_issue_resp_t resp;
|
||||
} x_issue_t;
|
||||
|
||||
logic fifo_full, fifo_empty;
|
||||
logic x_issue_ready_q;
|
||||
logic instr_push, instr_pop;
|
||||
x_issue_t req_i;
|
||||
x_issue_t req_o;
|
||||
|
||||
|
||||
|
||||
assign instr_push = x_issue_resp_o.accept ? 1 : 0;
|
||||
assign instr_pop = (x_commit_i.x_commit_kill && x_commit_valid_i) || x_result_valid_o;
|
||||
assign x_issue_ready_q = ~fifo_full; // if something is in the fifo, the instruction is being processed
|
||||
// so we can't receive anything else
|
||||
assign req_i.req = x_issue_req_i;
|
||||
assign req_i.resp = x_issue_resp_o;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : regs
|
||||
if (!rst_ni) begin
|
||||
x_issue_ready_o <= 1;
|
||||
end else begin
|
||||
x_issue_ready_o <= x_issue_ready_q;
|
||||
end
|
||||
end
|
||||
|
||||
fifo_v3 #(
|
||||
.FALL_THROUGH(1), //data_o ready and pop in the same cycle
|
||||
.DATA_WIDTH (64),
|
||||
.DEPTH (8),
|
||||
.dtype (x_issue_t)
|
||||
) fifo_commit_i (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (1'b0),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (fifo_full),
|
||||
.empty_o (fifo_empty),
|
||||
.usage_o (),
|
||||
.data_i (req_i),
|
||||
.push_i (instr_push),
|
||||
.data_o (req_o),
|
||||
.pop_i (instr_pop)
|
||||
);
|
||||
|
||||
logic [3:0] c;
|
||||
counter #(
|
||||
.WIDTH(4)
|
||||
) counter_i (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.clear_i (~x_commit_i.x_commit_kill && x_commit_valid_i),
|
||||
.en_i (1'b1),
|
||||
.load_i (),
|
||||
.down_i (),
|
||||
.d_i (),
|
||||
.q_o (c),
|
||||
.overflow_o()
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
x_result_o.data = req_o.req.rs[0] + req_o.req.rs[1] + (X_NUM_RS == 3 ? req_o.req.rs[2] : 0);
|
||||
x_result_valid_o = (c == x_result_o.data[3:0]) && ~fifo_empty ? 1 : 0;
|
||||
x_result_o.id = req_o.req.id;
|
||||
x_result_o.rd = req_o.req.instr[11:7];
|
||||
x_result_o.we = req_o.resp.writeback & x_result_valid_o;
|
||||
x_result_o.exc = 0;
|
||||
x_result_o.exccode = 0;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
// Copyright 2021 Thales DIS design services SAS
|
||||
//
|
||||
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
|
||||
|
||||
package cvxif_instr_pkg;
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] instr;
|
||||
logic [31:0] mask;
|
||||
cvxif_pkg::x_issue_resp_t resp;
|
||||
} copro_issue_resp_t;
|
||||
|
||||
// 2 Possible RISCV instructions for Coprocessor
|
||||
parameter int unsigned NbInstr = 2;
|
||||
parameter copro_issue_resp_t CoproInstr[NbInstr] = '{
|
||||
'{
|
||||
instr: 32'b00000_00_00000_00000_0_00_00000_0101011, // custom1 opcode
|
||||
mask: 32'b00000_00_00000_00000_0_00_00000_1111111,
|
||||
resp : '{
|
||||
accept : 1'b1,
|
||||
writeback : 1'b0,
|
||||
dualwrite : 1'b0,
|
||||
dualread : 1'b0,
|
||||
loadstore : 1'b0,
|
||||
exc : 1'b0
|
||||
}
|
||||
},
|
||||
'{
|
||||
instr: 32'b00000_00_00000_00000_0_00_00000_1011011, // custom2 opcode
|
||||
mask: 32'b00000_00_00000_00000_0_00_00000_1111111,
|
||||
resp : '{
|
||||
accept : 1'b1,
|
||||
writeback : 1'b1,
|
||||
dualwrite : 1'b0,
|
||||
dualread : 1'b0,
|
||||
loadstore : 1'b0,
|
||||
exc : 1'b0
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
endpackage
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright 2021 Thales DIS design services SAS
|
||||
//
|
||||
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
|
||||
|
||||
module instr_decoder
|
||||
import cvxif_pkg::*;
|
||||
#(
|
||||
parameter int NbInstr = 1,
|
||||
parameter cvxif_instr_pkg::copro_issue_resp_t CoproInstr[NbInstr] = {0}
|
||||
) (
|
||||
input logic clk_i,
|
||||
input x_issue_req_t x_issue_req_i,
|
||||
output x_issue_resp_t x_issue_resp_o
|
||||
);
|
||||
|
||||
logic [NbInstr-1:0] sel;
|
||||
|
||||
for (genvar i = 0; i < NbInstr; i++) begin : gen_predecoder_selector
|
||||
assign sel[i] = ((CoproInstr[i].mask & x_issue_req_i.instr) == CoproInstr[i].instr);
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
x_issue_resp_o.accept = '0;
|
||||
x_issue_resp_o.writeback = '0;
|
||||
x_issue_resp_o.dualwrite = '0;
|
||||
x_issue_resp_o.dualread = '0;
|
||||
x_issue_resp_o.loadstore = '0;
|
||||
x_issue_resp_o.exc = '0;
|
||||
for (int unsigned i = 0; i < NbInstr; i++) begin
|
||||
if (sel[i]) begin
|
||||
x_issue_resp_o.accept = CoproInstr[i].resp.accept;
|
||||
x_issue_resp_o.writeback = CoproInstr[i].resp.writeback;
|
||||
x_issue_resp_o.dualwrite = CoproInstr[i].resp.dualwrite;
|
||||
x_issue_resp_o.dualread = CoproInstr[i].resp.dualread;
|
||||
x_issue_resp_o.loadstore = CoproInstr[i].resp.loadstore;
|
||||
x_issue_resp_o.exc = CoproInstr[i].resp.exc;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assert property (@(posedge clk_i) $onehot0(sel))
|
||||
else $warning("This offloaded instruction is valid for multiple coprocessor instructions !");
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
// Copyright 2021 Thales DIS design services SAS
|
||||
//
|
||||
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Original Author: Guillaume CHAUVON (guillaume.chauvon@thalesgroup.com)
|
||||
|
||||
// Functional Unit for the logic of the CoreV-X-Interface
|
||||
|
||||
|
||||
module cvxif_fu
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input fu_data_t fu_data_i,
|
||||
input riscv::priv_lvl_t priv_lvl_i,
|
||||
//from issue
|
||||
input logic x_valid_i,
|
||||
output logic x_ready_o,
|
||||
input logic [ 31:0] x_off_instr_i,
|
||||
//to writeback
|
||||
output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
|
||||
output exception_t x_exception_o,
|
||||
output riscv::xlen_t x_result_o,
|
||||
output logic x_valid_o,
|
||||
output logic x_we_o,
|
||||
//to coprocessor
|
||||
output cvxif_pkg::cvxif_req_t cvxif_req_o,
|
||||
input cvxif_pkg::cvxif_resp_t cvxif_resp_i
|
||||
);
|
||||
localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS;
|
||||
|
||||
logic illegal_n, illegal_q;
|
||||
logic [TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q;
|
||||
logic [31:0] illegal_instr_n, illegal_instr_q;
|
||||
logic [X_NUM_RS-1:0] rs_valid;
|
||||
|
||||
if (cvxif_pkg::X_NUM_RS == 3) begin : gen_third_operand
|
||||
assign rs_valid = 3'b111;
|
||||
end else begin : gen_no_third_operand
|
||||
assign rs_valid = 2'b11;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
cvxif_req_o = '0;
|
||||
cvxif_req_o.x_result_ready = 1'b1;
|
||||
x_ready_o = cvxif_resp_i.x_issue_ready;
|
||||
if (x_valid_i) begin
|
||||
cvxif_req_o.x_issue_valid = x_valid_i;
|
||||
cvxif_req_o.x_issue_req.instr = x_off_instr_i;
|
||||
cvxif_req_o.x_issue_req.mode = priv_lvl_i;
|
||||
cvxif_req_o.x_issue_req.id = fu_data_i.trans_id;
|
||||
cvxif_req_o.x_issue_req.rs[0] = fu_data_i.operand_a;
|
||||
cvxif_req_o.x_issue_req.rs[1] = fu_data_i.operand_b;
|
||||
if (cvxif_pkg::X_NUM_RS == 3) begin
|
||||
cvxif_req_o.x_issue_req.rs[2] = fu_data_i.imm;
|
||||
end
|
||||
cvxif_req_o.x_issue_req.rs_valid = rs_valid;
|
||||
cvxif_req_o.x_commit_valid = x_valid_i;
|
||||
cvxif_req_o.x_commit.id = fu_data_i.trans_id;
|
||||
cvxif_req_o.x_commit.x_commit_kill = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
illegal_n = illegal_q;
|
||||
illegal_id_n = illegal_id_q;
|
||||
illegal_instr_n = illegal_instr_q;
|
||||
if (~cvxif_resp_i.x_issue_resp.accept && cvxif_req_o.x_issue_valid && cvxif_resp_i.x_issue_ready && ~illegal_n) begin
|
||||
illegal_n = 1'b1;
|
||||
illegal_id_n = cvxif_req_o.x_issue_req.id;
|
||||
illegal_instr_n = cvxif_req_o.x_issue_req.instr;
|
||||
end
|
||||
x_valid_o = cvxif_resp_i.x_result_valid; //Read result only when CVXIF is enabled
|
||||
x_trans_id_o = x_valid_o ? cvxif_resp_i.x_result.id : '0;
|
||||
x_result_o = x_valid_o ? cvxif_resp_i.x_result.data : '0;
|
||||
x_exception_o.cause = x_valid_o ? {{(riscv::XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0;
|
||||
x_exception_o.valid = x_valid_o ? cvxif_resp_i.x_result.exc : '0;
|
||||
x_exception_o.tval = '0;
|
||||
x_we_o = x_valid_o ? cvxif_resp_i.x_result.we : '0;
|
||||
if (illegal_n) begin
|
||||
if (~x_valid_o) begin
|
||||
x_trans_id_o = illegal_id_n;
|
||||
x_result_o = '0;
|
||||
x_valid_o = 1'b1;
|
||||
x_exception_o.cause = riscv::ILLEGAL_INSTR;
|
||||
x_exception_o.valid = 1'b1;
|
||||
x_exception_o.tval = illegal_instr_n;
|
||||
x_we_o = '0;
|
||||
illegal_n = '0; // Reset flag for illegal instr. illegal_id and illegal instr values are a don't care, no need to reset it.
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i, negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
illegal_q <= 1'b0;
|
||||
illegal_id_q <= '0;
|
||||
illegal_instr_q <= '0;
|
||||
end else begin
|
||||
illegal_q <= illegal_n;
|
||||
illegal_id_q <= illegal_id_n;
|
||||
illegal_instr_q <= illegal_instr_n;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,413 @@
|
|||
|
||||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 19.04.2017
|
||||
// Description: Instantiation of all functional units residing in the execute stage
|
||||
|
||||
|
||||
module ex_stage
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned ASID_WIDTH = 1
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i,
|
||||
input logic debug_mode_i,
|
||||
|
||||
input logic [riscv::VLEN-1:0] rs1_forwarding_i,
|
||||
input logic [riscv::VLEN-1:0] rs2_forwarding_i,
|
||||
input fu_data_t fu_data_i,
|
||||
input logic [riscv::VLEN-1:0] pc_i, // PC of current instruction
|
||||
input logic is_compressed_instr_i, // we need to know if this was a compressed instruction
|
||||
// in order to calculate the next PC on a mis-predict
|
||||
// Fixed latency unit(s)
|
||||
output riscv::xlen_t flu_result_o,
|
||||
output logic [TRANS_ID_BITS-1:0] flu_trans_id_o, // ID of scoreboard entry at which to write back
|
||||
output exception_t flu_exception_o,
|
||||
output logic flu_ready_o, // FLU is ready
|
||||
output logic flu_valid_o, // FLU result is valid
|
||||
// Branches and Jumps
|
||||
// ALU 1
|
||||
input logic alu_valid_i, // Output is valid
|
||||
// Branch Unit
|
||||
input logic branch_valid_i, // we are using the branch unit
|
||||
input branchpredict_sbe_t branch_predict_i,
|
||||
output bp_resolve_t resolved_branch_o, // the branch engine uses the write back from the ALU
|
||||
output logic resolve_branch_o, // to ID signaling that we resolved the branch
|
||||
// CSR
|
||||
input logic csr_valid_i,
|
||||
output logic [11:0] csr_addr_o,
|
||||
input logic csr_commit_i,
|
||||
// MULT
|
||||
input logic mult_valid_i, // Output is valid
|
||||
// LSU
|
||||
output logic lsu_ready_o, // FU is ready
|
||||
input logic lsu_valid_i, // Input is valid
|
||||
|
||||
output logic load_valid_o,
|
||||
output riscv::xlen_t load_result_o,
|
||||
output logic [TRANS_ID_BITS-1:0] load_trans_id_o,
|
||||
output exception_t load_exception_o,
|
||||
output logic store_valid_o,
|
||||
output riscv::xlen_t store_result_o,
|
||||
output logic [TRANS_ID_BITS-1:0] store_trans_id_o,
|
||||
output exception_t store_exception_o,
|
||||
|
||||
input logic lsu_commit_i,
|
||||
output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request
|
||||
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
|
||||
input logic stall_st_pending_i,
|
||||
output logic no_st_pending_o,
|
||||
input logic amo_valid_commit_i,
|
||||
// FPU
|
||||
output logic fpu_ready_o, // FU is ready
|
||||
input logic fpu_valid_i, // Output is valid
|
||||
input logic [1:0] fpu_fmt_i, // FP format
|
||||
input logic [2:0] fpu_rm_i, // FP rm
|
||||
input logic [2:0] fpu_frm_i, // FP frm csr
|
||||
input logic [6:0] fpu_prec_i, // FP precision control
|
||||
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
|
||||
output riscv::xlen_t fpu_result_o,
|
||||
output logic fpu_valid_o,
|
||||
output exception_t fpu_exception_o,
|
||||
// CoreV-X-Interface
|
||||
input logic x_valid_i,
|
||||
output logic x_ready_o,
|
||||
input logic [31:0] x_off_instr_i,
|
||||
output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
|
||||
output exception_t x_exception_o,
|
||||
output riscv::xlen_t x_result_o,
|
||||
output logic x_valid_o,
|
||||
output logic x_we_o,
|
||||
output cvxif_pkg::cvxif_req_t cvxif_req_o,
|
||||
input cvxif_pkg::cvxif_resp_t cvxif_resp_i,
|
||||
input logic acc_valid_i, // Output is valid
|
||||
// Memory Management
|
||||
input logic enable_translation_i,
|
||||
input logic en_ld_st_translation_i,
|
||||
input logic flush_tlb_i,
|
||||
|
||||
input riscv::priv_lvl_t priv_lvl_i,
|
||||
input riscv::priv_lvl_t ld_st_priv_lvl_i,
|
||||
input logic sum_i,
|
||||
input logic mxr_i,
|
||||
input logic [riscv::PPNW-1:0] satp_ppn_i,
|
||||
input logic [ ASID_WIDTH-1:0] asid_i,
|
||||
// icache translation requests
|
||||
input icache_arsp_t icache_areq_i,
|
||||
output icache_areq_t icache_areq_o,
|
||||
|
||||
// interface to dcache
|
||||
input dcache_req_o_t [2:0] dcache_req_ports_i,
|
||||
output dcache_req_i_t [2:0] dcache_req_ports_o,
|
||||
input logic dcache_wbuffer_empty_i,
|
||||
input logic dcache_wbuffer_not_ni_i,
|
||||
output amo_req_t amo_req_o, // request to cache subsytem
|
||||
input amo_resp_t amo_resp_i, // response from cache subsystem
|
||||
// Performance counters
|
||||
output logic itlb_miss_o,
|
||||
output logic dtlb_miss_o,
|
||||
// PMPs
|
||||
input riscv::pmpcfg_t [15:0] pmpcfg_i,
|
||||
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
|
||||
|
||||
// RVFI
|
||||
output lsu_ctrl_t rvfi_lsu_ctrl_o,
|
||||
output [riscv::PLEN-1:0] rvfi_mem_paddr_o
|
||||
);
|
||||
|
||||
// -------------------------
|
||||
// Fixed Latency Units
|
||||
// -------------------------
|
||||
// all fixed latency units share a single issue port and a sing write
|
||||
// port into the scoreboard. At the moment those are:
|
||||
// 1. ALU - all operations are single cycle
|
||||
// 2. Branch unit: operation is single cycle, the ALU is needed
|
||||
// for comparison
|
||||
// 3. CSR: This is a small buffer which saves the address of the CSR.
|
||||
// The value is then re-fetched once the instruction retires. The buffer
|
||||
// is only a single entry deep, hence this operation will block all
|
||||
// other operations once this buffer is full. This should not be a major
|
||||
// concern though as CSRs are infrequent.
|
||||
// 4. Multiplier/Divider: The multiplier has a fixed latency of 1 cycle.
|
||||
// The issue logic will take care of not issuing
|
||||
// another instruction if it will collide on the
|
||||
// output port. Divisions are arbitrary in length
|
||||
// they will simply block the issue of all other
|
||||
// instructions.
|
||||
|
||||
|
||||
logic current_instruction_is_sfence_vma;
|
||||
// These two register store the rs1 and rs2 parameters in case of `SFENCE_VMA`
|
||||
// instruction to be used for TLB flush in the next clock cycle.
|
||||
logic [ASID_WIDTH-1:0] asid_to_be_flushed;
|
||||
logic [riscv::VLEN-1:0] vaddr_to_be_flushed;
|
||||
|
||||
// from ALU to branch unit
|
||||
logic alu_branch_res; // branch comparison result
|
||||
riscv::xlen_t alu_result, csr_result, mult_result;
|
||||
logic [riscv::VLEN-1:0] branch_result;
|
||||
logic csr_ready, mult_ready;
|
||||
logic [TRANS_ID_BITS-1:0] mult_trans_id;
|
||||
logic mult_valid;
|
||||
|
||||
// 1. ALU (combinatorial)
|
||||
// data silence operation
|
||||
fu_data_t alu_data;
|
||||
assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0;
|
||||
|
||||
alu #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) alu_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.fu_data_i (alu_data),
|
||||
.result_o (alu_result),
|
||||
.alu_branch_res_o(alu_branch_res)
|
||||
);
|
||||
|
||||
// 2. Branch Unit (combinatorial)
|
||||
// we don't silence the branch unit as this is already critical and we do
|
||||
// not want to add another layer of logic
|
||||
branch_unit #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) branch_unit_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.debug_mode_i,
|
||||
.fu_data_i,
|
||||
.pc_i,
|
||||
.is_compressed_instr_i,
|
||||
// any functional unit is valid, check that there is no accidental mis-predict
|
||||
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) ,
|
||||
.branch_valid_i,
|
||||
.branch_comp_res_i(alu_branch_res),
|
||||
.branch_result_o(branch_result),
|
||||
.branch_predict_i,
|
||||
.resolved_branch_o,
|
||||
.resolve_branch_o,
|
||||
.branch_exception_o(flu_exception_o)
|
||||
);
|
||||
|
||||
// 3. CSR (sequential)
|
||||
csr_buffer #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) csr_buffer_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.fu_data_i,
|
||||
.csr_valid_i,
|
||||
.csr_ready_o (csr_ready),
|
||||
.csr_result_o(csr_result),
|
||||
.csr_commit_i,
|
||||
.csr_addr_o
|
||||
);
|
||||
|
||||
assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i | mult_valid;
|
||||
|
||||
// result MUX
|
||||
always_comb begin
|
||||
// Branch result as default case
|
||||
flu_result_o = {{riscv::XLEN - riscv::VLEN{1'b0}}, branch_result};
|
||||
flu_trans_id_o = fu_data_i.trans_id;
|
||||
// ALU result
|
||||
if (alu_valid_i) begin
|
||||
flu_result_o = alu_result;
|
||||
// CSR result
|
||||
end else if (csr_valid_i) begin
|
||||
flu_result_o = csr_result;
|
||||
end else if (mult_valid) begin
|
||||
flu_result_o = mult_result;
|
||||
flu_trans_id_o = mult_trans_id;
|
||||
end
|
||||
end
|
||||
|
||||
// ready flags for FLU
|
||||
always_comb begin
|
||||
flu_ready_o = csr_ready & mult_ready;
|
||||
end
|
||||
|
||||
// 4. Multiplication (Sequential)
|
||||
fu_data_t mult_data;
|
||||
// input silencing of multiplier
|
||||
assign mult_data = mult_valid_i ? fu_data_i : '0;
|
||||
|
||||
mult #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) i_mult (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.mult_valid_i,
|
||||
.fu_data_i (mult_data),
|
||||
.result_o (mult_result),
|
||||
.mult_valid_o (mult_valid),
|
||||
.mult_ready_o (mult_ready),
|
||||
.mult_trans_id_o(mult_trans_id)
|
||||
);
|
||||
|
||||
// ----------------
|
||||
// FPU
|
||||
// ----------------
|
||||
generate
|
||||
if (CVA6Cfg.FpPresent) begin : fpu_gen
|
||||
fu_data_t fpu_data;
|
||||
assign fpu_data = fpu_valid_i ? fu_data_i : '0;
|
||||
|
||||
fpu_wrap #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) fpu_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.fpu_valid_i,
|
||||
.fpu_ready_o,
|
||||
.fu_data_i(fpu_data),
|
||||
.fpu_fmt_i,
|
||||
.fpu_rm_i,
|
||||
.fpu_frm_i,
|
||||
.fpu_prec_i,
|
||||
.fpu_trans_id_o,
|
||||
.result_o (fpu_result_o),
|
||||
.fpu_valid_o,
|
||||
.fpu_exception_o
|
||||
);
|
||||
end else begin : no_fpu_gen
|
||||
assign fpu_ready_o = '0;
|
||||
assign fpu_trans_id_o = '0;
|
||||
assign fpu_result_o = '0;
|
||||
assign fpu_valid_o = '0;
|
||||
assign fpu_exception_o = '0;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ----------------
|
||||
// Load-Store Unit
|
||||
// ----------------
|
||||
fu_data_t lsu_data;
|
||||
|
||||
assign lsu_data = lsu_valid_i ? fu_data_i : '0;
|
||||
|
||||
load_store_unit #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.ASID_WIDTH(ASID_WIDTH)
|
||||
) lsu_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.stall_st_pending_i,
|
||||
.no_st_pending_o,
|
||||
.fu_data_i (lsu_data),
|
||||
.lsu_ready_o,
|
||||
.lsu_valid_i,
|
||||
.load_trans_id_o,
|
||||
.load_result_o,
|
||||
.load_valid_o,
|
||||
.load_exception_o,
|
||||
.store_trans_id_o,
|
||||
.store_result_o,
|
||||
.store_valid_o,
|
||||
.store_exception_o,
|
||||
.commit_i (lsu_commit_i),
|
||||
.commit_ready_o (lsu_commit_ready_o),
|
||||
.commit_tran_id_i,
|
||||
.enable_translation_i,
|
||||
.en_ld_st_translation_i,
|
||||
.icache_areq_i,
|
||||
.icache_areq_o,
|
||||
.priv_lvl_i,
|
||||
.ld_st_priv_lvl_i,
|
||||
.sum_i,
|
||||
.mxr_i,
|
||||
.satp_ppn_i,
|
||||
.asid_i,
|
||||
.asid_to_be_flushed_i (asid_to_be_flushed),
|
||||
.vaddr_to_be_flushed_i(vaddr_to_be_flushed),
|
||||
.flush_tlb_i,
|
||||
.itlb_miss_o,
|
||||
.dtlb_miss_o,
|
||||
.dcache_req_ports_i,
|
||||
.dcache_req_ports_o,
|
||||
.dcache_wbuffer_empty_i,
|
||||
.dcache_wbuffer_not_ni_i,
|
||||
.amo_valid_commit_i,
|
||||
.amo_req_o,
|
||||
.amo_resp_i,
|
||||
.pmpcfg_i,
|
||||
.pmpaddr_i,
|
||||
.rvfi_lsu_ctrl_o,
|
||||
.rvfi_mem_paddr_o
|
||||
);
|
||||
|
||||
if (CVA6Cfg.CvxifEn) begin : gen_cvxif
|
||||
fu_data_t cvxif_data;
|
||||
assign cvxif_data = x_valid_i ? fu_data_i : '0;
|
||||
cvxif_fu #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) cvxif_fu_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.fu_data_i,
|
||||
.priv_lvl_i(ld_st_priv_lvl_i),
|
||||
.x_valid_i,
|
||||
.x_ready_o,
|
||||
.x_off_instr_i,
|
||||
.x_trans_id_o,
|
||||
.x_exception_o,
|
||||
.x_result_o,
|
||||
.x_valid_o,
|
||||
.x_we_o,
|
||||
.cvxif_req_o,
|
||||
.cvxif_resp_i
|
||||
);
|
||||
end else begin : gen_no_cvxif
|
||||
assign cvxif_req_o = '0;
|
||||
assign x_trans_id_o = '0;
|
||||
assign x_exception_o = '0;
|
||||
assign x_result_o = '0;
|
||||
assign x_valid_o = '0;
|
||||
end
|
||||
|
||||
if (CVA6Cfg.RVS) begin
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
current_instruction_is_sfence_vma <= 1'b0;
|
||||
end else begin
|
||||
if (flush_i) begin
|
||||
current_instruction_is_sfence_vma <= 1'b0;
|
||||
end else if ((fu_data_i.operation == SFENCE_VMA) && csr_valid_i) begin
|
||||
current_instruction_is_sfence_vma <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// This process stores the rs1 and rs2 parameters of a SFENCE_VMA instruction.
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
asid_to_be_flushed <= '0;
|
||||
vaddr_to_be_flushed <= '0;
|
||||
// if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen
|
||||
end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin
|
||||
vaddr_to_be_flushed <= rs1_forwarding_i;
|
||||
asid_to_be_flushed <= rs2_forwarding_i[ASID_WIDTH-1:0];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
assign current_instruction_is_sfence_vma = 1'b0;
|
||||
assign asid_to_be_flushed = '0;
|
||||
assign vaddr_to_be_flushed = '0;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,568 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Stefan Mach, ETH Zurich
|
||||
// Date: 12.04.2018
|
||||
// Description: Wrapper for the floating-point unit
|
||||
|
||||
|
||||
module fpu_wrap
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic flush_i,
|
||||
input logic fpu_valid_i,
|
||||
output logic fpu_ready_o,
|
||||
input fu_data_t fu_data_i,
|
||||
|
||||
input logic [ 1:0] fpu_fmt_i,
|
||||
input logic [ 2:0] fpu_rm_i,
|
||||
input logic [ 2:0] fpu_frm_i,
|
||||
input logic [ 6:0] fpu_prec_i,
|
||||
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
|
||||
output logic [ CVA6Cfg.FLen-1:0] result_o,
|
||||
output logic fpu_valid_o,
|
||||
output exception_t fpu_exception_o
|
||||
);
|
||||
|
||||
// this is a workaround
|
||||
// otherwise compilation might issue an error if FLEN=0
|
||||
enum logic {
|
||||
READY,
|
||||
STALL
|
||||
}
|
||||
state_q, state_d;
|
||||
if (CVA6Cfg.FpPresent) begin : fpu_gen
|
||||
logic [CVA6Cfg.FLen-1:0] operand_a_i;
|
||||
logic [CVA6Cfg.FLen-1:0] operand_b_i;
|
||||
logic [CVA6Cfg.FLen-1:0] operand_c_i;
|
||||
assign operand_a_i = fu_data_i.operand_a[CVA6Cfg.FLen-1:0];
|
||||
assign operand_b_i = fu_data_i.operand_b[CVA6Cfg.FLen-1:0];
|
||||
assign operand_c_i = fu_data_i.imm[CVA6Cfg.FLen-1:0];
|
||||
|
||||
//-----------------------------------
|
||||
// FPnew config from FPnew package
|
||||
//-----------------------------------
|
||||
localparam OPBITS = fpnew_pkg::OP_BITS;
|
||||
localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
|
||||
localparam IFMTBITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
|
||||
|
||||
// Features (enabled formats, vectors etc.)
|
||||
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
|
||||
Width: unsigned'(riscv::XLEN), // parameterized using XLEN
|
||||
EnableVectors: CVA6Cfg.XFVec,
|
||||
EnableNanBox: 1'b1,
|
||||
FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT},
|
||||
IntFmtMask: {
|
||||
CVA6Cfg.XFVec && CVA6Cfg.XF8,
|
||||
CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT),
|
||||
1'b1,
|
||||
1'b1
|
||||
}
|
||||
};
|
||||
|
||||
// Implementation (number of registers etc)
|
||||
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
|
||||
PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt
|
||||
'{
|
||||
unsigned'(LAT_COMP_FP32),
|
||||
unsigned'(LAT_COMP_FP64),
|
||||
unsigned'(LAT_COMP_FP16),
|
||||
unsigned'(LAT_COMP_FP8),
|
||||
unsigned'(LAT_COMP_FP16ALT)
|
||||
}, // ADDMUL
|
||||
'{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT
|
||||
'{default: unsigned'(LAT_NONCOMP)}, // NONCOMP
|
||||
'{default: unsigned'(LAT_CONV)}
|
||||
}, // CONV
|
||||
UnitTypes: '{
|
||||
'{default: fpnew_pkg::PARALLEL}, // ADDMUL
|
||||
'{default: fpnew_pkg::MERGED}, // DIVSQRT
|
||||
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
|
||||
'{default: fpnew_pkg::MERGED}
|
||||
}, // CONV
|
||||
PipeConfig: fpnew_pkg::DISTRIBUTED
|
||||
};
|
||||
|
||||
//-------------------------------------------------
|
||||
// Inputs to the FPU and protocol inversion buffer
|
||||
//-------------------------------------------------
|
||||
logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a;
|
||||
logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b;
|
||||
logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c;
|
||||
logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op;
|
||||
logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod;
|
||||
logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt;
|
||||
logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt;
|
||||
logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt;
|
||||
logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm;
|
||||
logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op;
|
||||
|
||||
logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag;
|
||||
|
||||
logic fpu_in_ready, fpu_in_valid;
|
||||
logic fpu_out_ready, fpu_out_valid;
|
||||
|
||||
logic [4:0] fpu_status;
|
||||
|
||||
// FSM to handle protocol inversion
|
||||
logic hold_inputs;
|
||||
logic use_hold;
|
||||
|
||||
//-----------------------------
|
||||
// Translate inputs
|
||||
//-----------------------------
|
||||
|
||||
always_comb begin : input_translation
|
||||
|
||||
automatic logic vec_replication; // control honoring of replication flag
|
||||
automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB)
|
||||
automatic logic check_ah; // Decide for AH from RM field encoding
|
||||
|
||||
// Default Values
|
||||
operand_a_d = operand_a_i;
|
||||
operand_b_d = operand_b_i; // immediates come through this port unless used as operand
|
||||
operand_c_d = operand_c_i; // immediates come through this port unless used as operand
|
||||
fpu_op_d = fpnew_pkg::SGNJ; // sign injection by default
|
||||
fpu_op_mod_d = 1'b0;
|
||||
fpu_dstfmt_d = fpnew_pkg::FP32;
|
||||
fpu_ifmt_d = fpnew_pkg::INT32;
|
||||
fpu_rm_d = fpu_rm_i;
|
||||
fpu_vec_op_d = fu_data_i.fu == FPU_VEC;
|
||||
fpu_tag_d = fu_data_i.trans_id;
|
||||
vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field
|
||||
replicate_c = 1'b0;
|
||||
check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i
|
||||
|
||||
// Scalar Rounding Modes - some ops encode inside RM but use smaller range
|
||||
if (!(fpu_rm_i inside {[3'b000 : 3'b100]})) fpu_rm_d = fpu_frm_i;
|
||||
|
||||
// Vectorial ops always consult FRM
|
||||
if (fpu_vec_op_d) fpu_rm_d = fpu_frm_i;
|
||||
|
||||
// Formats
|
||||
unique case (fpu_fmt_i)
|
||||
// FP32
|
||||
2'b00: fpu_dstfmt_d = fpnew_pkg::FP32;
|
||||
// FP64 or FP16ALT (vectorial)
|
||||
2'b01: fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64;
|
||||
// FP16 or FP16ALT (scalar)
|
||||
2'b10: begin
|
||||
if (!fpu_vec_op_d && fpu_rm_i == 3'b101) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
|
||||
else fpu_dstfmt_d = fpnew_pkg::FP16;
|
||||
end
|
||||
// FP8
|
||||
default: fpu_dstfmt_d = fpnew_pkg::FP8;
|
||||
endcase
|
||||
|
||||
// By default, set src=dst
|
||||
fpu_srcfmt_d = fpu_dstfmt_d;
|
||||
|
||||
// Operations (this can modify the rounding mode field and format!)
|
||||
unique case (fu_data_i.operation)
|
||||
// Addition
|
||||
FADD: begin
|
||||
fpu_op_d = fpnew_pkg::ADD;
|
||||
replicate_c = 1'b1; // second operand is in C
|
||||
end
|
||||
// Subtraction is modified ADD
|
||||
FSUB: begin
|
||||
fpu_op_d = fpnew_pkg::ADD;
|
||||
fpu_op_mod_d = 1'b1;
|
||||
replicate_c = 1'b1; // second operand is in C
|
||||
end
|
||||
// Multiplication
|
||||
FMUL: fpu_op_d = fpnew_pkg::MUL;
|
||||
// Division
|
||||
FDIV: fpu_op_d = fpnew_pkg::DIV;
|
||||
// Min/Max - OP is encoded in rm (000-001)
|
||||
FMIN_MAX: begin
|
||||
fpu_op_d = fpnew_pkg::MINMAX;
|
||||
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
|
||||
check_ah = 1'b1; // AH has RM MSB encoding
|
||||
end
|
||||
// Square Root
|
||||
FSQRT: fpu_op_d = fpnew_pkg::SQRT;
|
||||
// Fused Multiply Add
|
||||
FMADD: fpu_op_d = fpnew_pkg::FMADD;
|
||||
// Fused Multiply Subtract is modified FMADD
|
||||
FMSUB: begin
|
||||
fpu_op_d = fpnew_pkg::FMADD;
|
||||
fpu_op_mod_d = 1'b1;
|
||||
end
|
||||
// Fused Negated Multiply Subtract
|
||||
FNMSUB: fpu_op_d = fpnew_pkg::FNMSUB;
|
||||
// Fused Negated Multiply Add is modified FNMSUB
|
||||
FNMADD: begin
|
||||
fpu_op_d = fpnew_pkg::FNMSUB;
|
||||
fpu_op_mod_d = 1'b1;
|
||||
end
|
||||
// Float to Int Cast - Op encoded in lowest two imm bits or rm
|
||||
FCVT_F2I: begin
|
||||
fpu_op_d = fpnew_pkg::F2I;
|
||||
// Vectorial Ops encoded in R bit
|
||||
if (fpu_vec_op_d) begin
|
||||
fpu_op_mod_d = fpu_rm_i[0];
|
||||
vec_replication = 1'b0; // no replication, R bit used for op
|
||||
unique case (fpu_fmt_i)
|
||||
2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
|
||||
2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
|
||||
2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
|
||||
endcase
|
||||
// Scalar casts encoded in imm
|
||||
end else begin
|
||||
fpu_op_mod_d = operand_c_i[0];
|
||||
if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
|
||||
else fpu_ifmt_d = fpnew_pkg::INT32;
|
||||
end
|
||||
end
|
||||
// Int to Float Cast - Op encoded in lowest two imm bits or rm
|
||||
FCVT_I2F: begin
|
||||
fpu_op_d = fpnew_pkg::I2F;
|
||||
// Vectorial Ops encoded in R bit
|
||||
if (fpu_vec_op_d) begin
|
||||
fpu_op_mod_d = fpu_rm_i[0];
|
||||
vec_replication = 1'b0; // no replication, R bit used for op
|
||||
unique case (fpu_fmt_i)
|
||||
2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
|
||||
2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
|
||||
2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
|
||||
endcase
|
||||
// Scalar casts encoded in imm
|
||||
end else begin
|
||||
fpu_op_mod_d = operand_c_i[0];
|
||||
if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
|
||||
else fpu_ifmt_d = fpnew_pkg::INT32;
|
||||
end
|
||||
end
|
||||
// Float to Float Cast - Source format encoded in lowest two/three imm bits
|
||||
FCVT_F2F: begin
|
||||
fpu_op_d = fpnew_pkg::F2F;
|
||||
// Vectorial ops encoded in lowest two imm bits
|
||||
if (fpu_vec_op_d) begin
|
||||
vec_replication = 1'b0; // no replication for casts (not needed)
|
||||
unique case (operand_c_i[1:0])
|
||||
2'b00: fpu_srcfmt_d = fpnew_pkg::FP32;
|
||||
2'b01: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
|
||||
2'b10: fpu_srcfmt_d = fpnew_pkg::FP16;
|
||||
2'b11: fpu_srcfmt_d = fpnew_pkg::FP8;
|
||||
endcase
|
||||
// Scalar ops encoded in lowest three imm bits
|
||||
end else begin
|
||||
unique case (operand_c_i[2:0])
|
||||
3'b000: fpu_srcfmt_d = fpnew_pkg::FP32;
|
||||
3'b001: fpu_srcfmt_d = fpnew_pkg::FP64;
|
||||
3'b010: fpu_srcfmt_d = fpnew_pkg::FP16;
|
||||
3'b110: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
|
||||
3'b011: fpu_srcfmt_d = fpnew_pkg::FP8;
|
||||
default: ; // Do nothing
|
||||
endcase
|
||||
end
|
||||
end
|
||||
// Scalar Sign Injection - op encoded in rm (000-010)
|
||||
FSGNJ: begin
|
||||
fpu_op_d = fpnew_pkg::SGNJ;
|
||||
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
|
||||
check_ah = 1'b1; // AH has RM MSB encoding
|
||||
end
|
||||
// Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding
|
||||
FMV_F2X: begin
|
||||
fpu_op_d = fpnew_pkg::SGNJ;
|
||||
fpu_rm_d = 3'b011; // passthrough without checking nan-box
|
||||
fpu_op_mod_d = 1'b1; // no NaN-Boxing
|
||||
check_ah = 1'b1; // AH has RM MSB encoding
|
||||
vec_replication = 1'b0; // no replication, we set second operand
|
||||
end
|
||||
// Move from GPR to FPR - mapped to NOP since no recoding
|
||||
FMV_X2F: begin
|
||||
fpu_op_d = fpnew_pkg::SGNJ;
|
||||
fpu_rm_d = 3'b011; // passthrough without checking nan-box
|
||||
check_ah = 1'b1; // AH has RM MSB encoding
|
||||
vec_replication = 1'b0; // no replication, we set second operand
|
||||
end
|
||||
// Scalar Comparisons - op encoded in rm (000-010)
|
||||
FCMP: begin
|
||||
fpu_op_d = fpnew_pkg::CMP;
|
||||
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
|
||||
check_ah = 1'b1; // AH has RM MSB encoding
|
||||
end
|
||||
// Classification
|
||||
FCLASS: begin
|
||||
fpu_op_d = fpnew_pkg::CLASSIFY;
|
||||
fpu_rm_d = {
|
||||
1'b0, fpu_rm_i[1:0]
|
||||
}; // mask out AH encoding bit - CLASS doesn't care anyways
|
||||
check_ah = 1'b1; // AH has RM MSB encoding
|
||||
end
|
||||
// Vectorial Minimum - set up scalar encoding in rm
|
||||
VFMIN: begin
|
||||
fpu_op_d = fpnew_pkg::MINMAX;
|
||||
fpu_rm_d = 3'b000; // min
|
||||
end
|
||||
// Vectorial Maximum - set up scalar encoding in rm
|
||||
VFMAX: begin
|
||||
fpu_op_d = fpnew_pkg::MINMAX;
|
||||
fpu_rm_d = 3'b001; // max
|
||||
end
|
||||
// Vectorial Sign Injection - set up scalar encoding in rm
|
||||
VFSGNJ: begin
|
||||
fpu_op_d = fpnew_pkg::SGNJ;
|
||||
fpu_rm_d = 3'b000; // sgnj
|
||||
end
|
||||
// Vectorial Negated Sign Injection - set up scalar encoding in rm
|
||||
VFSGNJN: begin
|
||||
fpu_op_d = fpnew_pkg::SGNJ;
|
||||
fpu_rm_d = 3'b001; // sgnjn
|
||||
end
|
||||
// Vectorial Xored Sign Injection - set up scalar encoding in rm
|
||||
VFSGNJX: begin
|
||||
fpu_op_d = fpnew_pkg::SGNJ;
|
||||
fpu_rm_d = 3'b010; // sgnjx
|
||||
end
|
||||
// Vectorial Equals - set up scalar encoding in rm
|
||||
VFEQ: begin
|
||||
fpu_op_d = fpnew_pkg::CMP;
|
||||
fpu_rm_d = 3'b010; // eq
|
||||
end
|
||||
// Vectorial Not Equals - set up scalar encoding in rm
|
||||
VFNE: begin
|
||||
fpu_op_d = fpnew_pkg::CMP;
|
||||
fpu_op_mod_d = 1'b1; // invert output
|
||||
fpu_rm_d = 3'b010; // eq
|
||||
end
|
||||
// Vectorial Less Than - set up scalar encoding in rm
|
||||
VFLT: begin
|
||||
fpu_op_d = fpnew_pkg::CMP;
|
||||
fpu_rm_d = 3'b001; // lt
|
||||
end
|
||||
// Vectorial Greater or Equal - set up scalar encoding in rm
|
||||
VFGE: begin
|
||||
fpu_op_d = fpnew_pkg::CMP;
|
||||
fpu_op_mod_d = 1'b1; // invert output
|
||||
fpu_rm_d = 3'b001; // lt
|
||||
end
|
||||
// Vectorial Less or Equal - set up scalar encoding in rm
|
||||
VFLE: begin
|
||||
fpu_op_d = fpnew_pkg::CMP;
|
||||
fpu_rm_d = 3'b000; // le
|
||||
end
|
||||
// Vectorial Greater Than - set up scalar encoding in rm
|
||||
VFGT: begin
|
||||
fpu_op_d = fpnew_pkg::CMP;
|
||||
fpu_op_mod_d = 1'b1; // invert output
|
||||
fpu_rm_d = 3'b000; // le
|
||||
end
|
||||
// Vectorial Convert-and-Pack from FP32, lower 4 entries
|
||||
VFCPKAB_S: begin
|
||||
fpu_op_d = fpnew_pkg::CPKAB;
|
||||
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
|
||||
vec_replication = 1'b0; // no replication, R bit used for op
|
||||
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
|
||||
end
|
||||
// Vectorial Convert-and-Pack from FP32, upper 4 entries
|
||||
VFCPKCD_S: begin
|
||||
fpu_op_d = fpnew_pkg::CPKCD;
|
||||
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
|
||||
vec_replication = 1'b0; // no replication, R bit used for op
|
||||
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
|
||||
end
|
||||
// Vectorial Convert-and-Pack from FP64, lower 4 entries
|
||||
VFCPKAB_D: begin
|
||||
fpu_op_d = fpnew_pkg::CPKAB;
|
||||
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
|
||||
vec_replication = 1'b0; // no replication, R bit used for op
|
||||
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
|
||||
end
|
||||
// Vectorial Convert-and-Pack from FP64, upper 4 entries
|
||||
VFCPKCD_D: begin
|
||||
fpu_op_d = fpnew_pkg::CPKCD;
|
||||
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
|
||||
vec_replication = 1'b0; // no replication, R bit used for op
|
||||
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
|
||||
end
|
||||
// No changes per default
|
||||
default: ; //nothing
|
||||
endcase
|
||||
|
||||
// Scalar AH encoding fixing
|
||||
if (!fpu_vec_op_d && check_ah) if (fpu_rm_i[2]) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
|
||||
|
||||
// Replication
|
||||
if (fpu_vec_op_d && vec_replication) begin
|
||||
if (replicate_c) begin
|
||||
unique case (fpu_dstfmt_d)
|
||||
fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i;
|
||||
fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
|
||||
operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}};
|
||||
fpnew_pkg::FP8:
|
||||
operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}};
|
||||
default: ; // Do nothing
|
||||
endcase // fpu_dstfmt_d
|
||||
end else begin
|
||||
unique case (fpu_dstfmt_d)
|
||||
fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i;
|
||||
fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
|
||||
operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}};
|
||||
fpnew_pkg::FP8:
|
||||
operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}};
|
||||
default: ; // Do nothing
|
||||
endcase // fpu_dstfmt_d
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
//---------------------------------------------------------
|
||||
// Upstream protocol inversion: InValid depends on InReady
|
||||
//---------------------------------------------------------
|
||||
|
||||
always_comb begin : p_inputFSM
|
||||
// Default Values
|
||||
fpu_ready_o = 1'b0;
|
||||
fpu_in_valid = 1'b0;
|
||||
hold_inputs = 1'b0; // hold register disabled
|
||||
use_hold = 1'b0; // inputs go directly to unit
|
||||
state_d = state_q; // stay in the same state
|
||||
|
||||
// FSM
|
||||
unique case (state_q)
|
||||
// Default state, ready for instructions
|
||||
READY: begin
|
||||
fpu_ready_o = 1'b1; // Act as if FPU ready
|
||||
fpu_in_valid = fpu_valid_i; // Forward input valid to FPU
|
||||
// There is a transaction but the FPU can't handle it
|
||||
if (fpu_valid_i & ~fpu_in_ready) begin
|
||||
fpu_ready_o = 1'b0; // No token given to Issue
|
||||
hold_inputs = 1'b1; // save inputs to the holding register
|
||||
state_d = STALL; // stall future incoming requests
|
||||
end
|
||||
end
|
||||
// We're stalling the upstream (ready=0)
|
||||
STALL: begin
|
||||
fpu_in_valid = 1'b1; // we have data for the FPU
|
||||
use_hold = 1'b1; // the data comes from the hold reg
|
||||
// Wait until it's consumed
|
||||
if (fpu_in_ready) begin
|
||||
fpu_ready_o = 1'b1; // Give a token to issue
|
||||
state_d = READY; // accept future requests
|
||||
end
|
||||
end
|
||||
// Default: emit default values
|
||||
default: ;
|
||||
endcase
|
||||
|
||||
// Flushing will override issue and go back to idle
|
||||
if (flush_i) begin
|
||||
state_d = READY;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// Buffer register and FSM state holding
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg
|
||||
if (~rst_ni) begin
|
||||
state_q <= READY;
|
||||
operand_a_q <= '0;
|
||||
operand_b_q <= '0;
|
||||
operand_c_q <= '0;
|
||||
fpu_op_q <= '0;
|
||||
fpu_op_mod_q <= '0;
|
||||
fpu_srcfmt_q <= '0;
|
||||
fpu_dstfmt_q <= '0;
|
||||
fpu_ifmt_q <= '0;
|
||||
fpu_rm_q <= '0;
|
||||
fpu_vec_op_q <= '0;
|
||||
fpu_tag_q <= '0;
|
||||
end else begin
|
||||
state_q <= state_d;
|
||||
// Hold register is [TRIGGERED] by FSM
|
||||
if (hold_inputs) begin
|
||||
operand_a_q <= operand_a_d;
|
||||
operand_b_q <= operand_b_d;
|
||||
operand_c_q <= operand_c_d;
|
||||
fpu_op_q <= fpu_op_d;
|
||||
fpu_op_mod_q <= fpu_op_mod_d;
|
||||
fpu_srcfmt_q <= fpu_srcfmt_d;
|
||||
fpu_dstfmt_q <= fpu_dstfmt_d;
|
||||
fpu_ifmt_q <= fpu_ifmt_d;
|
||||
fpu_rm_q <= fpu_rm_d;
|
||||
fpu_vec_op_q <= fpu_vec_op_d;
|
||||
fpu_tag_q <= fpu_tag_d;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Select FPU input data: from register if valid data in register, else directly from input
|
||||
assign operand_a = use_hold ? operand_a_q : operand_a_d;
|
||||
assign operand_b = use_hold ? operand_b_q : operand_b_d;
|
||||
assign operand_c = use_hold ? operand_c_q : operand_c_d;
|
||||
assign fpu_op = use_hold ? fpu_op_q : fpu_op_d;
|
||||
assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d;
|
||||
assign fpu_srcfmt = use_hold ? fpu_srcfmt_q : fpu_srcfmt_d;
|
||||
assign fpu_dstfmt = use_hold ? fpu_dstfmt_q : fpu_dstfmt_d;
|
||||
assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d;
|
||||
assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d;
|
||||
assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d;
|
||||
assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d;
|
||||
|
||||
// Consolidate operands
|
||||
logic [2:0][CVA6Cfg.FLen-1:0] fpu_operands;
|
||||
|
||||
assign fpu_operands[0] = operand_a;
|
||||
assign fpu_operands[1] = operand_b;
|
||||
assign fpu_operands[2] = operand_c;
|
||||
|
||||
//---------------
|
||||
// FPU instance
|
||||
//---------------
|
||||
|
||||
fpnew_top #(
|
||||
.Features (FPU_FEATURES),
|
||||
.Implementation(FPU_IMPLEMENTATION),
|
||||
.TagType (logic [TRANS_ID_BITS-1:0])
|
||||
) i_fpnew_bulk (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.operands_i (fpu_operands),
|
||||
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rm)),
|
||||
.op_i (fpnew_pkg::operation_e'(fpu_op)),
|
||||
.op_mod_i (fpu_op_mod),
|
||||
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_srcfmt)),
|
||||
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dstfmt)),
|
||||
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_ifmt)),
|
||||
.vectorial_op_i(fpu_vec_op),
|
||||
.tag_i (fpu_tag),
|
||||
.simd_mask_i (1'b1),
|
||||
.in_valid_i (fpu_in_valid),
|
||||
.in_ready_o (fpu_in_ready),
|
||||
.flush_i,
|
||||
.result_o,
|
||||
.status_o (fpu_status),
|
||||
.tag_o (fpu_trans_id_o),
|
||||
.out_valid_o (fpu_out_valid),
|
||||
.out_ready_i (fpu_out_ready),
|
||||
.busy_o ( /* unused */)
|
||||
);
|
||||
|
||||
// Pack status flag into exception cause, tval ignored in wb, exception is always invalid
|
||||
assign fpu_exception_o.cause = {59'h0, fpu_status};
|
||||
assign fpu_exception_o.valid = 1'b0;
|
||||
|
||||
// Donwstream write port is dedicated to FPU and always ready
|
||||
assign fpu_out_ready = 1'b1;
|
||||
|
||||
// Downstream valid from unit
|
||||
assign fpu_valid_o = fpu_out_valid;
|
||||
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,215 @@
|
|||
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
|
||||
// Copyright 2023 - Thales for additionnal conribution.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 2.0 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 08.02.2018
|
||||
// Migrated: Luis Vitorio Cargnini, IEEE
|
||||
// Date: 09.06.2018
|
||||
// FPGA optimization: Sebastien Jacq, Thales
|
||||
// Date: 2023-01-30
|
||||
|
||||
// branch history table - 2 bit saturation counter
|
||||
|
||||
module bht #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned NR_ENTRIES = 1024
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic flush_i,
|
||||
input logic debug_mode_i,
|
||||
input logic [ riscv::VLEN-1:0] vpc_i,
|
||||
input ariane_pkg::bht_update_t bht_update_i,
|
||||
// we potentially need INSTR_PER_FETCH predictions/cycle
|
||||
output ariane_pkg::bht_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_prediction_o
|
||||
);
|
||||
// the last bit is always zero, we don't need it for indexing
|
||||
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
|
||||
// re-shape the branch history table
|
||||
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
|
||||
// number of bits needed to index the row
|
||||
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
|
||||
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
|
||||
// number of bits we should use for prediction
|
||||
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
|
||||
// we are not interested in all bits of the address
|
||||
unread i_unread (.d_i(|vpc_i));
|
||||
|
||||
struct packed {
|
||||
logic valid;
|
||||
logic [1:0] saturation_counter;
|
||||
}
|
||||
bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
|
||||
bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
|
||||
|
||||
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
|
||||
logic [ROW_INDEX_BITS-1:0] update_row_index;
|
||||
|
||||
assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
|
||||
assign update_pc = bht_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
|
||||
if (CVA6Cfg.RVC) begin : gen_update_row_index
|
||||
assign update_row_index = bht_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
|
||||
end else begin
|
||||
assign update_row_index = '0;
|
||||
end
|
||||
|
||||
if (!ariane_pkg::FPGA_EN) begin : gen_asic_bht // ASIC TARGET
|
||||
|
||||
logic [1:0] saturation_counter;
|
||||
// prediction assignment
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output
|
||||
assign bht_prediction_o[i].valid = bht_q[index][i].valid;
|
||||
assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1;
|
||||
end
|
||||
|
||||
always_comb begin : update_bht
|
||||
bht_d = bht_q;
|
||||
saturation_counter = bht_q[update_pc][update_row_index].saturation_counter;
|
||||
|
||||
if ((bht_update_i.valid && CVA6Cfg.DebugEn && !debug_mode_i) || (bht_update_i.valid && !CVA6Cfg.DebugEn)) begin
|
||||
bht_d[update_pc][update_row_index].valid = 1'b1;
|
||||
|
||||
if (saturation_counter == 2'b11) begin
|
||||
// we can safely decrease it
|
||||
if (!bht_update_i.taken)
|
||||
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
|
||||
// then check if it saturated in the negative regime e.g.: branch not taken
|
||||
end else if (saturation_counter == 2'b00) begin
|
||||
// we can safely increase it
|
||||
if (bht_update_i.taken)
|
||||
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
|
||||
end else begin // otherwise we are not in any boundaries and can decrease or increase it
|
||||
if (bht_update_i.taken)
|
||||
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
|
||||
else bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
for (int unsigned i = 0; i < NR_ROWS; i++) begin
|
||||
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
|
||||
bht_q[i][j] <= '0;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
// evict all entries
|
||||
if (flush_i) begin
|
||||
for (int i = 0; i < NR_ROWS; i++) begin
|
||||
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
|
||||
bht_q[i][j].valid <= 1'b0;
|
||||
bht_q[i][j].saturation_counter <= 2'b10;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
bht_q <= bht_d;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end else begin : gen_fpga_bht //FPGA TARGETS
|
||||
|
||||
// number of bits par word in the bram
|
||||
localparam BRAM_WORD_BITS = $bits(ariane_pkg::bht_t);
|
||||
logic [ ROW_INDEX_BITS-1:0] row_index;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_ram_we;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1;
|
||||
|
||||
ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht;
|
||||
ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_updated;
|
||||
|
||||
if (CVA6Cfg.RVC) begin : gen_row_index
|
||||
assign row_index = vpc_i[ROW_ADDR_BITS+OFFSET-1:OFFSET];
|
||||
end else begin
|
||||
assign row_index = '0;
|
||||
end
|
||||
|
||||
// -------------------------
|
||||
// prediction assignment & update Branch History Table
|
||||
// -------------------------
|
||||
always_comb begin : prediction_update_bht
|
||||
bht_ram_we = '0;
|
||||
bht_ram_read_address_0 = '0;
|
||||
bht_ram_read_address_1 = '0;
|
||||
bht_ram_write_address = '0;
|
||||
bht_ram_wdata = '0;
|
||||
bht_updated = '0;
|
||||
bht = '0;
|
||||
|
||||
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
|
||||
if (row_index == i) begin
|
||||
bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
|
||||
bht_prediction_o[i].valid = bht_ram_rdata_0[i*BRAM_WORD_BITS+2];
|
||||
bht_prediction_o[i].taken = bht_ram_rdata_0[i*BRAM_WORD_BITS+1];
|
||||
end
|
||||
end
|
||||
|
||||
if (bht_update_i.valid && !debug_mode_i) begin
|
||||
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
|
||||
if (update_row_index == i) begin
|
||||
bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
|
||||
bht[i].saturation_counter = bht_ram_rdata_1[i*BRAM_WORD_BITS+:2];
|
||||
|
||||
if (bht[i].saturation_counter == 2'b11) begin
|
||||
// we can safely decrease it
|
||||
if (!bht_update_i.taken)
|
||||
bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
|
||||
else bht_updated[i].saturation_counter = 2'b11;
|
||||
// then check if it saturated in the negative regime e.g.: branch not taken
|
||||
end else if (bht[i].saturation_counter == 2'b00) begin
|
||||
// we can safely increase it
|
||||
if (bht_update_i.taken)
|
||||
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
|
||||
else bht_updated[i].saturation_counter = 2'b00;
|
||||
end else begin // otherwise we are not in any boundaries and can decrease or increase it
|
||||
if (bht_update_i.taken)
|
||||
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
|
||||
else bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
|
||||
end
|
||||
|
||||
bht_updated[i].valid = 1'b1;
|
||||
bht_ram_we[i] = 1'b1;
|
||||
bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
|
||||
//bht_ram_wdata[(i+1)*BRAM_WORD_BITS-1] = 1'b1; //valid
|
||||
bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
|
||||
bht_updated[i].valid, bht_updated[i].saturation_counter
|
||||
};
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_ram
|
||||
AsyncThreePortRam #(
|
||||
.ADDR_WIDTH($clog2(NR_ROWS)),
|
||||
.DATA_DEPTH(NR_ROWS),
|
||||
.DATA_WIDTH(BRAM_WORD_BITS)
|
||||
) i_bht_ram (
|
||||
.Clk_CI (clk_i),
|
||||
.WrEn_SI (bht_ram_we[i]),
|
||||
.WrAddr_DI (bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
|
||||
.WrData_DI (bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
|
||||
.RdAddr_DI_0(bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
|
||||
.RdAddr_DI_1(bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
|
||||
.RdData_DO_0(bht_ram_rdata_0[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
|
||||
.RdData_DO_1(bht_ram_rdata_1[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
|
||||
);
|
||||
end
|
||||
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 2.0 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 08.02.2018
|
||||
// Migrated: Luis Vitorio Cargnini, IEEE
|
||||
// Date: 09.06.2018
|
||||
//
|
||||
// Additional contributions by:
|
||||
// Sebastien Jacq, Thales - sjthales on github.com
|
||||
// Date: 2022-12-01
|
||||
//
|
||||
// Description: This module is an adaptation of the BTB (Branch Target Buffer)
|
||||
// module both FPGA and ASIC targets.
|
||||
// Prediction target address is stored in BRAM on FPGA while for
|
||||
// original module, target address is stored in D flip-flop.
|
||||
// For FPGA flushing is not supported because the frontend module
|
||||
// flushing signal is not connected.
|
||||
//
|
||||
// branch target buffer
|
||||
module btb #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int NR_ENTRIES = 8
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i, // flush the btb
|
||||
input logic debug_mode_i,
|
||||
|
||||
input logic [riscv::VLEN-1:0] vpc_i, // virtual PC from IF stage
|
||||
input ariane_pkg::btb_update_t btb_update_i, // update btb with this information
|
||||
output ariane_pkg::btb_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb
|
||||
);
|
||||
// the last bit is always zero, we don't need it for indexing
|
||||
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
|
||||
// re-shape the branch history table
|
||||
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
|
||||
// number of bits needed to index the row
|
||||
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
|
||||
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
|
||||
// number of bits we should use for prediction
|
||||
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
|
||||
// prevent aliasing to degrade performance
|
||||
localparam ANTIALIAS_BITS = 8;
|
||||
// number of bits par word in the bram
|
||||
localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t);
|
||||
// we are not interested in all bits of the address
|
||||
unread i_unread (.d_i(|vpc_i));
|
||||
|
||||
|
||||
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
|
||||
logic [ROW_INDEX_BITS-1:0] update_row_index;
|
||||
|
||||
assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
|
||||
assign update_pc = btb_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
|
||||
if (CVA6Cfg.RVC) begin : gen_update_row_index
|
||||
assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
|
||||
end else begin
|
||||
assign update_row_index = '0;
|
||||
end
|
||||
|
||||
if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
|
||||
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
|
||||
|
||||
// output matching prediction
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
|
||||
assign btb_ram_csel_prediction[i] = 1'b1;
|
||||
assign btb_ram_we_prediction[i] = 1'b0;
|
||||
assign btb_ram_wdata_prediction = '0;
|
||||
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
|
||||
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS];
|
||||
end
|
||||
|
||||
// -------------------------
|
||||
// Update Branch Prediction
|
||||
// -------------------------
|
||||
// update on a mis-predict
|
||||
always_comb begin : update_branch_predict
|
||||
btb_ram_csel_update = '0;
|
||||
btb_ram_we_update = '0;
|
||||
btb_ram_addr_update = '0;
|
||||
btb_ram_wdata_update = '0;
|
||||
|
||||
if (btb_update_i.valid && !debug_mode_i) begin
|
||||
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
|
||||
if (update_row_index == i) begin
|
||||
btb_ram_csel_update[i] = 1'b1;
|
||||
btb_ram_we_update[i] = 1'b1;
|
||||
btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
|
||||
btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
|
||||
1'b1, btb_update_i.target_address
|
||||
};
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram
|
||||
SyncDpRam #(
|
||||
.ADDR_WIDTH($clog2(NR_ROWS)),
|
||||
.DATA_DEPTH(NR_ROWS),
|
||||
.DATA_WIDTH(BRAM_WORD_BITS),
|
||||
.OUT_REGS (0),
|
||||
.SIM_INIT (1)
|
||||
) i_btb_ram (
|
||||
.Clk_CI (clk_i),
|
||||
.Rst_RBI (rst_ni),
|
||||
//----------------------------
|
||||
.CSelA_SI (btb_ram_csel_update[i]),
|
||||
.WrEnA_SI (btb_ram_we_update[i]),
|
||||
.AddrA_DI (btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
|
||||
.WrDataA_DI(btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
|
||||
.RdDataA_DO(),
|
||||
//-----------------------------
|
||||
.CSelB_SI (btb_ram_csel_prediction[i]),
|
||||
.WrEnB_SI (btb_ram_we_prediction[i]),
|
||||
.AddrB_DI (btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
|
||||
.WrDataB_DI(btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
|
||||
.RdDataB_DO(btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
|
||||
);
|
||||
end
|
||||
|
||||
end else begin : gen_asic_btb // ASIC TARGET
|
||||
|
||||
// typedef for all branch target entries
|
||||
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
|
||||
ariane_pkg::btb_prediction_t
|
||||
btb_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
|
||||
btb_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
|
||||
|
||||
// output matching prediction
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
|
||||
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
|
||||
end
|
||||
|
||||
// -------------------------
|
||||
// Update Branch Prediction
|
||||
// -------------------------
|
||||
// update on a mis-predict
|
||||
always_comb begin : update_branch_predict
|
||||
btb_d = btb_q;
|
||||
|
||||
if (btb_update_i.valid && !debug_mode_i) begin
|
||||
btb_d[update_pc][update_row_index].valid = 1'b1;
|
||||
// the target address is simply updated
|
||||
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
|
||||
end
|
||||
end
|
||||
|
||||
// sequential process
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
// Bias the branches to be taken upon first arrival
|
||||
for (int i = 0; i < NR_ROWS; i++) btb_q[i] <= '{default: 0};
|
||||
end else begin
|
||||
// evict all entries
|
||||
if (flush_i) begin
|
||||
for (int i = 0; i < NR_ROWS; i++) begin
|
||||
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
|
||||
btb_q[i][j].valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
btb_q <= btb_d;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,516 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 08.02.2018
|
||||
// Description: Ariane Instruction Fetch Frontend
|
||||
//
|
||||
// This module interfaces with the instruction cache, handles control
|
||||
// change request from the back-end and does branch prediction.
|
||||
|
||||
module frontend
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i, // flush request for PCGEN
|
||||
input logic flush_bp_i, // flush branch prediction
|
||||
input logic halt_i, // halt commit stage
|
||||
input logic debug_mode_i,
|
||||
// global input
|
||||
input logic [riscv::VLEN-1:0] boot_addr_i,
|
||||
// Set a new PC
|
||||
// mispredict
|
||||
input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB
|
||||
// from commit, when flushing the whole pipeline
|
||||
input logic set_pc_commit_i, // Take the PC from commit stage
|
||||
input logic [riscv::VLEN-1:0] pc_commit_i, // PC of instruction in commit stage
|
||||
// CSR input
|
||||
input logic [riscv::VLEN-1:0] epc_i, // exception PC which we need to return to
|
||||
input logic eret_i, // return from exception
|
||||
input logic [riscv::VLEN-1:0] trap_vector_base_i, // base of trap vector
|
||||
input logic ex_valid_i, // exception is valid - from commit
|
||||
input logic set_debug_pc_i, // jump to debug address
|
||||
// Instruction Fetch
|
||||
output icache_dreq_t icache_dreq_o,
|
||||
input icache_drsp_t icache_dreq_i,
|
||||
// instruction output port -> to processor back-end
|
||||
output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
|
||||
output logic fetch_entry_valid_o, // instruction in IF is valid
|
||||
input logic fetch_entry_ready_i // ID acknowledged this instruction
|
||||
);
|
||||
// Instruction Cache Registers, from I$
|
||||
logic [ FETCH_WIDTH-1:0] icache_data_q;
|
||||
logic icache_valid_q;
|
||||
ariane_pkg::frontend_exception_t icache_ex_valid_q;
|
||||
logic [ riscv::VLEN-1:0] icache_vaddr_q;
|
||||
logic instr_queue_ready;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed;
|
||||
// upper-most branch-prediction from last cycle
|
||||
btb_prediction_t btb_q;
|
||||
bht_prediction_t bht_q;
|
||||
// instruction fetch is ready
|
||||
logic if_ready;
|
||||
logic [riscv::VLEN-1:0] npc_d, npc_q; // next PC
|
||||
|
||||
// indicates whether we come out of reset (then we need to load boot_addr_i)
|
||||
logic npc_rst_load_q;
|
||||
|
||||
logic replay;
|
||||
logic [ riscv::VLEN-1:0] replay_addr;
|
||||
|
||||
// shift amount
|
||||
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt;
|
||||
// address will always be 16 bit aligned, make this explicit here
|
||||
if (CVA6Cfg.RVC) begin : gen_shamt
|
||||
assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1];
|
||||
end else begin
|
||||
assign shamt = 1'b0;
|
||||
end
|
||||
|
||||
// -----------------------
|
||||
// Ctrl Flow Speculation
|
||||
// -----------------------
|
||||
// RVI ctrl flow prediction
|
||||
logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump;
|
||||
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvi_imm;
|
||||
// RVC branching
|
||||
logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call;
|
||||
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvc_imm;
|
||||
// re-aligned instruction and address (coming from cache - combinationally)
|
||||
logic [INSTR_PER_FETCH-1:0][ 31:0] instr;
|
||||
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr;
|
||||
logic [INSTR_PER_FETCH-1:0] instruction_valid;
|
||||
// BHT, BTB and RAS prediction
|
||||
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction;
|
||||
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction;
|
||||
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
|
||||
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
|
||||
ras_t ras_predict;
|
||||
logic [ riscv::VLEN-1:0] vpc_btb;
|
||||
|
||||
// branch-predict update
|
||||
logic is_mispredict;
|
||||
logic ras_push, ras_pop;
|
||||
logic [ riscv::VLEN-1:0] ras_update;
|
||||
|
||||
// Instruction FIFO
|
||||
logic [ riscv::VLEN-1:0] predict_address;
|
||||
cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf;
|
||||
|
||||
logic serving_unaligned;
|
||||
// Re-align instructions
|
||||
instr_realign #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) i_instr_realign (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (icache_dreq_o.kill_s2),
|
||||
.valid_i (icache_valid_q),
|
||||
.serving_unaligned_o(serving_unaligned),
|
||||
.address_i (icache_vaddr_q),
|
||||
.data_i (icache_data_q),
|
||||
.valid_o (instruction_valid),
|
||||
.addr_o (addr),
|
||||
.instr_o (instr)
|
||||
);
|
||||
|
||||
// --------------------
|
||||
// Branch Prediction
|
||||
// --------------------
|
||||
// select the right branch prediction result
|
||||
// in case we are serving an unaligned instruction in instr[0] we need to take
|
||||
// the prediction we saved from the previous fetch
|
||||
if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted
|
||||
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2(
|
||||
INSTR_PER_FETCH
|
||||
):1]];
|
||||
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2(
|
||||
INSTR_PER_FETCH
|
||||
):1]];
|
||||
|
||||
// for all other predictions we can use the generated address to index
|
||||
// into the branch prediction data structures
|
||||
for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address
|
||||
assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
|
||||
assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
|
||||
end
|
||||
end else begin
|
||||
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]];
|
||||
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][1]];
|
||||
end
|
||||
;
|
||||
|
||||
// for the return address stack it doens't matter as we have the
|
||||
// address of the call/return already
|
||||
logic bp_valid;
|
||||
|
||||
logic [INSTR_PER_FETCH-1:0] is_branch;
|
||||
logic [INSTR_PER_FETCH-1:0] is_call;
|
||||
logic [INSTR_PER_FETCH-1:0] is_jump;
|
||||
logic [INSTR_PER_FETCH-1:0] is_return;
|
||||
logic [INSTR_PER_FETCH-1:0] is_jalr;
|
||||
|
||||
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
|
||||
// branch history table -> BHT
|
||||
assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]);
|
||||
// function calls -> RAS
|
||||
assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]);
|
||||
// function return -> RAS
|
||||
assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]);
|
||||
// unconditional jumps with known target -> immediately resolved
|
||||
assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]);
|
||||
// unconditional jumps with unknown target -> BTB
|
||||
assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
|
||||
end
|
||||
|
||||
// taken/not taken
|
||||
always_comb begin
|
||||
taken_rvi_cf = '0;
|
||||
taken_rvc_cf = '0;
|
||||
predict_address = '0;
|
||||
|
||||
for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF;
|
||||
|
||||
ras_push = 1'b0;
|
||||
ras_pop = 1'b0;
|
||||
ras_update = '0;
|
||||
|
||||
// lower most prediction gets precedence
|
||||
for (int i = INSTR_PER_FETCH - 1; i >= 0; i--) begin
|
||||
unique case ({
|
||||
is_branch[i], is_return[i], is_jump[i], is_jalr[i]
|
||||
})
|
||||
4'b0000: ; // regular instruction e.g.: no branch
|
||||
// unconditional jump to register, we need the BTB to resolve this
|
||||
4'b0001: begin
|
||||
ras_pop = 1'b0;
|
||||
ras_push = 1'b0;
|
||||
if (CVA6Cfg.BTBEntries && btb_prediction_shifted[i].valid) begin
|
||||
predict_address = btb_prediction_shifted[i].target_address;
|
||||
cf_type[i] = ariane_pkg::JumpR;
|
||||
end
|
||||
end
|
||||
// its an unconditional jump to an immediate
|
||||
4'b0010: begin
|
||||
ras_pop = 1'b0;
|
||||
ras_push = 1'b0;
|
||||
taken_rvi_cf[i] = rvi_jump[i];
|
||||
taken_rvc_cf[i] = rvc_jump[i];
|
||||
cf_type[i] = ariane_pkg::Jump;
|
||||
end
|
||||
// return
|
||||
4'b0100: begin
|
||||
// make sure to only alter the RAS if we actually consumed the instruction
|
||||
ras_pop = ras_predict.valid & instr_queue_consumed[i];
|
||||
ras_push = 1'b0;
|
||||
predict_address = ras_predict.ra;
|
||||
cf_type[i] = ariane_pkg::Return;
|
||||
end
|
||||
// branch prediction
|
||||
4'b1000: begin
|
||||
ras_pop = 1'b0;
|
||||
ras_push = 1'b0;
|
||||
// if we have a valid dynamic prediction use it
|
||||
if (bht_prediction_shifted[i].valid) begin
|
||||
taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken;
|
||||
taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken;
|
||||
// otherwise default to static prediction
|
||||
end else begin
|
||||
// set if immediate is negative - static prediction
|
||||
taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1];
|
||||
taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1];
|
||||
end
|
||||
if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin
|
||||
cf_type[i] = ariane_pkg::Branch;
|
||||
end
|
||||
end
|
||||
default: ;
|
||||
// default: $error("Decoded more than one control flow");
|
||||
endcase
|
||||
// if this instruction, in addition, is a call, save the resulting address
|
||||
// but only if we actually consumed the address
|
||||
if (is_call[i]) begin
|
||||
ras_push = instr_queue_consumed[i];
|
||||
ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
|
||||
end
|
||||
// calculate the jump target address
|
||||
if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin
|
||||
predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
// or reduce struct
|
||||
always_comb begin
|
||||
bp_valid = 1'b0;
|
||||
// BP cannot be valid if we have a return instruction and the RAS is not giving a valid address
|
||||
// Check that we encountered a control flow and that for a return the RAS
|
||||
// contains a valid prediction.
|
||||
for (int i = 0; i < INSTR_PER_FETCH; i++)
|
||||
bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid));
|
||||
end
|
||||
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
|
||||
|
||||
// Cache interface
|
||||
assign icache_dreq_o.req = instr_queue_ready;
|
||||
assign if_ready = icache_dreq_i.ready & instr_queue_ready;
|
||||
// We need to flush the cache pipeline if:
|
||||
// 1. We mispredicted
|
||||
// 2. Want to flush the whole processor front-end
|
||||
// 3. Need to replay an instruction because the fetch-fifo was full
|
||||
assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay;
|
||||
// if we have a valid branch-prediction we need to only kill the last cache request
|
||||
// also if we killed the first stage we also need to kill the second stage (inclusive flush)
|
||||
assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
|
||||
|
||||
// Update Control Flow Predictions
|
||||
bht_update_t bht_update;
|
||||
btb_update_t btb_update;
|
||||
|
||||
// assert on branch, deassert when resolved
|
||||
logic speculative_q, speculative_d;
|
||||
assign speculative_d = (speculative_q && !resolved_branch_i.valid || |is_branch || |is_return || |is_jalr) && !flush_i;
|
||||
assign icache_dreq_o.spec = speculative_d;
|
||||
|
||||
assign bht_update.valid = resolved_branch_i.valid
|
||||
& (resolved_branch_i.cf_type == ariane_pkg::Branch);
|
||||
assign bht_update.pc = resolved_branch_i.pc;
|
||||
assign bht_update.taken = resolved_branch_i.is_taken;
|
||||
// only update mispredicted branches e.g. no returns from the RAS
|
||||
assign btb_update.valid = resolved_branch_i.valid
|
||||
& resolved_branch_i.is_mispredict
|
||||
& (resolved_branch_i.cf_type == ariane_pkg::JumpR);
|
||||
assign btb_update.pc = resolved_branch_i.pc;
|
||||
assign btb_update.target_address = resolved_branch_i.target_address;
|
||||
|
||||
// -------------------
|
||||
// Next PC
|
||||
// -------------------
|
||||
// next PC (NPC) can come from (in order of precedence):
|
||||
// 0. Default assignment/replay instruction
|
||||
// 1. Branch Predict taken
|
||||
// 2. Control flow change request (misprediction)
|
||||
// 3. Return from environment call
|
||||
// 4. Exception/Interrupt
|
||||
// 5. Pipeline Flush because of CSR side effects
|
||||
// Mis-predict handling is a little bit different
|
||||
// select PC a.k.a PC Gen
|
||||
always_comb begin : npc_select
|
||||
automatic logic [riscv::VLEN-1:0] fetch_address;
|
||||
// check whether we come out of reset
|
||||
// this is a workaround. some tools have issues
|
||||
// having boot_addr_i in the asynchronous
|
||||
// reset assignment to npc_q, even though
|
||||
// boot_addr_i will be assigned a constant
|
||||
// on the top-level.
|
||||
if (npc_rst_load_q) begin
|
||||
npc_d = boot_addr_i;
|
||||
fetch_address = boot_addr_i;
|
||||
end else begin
|
||||
fetch_address = npc_q;
|
||||
// keep stable by default
|
||||
npc_d = npc_q;
|
||||
end
|
||||
// 0. Branch Prediction
|
||||
if (bp_valid) begin
|
||||
fetch_address = predict_address;
|
||||
npc_d = predict_address;
|
||||
end
|
||||
// 1. Default assignment
|
||||
if (if_ready) begin
|
||||
npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4;
|
||||
end
|
||||
// 2. Replay instruction fetch
|
||||
if (replay) begin
|
||||
npc_d = replay_addr;
|
||||
end
|
||||
// 3. Control flow change request
|
||||
if (is_mispredict) begin
|
||||
npc_d = resolved_branch_i.target_address;
|
||||
end
|
||||
// 4. Return from environment call
|
||||
if (eret_i) begin
|
||||
npc_d = epc_i;
|
||||
end
|
||||
// 5. Exception/Interrupt
|
||||
if (ex_valid_i) begin
|
||||
npc_d = trap_vector_base_i;
|
||||
end
|
||||
// 6. Pipeline Flush because of CSR side effects
|
||||
// On a pipeline flush start fetching from the next address
|
||||
// of the instruction in the commit stage
|
||||
// we either came here from a flush request of a CSR instruction or AMO,
|
||||
// so as CSR or AMO instructions do not exist in a compressed form
|
||||
// we can unconditionally do PC + 4 here
|
||||
// or if the commit stage is halted, just take the current pc of the
|
||||
// instruction in the commit stage
|
||||
// TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
|
||||
if (set_pc_commit_i) begin
|
||||
npc_d = pc_commit_i + (halt_i ? '0 : {{riscv::VLEN - 3{1'b0}}, 3'b100});
|
||||
end
|
||||
// 7. Debug
|
||||
// enter debug on a hard-coded base-address
|
||||
if (CVA6Cfg.DebugEn && set_debug_pc_i)
|
||||
npc_d = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.HaltAddress[riscv::VLEN-1:0];
|
||||
icache_dreq_o.vaddr = fetch_address;
|
||||
end
|
||||
|
||||
logic [FETCH_WIDTH-1:0] icache_data;
|
||||
// re-align the cache line
|
||||
assign icache_data = icache_dreq_i.data >> {shamt, 4'b0};
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
npc_rst_load_q <= 1'b1;
|
||||
npc_q <= '0;
|
||||
speculative_q <= '0;
|
||||
icache_data_q <= '0;
|
||||
icache_valid_q <= 1'b0;
|
||||
icache_vaddr_q <= 'b0;
|
||||
icache_ex_valid_q <= ariane_pkg::FE_NONE;
|
||||
btb_q <= '0;
|
||||
bht_q <= '0;
|
||||
end else begin
|
||||
npc_rst_load_q <= 1'b0;
|
||||
npc_q <= npc_d;
|
||||
speculative_q <= speculative_d;
|
||||
icache_valid_q <= icache_dreq_i.valid;
|
||||
if (icache_dreq_i.valid) begin
|
||||
icache_data_q <= icache_data;
|
||||
icache_vaddr_q <= icache_dreq_i.vaddr;
|
||||
// Map the only three exceptions which can occur in the frontend to a two bit enum
|
||||
if (ariane_pkg::MMU_PRESENT && icache_dreq_i.ex.cause == riscv::INSTR_PAGE_FAULT) begin
|
||||
icache_ex_valid_q <= ariane_pkg::FE_INSTR_PAGE_FAULT;
|
||||
end else if (icache_dreq_i.ex.cause == riscv::INSTR_ACCESS_FAULT) begin
|
||||
icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT;
|
||||
end else begin
|
||||
icache_ex_valid_q <= ariane_pkg::FE_NONE;
|
||||
end
|
||||
// save the uppermost prediction
|
||||
btb_q <= btb_prediction[INSTR_PER_FETCH-1];
|
||||
bht_q <= bht_prediction[INSTR_PER_FETCH-1];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (CVA6Cfg.RASDepth == 0) begin
|
||||
assign ras_predict = '0;
|
||||
end else begin : ras_gen
|
||||
ras #(
|
||||
.CVA6Cfg(CVA6Cfg),
|
||||
.DEPTH (CVA6Cfg.RASDepth)
|
||||
) i_ras (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i(flush_bp_i),
|
||||
.push_i (ras_push),
|
||||
.pop_i (ras_pop),
|
||||
.data_i (ras_update),
|
||||
.data_o (ras_predict)
|
||||
);
|
||||
end
|
||||
|
||||
//For FPGA, BTB is implemented in read synchronous BRAM
|
||||
//while for ASIC, BTB is implemented in D flip-flop
|
||||
//and can be read at the same cycle.
|
||||
assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q;
|
||||
|
||||
if (CVA6Cfg.BTBEntries == 0) begin
|
||||
assign btb_prediction = '0;
|
||||
end else begin : btb_gen
|
||||
btb #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.NR_ENTRIES(CVA6Cfg.BTBEntries)
|
||||
) i_btb (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i (flush_bp_i),
|
||||
.debug_mode_i,
|
||||
.vpc_i (vpc_btb),
|
||||
.btb_update_i (btb_update),
|
||||
.btb_prediction_o(btb_prediction)
|
||||
);
|
||||
end
|
||||
|
||||
if (CVA6Cfg.BHTEntries == 0) begin
|
||||
assign bht_prediction = '0;
|
||||
end else begin : bht_gen
|
||||
bht #(
|
||||
.CVA6Cfg (CVA6Cfg),
|
||||
.NR_ENTRIES(CVA6Cfg.BHTEntries)
|
||||
) i_bht (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i (flush_bp_i),
|
||||
.debug_mode_i,
|
||||
.vpc_i (icache_vaddr_q),
|
||||
.bht_update_i (bht_update),
|
||||
.bht_prediction_o(bht_prediction)
|
||||
);
|
||||
end
|
||||
|
||||
// we need to inspect up to INSTR_PER_FETCH instructions for branches
|
||||
// and jumps
|
||||
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan
|
||||
instr_scan #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) i_instr_scan (
|
||||
.instr_i (instr[i]),
|
||||
.rvi_return_o(rvi_return[i]),
|
||||
.rvi_call_o (rvi_call[i]),
|
||||
.rvi_branch_o(rvi_branch[i]),
|
||||
.rvi_jalr_o (rvi_jalr[i]),
|
||||
.rvi_jump_o (rvi_jump[i]),
|
||||
.rvi_imm_o (rvi_imm[i]),
|
||||
.rvc_branch_o(rvc_branch[i]),
|
||||
.rvc_jump_o (rvc_jump[i]),
|
||||
.rvc_jr_o (rvc_jr[i]),
|
||||
.rvc_return_o(rvc_return[i]),
|
||||
.rvc_jalr_o (rvc_jalr[i]),
|
||||
.rvc_call_o (rvc_call[i]),
|
||||
.rvc_imm_o (rvc_imm[i])
|
||||
);
|
||||
end
|
||||
|
||||
instr_queue #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) i_instr_queue (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (flush_i),
|
||||
.instr_i (instr), // from re-aligner
|
||||
.addr_i (addr), // from re-aligner
|
||||
.exception_i (icache_ex_valid_q), // from I$
|
||||
.exception_addr_i (icache_vaddr_q),
|
||||
.predict_address_i (predict_address),
|
||||
.cf_type_i (cf_type),
|
||||
.valid_i (instruction_valid), // from re-aligner
|
||||
.consumed_o (instr_queue_consumed),
|
||||
.ready_o (instr_queue_ready),
|
||||
.replay_o (replay),
|
||||
.replay_addr_o (replay_addr),
|
||||
.fetch_entry_o (fetch_entry_o), // to back-end
|
||||
.fetch_entry_valid_o(fetch_entry_valid_o), // to back-end
|
||||
.fetch_entry_ready_i(fetch_entry_ready_i) // to back-end
|
||||
);
|
||||
|
||||
// pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
initial begin
|
||||
assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64)
|
||||
else $fatal(1, "[frontend] fetch width != not supported");
|
||||
end
|
||||
`endif
|
||||
// pragma translate_on
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,459 @@
|
|||
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 26.10.2018sim:/ariane_tb/dut/i_ariane/i_frontend/icache_ex_valid_q
|
||||
|
||||
// Description: Instruction Queue, separates instruction front-end from processor
|
||||
// back-end.
|
||||
//
|
||||
// This is an optimized instruction queue which supports the handling of
|
||||
// compressed instructions (16 bit instructions). Internally it is organized as
|
||||
// FETCH_ENTRY x 32 bit queues which are filled in a consecutive manner. Two pointers
|
||||
// point into (`idx_is_q` and `idx_ds_q`) the fill port and the read port. The read port
|
||||
// is designed so that it will easily allow for multiple issue implementation.
|
||||
// The input supports arbitrary power of two instruction fetch widths.
|
||||
//
|
||||
// The queue supports handling of branch prediction and will take care of
|
||||
// only saving a valid instruction stream.
|
||||
//
|
||||
// Furthermore it contains a replay interface in case the instruction queue
|
||||
// is already full. As instructions are in general easily replayed this should
|
||||
// increase the efficiency as I$ misses are potentially hidden. This stands in
|
||||
// contrast to pessimistic actions (early stalling) or credit based approaches.
|
||||
// Credit based systems might be difficult to implement with the current system
|
||||
// as we do not exactly know how much space we are going to need in the fifos
|
||||
// as each instruction can take either one or two slots.
|
||||
//
|
||||
// So the consumed/valid interface degenerates to a `information` interface. If the
|
||||
// upstream circuits keeps pushing the queue will discard the information
|
||||
// and start replaying from the point were it could last manage to accept instructions.
|
||||
//
|
||||
// The instruction front-end will stop issuing instructions as soon as the
|
||||
// fifo is full. This will gate the logic if the processor is e.g.: halted
|
||||
//
|
||||
// TODO(zarubaf): The instruction queues can be reduced to 16 bit. Potentially
|
||||
// the replay mechanism gets more complicated as it can be that a 32 bit instruction
|
||||
// can not be pushed at once.
|
||||
|
||||
module instr_queue
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic flush_i,
|
||||
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i,
|
||||
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i,
|
||||
input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i,
|
||||
output logic ready_o,
|
||||
output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o,
|
||||
// we've encountered an exception, at this point the only possible exceptions are page-table faults
|
||||
input ariane_pkg::frontend_exception_t exception_i,
|
||||
input logic [riscv::VLEN-1:0] exception_addr_i,
|
||||
// branch predict
|
||||
input logic [riscv::VLEN-1:0] predict_address_i,
|
||||
input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i,
|
||||
// replay instruction because one of the FIFO was already full
|
||||
output logic replay_o,
|
||||
output logic [riscv::VLEN-1:0] replay_addr_o, // address at which to replay this instruction
|
||||
// to processor backend
|
||||
output ariane_pkg::fetch_entry_t fetch_entry_o,
|
||||
output logic fetch_entry_valid_o,
|
||||
input logic fetch_entry_ready_i
|
||||
);
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] instr; // instruction word
|
||||
ariane_pkg::cf_t cf; // branch was taken
|
||||
ariane_pkg::frontend_exception_t ex; // exception happened
|
||||
logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception
|
||||
} instr_data_t;
|
||||
|
||||
logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] branch_index;
|
||||
// instruction queues
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0][$clog2(
|
||||
ariane_pkg::FETCH_FIFO_DEPTH
|
||||
)-1:0] instr_queue_usage;
|
||||
instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full;
|
||||
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty;
|
||||
logic instr_overflow;
|
||||
// address queue
|
||||
logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage;
|
||||
logic [ riscv::VLEN-1:0] address_out;
|
||||
logic pop_address;
|
||||
logic push_address;
|
||||
logic full_address;
|
||||
logic empty_address;
|
||||
logic address_overflow;
|
||||
// input stream counter
|
||||
logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] idx_is_d, idx_is_q;
|
||||
// Registers
|
||||
// output FIFO select, one-hot
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q;
|
||||
logic [riscv::VLEN-1:0] pc_d, pc_q; // current PC
|
||||
logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush
|
||||
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask;
|
||||
logic branch_empty;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken;
|
||||
// shift amount, e.g.: instructions we want to retire
|
||||
logic [ariane_pkg::LOG2_INSTR_PER_FETCH:0] popcount;
|
||||
logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] shamt;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] consumed_extended;
|
||||
// FIFO mask
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] fifo_pos_extended;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_pos;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0][31:0] instr;
|
||||
ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH*2-1:0] cf;
|
||||
// replay interface
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_overflow_fifo;
|
||||
|
||||
assign ready_o = ~(|instr_queue_full) & ~full_address;
|
||||
|
||||
if (ariane_pkg::RVC) begin : gen_multiple_instr_per_fetch_with_C
|
||||
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_unpack_taken
|
||||
assign taken[i] = cf_type_i[i] != ariane_pkg::NoCF;
|
||||
end
|
||||
|
||||
// calculate a branch mask, e.g.: get the first taken branch
|
||||
lzc #(
|
||||
.WIDTH(ariane_pkg::INSTR_PER_FETCH),
|
||||
.MODE (0) // count trailing zeros
|
||||
) i_lzc_branch_index (
|
||||
.in_i (taken), // we want to count trailing zeros
|
||||
.cnt_o (branch_index), // first branch on branch_index
|
||||
.empty_o(branch_empty)
|
||||
);
|
||||
|
||||
|
||||
// the first index is for sure valid
|
||||
// for example (64 bit fetch):
|
||||
// taken mask: 0 1 1 0
|
||||
// leading zero count = 1
|
||||
// 0 0 0 1, 1 1 1 << 1 = 0 0 1 1, 1 1 0
|
||||
// take the upper 4 bits: 0 0 1 1
|
||||
assign branch_mask_extended = {{{ariane_pkg::INSTR_PER_FETCH-1}{1'b0}}, {{ariane_pkg::INSTR_PER_FETCH}{1'b1}}} << branch_index;
|
||||
assign branch_mask = branch_mask_extended[ariane_pkg::INSTR_PER_FETCH * 2 - 2:ariane_pkg::INSTR_PER_FETCH - 1];
|
||||
|
||||
// mask with taken branches to get the actual amount of instructions we want to push
|
||||
assign valid = valid_i & branch_mask;
|
||||
// rotate right again
|
||||
assign consumed_extended = {push_instr_fifo, push_instr_fifo} >> idx_is_q;
|
||||
assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0];
|
||||
// count the numbers of valid instructions we've pushed from this package
|
||||
popcount #(
|
||||
.INPUT_WIDTH(ariane_pkg::INSTR_PER_FETCH)
|
||||
) i_popcount (
|
||||
.data_i (push_instr_fifo),
|
||||
.popcount_o(popcount)
|
||||
);
|
||||
assign shamt = popcount[$bits(shamt)-1:0];
|
||||
|
||||
// save the shift amount for next cycle
|
||||
assign idx_is_d = idx_is_q + shamt;
|
||||
|
||||
// ----------------------
|
||||
// Input interface
|
||||
// ----------------------
|
||||
// rotate left by the current position
|
||||
assign fifo_pos_extended = {valid, valid} << idx_is_q;
|
||||
// we just care about the upper bits
|
||||
assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH];
|
||||
// the fifo_position signal can directly be used to guide the push signal of each FIFO
|
||||
// make sure it is not full
|
||||
assign push_instr = fifo_pos & ~instr_queue_full;
|
||||
|
||||
// duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input
|
||||
assign instr[i] = instr_i[i];
|
||||
assign instr[i+ariane_pkg::INSTR_PER_FETCH] = instr_i[i];
|
||||
assign cf[i] = cf_type_i[i];
|
||||
assign cf[i+ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i];
|
||||
end
|
||||
|
||||
// shift the inputs
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select
|
||||
/* verilator lint_off WIDTH */
|
||||
assign instr_data_in[i].instr = instr[i+idx_is_q];
|
||||
assign instr_data_in[i].cf = cf[i+idx_is_q];
|
||||
assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet
|
||||
assign instr_data_in[i].ex_vaddr = exception_addr_i;
|
||||
/* verilator lint_on WIDTH */
|
||||
end
|
||||
end else begin : gen_multiple_instr_per_fetch_without_C
|
||||
|
||||
assign taken = '0;
|
||||
assign branch_empty = '0;
|
||||
assign branch_index = '0;
|
||||
assign branch_mask_extended = '0;
|
||||
assign branch_mask = '0;
|
||||
assign consumed_extended = '0;
|
||||
assign fifo_pos_extended = '0;
|
||||
assign fifo_pos = '0;
|
||||
assign instr = '0;
|
||||
assign popcount = '0;
|
||||
assign shamt = '0;
|
||||
assign valid = '0;
|
||||
|
||||
|
||||
assign consumed_o = push_instr_fifo[0];
|
||||
// ----------------------
|
||||
// Input interface
|
||||
// ----------------------
|
||||
assign push_instr = valid_i & ~instr_queue_full;
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
assign instr_data_in[0].instr = instr_i[0];
|
||||
assign instr_data_in[0].cf = cf_type_i[0];
|
||||
assign instr_data_in[0].ex = exception_i; // exceptions hold for the whole fetch packet
|
||||
assign instr_data_in[0].ex_vaddr = exception_addr_i;
|
||||
/* verilator lint_on WIDTH */
|
||||
end
|
||||
|
||||
// ----------------------
|
||||
// Replay Logic
|
||||
// ----------------------
|
||||
// We need to replay a instruction fetch iff:
|
||||
// 1. One of the instruction data FIFOs was full and we needed it
|
||||
// (e.g.: we pushed and it was full)
|
||||
// 2. The address/branch predict FIFO was full
|
||||
// if one of the FIFOs was full we need to replay the faulting instruction
|
||||
if (ariane_pkg::RVC == 1'b1) begin : gen_instr_overflow_fifo_with_C
|
||||
assign instr_overflow_fifo = instr_queue_full & fifo_pos;
|
||||
end else begin : gen_instr_overflow_fifo_without_C
|
||||
assign instr_overflow_fifo = instr_queue_full & valid_i;
|
||||
end
|
||||
assign instr_overflow = |instr_overflow_fifo; // at least one instruction overflowed
|
||||
assign address_overflow = full_address & push_address;
|
||||
assign replay_o = instr_overflow | address_overflow;
|
||||
|
||||
if (ariane_pkg::RVC) begin : gen_replay_addr_o_with_c
|
||||
// select the address, in the case of an address fifo overflow just
|
||||
// use the base of this package
|
||||
// if we successfully pushed some instructions we can output the next instruction
|
||||
// which we didn't manage to push
|
||||
assign replay_addr_o = (address_overflow) ? addr_i[0] : addr_i[shamt];
|
||||
end else begin : gen_replay_addr_o_without_C
|
||||
assign replay_addr_o = addr_i[0];
|
||||
end
|
||||
|
||||
// ----------------------
|
||||
// Downstream interface
|
||||
// ----------------------
|
||||
// as long as there is at least one queue which can take the value we have a valid instruction
|
||||
assign fetch_entry_valid_o = ~(&instr_queue_empty);
|
||||
|
||||
if (ariane_pkg::RVC) begin : gen_downstream_itf_with_c
|
||||
always_comb begin
|
||||
idx_ds_d = idx_ds_q;
|
||||
|
||||
pop_instr = '0;
|
||||
// assemble fetch entry
|
||||
fetch_entry_o.instruction = '0;
|
||||
fetch_entry_o.address = pc_q;
|
||||
fetch_entry_o.ex.valid = 1'b0;
|
||||
fetch_entry_o.ex.cause = '0;
|
||||
|
||||
fetch_entry_o.ex.tval = '0;
|
||||
fetch_entry_o.branch_predict.predict_address = address_out;
|
||||
fetch_entry_o.branch_predict.cf = ariane_pkg::NoCF;
|
||||
// output mux select
|
||||
for (int unsigned i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
|
||||
if (idx_ds_q[i]) begin
|
||||
if (instr_data_out[i].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin
|
||||
fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT;
|
||||
end else begin
|
||||
fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT;
|
||||
end
|
||||
fetch_entry_o.instruction = instr_data_out[i].instr;
|
||||
fetch_entry_o.ex.valid = instr_data_out[i].ex != ariane_pkg::FE_NONE;
|
||||
fetch_entry_o.ex.tval = {
|
||||
{(riscv::XLEN - riscv::VLEN) {1'b0}}, instr_data_out[i].ex_vaddr
|
||||
};
|
||||
fetch_entry_o.branch_predict.cf = instr_data_out[i].cf;
|
||||
pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i;
|
||||
end
|
||||
end
|
||||
// rotate the pointer left
|
||||
if (fetch_entry_ready_i) begin
|
||||
idx_ds_d = {
|
||||
idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1]
|
||||
};
|
||||
end
|
||||
end
|
||||
end else begin : gen_downstream_itf_without_c
|
||||
always_comb begin
|
||||
idx_ds_d = '0;
|
||||
idx_is_d = '0;
|
||||
fetch_entry_o.instruction = instr_data_out[0].instr;
|
||||
fetch_entry_o.address = pc_q;
|
||||
|
||||
fetch_entry_o.ex.valid = instr_data_out[0].ex != ariane_pkg::FE_NONE;
|
||||
if (instr_data_out[0].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin
|
||||
fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT;
|
||||
end else begin
|
||||
fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT;
|
||||
end
|
||||
fetch_entry_o.ex.tval = {{64 - riscv::VLEN{1'b0}}, instr_data_out[0].ex_vaddr};
|
||||
|
||||
fetch_entry_o.branch_predict.predict_address = address_out;
|
||||
fetch_entry_o.branch_predict.cf = instr_data_out[0].cf;
|
||||
|
||||
pop_instr[0] = fetch_entry_valid_o & fetch_entry_ready_i;
|
||||
end
|
||||
end
|
||||
|
||||
// TODO(zarubaf): This needs to change for dual-issue
|
||||
// if the handshaking is successful and we had a prediction pop one address entry
|
||||
assign pop_address = ((fetch_entry_o.branch_predict.cf != ariane_pkg::NoCF) & |pop_instr);
|
||||
|
||||
// ----------------------
|
||||
// Calculate (Next) PC
|
||||
// ----------------------
|
||||
always_comb begin
|
||||
pc_d = pc_q;
|
||||
reset_address_d = flush_i ? 1'b1 : reset_address_q;
|
||||
|
||||
if (fetch_entry_ready_i) begin
|
||||
// TODO(zarubaf): This needs to change for a dual issue implementation
|
||||
// advance the PC
|
||||
if (ariane_pkg::RVC == 1'b1) begin : gen_pc_with_c_extension
|
||||
pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4);
|
||||
end else begin : gen_pc_without_c_extension
|
||||
pc_d = pc_q + 'd4;
|
||||
end
|
||||
end
|
||||
|
||||
if (pop_address) pc_d = address_out;
|
||||
|
||||
// we previously flushed so we need to reset the address
|
||||
if (valid_i[0] && reset_address_q) begin
|
||||
// this is the base of the first instruction
|
||||
pc_d = addr_i[0];
|
||||
reset_address_d = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
// FIFOs
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_instr_fifo
|
||||
// Make sure we don't save any instructions if we couldn't save the address
|
||||
assign push_instr_fifo[i] = push_instr[i] & ~address_overflow;
|
||||
fifo_v3 #(
|
||||
.DEPTH(ariane_pkg::FETCH_FIFO_DEPTH),
|
||||
.dtype(instr_data_t)
|
||||
) i_fifo_instr_data (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (flush_i),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (instr_queue_full[i]),
|
||||
.empty_o (instr_queue_empty[i]),
|
||||
.usage_o (instr_queue_usage[i]),
|
||||
.data_i (instr_data_in[i]),
|
||||
.push_i (push_instr_fifo[i]),
|
||||
.data_o (instr_data_out[i]),
|
||||
.pop_i (pop_instr[i])
|
||||
);
|
||||
end
|
||||
// or reduce and check whether we are retiring a taken branch (might be that the corresponding)
|
||||
// fifo is full.
|
||||
always_comb begin
|
||||
push_address = 1'b0;
|
||||
// check if we are pushing a ctrl flow change, if so save the address
|
||||
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
|
||||
push_address |= push_instr[i] & (instr_data_in[i].cf != ariane_pkg::NoCF);
|
||||
end
|
||||
end
|
||||
|
||||
fifo_v3 #(
|
||||
.DEPTH (ariane_pkg::FETCH_FIFO_DEPTH), // TODO(zarubaf): Fork out to separate param
|
||||
.DATA_WIDTH(riscv::VLEN)
|
||||
) i_fifo_address (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i (flush_i),
|
||||
.testmode_i(1'b0),
|
||||
.full_o (full_address),
|
||||
.empty_o (empty_address),
|
||||
.usage_o (address_queue_usage),
|
||||
.data_i (predict_address_i),
|
||||
.push_i (push_address & ~full_address),
|
||||
.data_o (address_out),
|
||||
.pop_i (pop_address)
|
||||
);
|
||||
|
||||
unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage}));
|
||||
unread i_unread_branch_mask (.d_i(|branch_mask_extended));
|
||||
unread i_unread_lzc (.d_i(|{branch_empty}));
|
||||
unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals
|
||||
unread i_unread_instr_fifo (.d_i(|instr_queue_usage));
|
||||
|
||||
if (ariane_pkg::RVC) begin : gen_pc_q_with_c
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
idx_ds_q <= 'b1;
|
||||
idx_is_q <= '0;
|
||||
pc_q <= '0;
|
||||
reset_address_q <= 1'b1;
|
||||
end else begin
|
||||
pc_q <= pc_d;
|
||||
reset_address_q <= reset_address_d;
|
||||
if (flush_i) begin
|
||||
// one-hot encoded
|
||||
idx_ds_q <= 'b1;
|
||||
// binary encoded
|
||||
idx_is_q <= '0;
|
||||
reset_address_q <= 1'b1;
|
||||
end else begin
|
||||
idx_ds_q <= idx_ds_d;
|
||||
idx_is_q <= idx_is_d;
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin : gen_pc_q_without_C
|
||||
assign idx_ds_q = '0;
|
||||
assign idx_is_q = '0;
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
pc_q <= '0;
|
||||
reset_address_q <= 1'b1;
|
||||
end else begin
|
||||
pc_q <= pc_d;
|
||||
reset_address_q <= reset_address_d;
|
||||
if (flush_i) begin
|
||||
reset_address_q <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
replay_address_fifo :
|
||||
assert property (@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i)
|
||||
else $fatal(1, "[instr_queue] Pushing address although replay asserted");
|
||||
|
||||
output_select_onehot :
|
||||
assert property (@(posedge clk_i) $onehot0(idx_ds_q))
|
||||
else begin
|
||||
$error("Output select should be one-hot encoded");
|
||||
$stop();
|
||||
end
|
||||
`endif
|
||||
// pragma translate_on
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 2.0 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 08.02.2018
|
||||
// Migrated: Luis Vitorio Cargnini, IEEE
|
||||
// Date: 09.06.2018
|
||||
|
||||
// ------------------------------
|
||||
// Instruction Scanner
|
||||
// ------------------------------
|
||||
module instr_scan #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic [ 31:0] instr_i, // expect aligned instruction, compressed or not
|
||||
output logic rvi_return_o,
|
||||
output logic rvi_call_o,
|
||||
output logic rvi_branch_o,
|
||||
output logic rvi_jalr_o,
|
||||
output logic rvi_jump_o,
|
||||
output logic [riscv::VLEN-1:0] rvi_imm_o,
|
||||
output logic rvc_branch_o,
|
||||
output logic rvc_jump_o,
|
||||
output logic rvc_jr_o,
|
||||
output logic rvc_return_o,
|
||||
output logic rvc_jalr_o,
|
||||
output logic rvc_call_o,
|
||||
output logic [riscv::VLEN-1:0] rvc_imm_o
|
||||
);
|
||||
logic is_rvc;
|
||||
assign is_rvc = (instr_i[1:0] != 2'b11);
|
||||
|
||||
logic rv32_rvc_jal;
|
||||
assign rv32_rvc_jal = (riscv::XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1));
|
||||
|
||||
logic is_xret;
|
||||
assign is_xret = logic'(instr_i[31:30] == 2'b00) & logic'(instr_i[28:0] == 29'b10000001000000000000001110011);
|
||||
|
||||
// check that rs1 is either x1 or x5 and that rd is not rs1
|
||||
assign rvi_return_o = rvi_jalr_o & ((instr_i[19:15] == 5'd1) | instr_i[19:15] == 5'd5)
|
||||
& (instr_i[19:15] != instr_i[11:7]);
|
||||
// Opocde is JAL[R] and destination register is either x1 or x5
|
||||
assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5);
|
||||
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
|
||||
assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? ariane_pkg::uj_imm(
|
||||
instr_i
|
||||
) : ariane_pkg::sb_imm(
|
||||
instr_i
|
||||
);
|
||||
assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch);
|
||||
assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr);
|
||||
assign rvi_jump_o = logic'(instr_i[6:0] == riscv::OpcodeJal) | is_xret;
|
||||
|
||||
// opcode JAL
|
||||
assign rvc_jump_o = ((instr_i[15:13] == riscv::OpcodeC1J) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)) | rv32_rvc_jal;
|
||||
|
||||
// always links to register 0
|
||||
logic is_jal_r;
|
||||
assign is_jal_r = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd)
|
||||
& (instr_i[6:2] == 5'b00000)
|
||||
& (instr_i[1:0] == riscv::OpcodeC2)
|
||||
& is_rvc;
|
||||
assign rvc_jr_o = is_jal_r & ~instr_i[12];
|
||||
// always links to register 1 e.g.: it is a jump
|
||||
assign rvc_jalr_o = is_jal_r & instr_i[12];
|
||||
assign rvc_call_o = rvc_jalr_o | rv32_rvc_jal;
|
||||
|
||||
assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez))
|
||||
& (instr_i[1:0] == riscv::OpcodeC1)
|
||||
& is_rvc;
|
||||
// check that rs1 is x1 or x5
|
||||
assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o;
|
||||
|
||||
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
|
||||
assign rvc_imm_o = (instr_i[14]) ? {{56+riscv::VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0}
|
||||
: {{53+riscv::VLEN-64{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0};
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
//Copyright (C) 2018 to present,
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 2.0 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 08.02.2018
|
||||
// Migrated: Luis Vitorio Cargnini, IEEE
|
||||
// Date: 09.06.2018
|
||||
|
||||
// return address stack
|
||||
module ras #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
||||
parameter int unsigned DEPTH = 2
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic flush_i,
|
||||
input logic push_i,
|
||||
input logic pop_i,
|
||||
input logic [riscv::VLEN-1:0] data_i,
|
||||
output ariane_pkg::ras_t data_o
|
||||
);
|
||||
|
||||
ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q;
|
||||
|
||||
assign data_o = stack_q[0];
|
||||
|
||||
always_comb begin
|
||||
stack_d = stack_q;
|
||||
|
||||
// push on the stack
|
||||
if (push_i) begin
|
||||
stack_d[0].ra = data_i;
|
||||
// mark the new return address as valid
|
||||
stack_d[0].valid = 1'b1;
|
||||
stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0];
|
||||
end
|
||||
|
||||
if (pop_i) begin
|
||||
stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1];
|
||||
// we popped the value so invalidate the end of the stack
|
||||
stack_d[DEPTH-1].valid = 1'b0;
|
||||
stack_d[DEPTH-1].ra = 'b0;
|
||||
end
|
||||
// leave everything untouched and just push the latest value to the
|
||||
// top of the stack
|
||||
if (pop_i && push_i) begin
|
||||
stack_d = stack_q;
|
||||
stack_d[0].ra = data_i;
|
||||
stack_d[0].valid = 1'b1;
|
||||
end
|
||||
|
||||
if (flush_i) begin
|
||||
stack_d = '0;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
stack_q <= '0;
|
||||
end else begin
|
||||
stack_q <= stack_d;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 15.04.2017
|
||||
// Description: Instruction decode, contains the logic for decode,
|
||||
// issue and read operands.
|
||||
|
||||
module id_stage #(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
input logic flush_i,
|
||||
input logic debug_req_i,
|
||||
// from IF
|
||||
input ariane_pkg::fetch_entry_t fetch_entry_i,
|
||||
input logic fetch_entry_valid_i,
|
||||
output logic fetch_entry_ready_o, // acknowledge the instruction (fetch entry)
|
||||
// to ID
|
||||
output ariane_pkg::scoreboard_entry_t issue_entry_o, // a decoded instruction
|
||||
output logic issue_entry_valid_o, // issue entry is valid
|
||||
output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions
|
||||
input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions
|
||||
output logic rvfi_is_compressed_o,
|
||||
// from CSR file
|
||||
input riscv::priv_lvl_t priv_lvl_i, // current privilege level
|
||||
input riscv::xs_t fs_i, // floating point extension status
|
||||
input logic [2:0] frm_i, // floating-point dynamic rounding mode
|
||||
input riscv::xs_t vs_i, // vector extension status
|
||||
input logic [1:0] irq_i,
|
||||
input ariane_pkg::irq_ctrl_t irq_ctrl_i,
|
||||
input logic debug_mode_i, // we are in debug mode
|
||||
input logic tvm_i,
|
||||
input logic tw_i,
|
||||
input logic tsr_i
|
||||
);
|
||||
// ID/ISSUE register stage
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
ariane_pkg::scoreboard_entry_t sbe;
|
||||
logic is_ctrl_flow;
|
||||
} issue_struct_t;
|
||||
issue_struct_t issue_n, issue_q;
|
||||
|
||||
logic is_control_flow_instr;
|
||||
ariane_pkg::scoreboard_entry_t decoded_instruction;
|
||||
|
||||
logic is_illegal;
|
||||
logic [31:0] instruction;
|
||||
logic is_compressed;
|
||||
|
||||
if (CVA6Cfg.RVC) begin
|
||||
// ---------------------------------------------------------
|
||||
// 1. Check if they are compressed and expand in case they are
|
||||
// ---------------------------------------------------------
|
||||
compressed_decoder #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) compressed_decoder_i (
|
||||
.instr_i (fetch_entry_i.instruction),
|
||||
.instr_o (instruction),
|
||||
.illegal_instr_o(is_illegal),
|
||||
.is_compressed_o(is_compressed)
|
||||
);
|
||||
end else begin
|
||||
assign instruction = fetch_entry_i.instruction;
|
||||
assign is_illegal = '0;
|
||||
assign is_compressed = '0;
|
||||
end
|
||||
|
||||
assign rvfi_is_compressed_o = is_compressed;
|
||||
// ---------------------------------------------------------
|
||||
// 2. Decode and emit instruction to issue stage
|
||||
// ---------------------------------------------------------
|
||||
decoder #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) decoder_i (
|
||||
.debug_req_i,
|
||||
.irq_ctrl_i,
|
||||
.irq_i,
|
||||
.pc_i (fetch_entry_i.address),
|
||||
.is_compressed_i (is_compressed),
|
||||
.is_illegal_i (is_illegal),
|
||||
.instruction_i (instruction),
|
||||
.compressed_instr_i (fetch_entry_i.instruction[15:0]),
|
||||
.branch_predict_i (fetch_entry_i.branch_predict),
|
||||
.ex_i (fetch_entry_i.ex),
|
||||
.priv_lvl_i (priv_lvl_i),
|
||||
.debug_mode_i (debug_mode_i),
|
||||
.fs_i,
|
||||
.frm_i,
|
||||
.vs_i,
|
||||
.tvm_i,
|
||||
.tw_i,
|
||||
.tsr_i,
|
||||
.instruction_o (decoded_instruction),
|
||||
.is_control_flow_instr_o(is_control_flow_instr)
|
||||
);
|
||||
|
||||
// ------------------
|
||||
// Pipeline Register
|
||||
// ------------------
|
||||
assign issue_entry_o = issue_q.sbe;
|
||||
assign issue_entry_valid_o = issue_q.valid;
|
||||
assign is_ctrl_flow_o = issue_q.is_ctrl_flow;
|
||||
|
||||
always_comb begin
|
||||
issue_n = issue_q;
|
||||
fetch_entry_ready_o = 1'b0;
|
||||
|
||||
// Clear the valid flag if issue has acknowledged the instruction
|
||||
if (issue_instr_ack_i) issue_n.valid = 1'b0;
|
||||
|
||||
// if we have a space in the register and the fetch is valid, go get it
|
||||
// or the issue stage is currently acknowledging an instruction, which means that we will have space
|
||||
// for a new instruction
|
||||
if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin
|
||||
fetch_entry_ready_o = 1'b1;
|
||||
issue_n = '{1'b1, decoded_instruction, is_control_flow_instr};
|
||||
end
|
||||
|
||||
// invalidate the pipeline register on a flush
|
||||
if (flush_i) issue_n.valid = 1'b0;
|
||||
end
|
||||
// -------------------------
|
||||
// Registers (ID <-> Issue)
|
||||
// -------------------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
issue_q <= '0;
|
||||
end else begin
|
||||
issue_q <= issue_n;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
// Copyright 2023 ETH Zurich and University of Bologna.
|
||||
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Authors: Matheus Cavalcante <matheusd@iis.ee.ethz.ch>
|
||||
// Nils Wistoff <nwistoff@iis.ee.ethz.ch>
|
||||
|
||||
// Package defining the accelerator interface as used by Ara + CVA6
|
||||
|
||||
package acc_pkg;
|
||||
|
||||
// ----------------------
|
||||
// Accelerator Interface
|
||||
// ----------------------
|
||||
|
||||
typedef struct packed {
|
||||
logic req_valid;
|
||||
logic resp_ready;
|
||||
riscv::instruction_t insn;
|
||||
riscv::xlen_t rs1;
|
||||
riscv::xlen_t rs2;
|
||||
fpnew_pkg::roundmode_e frm;
|
||||
logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id;
|
||||
logic store_pending;
|
||||
// Invalidation interface
|
||||
logic acc_cons_en;
|
||||
logic inval_ready;
|
||||
} accelerator_req_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic req_ready;
|
||||
logic resp_valid;
|
||||
riscv::xlen_t result;
|
||||
logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id;
|
||||
logic error;
|
||||
// Metadata
|
||||
logic store_pending;
|
||||
logic store_complete;
|
||||
logic load_complete;
|
||||
logic [4:0] fflags;
|
||||
logic fflags_valid;
|
||||
// Invalidation interface
|
||||
logic inval_valid;
|
||||
logic [63:0] inval_addr;
|
||||
} accelerator_resp_t;
|
||||
|
||||
endpackage
|
||||
|
|
@ -0,0 +1,994 @@
|
|||
/* Copyright 2018 ETH Zurich and University of Bologna.
|
||||
* Copyright and related rights are licensed under the Solderpad Hardware
|
||||
* License, Version 0.51 (the “License”); you may not use this file except in
|
||||
* compliance with the License. You may obtain a copy of the License at
|
||||
* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
* or agreed to in writing, software, hardware and materials distributed under
|
||||
* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
|
||||
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations under the License.
|
||||
*
|
||||
* File: ariane_pkg.sv
|
||||
* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
|
||||
* Date: 8.4.2017
|
||||
*
|
||||
* Description: Contains all the necessary defines for Ariane
|
||||
* in one package.
|
||||
*/
|
||||
|
||||
// this is needed to propagate the
|
||||
// configuration in case Ariane is
|
||||
// instantiated in OpenPiton
|
||||
`ifdef PITON_ARIANE
|
||||
`include "l15.tmp.h"
|
||||
`endif
|
||||
|
||||
/// This package contains `functions` and global defines for CVA6.
|
||||
/// *Note*: There are some parameters here as well which will eventually be
|
||||
/// moved out to favour a fully parameterizable core.
|
||||
package ariane_pkg;
|
||||
|
||||
// TODO: Slowly move those parameters to the new system.
|
||||
localparam NR_SB_ENTRIES = cva6_config_pkg::CVA6ConfigNrScoreboardEntries; // number of scoreboard entries
|
||||
localparam TRANS_ID_BITS = $clog2(
|
||||
NR_SB_ENTRIES
|
||||
); // depending on the number of scoreboard entries we need that many bits
|
||||
// to uniquely identify the entry in the scoreboard
|
||||
localparam ASID_WIDTH = (riscv::XLEN == 64) ? 16 : 1;
|
||||
localparam BITS_SATURATION_COUNTER = 2;
|
||||
|
||||
localparam ISSUE_WIDTH = 1;
|
||||
|
||||
// depth of store-buffers, this needs to be a power of two
|
||||
localparam logic [2:0] DEPTH_SPEC = 'd4;
|
||||
|
||||
localparam int unsigned DCACHE_TYPE = int'(cva6_config_pkg::CVA6ConfigDcacheType);
|
||||
// if DCACHE_TYPE = cva6_config_pkg::WT
|
||||
// we can use a small commit queue since we have a write buffer in the dcache
|
||||
// we could in principle do without the commit queue in this case, but the timing degrades if we do that due
|
||||
// to longer paths into the commit stage
|
||||
// if DCACHE_TYPE = cva6_config_pkg::WB
|
||||
// allocate more space for the commit buffer to be on the save side, this needs to be a power of two
|
||||
localparam logic [2:0] DEPTH_COMMIT = 'd4;
|
||||
|
||||
localparam bit FPGA_EN = cva6_config_pkg::CVA6ConfigFPGAEn; // Is FPGA optimization of CV32A6
|
||||
|
||||
localparam bit RVC = cva6_config_pkg::CVA6ConfigCExtEn; // Is C extension configuration
|
||||
|
||||
// Transprecision float unit
|
||||
localparam int unsigned LAT_COMP_FP32 = 'd2;
|
||||
localparam int unsigned LAT_COMP_FP64 = 'd3;
|
||||
localparam int unsigned LAT_COMP_FP16 = 'd1;
|
||||
localparam int unsigned LAT_COMP_FP16ALT = 'd1;
|
||||
localparam int unsigned LAT_COMP_FP8 = 'd1;
|
||||
localparam int unsigned LAT_DIVSQRT = 'd2;
|
||||
localparam int unsigned LAT_NONCOMP = 'd1;
|
||||
localparam int unsigned LAT_CONV = 'd2;
|
||||
|
||||
localparam riscv::xlen_t OPENHWGROUP_MVENDORID = {{riscv::XLEN - 32{1'b0}}, 32'h0602};
|
||||
localparam riscv::xlen_t ARIANE_MARCHID = {{riscv::XLEN - 32{1'b0}}, 32'd3};
|
||||
|
||||
// 32 registers
|
||||
localparam REG_ADDR_SIZE = 5;
|
||||
|
||||
// Read ports for general purpose register files
|
||||
localparam NR_RGPR_PORTS = 2;
|
||||
|
||||
// static debug hartinfo
|
||||
// debug causes
|
||||
localparam logic [2:0] CauseBreakpoint = 3'h1;
|
||||
localparam logic [2:0] CauseTrigger = 3'h2;
|
||||
localparam logic [2:0] CauseRequest = 3'h3;
|
||||
localparam logic [2:0] CauseSingleStep = 3'h4;
|
||||
// amount of data count registers implemented
|
||||
localparam logic [3:0] DataCount = 4'h2;
|
||||
|
||||
// address where data0-15 is shadowed or if shadowed in a CSR
|
||||
// address of the first CSR used for shadowing the data
|
||||
localparam logic [11:0] DataAddr = 12'h380; // we are aligned with Rocket here
|
||||
typedef struct packed {
|
||||
logic [31:24] zero1;
|
||||
logic [23:20] nscratch;
|
||||
logic [19:17] zero0;
|
||||
logic dataaccess;
|
||||
logic [15:12] datasize;
|
||||
logic [11:0] dataaddr;
|
||||
} hartinfo_t;
|
||||
|
||||
localparam hartinfo_t DebugHartInfo = '{
|
||||
zero1: '0,
|
||||
nscratch: 2, // Debug module needs at least two scratch regs
|
||||
zero0: '0,
|
||||
dataaccess: 1'b1, // data registers are memory mapped in the debugger
|
||||
datasize: DataCount,
|
||||
dataaddr: DataAddr
|
||||
};
|
||||
|
||||
// enables a commit log which matches spikes commit log format for easier trace comparison
|
||||
localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1;
|
||||
|
||||
// ------------- Dangerous -------------
|
||||
// if set to zero a flush will not invalidate the cache-lines, in a single core environment
|
||||
// where coherence is not necessary this can improve performance. This needs to be switched on
|
||||
// when more than one core is in a system
|
||||
localparam logic INVALIDATE_ON_FLUSH = 1'b1;
|
||||
|
||||
`ifdef SPIKE_TANDEM
|
||||
// Spike still places 0 in TVAL for ENV_CALL_* exceptions.
|
||||
// This may eventually go away when Spike starts to handle TVAL for *all* exceptions.
|
||||
localparam bit ZERO_TVAL = 1'b1;
|
||||
`else
|
||||
localparam bit ZERO_TVAL = 1'b0;
|
||||
`endif
|
||||
// read mask for SSTATUS over MMSTATUS
|
||||
localparam logic [63:0] SMODE_STATUS_READ_MASK = riscv::SSTATUS_UIE
|
||||
| riscv::SSTATUS_SIE
|
||||
| riscv::SSTATUS_SPIE
|
||||
| riscv::SSTATUS_SPP
|
||||
| riscv::SSTATUS_FS
|
||||
| riscv::SSTATUS_XS
|
||||
| riscv::SSTATUS_SUM
|
||||
| riscv::SSTATUS_MXR
|
||||
| riscv::SSTATUS_UPIE
|
||||
| riscv::SSTATUS_SPIE
|
||||
| riscv::SSTATUS_UXL
|
||||
| riscv::SSTATUS_SD;
|
||||
|
||||
localparam logic [63:0] SMODE_STATUS_WRITE_MASK = riscv::SSTATUS_SIE
|
||||
| riscv::SSTATUS_SPIE
|
||||
| riscv::SSTATUS_SPP
|
||||
| riscv::SSTATUS_FS
|
||||
| riscv::SSTATUS_SUM
|
||||
| riscv::SSTATUS_MXR;
|
||||
// ---------------
|
||||
// AXI
|
||||
// ---------------
|
||||
|
||||
localparam FETCH_USER_WIDTH = cva6_config_pkg::CVA6ConfigFetchUserWidth;
|
||||
localparam DATA_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth;
|
||||
localparam AXI_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn | cva6_config_pkg::CVA6ConfigFetchUserEn;
|
||||
localparam AXI_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth;
|
||||
localparam DATA_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn;
|
||||
localparam FETCH_USER_EN = cva6_config_pkg::CVA6ConfigFetchUserEn;
|
||||
|
||||
typedef enum logic {
|
||||
SINGLE_REQ,
|
||||
CACHE_LINE_REQ
|
||||
} ad_req_t;
|
||||
|
||||
// ---------------
|
||||
// Fetch Stage
|
||||
// ---------------
|
||||
|
||||
// leave as is (fails with >8 entries and wider fetch width)
|
||||
localparam int unsigned FETCH_FIFO_DEPTH = 4;
|
||||
localparam int unsigned FETCH_WIDTH = 32;
|
||||
// maximum instructions we can fetch on one request (we support compressed instructions)
|
||||
localparam int unsigned INSTR_PER_FETCH = RVC == 1'b1 ? (FETCH_WIDTH / 16) : 1;
|
||||
localparam int unsigned LOG2_INSTR_PER_FETCH = RVC == 1'b1 ? $clog2(INSTR_PER_FETCH) : 1;
|
||||
|
||||
// Only use struct when signals have same direction
|
||||
// exception
|
||||
typedef struct packed {
|
||||
riscv::xlen_t cause; // cause of exception
|
||||
riscv::xlen_t tval; // additional information of causing exception (e.g.: instruction causing it),
|
||||
// address of LD/ST fault
|
||||
logic valid;
|
||||
} exception_t;
|
||||
|
||||
typedef enum logic [2:0] {
|
||||
NoCF, // No control flow prediction
|
||||
Branch, // Branch
|
||||
Jump, // Jump to address from immediate
|
||||
JumpR, // Jump to address from registers
|
||||
Return // Return Address Prediction
|
||||
} cf_t;
|
||||
|
||||
// branch-predict
|
||||
// this is the struct we get back from ex stage and we will use it to update
|
||||
// all the necessary data structures
|
||||
// bp_resolve_t
|
||||
typedef struct packed {
|
||||
logic valid; // prediction with all its values is valid
|
||||
logic [riscv::VLEN-1:0] pc; // PC of predict or mis-predict
|
||||
logic [riscv::VLEN-1:0] target_address; // target address at which to jump, or not
|
||||
logic is_mispredict; // set if this was a mis-predict
|
||||
logic is_taken; // branch is taken
|
||||
cf_t cf_type; // Type of control flow change
|
||||
} bp_resolve_t;
|
||||
|
||||
// branchpredict scoreboard entry
|
||||
// this is the struct which we will inject into the pipeline to guide the various
|
||||
// units towards the correct branch decision and resolve
|
||||
typedef struct packed {
|
||||
cf_t cf; // type of control flow prediction
|
||||
logic [riscv::VLEN-1:0] predict_address; // target address at which to jump, or not
|
||||
} branchpredict_sbe_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [riscv::VLEN-1:0] pc; // update at PC
|
||||
logic [riscv::VLEN-1:0] target_address;
|
||||
} btb_update_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [riscv::VLEN-1:0] target_address;
|
||||
} btb_prediction_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [riscv::VLEN-1:0] ra;
|
||||
} ras_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [riscv::VLEN-1:0] pc; // update at PC
|
||||
logic taken;
|
||||
} bht_update_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic taken;
|
||||
} bht_prediction_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [1:0] saturation_counter;
|
||||
} bht_t;
|
||||
|
||||
typedef enum logic [3:0] {
|
||||
NONE, // 0
|
||||
LOAD, // 1
|
||||
STORE, // 2
|
||||
ALU, // 3
|
||||
CTRL_FLOW, // 4
|
||||
MULT, // 5
|
||||
CSR, // 6
|
||||
FPU, // 7
|
||||
FPU_VEC, // 8
|
||||
CVXIF, // 9
|
||||
ACCEL // 10
|
||||
} fu_t;
|
||||
|
||||
localparam EXC_OFF_RST = 8'h80;
|
||||
|
||||
localparam SupervisorIrq = 1;
|
||||
localparam MachineIrq = 0;
|
||||
|
||||
// All information needed to determine whether we need to associate an interrupt
|
||||
// with the corresponding instruction or not.
|
||||
typedef struct packed {
|
||||
riscv::xlen_t mie;
|
||||
riscv::xlen_t mip;
|
||||
riscv::xlen_t mideleg;
|
||||
logic sie;
|
||||
logic global_enable;
|
||||
} irq_ctrl_t;
|
||||
|
||||
// ---------------
|
||||
// Cache config
|
||||
// ---------------
|
||||
|
||||
// for usage in OpenPiton we have to propagate the openpiton L15 configuration from l15.h
|
||||
`ifdef PITON_ARIANE
|
||||
|
||||
`ifndef CONFIG_L1I_CACHELINE_WIDTH
|
||||
`define CONFIG_L1I_CACHELINE_WIDTH 128
|
||||
`endif
|
||||
|
||||
`ifndef CONFIG_L1I_ASSOCIATIVITY
|
||||
`define CONFIG_L1I_ASSOCIATIVITY 4
|
||||
`endif
|
||||
|
||||
`ifndef CONFIG_L1I_SIZE
|
||||
`define CONFIG_L1I_SIZE 16*1024
|
||||
`endif
|
||||
|
||||
`ifndef CONFIG_L1D_CACHELINE_WIDTH
|
||||
`define CONFIG_L1D_CACHELINE_WIDTH 128
|
||||
`endif
|
||||
|
||||
`ifndef CONFIG_L1D_ASSOCIATIVITY
|
||||
`define CONFIG_L1D_ASSOCIATIVITY 8
|
||||
`endif
|
||||
|
||||
`ifndef CONFIG_L1D_SIZE
|
||||
`define CONFIG_L1D_SIZE 32*1024
|
||||
`endif
|
||||
|
||||
`ifndef L15_THREADID_WIDTH
|
||||
`define L15_THREADID_WIDTH 3
|
||||
`endif
|
||||
|
||||
// I$
|
||||
localparam int unsigned ICACHE_LINE_WIDTH = `CONFIG_L1I_CACHELINE_WIDTH;
|
||||
localparam int unsigned ICACHE_SET_ASSOC = `CONFIG_L1I_ASSOCIATIVITY;
|
||||
localparam int unsigned ICACHE_INDEX_WIDTH = $clog2(`CONFIG_L1I_SIZE / ICACHE_SET_ASSOC);
|
||||
localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH;
|
||||
localparam int unsigned ICACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : 128; // in bit
|
||||
// D$
|
||||
localparam int unsigned DCACHE_LINE_WIDTH = `CONFIG_L1D_CACHELINE_WIDTH;
|
||||
localparam int unsigned DCACHE_SET_ASSOC = `CONFIG_L1D_ASSOCIATIVITY;
|
||||
localparam int unsigned DCACHE_INDEX_WIDTH = $clog2(`CONFIG_L1D_SIZE / DCACHE_SET_ASSOC);
|
||||
localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH;
|
||||
localparam int unsigned DCACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : 128; // in bit
|
||||
localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH;
|
||||
|
||||
localparam int unsigned MEM_TID_WIDTH = `L15_THREADID_WIDTH;
|
||||
`else
|
||||
// I$
|
||||
localparam int unsigned CONFIG_L1I_SIZE = cva6_config_pkg::CVA6ConfigIcacheByteSize; // in byte
|
||||
localparam int unsigned ICACHE_SET_ASSOC = cva6_config_pkg::CVA6ConfigIcacheSetAssoc; // number of ways
|
||||
localparam int unsigned ICACHE_INDEX_WIDTH = $clog2(
|
||||
CONFIG_L1I_SIZE / ICACHE_SET_ASSOC
|
||||
); // in bit, contains also offset width
|
||||
localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH; // in bit
|
||||
localparam int unsigned ICACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit
|
||||
localparam int unsigned ICACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit
|
||||
// D$
|
||||
localparam int unsigned CONFIG_L1D_SIZE = cva6_config_pkg::CVA6ConfigDcacheByteSize; // in byte
|
||||
localparam int unsigned DCACHE_SET_ASSOC = cva6_config_pkg::CVA6ConfigDcacheSetAssoc; // number of ways
|
||||
localparam int unsigned DCACHE_INDEX_WIDTH = $clog2(
|
||||
CONFIG_L1D_SIZE / DCACHE_SET_ASSOC
|
||||
); // in bit, contains also offset width
|
||||
localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH; // in bit
|
||||
localparam int unsigned DCACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit
|
||||
localparam int unsigned DCACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit
|
||||
localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH;
|
||||
|
||||
localparam int unsigned MEM_TID_WIDTH = cva6_config_pkg::CVA6ConfigMemTidWidth;
|
||||
`endif
|
||||
|
||||
localparam int unsigned DCACHE_TID_WIDTH = cva6_config_pkg::CVA6ConfigDcacheIdWidth;
|
||||
|
||||
localparam int unsigned WT_DCACHE_WBUF_DEPTH = cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth;
|
||||
|
||||
// ---------------
|
||||
// EX Stage
|
||||
// ---------------
|
||||
|
||||
typedef enum logic [7:0] { // basic ALU op
|
||||
ADD,
|
||||
SUB,
|
||||
ADDW,
|
||||
SUBW,
|
||||
// logic operations
|
||||
XORL,
|
||||
ORL,
|
||||
ANDL,
|
||||
// shifts
|
||||
SRA,
|
||||
SRL,
|
||||
SLL,
|
||||
SRLW,
|
||||
SLLW,
|
||||
SRAW,
|
||||
// comparisons
|
||||
LTS,
|
||||
LTU,
|
||||
GES,
|
||||
GEU,
|
||||
EQ,
|
||||
NE,
|
||||
// jumps
|
||||
JALR,
|
||||
BRANCH,
|
||||
// set lower than operations
|
||||
SLTS,
|
||||
SLTU,
|
||||
// CSR functions
|
||||
MRET,
|
||||
SRET,
|
||||
DRET,
|
||||
ECALL,
|
||||
WFI,
|
||||
FENCE,
|
||||
FENCE_I,
|
||||
SFENCE_VMA,
|
||||
CSR_WRITE,
|
||||
CSR_READ,
|
||||
CSR_SET,
|
||||
CSR_CLEAR,
|
||||
// LSU functions
|
||||
LD,
|
||||
SD,
|
||||
LW,
|
||||
LWU,
|
||||
SW,
|
||||
LH,
|
||||
LHU,
|
||||
SH,
|
||||
LB,
|
||||
SB,
|
||||
LBU,
|
||||
// Atomic Memory Operations
|
||||
AMO_LRW,
|
||||
AMO_LRD,
|
||||
AMO_SCW,
|
||||
AMO_SCD,
|
||||
AMO_SWAPW,
|
||||
AMO_ADDW,
|
||||
AMO_ANDW,
|
||||
AMO_ORW,
|
||||
AMO_XORW,
|
||||
AMO_MAXW,
|
||||
AMO_MAXWU,
|
||||
AMO_MINW,
|
||||
AMO_MINWU,
|
||||
AMO_SWAPD,
|
||||
AMO_ADDD,
|
||||
AMO_ANDD,
|
||||
AMO_ORD,
|
||||
AMO_XORD,
|
||||
AMO_MAXD,
|
||||
AMO_MAXDU,
|
||||
AMO_MIND,
|
||||
AMO_MINDU,
|
||||
// Multiplications
|
||||
MUL,
|
||||
MULH,
|
||||
MULHU,
|
||||
MULHSU,
|
||||
MULW,
|
||||
// Divisions
|
||||
DIV,
|
||||
DIVU,
|
||||
DIVW,
|
||||
DIVUW,
|
||||
REM,
|
||||
REMU,
|
||||
REMW,
|
||||
REMUW,
|
||||
// Floating-Point Load and Store Instructions
|
||||
FLD,
|
||||
FLW,
|
||||
FLH,
|
||||
FLB,
|
||||
FSD,
|
||||
FSW,
|
||||
FSH,
|
||||
FSB,
|
||||
// Floating-Point Computational Instructions
|
||||
FADD,
|
||||
FSUB,
|
||||
FMUL,
|
||||
FDIV,
|
||||
FMIN_MAX,
|
||||
FSQRT,
|
||||
FMADD,
|
||||
FMSUB,
|
||||
FNMSUB,
|
||||
FNMADD,
|
||||
// Floating-Point Conversion and Move Instructions
|
||||
FCVT_F2I,
|
||||
FCVT_I2F,
|
||||
FCVT_F2F,
|
||||
FSGNJ,
|
||||
FMV_F2X,
|
||||
FMV_X2F,
|
||||
// Floating-Point Compare Instructions
|
||||
FCMP,
|
||||
// Floating-Point Classify Instruction
|
||||
FCLASS,
|
||||
// Vectorial Floating-Point Instructions that don't directly map onto the scalar ones
|
||||
VFMIN,
|
||||
VFMAX,
|
||||
VFSGNJ,
|
||||
VFSGNJN,
|
||||
VFSGNJX,
|
||||
VFEQ,
|
||||
VFNE,
|
||||
VFLT,
|
||||
VFGE,
|
||||
VFLE,
|
||||
VFGT,
|
||||
VFCPKAB_S,
|
||||
VFCPKCD_S,
|
||||
VFCPKAB_D,
|
||||
VFCPKCD_D,
|
||||
// Offload Instructions to be directed into cv_x_if
|
||||
OFFLOAD,
|
||||
// Or-Combine and REV8
|
||||
ORCB,
|
||||
REV8,
|
||||
// Bitwise Rotation
|
||||
ROL,
|
||||
ROLW,
|
||||
ROR,
|
||||
RORI,
|
||||
RORIW,
|
||||
RORW,
|
||||
// Sign and Zero Extend
|
||||
SEXTB,
|
||||
SEXTH,
|
||||
ZEXTH,
|
||||
// Count population
|
||||
CPOP,
|
||||
CPOPW,
|
||||
// Count Leading/Training Zeros
|
||||
CLZ,
|
||||
CLZW,
|
||||
CTZ,
|
||||
CTZW,
|
||||
// Carry less multiplication Op's
|
||||
CLMUL,
|
||||
CLMULH,
|
||||
CLMULR,
|
||||
// Single bit instructions Op's
|
||||
BCLR,
|
||||
BCLRI,
|
||||
BEXT,
|
||||
BEXTI,
|
||||
BINV,
|
||||
BINVI,
|
||||
BSET,
|
||||
BSETI,
|
||||
// Integer minimum/maximum
|
||||
MAX,
|
||||
MAXU,
|
||||
MIN,
|
||||
MINU,
|
||||
// Shift with Add Unsigned Word and Unsigned Word Op's (Bitmanip)
|
||||
SH1ADDUW,
|
||||
SH2ADDUW,
|
||||
SH3ADDUW,
|
||||
ADDUW,
|
||||
SLLIUW,
|
||||
// Shift with Add (Bitmanip)
|
||||
SH1ADD,
|
||||
SH2ADD,
|
||||
SH3ADD,
|
||||
// Bitmanip Logical with negate op (Bitmanip)
|
||||
ANDN,
|
||||
ORN,
|
||||
XNOR,
|
||||
// Accelerator operations
|
||||
ACCEL_OP,
|
||||
ACCEL_OP_FS1,
|
||||
ACCEL_OP_FD,
|
||||
ACCEL_OP_LOAD,
|
||||
ACCEL_OP_STORE,
|
||||
// Zicond instruction
|
||||
CZERO_EQZ,
|
||||
CZERO_NEZ
|
||||
} fu_op;
|
||||
|
||||
typedef struct packed {
|
||||
fu_t fu;
|
||||
fu_op operation;
|
||||
riscv::xlen_t operand_a;
|
||||
riscv::xlen_t operand_b;
|
||||
riscv::xlen_t imm;
|
||||
logic [TRANS_ID_BITS-1:0] trans_id;
|
||||
} fu_data_t;
|
||||
|
||||
function automatic logic op_is_branch(input fu_op op);
|
||||
unique case (op) inside
|
||||
EQ, NE, LTS, GES, LTU, GEU: return 1'b1;
|
||||
default: return 1'b0; // all other ops
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
// -------------------------------
|
||||
// Extract Src/Dst FP Reg from Op
|
||||
// -------------------------------
|
||||
// function used in instr_trace svh
|
||||
// is_rs1_fpr function is kept to allow cva6 compilation with instr_trace feature
|
||||
function automatic logic is_rs1_fpr(input fu_op op);
|
||||
unique case (op) inside
|
||||
[FMUL : FNMADD], // Computational Operations (except ADD/SUB)
|
||||
FCVT_F2I, // Float-Int Casts
|
||||
FCVT_F2F, // Float-Float Casts
|
||||
FSGNJ, // Sign Injections
|
||||
FMV_F2X, // FPR-GPR Moves
|
||||
FCMP, // Comparisons
|
||||
FCLASS, // Classifications
|
||||
[VFMIN : VFCPKCD_D], // Additional Vectorial FP ops
|
||||
ACCEL_OP_FS1:
|
||||
return 1'b1; // Accelerator instructions
|
||||
default: return 1'b0; // all other ops
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
// function used in instr_trace svh
|
||||
// is_rs2_fpr function is kept to allow cva6 compilation with instr_trace feature
|
||||
function automatic logic is_rs2_fpr(input fu_op op);
|
||||
unique case (op) inside
|
||||
[FSD : FSB], // FP Stores
|
||||
[FADD : FMIN_MAX], // Computational Operations (no sqrt)
|
||||
[FMADD : FNMADD], // Fused Computational Operations
|
||||
FCVT_F2F, // Vectorial F2F Conversions requrie target
|
||||
[FSGNJ : FMV_F2X], // Sign Injections and moves mapped to SGNJ
|
||||
FCMP, // Comparisons
|
||||
[VFMIN : VFCPKCD_D]:
|
||||
return 1'b1; // Additional Vectorial FP ops
|
||||
default: return 1'b0; // all other ops
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
// function used in instr_trace svh
|
||||
// is_imm_fpr function is kept to allow cva6 compilation with instr_trace feature
|
||||
// ternary operations encode the rs3 address in the imm field, also add/sub
|
||||
function automatic logic is_imm_fpr(input fu_op op);
|
||||
unique case (op) inside
|
||||
[FADD : FSUB], // ADD/SUB need inputs as Operand B/C
|
||||
[FMADD : FNMADD], // Fused Computational Operations
|
||||
[VFCPKAB_S : VFCPKCD_D]:
|
||||
return 1'b1; // Vectorial FP cast and pack ops
|
||||
default: return 1'b0; // all other ops
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
// function used in instr_trace svh
|
||||
// is_rd_fpr function is kept to allow cva6 compilation with instr_trace feature
|
||||
function automatic logic is_rd_fpr(input fu_op op);
|
||||
unique case (op) inside
|
||||
[FLD : FLB], // FP Loads
|
||||
[FADD : FNMADD], // Computational Operations
|
||||
FCVT_I2F, // Int-Float Casts
|
||||
FCVT_F2F, // Float-Float Casts
|
||||
FSGNJ, // Sign Injections
|
||||
FMV_X2F, // GPR-FPR Moves
|
||||
[VFMIN : VFSGNJX], // Vectorial MIN/MAX and SGNJ
|
||||
[VFCPKAB_S : VFCPKCD_D], // Vectorial FP cast and pack ops
|
||||
ACCEL_OP_FD:
|
||||
return 1'b1; // Accelerator instructions
|
||||
default: return 1'b0; // all other ops
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
function automatic logic is_amo(fu_op op);
|
||||
case (op) inside
|
||||
[AMO_LRW : AMO_MINDU]: begin
|
||||
return 1'b1;
|
||||
end
|
||||
default: return 1'b0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [riscv::VLEN-1:0] vaddr;
|
||||
logic overflow;
|
||||
riscv::xlen_t data;
|
||||
logic [(riscv::XLEN/8)-1:0] be;
|
||||
fu_t fu;
|
||||
fu_op operation;
|
||||
logic [TRANS_ID_BITS-1:0] trans_id;
|
||||
} lsu_ctrl_t;
|
||||
|
||||
// ---------------
|
||||
// IF/ID Stage
|
||||
// ---------------
|
||||
// store the decompressed instruction
|
||||
typedef struct packed {
|
||||
logic [riscv::VLEN-1:0] address; // the address of the instructions from below
|
||||
logic [31:0] instruction; // instruction word
|
||||
branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path
|
||||
exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions
|
||||
} fetch_entry_t;
|
||||
|
||||
// ---------------
|
||||
// ID/EX/WB Stage
|
||||
// ---------------
|
||||
|
||||
localparam RVFI = cva6_config_pkg::CVA6ConfigRvfiTrace;
|
||||
|
||||
typedef struct packed {
|
||||
logic [riscv::VLEN-1:0] pc; // PC of instruction
|
||||
logic [TRANS_ID_BITS-1:0] trans_id; // this can potentially be simplified, we could index the scoreboard entry
|
||||
// with the transaction id in any case make the width more generic
|
||||
fu_t fu; // functional unit to use
|
||||
fu_op op; // operation to perform in each functional unit
|
||||
logic [REG_ADDR_SIZE-1:0] rs1; // register source address 1
|
||||
logic [REG_ADDR_SIZE-1:0] rs2; // register source address 2
|
||||
logic [REG_ADDR_SIZE-1:0] rd; // register destination address
|
||||
riscv::xlen_t result; // for unfinished instructions this field also holds the immediate,
|
||||
// for unfinished floating-point that are partly encoded in rs2, this field also holds rs2
|
||||
// for unfinished floating-point fused operations (FMADD, FMSUB, FNMADD, FNMSUB)
|
||||
// this field holds the address of the third operand from the floating-point register file
|
||||
logic valid; // is the result valid
|
||||
logic use_imm; // should we use the immediate as operand b?
|
||||
logic use_zimm; // use zimm as operand a
|
||||
logic use_pc; // set if we need to use the PC as operand a, PC from exception
|
||||
exception_t ex; // exception has occurred
|
||||
branchpredict_sbe_t bp; // branch predict scoreboard data structure
|
||||
logic is_compressed; // signals a compressed instructions, we need this information at the commit stage if
|
||||
// we want jump accordingly e.g.: +4, +2
|
||||
logic vfp; // is this a vector floating-point instruction?
|
||||
} scoreboard_entry_t;
|
||||
|
||||
// ---------------
|
||||
// MMU instanciation
|
||||
// ---------------
|
||||
localparam bit MMU_PRESENT = cva6_config_pkg::CVA6ConfigMmuPresent;
|
||||
|
||||
localparam int unsigned INSTR_TLB_ENTRIES = cva6_config_pkg::CVA6ConfigInstrTlbEntries;
|
||||
localparam int unsigned DATA_TLB_ENTRIES = cva6_config_pkg::CVA6ConfigDataTlbEntries;
|
||||
|
||||
// -------------------
|
||||
// Performance counter
|
||||
// -------------------
|
||||
localparam bit PERF_COUNTER_EN = cva6_config_pkg::CVA6ConfigPerfCounterEn;
|
||||
localparam int unsigned MHPMCounterNum = 6;
|
||||
|
||||
// --------------------
|
||||
// Atomics
|
||||
// --------------------
|
||||
typedef enum logic [3:0] {
|
||||
AMO_NONE = 4'b0000,
|
||||
AMO_LR = 4'b0001,
|
||||
AMO_SC = 4'b0010,
|
||||
AMO_SWAP = 4'b0011,
|
||||
AMO_ADD = 4'b0100,
|
||||
AMO_AND = 4'b0101,
|
||||
AMO_OR = 4'b0110,
|
||||
AMO_XOR = 4'b0111,
|
||||
AMO_MAX = 4'b1000,
|
||||
AMO_MAXU = 4'b1001,
|
||||
AMO_MIN = 4'b1010,
|
||||
AMO_MINU = 4'b1011,
|
||||
AMO_CAS1 = 4'b1100, // unused, not part of riscv spec, but provided in OpenPiton
|
||||
AMO_CAS2 = 4'b1101 // unused, not part of riscv spec, but provided in OpenPiton
|
||||
} amo_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid; // valid flag
|
||||
logic is_2M; //
|
||||
logic is_1G; //
|
||||
logic [27-1:0] vpn; // VPN (39bits) = 27bits + 12bits offset
|
||||
logic [ASID_WIDTH-1:0] asid;
|
||||
riscv::pte_t content;
|
||||
} tlb_update_t;
|
||||
|
||||
// Bits required for representation of physical address space as 4K pages
|
||||
// (e.g. 27*4K == 39bit address space).
|
||||
localparam PPN4K_WIDTH = 38;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid; // valid flag
|
||||
logic is_4M; //
|
||||
logic [20-1:0] vpn; //VPN (32bits) = 20bits + 12bits offset
|
||||
logic [9-1:0] asid; //ASID length = 9 for Sv32 mmu
|
||||
riscv::pte_sv32_t content;
|
||||
} tlb_update_sv32_t;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
FE_NONE,
|
||||
FE_INSTR_ACCESS_FAULT,
|
||||
FE_INSTR_PAGE_FAULT
|
||||
} frontend_exception_t;
|
||||
|
||||
// ----------------------
|
||||
// cache request ports
|
||||
// ----------------------
|
||||
// I$ address translation requests
|
||||
typedef struct packed {
|
||||
logic fetch_valid; // address translation valid
|
||||
logic [riscv::PLEN-1:0] fetch_paddr; // physical address in
|
||||
exception_t fetch_exception; // exception occurred during fetch
|
||||
} icache_areq_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic fetch_req; // address translation request
|
||||
logic [riscv::VLEN-1:0] fetch_vaddr; // virtual address out
|
||||
} icache_arsp_t;
|
||||
|
||||
// I$ data requests
|
||||
typedef struct packed {
|
||||
logic req; // we request a new word
|
||||
logic kill_s1; // kill the current request
|
||||
logic kill_s2; // kill the last request
|
||||
logic spec; // request is speculative
|
||||
logic [riscv::VLEN-1:0] vaddr; // 1st cycle: 12 bit index is taken for lookup
|
||||
} icache_dreq_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic ready; // icache is ready
|
||||
logic valid; // signals a valid read
|
||||
logic [FETCH_WIDTH-1:0] data; // 2+ cycle out: tag
|
||||
logic [FETCH_USER_WIDTH-1:0] user; // User bits
|
||||
logic [riscv::VLEN-1:0] vaddr; // virtual address out
|
||||
exception_t ex; // we've encountered an exception
|
||||
} icache_drsp_t;
|
||||
|
||||
// AMO request going to cache. this request is unconditionally valid as soon
|
||||
// as request goes high.
|
||||
// Furthermore, those signals are kept stable until the response indicates
|
||||
// completion by asserting ack.
|
||||
typedef struct packed {
|
||||
logic req; // this request is valid
|
||||
amo_t amo_op; // atomic memory operation to perform
|
||||
logic [1:0] size; // 2'b10 --> word operation, 2'b11 --> double word operation
|
||||
logic [63:0] operand_a; // address
|
||||
logic [63:0] operand_b; // data as layouted in the register
|
||||
} amo_req_t;
|
||||
|
||||
// AMO response coming from cache.
|
||||
typedef struct packed {
|
||||
logic ack; // response is valid
|
||||
logic [63:0] result; // sign-extended, result
|
||||
} amo_resp_t;
|
||||
|
||||
// D$ data requests
|
||||
typedef struct packed {
|
||||
logic [DCACHE_INDEX_WIDTH-1:0] address_index;
|
||||
logic [DCACHE_TAG_WIDTH-1:0] address_tag;
|
||||
riscv::xlen_t data_wdata;
|
||||
logic [DCACHE_USER_WIDTH-1:0] data_wuser;
|
||||
logic data_req;
|
||||
logic data_we;
|
||||
logic [(riscv::XLEN/8)-1:0] data_be;
|
||||
logic [1:0] data_size;
|
||||
logic [DCACHE_TID_WIDTH-1:0] data_id;
|
||||
logic kill_req;
|
||||
logic tag_valid;
|
||||
} dcache_req_i_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic data_gnt;
|
||||
logic data_rvalid;
|
||||
logic [DCACHE_TID_WIDTH-1:0] data_rid;
|
||||
riscv::xlen_t data_rdata;
|
||||
logic [DCACHE_USER_WIDTH-1:0] data_ruser;
|
||||
} dcache_req_o_t;
|
||||
|
||||
// ----------------------
|
||||
// Arithmetic Functions
|
||||
// ----------------------
|
||||
function automatic riscv::xlen_t sext32(logic [31:0] operand);
|
||||
return {{riscv::XLEN - 32{operand[31]}}, operand[31:0]};
|
||||
endfunction
|
||||
|
||||
// ----------------------
|
||||
// Immediate functions
|
||||
// ----------------------
|
||||
function automatic logic [riscv::VLEN-1:0] uj_imm(logic [31:0] instruction_i);
|
||||
return {
|
||||
{44 + riscv::VLEN - 64{instruction_i[31]}},
|
||||
instruction_i[19:12],
|
||||
instruction_i[20],
|
||||
instruction_i[30:21],
|
||||
1'b0
|
||||
};
|
||||
endfunction
|
||||
|
||||
function automatic logic [riscv::VLEN-1:0] i_imm(logic [31:0] instruction_i);
|
||||
return {{52 + riscv::VLEN - 64{instruction_i[31]}}, instruction_i[31:20]};
|
||||
endfunction
|
||||
|
||||
function automatic logic [riscv::VLEN-1:0] sb_imm(logic [31:0] instruction_i);
|
||||
return {
|
||||
{51 + riscv::VLEN - 64{instruction_i[31]}},
|
||||
instruction_i[31],
|
||||
instruction_i[7],
|
||||
instruction_i[30:25],
|
||||
instruction_i[11:8],
|
||||
1'b0
|
||||
};
|
||||
endfunction
|
||||
|
||||
// ----------------------
|
||||
// LSU Functions
|
||||
// ----------------------
|
||||
// align data to address e.g.: shift data to be naturally 64
|
||||
function automatic riscv::xlen_t data_align(logic [2:0] addr, logic [63:0] data);
|
||||
// Set addr[2] to 1'b0 when 32bits
|
||||
logic [ 2:0] addr_tmp = {(addr[2] && riscv::IS_XLEN64), addr[1:0]};
|
||||
logic [63:0] data_tmp = {64{1'b0}};
|
||||
case (addr_tmp)
|
||||
3'b000: data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-1:0]};
|
||||
3'b001:
|
||||
data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-9:0], data[riscv::XLEN-1:riscv::XLEN-8]};
|
||||
3'b010:
|
||||
data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-17:0], data[riscv::XLEN-1:riscv::XLEN-16]};
|
||||
3'b011:
|
||||
data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-25:0], data[riscv::XLEN-1:riscv::XLEN-24]};
|
||||
3'b100: data_tmp = {data[31:0], data[63:32]};
|
||||
3'b101: data_tmp = {data[23:0], data[63:24]};
|
||||
3'b110: data_tmp = {data[15:0], data[63:16]};
|
||||
3'b111: data_tmp = {data[7:0], data[63:8]};
|
||||
endcase
|
||||
return data_tmp[riscv::XLEN-1:0];
|
||||
endfunction
|
||||
|
||||
// generate byte enable mask
|
||||
function automatic logic [7:0] be_gen(logic [2:0] addr, logic [1:0] size);
|
||||
case (size)
|
||||
2'b11: begin
|
||||
return 8'b1111_1111;
|
||||
end
|
||||
2'b10: begin
|
||||
case (addr[2:0])
|
||||
3'b000: return 8'b0000_1111;
|
||||
3'b001: return 8'b0001_1110;
|
||||
3'b010: return 8'b0011_1100;
|
||||
3'b011: return 8'b0111_1000;
|
||||
3'b100: return 8'b1111_0000;
|
||||
default: ; // Do nothing
|
||||
endcase
|
||||
end
|
||||
2'b01: begin
|
||||
case (addr[2:0])
|
||||
3'b000: return 8'b0000_0011;
|
||||
3'b001: return 8'b0000_0110;
|
||||
3'b010: return 8'b0000_1100;
|
||||
3'b011: return 8'b0001_1000;
|
||||
3'b100: return 8'b0011_0000;
|
||||
3'b101: return 8'b0110_0000;
|
||||
3'b110: return 8'b1100_0000;
|
||||
default: ; // Do nothing
|
||||
endcase
|
||||
end
|
||||
2'b00: begin
|
||||
case (addr[2:0])
|
||||
3'b000: return 8'b0000_0001;
|
||||
3'b001: return 8'b0000_0010;
|
||||
3'b010: return 8'b0000_0100;
|
||||
3'b011: return 8'b0000_1000;
|
||||
3'b100: return 8'b0001_0000;
|
||||
3'b101: return 8'b0010_0000;
|
||||
3'b110: return 8'b0100_0000;
|
||||
3'b111: return 8'b1000_0000;
|
||||
endcase
|
||||
end
|
||||
endcase
|
||||
return 8'b0;
|
||||
endfunction
|
||||
|
||||
function automatic logic [3:0] be_gen_32(logic [1:0] addr, logic [1:0] size);
|
||||
case (size)
|
||||
2'b10: begin
|
||||
return 4'b1111;
|
||||
end
|
||||
2'b01: begin
|
||||
case (addr[1:0])
|
||||
2'b00: return 4'b0011;
|
||||
2'b01: return 4'b0110;
|
||||
2'b10: return 4'b1100;
|
||||
default: ; // Do nothing
|
||||
endcase
|
||||
end
|
||||
2'b00: begin
|
||||
case (addr[1:0])
|
||||
2'b00: return 4'b0001;
|
||||
2'b01: return 4'b0010;
|
||||
2'b10: return 4'b0100;
|
||||
2'b11: return 4'b1000;
|
||||
endcase
|
||||
end
|
||||
default: return 4'b0;
|
||||
endcase
|
||||
return 4'b0;
|
||||
endfunction
|
||||
|
||||
// ----------------------
|
||||
// Extract Bytes from Op
|
||||
// ----------------------
|
||||
function automatic logic [1:0] extract_transfer_size(fu_op op);
|
||||
case (op)
|
||||
LD, SD, FLD, FSD,
|
||||
AMO_LRD, AMO_SCD,
|
||||
AMO_SWAPD, AMO_ADDD,
|
||||
AMO_ANDD, AMO_ORD,
|
||||
AMO_XORD, AMO_MAXD,
|
||||
AMO_MAXDU, AMO_MIND,
|
||||
AMO_MINDU: begin
|
||||
return 2'b11;
|
||||
end
|
||||
LW, LWU, SW, FLW, FSW,
|
||||
AMO_LRW, AMO_SCW,
|
||||
AMO_SWAPW, AMO_ADDW,
|
||||
AMO_ANDW, AMO_ORW,
|
||||
AMO_XORW, AMO_MAXW,
|
||||
AMO_MAXWU, AMO_MINW,
|
||||
AMO_MINWU: begin
|
||||
return 2'b10;
|
||||
end
|
||||
LH, LHU, SH, FLH, FSH: return 2'b01;
|
||||
LB, LBU, SB, FLB, FSB: return 2'b00;
|
||||
default: return 2'b11;
|
||||
endcase
|
||||
endfunction
|
||||
endpackage
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue