This commit is contained in:
Jonathan Kimmitt 2024-05-01 08:27:23 -04:00 committed by GitHub
commit 9b0133c8dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
250 changed files with 71133 additions and 0 deletions

View File

@ -0,0 +1,223 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 16.05.2017
// Description: Instruction Tracer Main Class
`ifndef VERILATOR
//pragma translate_off
`include "ex_trace_item.svh"
`include "instr_trace_item.svh"
module instr_tracer (
instr_tracer_if tracer_if,
input logic[riscv::XLEN-1:0] hart_id_i
);
// keep the decoded instructions in a queue
logic [31:0] decode_queue [$];
// keep the issued instructions in a queue
logic [31:0] issue_queue [$];
// issue scoreboard entries
ariane_pkg::scoreboard_entry_t issue_sbe_queue [$];
ariane_pkg::scoreboard_entry_t issue_sbe;
// store resolved branches, get (mis-)predictions
ariane_pkg::bp_resolve_t bp [$];
// shadow copy of the register files
logic [63:0] gp_reg_file [32];
logic [63:0] fp_reg_file [32];
// 64 bit clock tick count
longint unsigned clk_ticks;
int f, commit_log;
// address mapping
// contains mappings of the form vaddr <-> paddr
logic [63:0] store_mapping[$], load_mapping[$], address_mapping;
// static uvm_cmdline_processor uvcl = uvm_cmdline_processor::get_inst();
function void create_file(logic [63:0] hart_id);
string fn, fn_commit_log;
$sformat(fn, "trace_hart_%0.0f.log", hart_id);
$sformat(fn_commit_log, "trace_hart_%0.0f_commit.log", hart_id);
$display("[TRACER] Output filename is: %s", fn);
f = $fopen(fn,"w");
if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG) commit_log = $fopen(fn_commit_log, "w");
endfunction : create_file
task trace();
automatic logic [31:0] decode_instruction, issue_instruction, issue_commit_instruction;
automatic ariane_pkg::scoreboard_entry_t commit_instruction;
// initialize register 0
gp_reg_file = '{default:0};
fp_reg_file = '{default:0};
forever begin
automatic ariane_pkg::bp_resolve_t bp_instruction = '0;
// new cycle, we are only interested if reset is de-asserted
@(tracer_if.pck) if (tracer_if.pck.rstn !== 1'b1) begin
flush();
continue;
end
// increment clock tick
clk_ticks++;
// -------------------
// Instruction Decode
// -------------------
// we are decoding an instruction
if (tracer_if.pck.fetch_valid && tracer_if.pck.fetch_ack) begin
decode_instruction = tracer_if.pck.instruction;
decode_queue.push_back(decode_instruction);
end
// -------------------
// Instruction Issue
// -------------------
// we got a new issue ack, so put the element from the decode queue to
// the issue queue
if (tracer_if.pck.issue_ack && !tracer_if.pck.flush_unissued) begin
issue_instruction = decode_queue.pop_front();
issue_queue.push_back(issue_instruction);
// also save the scoreboard entry to a separate issue queue
issue_sbe_queue.push_back(ariane_pkg::scoreboard_entry_t'(tracer_if.pck.issue_sbe));
end
// --------------------
// Address Translation
// --------------------
if (tracer_if.pck.st_valid) begin
store_mapping.push_back(tracer_if.pck.st_paddr);
end
if (tracer_if.pck.ld_valid && !tracer_if.pck.ld_kill) begin
load_mapping.push_back(tracer_if.pck.ld_paddr);
end
// ----------------------
// Store predictions
// ----------------------
if (tracer_if.pck.resolve_branch.valid) begin
bp.push_back(tracer_if.pck.resolve_branch);
end
// --------------
// Commit
// --------------
// we are committing an instruction
for (int i = 0; i < 2; i++) begin
if (tracer_if.pck.commit_ack[i]) begin
commit_instruction = ariane_pkg::scoreboard_entry_t'(tracer_if.pck.commit_instr[i]);
issue_commit_instruction = issue_queue.pop_front();
issue_sbe = issue_sbe_queue.pop_front();
// check if the instruction retiring is a load or store, get the physical address accordingly
if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::LOAD)
address_mapping = load_mapping.pop_front();
else if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::STORE)
address_mapping = store_mapping.pop_front();
if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::CTRL_FLOW)
bp_instruction = bp.pop_front();
// the scoreboards issue entry still contains the immediate value as a result
// check if the write back is valid, if not we need to source the result from the register file
// as the most recent version of this register will be there.
if (tracer_if.pck.we_gpr[i] || tracer_if.pck.we_fpr[i]) begin
printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
end else if (ariane_pkg::is_rd_fpr(commit_instruction.op)) begin
printInstr(issue_sbe, issue_commit_instruction, fp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
end else begin
printInstr(issue_sbe, issue_commit_instruction, gp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
end
end
end
// --------------
// Exceptions
// --------------
if (tracer_if.pck.exception.valid && !(tracer_if.pck.debug_mode && tracer_if.pck.exception.cause == riscv::BREAKPOINT)) begin
// print exception
printException(tracer_if.pck.commit_instr[0].pc, tracer_if.pck.exception.cause, tracer_if.pck.exception.tval);
end
// ----------------------
// Commit Registers
// ----------------------
// update shadow reg files here
for (int i = 0; i < 2; i++) begin
if (tracer_if.pck.we_gpr[i] && tracer_if.pck.waddr[i] != 5'b0) begin
gp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i];
end else if (tracer_if.pck.we_fpr[i]) begin
fp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i];
end
end
// --------------
// Flush Signals
// --------------
// flush un-issued instructions
if (tracer_if.pck.flush_unissued) begin
flushDecode();
end
// flush whole pipeline
if (tracer_if.pck.flush) begin
flush();
end
end
endtask
// flush all decoded instructions
function void flushDecode ();
decode_queue = {};
endfunction
// flush everything, we took an exception/interrupt
function void flush ();
flushDecode();
// clear all elements in the queue
issue_queue = {};
issue_sbe_queue = {};
// also clear mappings
store_mapping = {};
load_mapping = {};
bp = {};
endfunction
function void printInstr(ariane_pkg::scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [riscv::PLEN-1:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, ariane_pkg::bp_resolve_t bp);
automatic instr_trace_item iti = new ($time, clk_ticks, sbe, instr, gp_reg_file, fp_reg_file, result, paddr, priv_lvl, debug_mode, bp);
// print instruction to console
automatic string print_instr = iti.printInstr();
if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin
$fwrite(commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result, ariane_pkg::is_rd_fpr(sbe.op)));
end
$fwrite(f, {print_instr, "\n"});
endfunction
function void printException(logic [riscv::VLEN-1:0] pc, logic [63:0] cause, logic [63:0] tval);
automatic ex_trace_item eti = new (pc, cause, tval);
automatic string print_ex = eti.printException();
$fwrite(f, {print_ex, "\n"});
endfunction
function void close();
if (f) $fclose(f);
if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG && commit_log) $fclose(commit_log);
endfunction
initial begin
#15ns;
create_file(hart_id_i);
trace();
end
final begin
close();
end
endmodule : instr_tracer
//pragma translate_on
`endif

View File

@ -0,0 +1,67 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 16.05.2017
// Description: Instruction Tracer Interface
`ifndef VERILATOR
`ifndef INSTR_TRACER_IF_SV
`define INSTR_TRACER_IF_SV
interface instr_tracer_if (
input clk
);
logic rstn;
logic flush_unissued;
logic flush;
// Decode
logic [31:0] instruction;
logic fetch_valid;
logic fetch_ack;
// Issue stage
logic issue_ack; // issue acknowledged
ariane_pkg::scoreboard_entry_t issue_sbe; // issue scoreboard entry
// WB stage
logic [1:0][4:0] waddr;
logic [1:0][63:0] wdata;
logic [1:0] we_gpr;
logic [1:0] we_fpr;
// commit stage
ariane_pkg::scoreboard_entry_t [1:0] commit_instr; // commit instruction
logic [1:0] commit_ack;
// address translation
// stores
logic st_valid;
logic [riscv::PLEN-1:0] st_paddr;
// loads
logic ld_valid;
logic ld_kill;
logic [riscv::PLEN-1:0] ld_paddr;
// misprediction
ariane_pkg::bp_resolve_t resolve_branch;
// exceptions
ariane_pkg::exception_t exception;
// current privilege level
riscv::priv_lvl_t priv_lvl;
logic debug_mode;
// the tracer just has a passive interface we do not drive anything with it
//pragma translate_off
clocking pck @(posedge clk);
input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr,
st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, resolve_branch,
wdata, we_gpr, we_fpr, commit_instr, commit_ack, exception, priv_lvl, debug_mode;
endclocking
//pragma translate_on
endinterface
`endif
`endif

View File

@ -0,0 +1,107 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: SRAM wrapper for FPGA (requires the fpga-support submodule)
//
// Note: the wrapped module contains two different implementations for
// ALTERA and XILINX tools, since these follow different coding styles for
// inferrable RAMS with byte enable. define `FPGA_TARGET_XILINX or
// `FPGA_TARGET_ALTERA in your build environment (default is ALTERA)
module sram #(
parameter DATA_WIDTH = 64,
parameter USER_WIDTH = 1,
parameter USER_EN = 0,
parameter NUM_WORDS = 1024,
parameter SIM_INIT = "none",
parameter OUT_REGS = 0 // enables output registers in FPGA macro (read lat = 2)
)(
input logic clk_i,
input logic rst_ni,
input logic req_i,
input logic we_i,
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
input logic [USER_WIDTH-1:0] wuser_i,
input logic [DATA_WIDTH-1:0] wdata_i,
input logic [(DATA_WIDTH+7)/8-1:0] be_i,
output logic [USER_WIDTH-1:0] ruser_o,
output logic [DATA_WIDTH-1:0] rdata_o
);
localparam DATA_WIDTH_ALIGNED = ((DATA_WIDTH+63)/64)*64;
localparam USER_WIDTH_ALIGNED = DATA_WIDTH_ALIGNED; // To be fine tuned to reduce memory size
localparam BE_WIDTH_ALIGNED = (((DATA_WIDTH+7)/8+7)/8)*8;
logic [DATA_WIDTH_ALIGNED-1:0] wdata_aligned;
logic [USER_WIDTH_ALIGNED-1:0] wuser_aligned;
logic [BE_WIDTH_ALIGNED-1:0] be_aligned;
logic [DATA_WIDTH_ALIGNED-1:0] rdata_aligned;
logic [USER_WIDTH_ALIGNED-1:0] ruser_aligned;
// align to 64 bits for inferrable macro below
always_comb begin : p_align
wdata_aligned ='0;
wuser_aligned ='0;
be_aligned ='0;
wdata_aligned[DATA_WIDTH-1:0] = wdata_i;
wuser_aligned[USER_WIDTH-1:0] = wuser_i;
be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i;
rdata_o = rdata_aligned[DATA_WIDTH-1:0];
ruser_o = ruser_aligned[USER_WIDTH-1:0];
end
for (genvar k = 0; k<(DATA_WIDTH+63)/64; k++) begin : gen_cut
// unused byte-enable segments (8bits) are culled by the tool
tc_sram_wrapper #(
.NumWords(NUM_WORDS), // Number of Words in data array
.DataWidth(64), // Data signal width
.ByteWidth(32'd8), // Width of a data byte
.NumPorts(32'd1), // Number of read and write ports
.Latency(32'd1), // Latency when the read data is available
.SimInit(SIM_INIT), // Simulation initialization
.PrintSimCfg(1'b0) // Print configuration
) i_tc_sram_wrapper (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req_i ),
.we_i ( we_i ),
.be_i ( be_aligned[k*8 +: 8] ),
.wdata_i ( wdata_aligned[k*64 +: 64] ),
.addr_i ( addr_i ),
.rdata_o ( rdata_aligned[k*64 +: 64] )
);
if (USER_EN > 0) begin : gen_mem_user
tc_sram_wrapper #(
.NumWords(NUM_WORDS), // Number of Words in data array
.DataWidth(64), // Data signal width
.ByteWidth(32'd8), // Width of a data byte
.NumPorts(32'd1), // Number of read and write ports
.Latency(32'd1), // Latency when the read data is available
.SimInit(SIM_INIT), // Simulation initialization
.PrintSimCfg(1'b0) // Print configuration
) i_tc_sram_wrapper_user (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req_i ),
.we_i ( we_i ),
.be_i ( be_aligned[k*8 +: 8] ),
.wdata_i ( wuser_aligned[k*64 +: 64] ),
.addr_i ( addr_i ),
.rdata_o ( ruser_aligned[k*64 +: 64] )
);
end else begin
assign ruser_aligned[k*64 +: 64] = '0;
end
end
endmodule : sram

View File

@ -0,0 +1,60 @@
// Copyright 2022 Thales DIS design services SAS
//
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Original Author: Jean-Roch COULON - Thales
module tc_sram_wrapper #(
parameter int unsigned NumWords = 32'd1024, // Number of Words in data array
parameter int unsigned DataWidth = 32'd128, // Data signal width
parameter int unsigned ByteWidth = 32'd8, // Width of a data byte
parameter int unsigned NumPorts = 32'd2, // Number of read and write ports
parameter int unsigned Latency = 32'd1, // Latency when the read data is available
parameter SimInit = "none", // Simulation initialization
parameter bit PrintSimCfg = 1'b0, // Print configuration
// DEPENDENT PARAMETERS, DO NOT OVERWRITE!
parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1,
parameter int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div
parameter type addr_t = logic [AddrWidth-1:0],
parameter type data_t = logic [DataWidth-1:0],
parameter type be_t = logic [BeWidth-1:0]
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// input ports
input logic [NumPorts-1:0] req_i, // request
input logic [NumPorts-1:0] we_i, // write enable
input addr_t [NumPorts-1:0] addr_i, // request address
input data_t [NumPorts-1:0] wdata_i, // write data
input be_t [NumPorts-1:0] be_i, // write byte enable
// output ports
output data_t [NumPorts-1:0] rdata_o // read data
);
// synthesis translate_off
tc_sram #(
.NumWords(NumWords),
.DataWidth(DataWidth),
.ByteWidth(ByteWidth),
.NumPorts(NumPorts),
.Latency(Latency),
.SimInit(SimInit),
.PrintSimCfg(PrintSimCfg)
) i_tc_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req_i ),
.we_i ( we_i ),
.be_i ( be_i ),
.wdata_i ( wdata_i ),
.addr_i ( addr_i ),
.rdata_o ( rdata_o )
);
// synthesis translate_on
endmodule

View File

@ -0,0 +1,423 @@
// Copyright 2020 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Authors: Matheus Cavalcante, ETH Zurich
// Nils Wistoff, ETH Zurich
// Date: 20.11.2020
// Description: Functional unit that dispatches CVA6 instructions to accelerators.
module acc_dispatcher
import ariane_pkg::*;
import riscv::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type acc_req_t = acc_pkg::accelerator_req_t,
parameter type acc_resp_t = acc_pkg::accelerator_resp_t,
parameter type acc_cfg_t = logic,
parameter acc_cfg_t AccCfg = '0
) (
input logic clk_i,
input logic rst_ni,
// Interface with the CSR regfile
input logic acc_cons_en_i, // Accelerator memory consistent mode
output logic acc_fflags_valid_o,
output logic [4:0] acc_fflags_o,
// Interface with the CSRs
input priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][PLEN-3:0] pmpaddr_i,
input logic [2:0] fcsr_frm_i,
output logic dirty_v_state_o,
// Interface with the issue stage
input scoreboard_entry_t issue_instr_i,
input logic issue_instr_hs_i,
output logic issue_stall_o,
input fu_data_t fu_data_i,
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
output logic [TRANS_ID_BITS-1:0] acc_trans_id_o,
output xlen_t acc_result_o,
output logic acc_valid_o,
output exception_t acc_exception_o,
// Interface with the execute stage
output logic acc_valid_ex_o, // FU executed
// Interface with the commit stage
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
input logic commit_st_barrier_i, // A store barrier was commited
// Interface with the load/store unit
output logic acc_stall_st_pending_o,
input logic acc_no_st_pending_i,
input dcache_req_i_t [2:0] dcache_req_ports_i,
// Interface with the controller
output logic ctrl_halt_o,
input logic flush_unissued_instr_i,
input logic flush_ex_i,
output logic flush_pipeline_o,
// Interface with cache subsystem
output dcache_req_i_t [1:0] acc_dcache_req_ports_o,
input dcache_req_o_t [1:0] acc_dcache_req_ports_i,
input logic inval_ready_i,
output logic inval_valid_o,
output logic [63:0] inval_addr_o,
// Accelerator interface
output acc_req_t acc_req_o,
input acc_resp_t acc_resp_i
);
`include "common_cells/registers.svh"
import cf_math_pkg::idx_width;
/***********************
* Common signals *
***********************/
logic acc_ready;
logic acc_valid_d, acc_valid_q;
/**************************
* Accelerator issue *
**************************/
// Issue accelerator instructions
`FF(acc_valid_q, acc_valid_d, '0)
assign acc_valid_ex_o = acc_valid_q;
assign acc_valid_d = ~issue_instr_i.ex.valid &
issue_instr_hs_i &
(issue_instr_i.fu == ACCEL) &
~flush_unissued_instr_i;
// Accelerator load/store pending signals
logic acc_no_ld_pending;
logic acc_no_st_pending;
// Stall issue stage in three cases:
always_comb begin : stall_issue
unique case (issue_instr_i.fu)
ACCEL:
// 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet
issue_stall_o = ~acc_ready;
LOAD:
// 2. We're issuing a scalar load but there is an inflight accelerator store.
issue_stall_o = acc_cons_en_i & ~acc_no_st_pending;
STORE:
// 3. We're issuing a scalar store but there is an inflight accelerator load or store.
issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending);
default: issue_stall_o = 1'b0;
endcase
end
/***********************
* Instruction queue *
***********************/
localparam InstructionQueueDepth = 3;
fu_data_t acc_data;
fu_data_t acc_insn_queue_o;
logic acc_insn_queue_pop;
logic acc_insn_queue_empty;
logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage;
logic acc_commit;
logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id;
assign acc_data = acc_valid_ex_o ? fu_data_i : '0;
fifo_v3 #(
.DEPTH (InstructionQueueDepth),
.FALL_THROUGH(1'b1),
.dtype (fu_data_t)
) i_acc_insn_queue (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_ex_i),
.testmode_i(1'b0),
.data_i (fu_data_i),
.push_i (acc_valid_q),
.full_o ( /* Unused */),
.data_o (acc_insn_queue_o),
.pop_i (acc_insn_queue_pop),
.empty_o (acc_insn_queue_empty),
.usage_o (acc_insn_queue_usage)
);
// We are ready if the instruction queue is able to accept at least one more entry.
assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1);
/**********************************
* Non-speculative instructions *
**********************************/
// Keep track of the instructions that were received by the dispatcher.
logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q;
`FF(insn_pending_q, insn_pending_d, '0)
// Only non-speculative instructions can be issued to the accelerators.
// The following block keeps track of which transaction IDs reached the
// top of the scoreboard, and are therefore no longer speculative.
logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q;
`FF(insn_ready_q, insn_ready_d, '0)
always_comb begin : p_non_speculative_ff
// Maintain state
insn_pending_d = insn_pending_q;
insn_ready_d = insn_ready_q;
// We received a new instruction
if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1;
// Flush all received instructions
if (flush_ex_i) insn_pending_d = '0;
// An accelerator instruction is no longer speculative.
if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin
insn_ready_d[acc_commit_trans_id] = 1'b1;
insn_pending_d[acc_commit_trans_id] = 1'b0;
end
// An accelerator instruction was issued.
if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0;
end : p_non_speculative_ff
/*************************
* Accelerator request *
*************************/
acc_pkg::accelerator_req_t acc_req;
logic acc_req_valid;
logic acc_req_ready;
acc_pkg::accelerator_req_t acc_req_int;
fall_through_register #(
.T(acc_pkg::accelerator_req_t)
) i_accelerator_req_register (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clr_i (1'b0),
.testmode_i(1'b0),
.data_i (acc_req),
.valid_i (acc_req_valid),
.ready_o (acc_req_ready),
.data_o (acc_req_int),
.valid_o (acc_req_o.req_valid),
.ready_i (acc_resp_i.req_ready)
);
assign acc_req_o.insn = acc_req_int.insn;
assign acc_req_o.rs1 = acc_req_int.rs1;
assign acc_req_o.rs2 = acc_req_int.rs2;
assign acc_req_o.frm = acc_req_int.frm;
assign acc_req_o.trans_id = acc_req_int.trans_id;
assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i;
assign acc_req_o.acc_cons_en = acc_cons_en_i;
assign acc_req_o.inval_ready = inval_ready_i;
always_comb begin : accelerator_req_dispatcher
// Do not fetch from the instruction queue
acc_insn_queue_pop = 1'b0;
// Default values
acc_req = '0;
acc_req_valid = 1'b0;
// Unpack fu_data_t into accelerator_req_t
if (!acc_insn_queue_empty) begin
acc_req = '{
// Instruction is forwarded from the decoder as an immediate
// -
// frm rounding information is up to date during a valid request to the accelerator
// The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes
// do not take place until the accelerator answers (Ariane commits in-order)
insn :
acc_insn_queue_o.imm[
31
:
0
],
rs1 : acc_insn_queue_o.operand_a,
rs2 : acc_insn_queue_o.operand_b,
frm : fpnew_pkg::roundmode_e'(fcsr_frm_i),
trans_id: acc_insn_queue_o.trans_id,
default: '0
};
// Wait until the instruction is no longer speculative.
acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] ||
(acc_commit && insn_pending_q[acc_commit_trans_id]);
acc_insn_queue_pop = acc_req_valid && acc_req_ready;
end
end
/**************************
* Accelerator response *
**************************/
logic acc_ld_disp;
logic acc_st_disp;
// Unpack the accelerator response
assign acc_trans_id_o = acc_resp_i.trans_id;
assign acc_result_o = acc_resp_i.result;
assign acc_valid_o = acc_resp_i.resp_valid;
assign acc_exception_o = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error};
assign acc_fflags_valid_o = acc_resp_i.fflags_valid;
assign acc_fflags_o = acc_resp_i.fflags;
// Always ready to receive responses
assign acc_req_o.resp_ready = 1'b1;
// Signal dispatched load/store to issue stage
assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);
// Cache invalidation
assign inval_valid_o = acc_resp_i.inval_valid;
assign inval_addr_o = acc_resp_i.inval_addr;
/**************************
* Accelerator commit *
**************************/
// Instruction can be issued to the (in-order) back-end if
// it reached the top of the scoreboard and it hasn't been
// issued yet
always_comb begin : accelerator_commit
acc_commit = 1'b0;
if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1;
if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL)
acc_commit = 1'b1;
end
// Dirty the V state if we are committing anything related to the vector accelerator
always_comb begin : dirty_v_state
dirty_v_state_o = 1'b0;
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL);
end
end
assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id
: commit_instr_i[1].trans_id;
/**************************
* Accelerator barriers *
**************************/
// On a store barrier (i.e. any barrier that requires preceeding stores to complete
// before continuing execution), halt execution while there are pending stores in
// the accelerator pipeline.
logic wait_acc_store_d, wait_acc_store_q;
`FF(wait_acc_store_q, wait_acc_store_d, '0)
// Set on store barrier. Clear when no store is pending.
assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending;
assign ctrl_halt_o = wait_acc_store_q;
/**************************
* Load/Store tracking *
**************************/
// Loads
logic acc_spec_loads_overflow;
logic [2:0] acc_spec_loads_pending;
logic acc_disp_loads_overflow;
logic [2:0] acc_disp_loads_pending;
assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0);
// Count speculative loads. These can still be flushed.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_spec_loads (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (flush_ex_i),
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp),
.load_i (1'b0),
.down_i (acc_ld_disp),
.d_i ('0),
.q_o (acc_spec_loads_pending),
.overflow_o(acc_spec_loads_overflow)
);
// Count dispatched loads. These cannot be flushed anymore.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_disp_loads (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (1'b0),
.en_i (acc_ld_disp ^ acc_resp_i.load_complete),
.load_i (1'b0),
.down_i (acc_resp_i.load_complete),
.d_i ('0),
.q_o (acc_disp_loads_pending),
.overflow_o(acc_disp_loads_overflow)
);
acc_dispatcher_no_load_overflow :
assert property (
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) )
else $error("[acc_dispatcher] Too many pending loads.");
// Stores
logic acc_spec_stores_overflow;
logic [2:0] acc_spec_stores_pending;
logic acc_disp_stores_overflow;
logic [2:0] acc_disp_stores_pending;
assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0);
// Count speculative stores. These can still be flushed.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_spec_stores (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (flush_ex_i),
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp),
.load_i (1'b0),
.down_i (acc_st_disp),
.d_i ('0),
.q_o (acc_spec_stores_pending),
.overflow_o(acc_spec_stores_overflow)
);
// Count dispatched stores. These cannot be flushed anymore.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_disp_stores (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (1'b0),
.en_i (acc_st_disp ^ acc_resp_i.store_complete),
.load_i (1'b0),
.down_i (acc_resp_i.store_complete),
.d_i ('0),
.q_o (acc_disp_stores_pending),
.overflow_o(acc_disp_stores_overflow)
);
acc_dispatcher_no_store_overflow :
assert property (
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) )
else $error("[acc_dispatcher] Too many pending stores.");
/**************************
* Tie Off Unused Signals *
**************************/
assign acc_stall_st_pending_o = 1'b0;
assign flush_pipeline_o = 1'b0;
assign acc_dcache_req_ports_o = '0;
endmodule : acc_dispatcher

View File

@ -0,0 +1,423 @@
// Copyright 2020 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Authors: Matheus Cavalcante, ETH Zurich
// Nils Wistoff, ETH Zurich
// Date: 20.11.2020
// Description: Functional unit that dispatches CVA6 instructions to accelerators.
module acc_dispatcher
import ariane_pkg::*;
import riscv::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type acc_req_t = acc_pkg::accelerator_req_t,
parameter type acc_resp_t = acc_pkg::accelerator_resp_t,
parameter type acc_cfg_t = logic,
parameter acc_cfg_t AccCfg = '0
) (
input logic clk_i,
input logic rst_ni,
// Interface with the CSR regfile
input logic acc_cons_en_i, // Accelerator memory consistent mode
output logic acc_fflags_valid_o,
output logic [4:0] acc_fflags_o,
// Interface with the CSRs
input priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][PLEN-3:0] pmpaddr_i,
input logic [2:0] fcsr_frm_i,
output logic dirty_v_state_o,
// Interface with the issue stage
input scoreboard_entry_t issue_instr_i,
input logic issue_instr_hs_i,
output logic issue_stall_o,
input fu_data_t fu_data_i,
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
output logic [TRANS_ID_BITS-1:0] acc_trans_id_o,
output xlen_t acc_result_o,
output logic acc_valid_o,
output exception_t acc_exception_o,
// Interface with the execute stage
output logic acc_valid_ex_o, // FU executed
// Interface with the commit stage
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
input logic commit_st_barrier_i, // A store barrier was commited
// Interface with the load/store unit
output logic acc_stall_st_pending_o,
input logic acc_no_st_pending_i,
input dcache_req_i_t [2:0] dcache_req_ports_i,
// Interface with the controller
output logic ctrl_halt_o,
input logic flush_unissued_instr_i,
input logic flush_ex_i,
output logic flush_pipeline_o,
// Interface with cache subsystem
output dcache_req_i_t [1:0] acc_dcache_req_ports_o,
input dcache_req_o_t [1:0] acc_dcache_req_ports_i,
input logic inval_ready_i,
output logic inval_valid_o,
output logic [63:0] inval_addr_o,
// Accelerator interface
output acc_pkg::accelerator_req_t acc_req_o,
input acc_pkg::accelerator_resp_t acc_resp_i
);
`include "common_cells/registers.svh"
import cf_math_pkg::idx_width;
/***********************
* Common signals *
***********************/
logic acc_ready;
logic acc_valid_d, acc_valid_q;
/**************************
* Accelerator issue *
**************************/
// Issue accelerator instructions
`FF(acc_valid_q, acc_valid_d, '0)
assign acc_valid_ex_o = acc_valid_q;
assign acc_valid_d = ~issue_instr_i.ex.valid &
issue_instr_hs_i &
(issue_instr_i.fu == ACCEL) &
~flush_unissued_instr_i;
// Accelerator load/store pending signals
logic acc_no_ld_pending;
logic acc_no_st_pending;
// Stall issue stage in three cases:
always_comb begin : stall_issue
unique case (issue_instr_i.fu)
ACCEL:
// 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet
issue_stall_o = ~acc_ready;
LOAD:
// 2. We're issuing a scalar load but there is an inflight accelerator store.
issue_stall_o = acc_cons_en_i & ~acc_no_st_pending;
STORE:
// 3. We're issuing a scalar store but there is an inflight accelerator load or store.
issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending);
default: issue_stall_o = 1'b0;
endcase
end
/***********************
* Instruction queue *
***********************/
localparam InstructionQueueDepth = 3;
fu_data_t acc_data;
fu_data_t acc_insn_queue_o;
logic acc_insn_queue_pop;
logic acc_insn_queue_empty;
logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage;
logic acc_commit;
logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id;
assign acc_data = acc_valid_ex_o ? fu_data_i : '0;
fifo_v3 #(
.DEPTH (InstructionQueueDepth),
.FALL_THROUGH(1'b1),
.dtype (fu_data_t)
) i_acc_insn_queue (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_ex_i),
.testmode_i(1'b0),
.data_i (fu_data_i),
.push_i (acc_valid_q),
.full_o ( /* Unused */),
.data_o (acc_insn_queue_o),
.pop_i (acc_insn_queue_pop),
.empty_o (acc_insn_queue_empty),
.usage_o (acc_insn_queue_usage)
);
// We are ready if the instruction queue is able to accept at least one more entry.
assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1);
/**********************************
* Non-speculative instructions *
**********************************/
// Keep track of the instructions that were received by the dispatcher.
logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q;
`FF(insn_pending_q, insn_pending_d, '0)
// Only non-speculative instructions can be issued to the accelerators.
// The following block keeps track of which transaction IDs reached the
// top of the scoreboard, and are therefore no longer speculative.
logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q;
`FF(insn_ready_q, insn_ready_d, '0)
always_comb begin : p_non_speculative_ff
// Maintain state
insn_pending_d = insn_pending_q;
insn_ready_d = insn_ready_q;
// We received a new instruction
if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1;
// Flush all received instructions
if (flush_ex_i) insn_pending_d = '0;
// An accelerator instruction is no longer speculative.
if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin
insn_ready_d[acc_commit_trans_id] = 1'b1;
insn_pending_d[acc_commit_trans_id] = 1'b0;
end
// An accelerator instruction was issued.
if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0;
end : p_non_speculative_ff
/*************************
* Accelerator request *
*************************/
acc_pkg::accelerator_req_t acc_req;
logic acc_req_valid;
logic acc_req_ready;
acc_pkg::accelerator_req_t acc_req_int;
fall_through_register #(
.T(acc_pkg::accelerator_req_t)
) i_accelerator_req_register (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clr_i (1'b0),
.testmode_i(1'b0),
.data_i (acc_req),
.valid_i (acc_req_valid),
.ready_o (acc_req_ready),
.data_o (acc_req_int),
.valid_o (acc_req_o.req_valid),
.ready_i (acc_resp_i.req_ready)
);
assign acc_req_o.insn = acc_req_int.insn;
assign acc_req_o.rs1 = acc_req_int.rs1;
assign acc_req_o.rs2 = acc_req_int.rs2;
assign acc_req_o.frm = acc_req_int.frm;
assign acc_req_o.trans_id = acc_req_int.trans_id;
assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i;
assign acc_req_o.acc_cons_en = acc_cons_en_i;
assign acc_req_o.inval_ready = inval_ready_i;
always_comb begin : accelerator_req_dispatcher
// Do not fetch from the instruction queue
acc_insn_queue_pop = 1'b0;
// Default values
acc_req = '0;
acc_req_valid = 1'b0;
// Unpack fu_data_t into accelerator_req_t
if (!acc_insn_queue_empty) begin
acc_req = '{
// Instruction is forwarded from the decoder as an immediate
// -
// frm rounding information is up to date during a valid request to the accelerator
// The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes
// do not take place until the accelerator answers (Ariane commits in-order)
insn :
acc_insn_queue_o.imm[
31
:
0
],
rs1 : acc_insn_queue_o.operand_a,
rs2 : acc_insn_queue_o.operand_b,
frm : fpnew_pkg::roundmode_e'(fcsr_frm_i),
trans_id: acc_insn_queue_o.trans_id,
default: '0
};
// Wait until the instruction is no longer speculative.
acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] ||
(acc_commit && insn_pending_q[acc_commit_trans_id]);
acc_insn_queue_pop = acc_req_valid && acc_req_ready;
end
end
/**************************
* Accelerator response *
**************************/
logic acc_ld_disp;
logic acc_st_disp;
// Unpack the accelerator response
assign acc_trans_id_o = acc_resp_i.trans_id;
assign acc_result_o = acc_resp_i.result;
assign acc_valid_o = acc_resp_i.resp_valid;
assign acc_exception_o = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error};
assign acc_fflags_valid_o = acc_resp_i.fflags_valid;
assign acc_fflags_o = acc_resp_i.fflags;
// Always ready to receive responses
assign acc_req_o.resp_ready = 1'b1;
// Signal dispatched load/store to issue stage
assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);
// Cache invalidation
assign inval_valid_o = acc_resp_i.inval_valid;
assign inval_addr_o = acc_resp_i.inval_addr;
/**************************
* Accelerator commit *
**************************/
// Instruction can be issued to the (in-order) back-end if
// it reached the top of the scoreboard and it hasn't been
// issued yet
always_comb begin : accelerator_commit
acc_commit = 1'b0;
if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1;
if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL)
acc_commit = 1'b1;
end
// Dirty the V state if we are committing anything related to the vector accelerator
always_comb begin : dirty_v_state
dirty_v_state_o = 1'b0;
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL);
end
end
assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id
: commit_instr_i[1].trans_id;
/**************************
* Accelerator barriers *
**************************/
// On a store barrier (i.e. any barrier that requires preceeding stores to complete
// before continuing execution), halt execution while there are pending stores in
// the accelerator pipeline.
logic wait_acc_store_d, wait_acc_store_q;
`FF(wait_acc_store_q, wait_acc_store_d, '0)
// Set on store barrier. Clear when no store is pending.
assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending;
assign ctrl_halt_o = wait_acc_store_q;
/**************************
* Load/Store tracking *
**************************/
// Loads
logic acc_spec_loads_overflow;
logic [2:0] acc_spec_loads_pending;
logic acc_disp_loads_overflow;
logic [2:0] acc_disp_loads_pending;
assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0);
// Count speculative loads. These can still be flushed.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_spec_loads (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (flush_ex_i),
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp),
.load_i (1'b0),
.down_i (acc_ld_disp),
.d_i ('0),
.q_o (acc_spec_loads_pending),
.overflow_o(acc_spec_loads_overflow)
);
// Count dispatched loads. These cannot be flushed anymore.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_disp_loads (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (1'b0),
.en_i (acc_ld_disp ^ acc_resp_i.load_complete),
.load_i (1'b0),
.down_i (acc_resp_i.load_complete),
.d_i ('0),
.q_o (acc_disp_loads_pending),
.overflow_o(acc_disp_loads_overflow)
);
acc_dispatcher_no_load_overflow :
assert property (
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) )
else $error("[acc_dispatcher] Too many pending loads.");
// Stores
logic acc_spec_stores_overflow;
logic [2:0] acc_spec_stores_pending;
logic acc_disp_stores_overflow;
logic [2:0] acc_disp_stores_pending;
assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0);
// Count speculative stores. These can still be flushed.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_spec_stores (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (flush_ex_i),
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp),
.load_i (1'b0),
.down_i (acc_st_disp),
.d_i ('0),
.q_o (acc_spec_stores_pending),
.overflow_o(acc_spec_stores_overflow)
);
// Count dispatched stores. These cannot be flushed anymore.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_disp_stores (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (1'b0),
.en_i (acc_st_disp ^ acc_resp_i.store_complete),
.load_i (1'b0),
.down_i (acc_resp_i.store_complete),
.d_i ('0),
.q_o (acc_disp_stores_pending),
.overflow_o(acc_disp_stores_overflow)
);
acc_dispatcher_no_store_overflow :
assert property (
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) )
else $error("[acc_dispatcher] Too many pending stores.");
/**************************
* Tie Off Unused Signals *
**************************/
assign acc_stall_st_pending_o = 1'b0;
assign flush_pipeline_o = 1'b0;
assign acc_dcache_req_ports_o = '0;
endmodule : acc_dispatcher

359
test/type_param/core/alu.sv Normal file
View File

@ -0,0 +1,359 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Matthias Baer <baermatt@student.ethz.ch>
// Author: Igor Loi <igor.loi@unibo.it>
// Author: Andreas Traber <atraber@student.ethz.ch>
// Author: Lukas Mueller <lukasmue@student.ethz.ch>
// Author: Florian Zaruba <zaruabf@iis.ee.ethz.ch>
//
// Date: 19.03.2017
// Description: Ariane ALU based on RI5CY's ALU
module alu
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input fu_data_t fu_data_i,
output riscv::xlen_t result_o,
output logic alu_branch_res_o
);
riscv::xlen_t operand_a_rev;
logic [ 31:0] operand_a_rev32;
logic [ riscv::XLEN:0] operand_b_neg;
logic [riscv::XLEN+1:0] adder_result_ext_o;
logic less; // handles both signed and unsigned forms
logic [ 31:0] rolw; // Rotate Left Word
logic [ 31:0] rorw; // Rotate Right Word
logic [31:0] orcbw, rev8w;
logic [ $clog2(riscv::XLEN) : 0] cpop; // Count Population
logic [$clog2(riscv::XLEN)-1 : 0] lz_tz_count; // Count Leading Zeros
logic [ 4:0] lz_tz_wcount; // Count Leading Zeros Word
logic lz_tz_empty, lz_tz_wempty;
riscv::xlen_t orcbw_result, rev8w_result;
// bit reverse operand_a for left shifts and bit counting
generate
genvar k;
for (k = 0; k < riscv::XLEN; k++)
assign operand_a_rev[k] = fu_data_i.operand_a[riscv::XLEN-1-k];
for (k = 0; k < 32; k++) assign operand_a_rev32[k] = fu_data_i.operand_a[31-k];
endgenerate
// ------
// Adder
// ------
logic adder_op_b_negate;
logic adder_z_flag;
logic [riscv::XLEN:0] adder_in_a, adder_in_b;
riscv::xlen_t adder_result;
logic [riscv::XLEN-1:0] operand_a_bitmanip, bit_indx;
always_comb begin
adder_op_b_negate = 1'b0;
unique case (fu_data_i.operation)
// ADDER OPS
EQ, NE, SUB, SUBW, ANDN, ORN, XNOR: adder_op_b_negate = 1'b1;
default: ;
endcase
end
always_comb begin
operand_a_bitmanip = fu_data_i.operand_a;
if (CVA6Cfg.RVB) begin
if (riscv::IS_XLEN64) begin
unique case (fu_data_i.operation)
SH1ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1;
SH2ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2;
SH3ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 3;
CTZW: operand_a_bitmanip = operand_a_rev32;
ADDUW, CPOPW, CLZW: operand_a_bitmanip = fu_data_i.operand_a[31:0];
default: ;
endcase
end
unique case (fu_data_i.operation)
SH1ADD: operand_a_bitmanip = fu_data_i.operand_a << 1;
SH2ADD: operand_a_bitmanip = fu_data_i.operand_a << 2;
SH3ADD: operand_a_bitmanip = fu_data_i.operand_a << 3;
CTZ: operand_a_bitmanip = operand_a_rev;
default: ;
endcase
end
end
// prepare operand a
assign adder_in_a = {operand_a_bitmanip, 1'b1};
// prepare operand b
assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {riscv::XLEN + 1{adder_op_b_negate}};
assign adder_in_b = operand_b_neg;
// actual adder
assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
assign adder_result = adder_result_ext_o[riscv::XLEN:1];
assign adder_z_flag = ~|adder_result;
// get the right branch comparison result
always_comb begin : branch_resolve
// set comparison by default
alu_branch_res_o = 1'b1;
case (fu_data_i.operation)
EQ: alu_branch_res_o = adder_z_flag;
NE: alu_branch_res_o = ~adder_z_flag;
LTS, LTU: alu_branch_res_o = less;
GES, GEU: alu_branch_res_o = ~less;
default: alu_branch_res_o = 1'b1;
endcase
end
// ---------
// Shifts
// ---------
// TODO: this can probably optimized significantly
logic shift_left; // should we shift left
logic shift_arithmetic;
riscv::xlen_t shift_amt; // amount of shift, to the right
riscv::xlen_t shift_op_a; // input of the shifter
logic [ 31:0] shift_op_a32; // input to the 32 bit shift operation
riscv::xlen_t shift_result;
logic [ 31:0] shift_result32;
logic [riscv::XLEN:0] shift_right_result;
logic [ 32:0] shift_right_result32;
riscv::xlen_t shift_left_result;
logic [ 31:0] shift_left_result32;
assign shift_amt = fu_data_i.operand_b;
assign shift_left = (fu_data_i.operation == SLL) | (fu_data_i.operation == SLLW);
assign shift_arithmetic = (fu_data_i.operation == SRA) | (fu_data_i.operation == SRAW);
// right shifts, we let the synthesizer optimize this
logic [riscv::XLEN:0] shift_op_a_64;
logic [32:0] shift_op_a_32;
// choose the bit reversed or the normal input for shift operand a
assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a;
assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0];
assign shift_op_a_64 = {shift_arithmetic & shift_op_a[riscv::XLEN-1], shift_op_a};
assign shift_op_a_32 = {shift_arithmetic & shift_op_a[31], shift_op_a32};
assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]);
assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]);
// bit reverse the shift_right_result for left shifts
genvar j;
generate
for (j = 0; j < riscv::XLEN; j++)
assign shift_left_result[j] = shift_right_result[riscv::XLEN-1-j];
for (j = 0; j < 32; j++) assign shift_left_result32[j] = shift_right_result32[31-j];
endgenerate
assign shift_result = shift_left ? shift_left_result : shift_right_result[riscv::XLEN-1:0];
assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0];
// ------------
// Comparisons
// ------------
always_comb begin
logic sgn;
sgn = 1'b0;
if ((fu_data_i.operation == SLTS) ||
(fu_data_i.operation == LTS) ||
(fu_data_i.operation == GES) ||
(fu_data_i.operation == MAX) ||
(fu_data_i.operation == MIN))
sgn = 1'b1;
less = ($signed({sgn & fu_data_i.operand_a[riscv::XLEN-1], fu_data_i.operand_a}) <
$signed({sgn & fu_data_i.operand_b[riscv::XLEN-1], fu_data_i.operand_b}));
end
if (CVA6Cfg.RVB) begin : gen_bitmanip
// Count Population + Count population Word
popcount #(
.INPUT_WIDTH(riscv::XLEN)
) i_cpop_count (
.data_i (operand_a_bitmanip),
.popcount_o(cpop)
);
// Count Leading/Trailing Zeros
// 64b
lzc #(
.WIDTH(riscv::XLEN),
.MODE (1)
) i_clz_64b (
.in_i(operand_a_bitmanip),
.cnt_o(lz_tz_count),
.empty_o(lz_tz_empty)
);
if (riscv::IS_XLEN64) begin
//32b
lzc #(
.WIDTH(32),
.MODE (1)
) i_clz_32b (
.in_i(operand_a_bitmanip[31:0]),
.cnt_o(lz_tz_wcount),
.empty_o(lz_tz_wempty)
);
end
end
if (CVA6Cfg.RVB) begin : gen_orcbw_rev8w_results
assign orcbw = {
{8{|fu_data_i.operand_a[31:24]}},
{8{|fu_data_i.operand_a[23:16]}},
{8{|fu_data_i.operand_a[15:8]}},
{8{|fu_data_i.operand_a[7:0]}}
};
assign rev8w = {
{fu_data_i.operand_a[7:0]},
{fu_data_i.operand_a[15:8]},
{fu_data_i.operand_a[23:16]},
{fu_data_i.operand_a[31:24]}
};
if (riscv::IS_XLEN64) begin : gen_64b
assign orcbw_result = {
{8{|fu_data_i.operand_a[63:56]}},
{8{|fu_data_i.operand_a[55:48]}},
{8{|fu_data_i.operand_a[47:40]}},
{8{|fu_data_i.operand_a[39:32]}},
orcbw
};
assign rev8w_result = {
rev8w,
{fu_data_i.operand_a[39:32]},
{fu_data_i.operand_a[47:40]},
{fu_data_i.operand_a[55:48]},
{fu_data_i.operand_a[63:56]}
};
end else begin : gen_32b
assign orcbw_result = orcbw;
assign rev8w_result = rev8w;
end
end
// -----------
// Result MUX
// -----------
always_comb begin
result_o = '0;
if (riscv::IS_XLEN64) begin
unique case (fu_data_i.operation)
// Add word: Ignore the upper bits and sign extend to 64 bit
ADDW, SUBW: result_o = {{riscv::XLEN - 32{adder_result[31]}}, adder_result[31:0]};
SH1ADDUW, SH2ADDUW, SH3ADDUW: result_o = adder_result;
// Shifts 32 bit
SLLW, SRLW, SRAW: result_o = {{riscv::XLEN - 32{shift_result32[31]}}, shift_result32[31:0]};
default: ;
endcase
end
unique case (fu_data_i.operation)
// Standard Operations
ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[riscv::XLEN:1];
ORL, ORN: result_o = fu_data_i.operand_a | operand_b_neg[riscv::XLEN:1];
XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[riscv::XLEN:1];
// Adder Operations
ADD, SUB, ADDUW, SH1ADD, SH2ADD, SH3ADD: result_o = adder_result;
// Shift Operations
SLL, SRL, SRA: result_o = (riscv::IS_XLEN64) ? shift_result : shift_result32;
// Comparison Operations
SLTS, SLTU: result_o = {{riscv::XLEN - 1{1'b0}}, less};
default: ; // default case to suppress unique warning
endcase
if (CVA6Cfg.RVB) begin
// Index for Bitwise Rotation
bit_indx = 1 << (fu_data_i.operand_b & (riscv::XLEN - 1));
// rolw, roriw, rorw
rolw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
rorw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
if (riscv::IS_XLEN64) begin
unique case (fu_data_i.operation)
CLZW, CTZW:
result_o = (lz_tz_wempty) ? 32 : {{riscv::XLEN - 5{1'b0}}, lz_tz_wcount}; // change
ROLW: result_o = {{riscv::XLEN - 32{rolw[31]}}, rolw};
RORW, RORIW: result_o = {{riscv::XLEN - 32{rorw[31]}}, rorw};
default: ;
endcase
end
unique case (fu_data_i.operation)
// Integer minimum/maximum
MAX: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MIN: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
MINU: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
// Single bit instructions operations
BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx;
BEXT, BEXTI: result_o = {{riscv::XLEN - 1{1'b0}}, |(fu_data_i.operand_a & bit_indx)};
BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx;
BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx;
// Count Leading/Trailing Zeros
CLZ, CTZ:
result_o = (lz_tz_empty) ? ({{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count} + 1) :
{{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count};
// Count population
CPOP, CPOPW: result_o = {{(riscv::XLEN - ($clog2(riscv::XLEN) + 1)) {1'b0}}, cpop};
// Sign and Zero Extend
SEXTB: result_o = {{riscv::XLEN - 8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]};
SEXTH: result_o = {{riscv::XLEN - 16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]};
ZEXTH: result_o = {{riscv::XLEN - 16{1'b0}}, fu_data_i.operand_a[15:0]};
// Bitwise Rotation
ROL:
result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[4:0])));
ROR, RORI:
result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[4:0])));
ORCB: result_o = orcbw_result;
REV8: result_o = rev8w_result;
default:
if (fu_data_i.operation == SLLIUW && riscv::IS_XLEN64)
result_o = {{riscv::XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0]; // Left Shift 32 bit unsigned
endcase
end
if (CVA6Cfg.ZiCondExtEn) begin
unique case (fu_data_i.operation)
CZERO_EQZ:
result_o = (|fu_data_i.operand_b) ? fu_data_i.operand_a : '0; // move zero to rd if rs2 is equal to zero else rs1
CZERO_NEZ:
result_o = (|fu_data_i.operand_b) ? '0 : fu_data_i.operand_a; // move zero to rd if rs2 is nonzero else rs1
default: ; // default case to suppress unique warning
endcase
end
end
endmodule

View File

@ -0,0 +1,82 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 20.09.2018
// Description: Buffers AMO requests
// This unit buffers an atomic memory operations for the cache subsyste.
// Furthermore it handles interfacing with the commit stage
module amo_buffer #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // pipeline flush
input logic valid_i, // AMO is valid
output logic ready_o, // AMO unit is ready
input ariane_pkg::amo_t amo_op_i, // AMO Operation
input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue
input riscv::xlen_t data_i, // data which is placed in the queue
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
// D$
output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem
input ariane_pkg::amo_resp_t amo_resp_i, // response from cache subsystem
// Auxiliary signals
input logic amo_valid_commit_i, // We have a vaild AMO in the commit stage
input logic no_st_pending_i // there is currently no store pending anymore
);
logic flush_amo_buffer;
logic amo_valid;
typedef struct packed {
ariane_pkg::amo_t op;
logic [riscv::PLEN-1:0] paddr;
riscv::xlen_t data;
logic [1:0] size;
} amo_op_t;
amo_op_t amo_data_in, amo_data_out;
// validate this request as soon as all stores have drained and the AMO is in the commit stage
assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid;
assign amo_req_o.amo_op = amo_data_out.op;
assign amo_req_o.size = amo_data_out.size;
assign amo_req_o.operand_a = {{64 - riscv::PLEN{1'b0}}, amo_data_out.paddr};
assign amo_req_o.operand_b = {{64 - riscv::XLEN{1'b0}}, amo_data_out.data};
assign amo_data_in.op = amo_op_i;
assign amo_data_in.data = data_i;
assign amo_data_in.paddr = paddr_i;
assign amo_data_in.size = data_size_i;
// only flush if we are currently not committing the AMO
// e.g.: it is not speculative anymore
assign flush_amo_buffer = flush_i & !amo_valid_commit_i;
fifo_v3 #(
.DEPTH(1),
.dtype(amo_op_t)
) i_amo_fifo (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_amo_buffer),
.testmode_i(1'b0),
.full_o (amo_valid),
.empty_o (ready_o),
.usage_o (), // left open
.data_i (amo_data_in),
.push_i (valid_i),
.data_o (amo_data_out),
.pop_i (amo_resp_i.ack)
);
endmodule

View File

@ -0,0 +1,83 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Engineer: Francesco Conti - f.conti@unibo.it
//
// Additional contributions by:
// Markus Wegmann - markus.wegmann@technokrat.ch
//
// Design Name: RISC-V register file
// Project Name: zero-riscy
// Language: SystemVerilog
//
// Description: Register file with 31 or 15x 32 bit wide registers.
// Register 0 is fixed to 0. This register file is based on
// flip flops.
//
module ariane_regfile #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
) (
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i,
output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
);
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2 ** ADDR_WIDTH;
logic [ NUM_WORDS-1:0][DATA_WIDTH-1:0] mem;
logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec;
always_comb begin : we_decoder
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (waddr_i[j] == i) we_dec[j][i] = we_i[j];
else we_dec[j][i] = 1'b0;
end
end
end
// loop from 1 to NUM_WORDS-1 as R0 is nil
always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral
if (~rst_ni) begin
mem <= '{default: '0};
end else begin
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (we_dec[j][i]) begin
mem[i] <= wdata_i[j];
end
end
if (ZERO_REG_ZERO) begin
mem[0] <= '0;
end
end
end
end
for (genvar i = 0; i < NR_READ_PORTS; i++) begin
assign rdata_o[i] = mem[raddr_i[i]];
end
endmodule

View File

@ -0,0 +1,125 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Engineer: Francesco Conti - f.conti@unibo.it
//
// Additional contributions by:
// Markus Wegmann - markus.wegmann@technokrat.ch
// Noam Gallmann - gnoam@live.com
// Felipe Lisboa Malaquias
// Henry Suzukawa
//
//
// Description: This register file is optimized for implementation on
// FPGAs. The register file features one distributed RAM block per implemented
// sync-write port, each with a parametrized number of async-read ports.
// Read-accesses are multiplexed from the relevant block depending on which block
// was last written to. For that purpose an additional array of registers is
// maintained keeping track of write acesses.
//
module ariane_regfile_fpga #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
) (
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i,
output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
);
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2 ** ADDR_WIDTH;
localparam LOG_NR_WRITE_PORTS = CVA6Cfg.NrCommitPorts == 1 ? 1 : $clog2(CVA6Cfg.NrCommitPorts);
// Distributed RAM usually supports one write port per block - duplicate for each write port.
logic [ NUM_WORDS-1:0][ DATA_WIDTH-1:0] mem [CVA6Cfg.NrCommitPorts];
logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec;
logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel;
logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q;
// write adress decoder (for block selector)
always_comb begin
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (waddr_i[j] == i) begin
we_dec[j][i] = we_i[j];
end else begin
we_dec[j][i] = 1'b0;
end
end
end
end
// update block selector:
// signal mem_block_sel records where the current valid value is stored.
// if multiple ports try to write to the same address simultaneously, the port with the highest
// index has priority.
always_comb begin
mem_block_sel = mem_block_sel_q;
for (int i = 0; i < NUM_WORDS; i++) begin
for (int j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
if (we_dec[j][i] == 1'b1) begin
mem_block_sel[i] = LOG_NR_WRITE_PORTS'(j);
end
end
end
end
// block selector flops
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
mem_block_sel_q <= '0;
end else begin
mem_block_sel_q <= mem_block_sel;
end
end
// distributed RAM blocks
logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts];
for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block
always_ff @(posedge clk_i) begin
if (we_i[j] && ~waddr_i[j] != 0) begin
mem[j][waddr_i[j]] <= wdata_i[j];
end
end
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read
assign mem_read[j][k] = mem[j][raddr_i[k]];
end
end
// output MUX
logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr;
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port
assign block_addr[k] = mem_block_sel_q[raddr_i[k]];
assign rdata_o[k] = (ZERO_REG_ZERO && raddr_i[k] == '0) ? '0 : mem_read[block_addr[k]][k];
end
// random initialization of the memory to suppress assert warnings on Questa.
initial begin
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for (int j = 0; j < NUM_WORDS; j++) begin
mem[i][j] = $random();
end
end
end
endmodule

View File

@ -0,0 +1,310 @@
/* Copyright 2018 ETH Zurich and University of Bologna.
* Copyright and related rights are licensed under the Solderpad Hardware
* License, Version 0.51 (the License); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
* or agreed to in writing, software, hardware and materials distributed under
* this License is distributed on an AS IS BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* File: axi_shim.sv
* Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>
* Florian Zaruba <zarubaf@iis.ee.ethz.ch>
* Date: 1.8.2018
*
* Description: Manages communication with the AXI Bus. Note that this unit does not
* buffer requests and register the signals.
*
*/
module axi_shim #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned AxiNumWords = 4, // data width in dwords, this is also the maximum burst length, must be >=2
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// read channel
// request
input logic rd_req_i,
output logic rd_gnt_o,
input logic [CVA6Cfg.AxiAddrWidth-1:0] rd_addr_i,
input logic [$clog2(AxiNumWords)-1:0] rd_blen_i, // axi convention: LEN-1
input logic [2:0] rd_size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_i, // use same ID for reads, or make sure you only have one outstanding read tx
input logic rd_lock_i,
// read response (we have to unconditionally sink the response)
input logic rd_rdy_i,
output logic rd_last_o,
output logic rd_valid_o,
output logic [CVA6Cfg.AxiDataWidth-1:0] rd_data_o,
output logic [CVA6Cfg.AxiUserWidth-1:0] rd_user_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_o,
output logic rd_exokay_o, // indicates whether exclusive tx succeeded
// write channel
input logic wr_req_i,
output logic wr_gnt_o,
input logic [CVA6Cfg.AxiAddrWidth-1:0] wr_addr_i,
input logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] wr_data_i,
input logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] wr_user_i,
input logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] wr_be_i,
input logic [$clog2(AxiNumWords)-1:0] wr_blen_i, // axi convention: LEN-1
input logic [2:0] wr_size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_i,
input logic wr_lock_i,
input logic [5:0] wr_atop_i,
// write response
input logic wr_rdy_i,
output logic wr_valid_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_o,
output logic wr_exokay_o, // indicates whether exclusive tx succeeded
// AXI port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
);
localparam AddrIndex = ($clog2(AxiNumWords) > 0) ? $clog2(AxiNumWords) : 1;
///////////////////////////////////////////////////////
// write channel
///////////////////////////////////////////////////////
enum logic [3:0] {
IDLE,
WAIT_AW_READY,
WAIT_LAST_W_READY,
WAIT_LAST_W_READY_AW_READY,
WAIT_AW_READY_BURST
}
wr_state_q, wr_state_d;
// AXI tx counter
logic [AddrIndex-1:0] wr_cnt_d, wr_cnt_q;
logic wr_single_req, wr_cnt_done, wr_cnt_clr, wr_cnt_en;
assign wr_single_req = (wr_blen_i == 0);
// address
assign axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
assign axi_req_o.aw.addr = wr_addr_i[CVA6Cfg.AxiAddrWidth-1:0];
assign axi_req_o.aw.size = wr_size_i;
assign axi_req_o.aw.len = wr_blen_i;
assign axi_req_o.aw.id = wr_id_i;
assign axi_req_o.aw.prot = 3'b0;
assign axi_req_o.aw.region = 4'b0;
assign axi_req_o.aw.lock = wr_lock_i;
assign axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE;
assign axi_req_o.aw.qos = 4'b0;
assign axi_req_o.aw.atop = wr_atop_i;
assign axi_req_o.aw.user = '0;
// data
assign axi_req_o.w.data = wr_data_i[wr_cnt_q];
assign axi_req_o.w.user = wr_user_i[wr_cnt_q];
assign axi_req_o.w.strb = wr_be_i[wr_cnt_q];
assign axi_req_o.w.last = wr_cnt_done;
// write response
assign wr_exokay_o = (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY);
assign axi_req_o.b_ready = wr_rdy_i;
assign wr_valid_o = axi_resp_i.b_valid;
assign wr_id_o = axi_resp_i.b.id;
// tx counter
assign wr_cnt_done = (wr_cnt_q == wr_blen_i);
assign wr_cnt_d = (wr_cnt_clr) ? '0 : (wr_cnt_en && CVA6Cfg.AxiBurstWriteEn) ? wr_cnt_q + 1 : wr_cnt_q;
always_comb begin : p_axi_write_fsm
// default
wr_state_d = wr_state_q;
axi_req_o.aw_valid = 1'b0;
axi_req_o.w_valid = 1'b0;
wr_gnt_o = 1'b0;
wr_cnt_en = 1'b0;
wr_cnt_clr = 1'b0;
case (wr_state_q)
///////////////////////////////////
IDLE: begin
// we have an incoming request
if (wr_req_i) begin
// is this a read or write?
axi_req_o.aw_valid = 1'b1;
axi_req_o.w_valid = 1'b1;
if (CVA6Cfg.AxiBurstWriteEn && !wr_single_req) begin
wr_cnt_en = axi_resp_i.w_ready;
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
2'b11: wr_state_d = WAIT_LAST_W_READY;
2'b01: wr_state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
default: ;
endcase
end else if (wr_single_req) begin // its a single write
wr_cnt_clr = 1'b1;
// single req can be granted here
wr_gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
2'b01: wr_state_d = WAIT_AW_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
default: wr_state_d = IDLE;
endcase
// its a request for the whole cache line
end
end
end
///////////////////////////////////
// ~> from single write
WAIT_AW_READY: begin
axi_req_o.aw_valid = 1'b1;
if (axi_resp_i.aw_ready) begin
wr_state_d = IDLE;
wr_gnt_o = 1'b1;
end
end
///////////////////////////////////
// ~> from write, there is an outstanding write
WAIT_LAST_W_READY: begin
axi_req_o.w_valid = 1'b1;
if (CVA6Cfg.AxiBurstWriteEn && axi_resp_i.w_ready && !wr_cnt_done) begin
wr_cnt_en = 1'b1;
end else if (wr_cnt_done) begin // this is the last write
if (axi_resp_i.w_ready) begin
wr_state_d = IDLE;
wr_cnt_clr = 1'b1;
wr_gnt_o = 1'b1;
end
end
end
///////////////////////////////////
default: begin
///////////////////////////////////
// ~> we need to wait for an aw_ready and there is at least one outstanding write
if (CVA6Cfg.AxiBurstWriteEn) begin
if (wr_state_q == WAIT_LAST_W_READY_AW_READY) begin
axi_req_o.w_valid = 1'b1;
axi_req_o.aw_valid = 1'b1;
// we got an aw_ready
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
// we got an aw ready
2'b01: begin
// are there any outstanding transactions?
if (wr_cnt_done) begin
wr_state_d = WAIT_AW_READY_BURST;
wr_cnt_clr = 1'b1;
end else begin
// yes, so reduce the count and stay here
wr_cnt_en = 1'b1;
end
end
2'b10: wr_state_d = WAIT_LAST_W_READY;
2'b11: begin
// we are finished
if (wr_cnt_done) begin
wr_state_d = IDLE;
wr_gnt_o = 1'b1;
wr_cnt_clr = 1'b1;
// there are outstanding transactions
end else begin
wr_state_d = WAIT_LAST_W_READY;
wr_cnt_en = 1'b1;
end
end
default: ;
endcase
end ///////////////////////////////////
// ~> all data has already been sent, we are only waiting for the aw_ready
else if (wr_state_q == WAIT_AW_READY_BURST) begin
axi_req_o.aw_valid = 1'b1;
if (axi_resp_i.aw_ready) begin
wr_state_d = IDLE;
wr_gnt_o = 1'b1;
end
end
end else begin
wr_state_d = IDLE;
end
end
endcase
end
///////////////////////////////////////////////////////
// read channel
///////////////////////////////////////////////////////
// address
// in case of a wrapping transfer we can simply begin at the address, if we want to request a cache-line
// with an incremental transfer we need to output the corresponding base address of the cache line
assign axi_req_o.ar.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
assign axi_req_o.ar.addr = rd_addr_i[CVA6Cfg.AxiAddrWidth-1:0];
assign axi_req_o.ar.size = rd_size_i;
assign axi_req_o.ar.len = rd_blen_i;
assign axi_req_o.ar.id = rd_id_i;
assign axi_req_o.ar.prot = 3'b0;
assign axi_req_o.ar.region = 4'b0;
assign axi_req_o.ar.lock = rd_lock_i;
assign axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE;
assign axi_req_o.ar.qos = 4'b0;
assign axi_req_o.ar.user = '0;
// make the read request
assign axi_req_o.ar_valid = rd_req_i;
assign rd_gnt_o = rd_req_i & axi_resp_i.ar_ready;
// return path
assign axi_req_o.r_ready = rd_rdy_i;
assign rd_data_o = axi_resp_i.r.data;
if (ariane_pkg::AXI_USER_EN) begin
assign rd_user_o = axi_resp_i.r.user;
end else begin
assign rd_user_o = '0;
end
assign rd_last_o = axi_resp_i.r.last;
assign rd_valid_o = axi_resp_i.r_valid;
assign rd_id_o = axi_resp_i.r.id;
assign rd_exokay_o = (axi_resp_i.r.resp == axi_pkg::RESP_EXOKAY);
// ----------------
// Registers
// ----------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
// start in flushing state and initialize the memory
wr_state_q <= IDLE;
wr_cnt_q <= '0;
end else begin
wr_state_q <= wr_state_d;
wr_cnt_q <= wr_cnt_d;
end
end
// ----------------
// Assertions
// ----------------
//pragma translate_off
initial begin
assert (AxiNumWords >= 1)
else $fatal(1, "[axi adapter] AxiNumWords must be >= 1");
assert (CVA6Cfg.AxiIdWidth >= 2)
else $fatal(1, "[axi adapter] AXI id width must be at least 2 bit wide");
end
//pragma translate_on
endmodule // axi_adapter2

View File

@ -0,0 +1,106 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 09.05.2017
// Description: Branch target calculation and comparison
module branch_unit #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic debug_mode_i,
input ariane_pkg::fu_data_t fu_data_i,
input logic [riscv::VLEN-1:0] pc_i, // PC of instruction
input logic is_compressed_instr_i,
input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict
input logic branch_valid_i,
input logic branch_comp_res_i, // branch comparison result from ALU
output logic [riscv::VLEN-1:0] branch_result_o,
input ariane_pkg::branchpredict_sbe_t branch_predict_i, // this is the address we predicted
output ariane_pkg::bp_resolve_t resolved_branch_o, // this is the actual address we are targeting
output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can
// accept new entries to the scoreboard
output ariane_pkg::exception_t branch_exception_o // branch exception out
);
logic [riscv::VLEN-1:0] target_address;
logic [riscv::VLEN-1:0] next_pc;
// here we handle the various possibilities of mis-predicts
always_comb begin : mispredict_handler
// set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC
automatic logic [riscv::VLEN-1:0] jump_base;
// TODO(zarubaf): The ALU can be used to calculate the branch target
jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[riscv::VLEN-1:0] : pc_i;
target_address = {riscv::VLEN{1'b0}};
resolve_branch_o = 1'b0;
resolved_branch_o.target_address = {riscv::VLEN{1'b0}};
resolved_branch_o.is_taken = 1'b0;
resolved_branch_o.valid = branch_valid_i;
resolved_branch_o.is_mispredict = 1'b0;
resolved_branch_o.cf_type = branch_predict_i.cf;
// calculate next PC, depending on whether the instruction is compressed or not this may be different
// TODO(zarubaf): We already calculate this a couple of times, maybe re-use?
next_pc = pc_i + ((is_compressed_instr_i) ? {{riscv::VLEN-2{1'b0}}, 2'h2} : {{riscv::VLEN-3{1'b0}}, 3'h4});
// calculate target address simple 64 bit addition
target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[riscv::VLEN-1:0]));
// on a JALR we are supposed to reset the LSB to 0 (according to the specification)
if (fu_data_i.operation == ariane_pkg::JALR) target_address[0] = 1'b0;
// we need to put the branch target address into rd, this is the result of this unit
branch_result_o = next_pc;
resolved_branch_o.pc = pc_i;
// There are only two sources of mispredicts:
// 1. Branches
// 2. Jumps to register addresses
if (branch_valid_i) begin
// write target address which goes to PC Gen
resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
resolved_branch_o.is_taken = branch_comp_res_i;
// check the outcome of the branch speculation
if (ariane_pkg::op_is_branch(fu_data_i.operation)) begin
// Set the `cf_type` of the output as `branch`, this will update the BHT.
resolved_branch_o.cf_type = ariane_pkg::Branch;
// If the ALU comparison does not agree with the BHT prediction set the resolution as mispredicted.
resolved_branch_o.is_mispredict = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch);
end
if (fu_data_i.operation == ariane_pkg::JALR
// check if the address of the jump register is correct and that we actually predicted
&& (branch_predict_i.cf == ariane_pkg::NoCF || target_address != branch_predict_i.predict_address)) begin
resolved_branch_o.is_mispredict = 1'b1;
// update BTB only if this wasn't a return
if (branch_predict_i.cf != ariane_pkg::Return)
resolved_branch_o.cf_type = ariane_pkg::JumpR;
end
// to resolve the branch in ID
resolve_branch_o = 1'b1;
end
end
// use ALU exception signal for storing instruction fetch exceptions if
// the target address is not aligned to a 2 byte boundary
//
logic jump_taken;
always_comb begin : exception_handling
// Do a jump if it is either unconditional jump (JAL | JALR) or `taken` conditional jump
jump_taken = !(ariane_pkg::op_is_branch(fu_data_i.operation)) ||
((ariane_pkg::op_is_branch(fu_data_i.operation)) && branch_comp_res_i);
branch_exception_o.cause = riscv::INSTR_ADDR_MISALIGNED;
branch_exception_o.valid = 1'b0;
branch_exception_o.tval = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
// Only throw instruction address misaligned exception if this is indeed a `taken` conditional branch or
// an unconditional jump
if (branch_valid_i && (target_address[0] || (!CVA6Cfg.RVC && target_address[1])) && jump_taken) begin
branch_exception_o.valid = 1'b1;
end
end
endmodule

View File

@ -0,0 +1,520 @@
/* Copyright 2018 ETH Zurich and University of Bologna.
* Copyright and related rights are licensed under the Solderpad Hardware
* License, Version 0.51 (the License); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
* or agreed to in writing, software, hardware and materials distributed under
* this License is distributed on an AS IS BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* File: axi_adapter.sv
* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
* Date: 1.8.2018
*
* Description: Manages communication with the AXI Bus
*/
//import std_cache_pkg::*;
module axi_adapter #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 256,
parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature
parameter int unsigned CACHELINE_BYTE_OFFSET = 8,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic req_i,
input ariane_pkg::ad_req_t type_i,
input ariane_pkg::amo_t amo_i,
output logic gnt_o,
input logic [riscv::XLEN-1:0] addr_i,
input logic we_i,
input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] wdata_i,
input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] be_i,
input logic [1:0] size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] id_i,
// read port
output logic valid_o,
output logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] rdata_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] id_o,
// critical word - read port
output logic [CVA6Cfg.AxiDataWidth-1:0] critical_word_o,
output logic critical_word_valid_o,
// AXI port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
);
localparam BURST_SIZE = (DATA_WIDTH / CVA6Cfg.AxiDataWidth) - 1;
localparam ADDR_INDEX = ($clog2(
DATA_WIDTH / CVA6Cfg.AxiDataWidth
) > 0) ? $clog2(
DATA_WIDTH / CVA6Cfg.AxiDataWidth
) : 1;
localparam MAX_OUTSTANDING_AW = CVA6Cfg.MaxOutstandingStores;
localparam MAX_OUTSTANDING_AW_CNT_WIDTH = $clog2(
MAX_OUTSTANDING_AW + 1
) > 0 ? $clog2(
MAX_OUTSTANDING_AW + 1
) : 1;
typedef logic [MAX_OUTSTANDING_AW_CNT_WIDTH-1:0] outstanding_aw_cnt_t;
enum logic [3:0] {
IDLE,
WAIT_B_VALID,
WAIT_AW_READY,
WAIT_LAST_W_READY,
WAIT_LAST_W_READY_AW_READY,
WAIT_AW_READY_BURST,
WAIT_R_VALID,
WAIT_R_VALID_MULTIPLE,
COMPLETE_READ,
WAIT_AMO_R_VALID
}
state_q, state_d;
// counter for AXI transfers
logic [ADDR_INDEX-1:0] cnt_d, cnt_q;
logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0]
cache_line_d, cache_line_q;
// save the address for a read, as we allow for non-cacheline aligned accesses
logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0] addr_offset_d, addr_offset_q;
logic [CVA6Cfg.AxiIdWidth-1:0] id_d, id_q;
logic [ADDR_INDEX-1:0] index;
// save the atomic operation and size
ariane_pkg::amo_t amo_d, amo_q;
logic [1:0] size_d, size_q;
// outstanding write transactions counter
outstanding_aw_cnt_t outstanding_aw_cnt_q, outstanding_aw_cnt_d;
logic any_outstanding_aw;
assign any_outstanding_aw = outstanding_aw_cnt_q != '0;
always_comb begin : axi_fsm
// Default assignments
axi_req_o.aw_valid = 1'b0;
// Cast to AXI address width
axi_req_o.aw.addr = addr_i;
axi_req_o.aw.prot = 3'b0;
axi_req_o.aw.region = 4'b0;
axi_req_o.aw.len = 8'b0;
axi_req_o.aw.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes
axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
axi_req_o.aw.lock = 1'b0;
axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE;
axi_req_o.aw.qos = 4'b0;
axi_req_o.aw.id = id_i;
axi_req_o.aw.atop = atop_from_amo(amo_i);
axi_req_o.aw.user = '0;
axi_req_o.ar_valid = 1'b0;
// Cast to AXI address width
axi_req_o.ar.addr = addr_i;
// in case of a single request or wrapping transfer we can simply begin at the address, if we want to request a cache-line
// with an incremental transfer we need to output the corresponding base address of the cache line
if (!CRITICAL_WORD_FIRST && type_i != ariane_pkg::SINGLE_REQ) begin
axi_req_o.ar.addr[CACHELINE_BYTE_OFFSET-1:0] = '0;
end
axi_req_o.ar.prot = 3'b0;
axi_req_o.ar.region = 4'b0;
axi_req_o.ar.len = 8'b0;
axi_req_o.ar.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes
axi_req_o.ar.burst = (CRITICAL_WORD_FIRST ? axi_pkg::BURST_WRAP : axi_pkg::BURST_INCR); // wrapping transfer in case of a critical word first strategy
axi_req_o.ar.lock = 1'b0;
axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE;
axi_req_o.ar.qos = 4'b0;
axi_req_o.ar.id = id_i;
axi_req_o.ar.user = '0;
axi_req_o.w_valid = 1'b0;
axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0];
axi_req_o.w.last = 1'b0;
axi_req_o.w.user = '0;
axi_req_o.b_ready = 1'b0;
axi_req_o.r_ready = 1'b0;
gnt_o = 1'b0;
valid_o = 1'b0;
id_o = axi_resp_i.r.id;
critical_word_o = axi_resp_i.r.data;
critical_word_valid_o = 1'b0;
rdata_o = cache_line_q;
state_d = state_q;
cnt_d = cnt_q;
cache_line_d = cache_line_q;
addr_offset_d = addr_offset_q;
id_d = id_q;
amo_d = amo_q;
size_d = size_q;
index = '0;
outstanding_aw_cnt_d = outstanding_aw_cnt_q;
case (state_q)
IDLE: begin
cnt_d = '0;
// we have an incoming request
if (req_i) begin
// is this a read or write?
// write
if (we_i) begin
// multiple outstanding write transactions are only
// allowed if they are guaranteed not to be reordered
// i.e. same ID
if (!any_outstanding_aw || ((id_i == id_q) && (amo_i == ariane_pkg::AMO_NONE))) begin
// the data is valid
axi_req_o.aw_valid = 1'b1;
axi_req_o.w_valid = 1'b1;
// store-conditional requires exclusive access
axi_req_o.aw.lock = amo_i == ariane_pkg::AMO_SC;
// its a single write
if (type_i == ariane_pkg::SINGLE_REQ) begin
// only a single write so the data is already the last one
axi_req_o.w.last = 1'b1;
// single req can be granted here
gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
2'b11: state_d = WAIT_B_VALID;
2'b01: state_d = WAIT_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default: state_d = IDLE;
endcase
if (axi_resp_i.aw_ready) begin
id_d = id_i;
amo_d = amo_i;
size_d = size_i;
end
// its a request for the whole cache line
end else begin
// bursts of AMOs unsupported
assert (amo_i == ariane_pkg::AMO_NONE)
else $fatal("Bursts of atomic operations are not supported");
axi_req_o.aw.len = BURST_SIZE[7:0]; // number of bursts to do
axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0];
if (axi_resp_i.w_ready) cnt_d = BURST_SIZE[ADDR_INDEX-1:0] - 1;
else cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
2'b11: state_d = WAIT_LAST_W_READY;
2'b01: state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default: ;
endcase
end
end
// read
end else begin
// only multiple outstanding write transactions are allowed
if (!any_outstanding_aw) begin
axi_req_o.ar_valid = 1'b1;
// load-reserved requires exclusive access
axi_req_o.ar.lock = amo_i == ariane_pkg::AMO_LR;
gnt_o = axi_resp_i.ar_ready;
if (type_i != ariane_pkg::SINGLE_REQ) begin
assert (amo_i == ariane_pkg::AMO_NONE)
else $fatal("Bursts of atomic operations are not supported");
axi_req_o.ar.len = BURST_SIZE[7:0];
cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
end
if (axi_resp_i.ar_ready) begin
state_d = (type_i == ariane_pkg::SINGLE_REQ) ? WAIT_R_VALID : WAIT_R_VALID_MULTIPLE;
addr_offset_d = addr_i[ADDR_INDEX-1+3:3];
end
end
end
end
end
// ~> from single write
WAIT_AW_READY: begin
axi_req_o.aw_valid = 1'b1;
if (axi_resp_i.aw_ready) begin
gnt_o = 1'b1;
state_d = WAIT_B_VALID;
id_d = id_i;
amo_d = amo_i;
size_d = size_i;
end
end
// ~> we need to wait for an aw_ready and there is at least one outstanding write
WAIT_LAST_W_READY_AW_READY: begin
axi_req_o.w_valid = 1'b1;
axi_req_o.w.last = (cnt_q == '0);
if (type_i == ariane_pkg::SINGLE_REQ) begin
axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0];
end else begin
axi_req_o.w.data = wdata_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
axi_req_o.w.strb = be_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
end
axi_req_o.aw_valid = 1'b1;
// we are here because we want to write a cache line
axi_req_o.aw.len = BURST_SIZE[7:0];
// we got an aw_ready
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
// we got an aw ready
2'b01: begin
// are there any outstanding transactions?
if (cnt_q == 0) state_d = WAIT_AW_READY_BURST;
else // yes, so reduce the count and stay here
cnt_d = cnt_q - 1;
end
2'b10: state_d = WAIT_LAST_W_READY;
2'b11: begin
// we are finished
if (cnt_q == 0) begin
state_d = WAIT_B_VALID;
gnt_o = 1'b1;
// there are outstanding transactions
end else begin
state_d = WAIT_LAST_W_READY;
cnt_d = cnt_q - 1;
end
end
default: ;
endcase
end
// ~> all data has already been sent, we are only waiting for the aw_ready
WAIT_AW_READY_BURST: begin
axi_req_o.aw_valid = 1'b1;
axi_req_o.aw.len = BURST_SIZE[7:0];
if (axi_resp_i.aw_ready) begin
state_d = WAIT_B_VALID;
gnt_o = 1'b1;
end
end
// ~> from write, there is an outstanding write
WAIT_LAST_W_READY: begin
axi_req_o.w_valid = 1'b1;
if (type_i != ariane_pkg::SINGLE_REQ) begin
axi_req_o.w.data = wdata_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
axi_req_o.w.strb = be_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q];
end
// this is the last write
if (cnt_q == '0) begin
axi_req_o.w.last = 1'b1;
if (axi_resp_i.w_ready) begin
state_d = WAIT_B_VALID;
gnt_o = 1'b1;
end
end else if (axi_resp_i.w_ready) begin
cnt_d = cnt_q - 1;
end
end
// ~> finish write transaction
WAIT_B_VALID: begin
id_o = axi_resp_i.b.id;
// Write is valid
if (axi_resp_i.b_valid && !any_outstanding_aw) begin
axi_req_o.b_ready = 1'b1;
// some atomics must wait for read data
// we only accept it after accepting bvalid
if (amo_returns_data(amo_q)) begin
if (axi_resp_i.r_valid) begin
// return read data if valid
valid_o = 1'b1;
axi_req_o.r_ready = 1'b1;
state_d = IDLE;
rdata_o = axi_resp_i.r.data;
end else begin
// wait otherwise
state_d = WAIT_AMO_R_VALID;
end
end else begin
valid_o = 1'b1;
state_d = IDLE;
// store-conditional response
if (amo_q == ariane_pkg::AMO_SC) begin
if (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY) begin
// success -> return 0
rdata_o = 'b0;
end else begin
// failure -> when request is 64-bit, return 1;
// when request is 32-bit place a 1 in both upper
// and lower half words. The right word will be
// realigned/masked externally
rdata_o = size_q == 2'b10 ? (1'b1 << 32) | 64'b1 : 64'b1;
end
end
end
// if the request was not an atomic we can possibly issue
// other requests while waiting for the response
end else begin
if ((amo_q == ariane_pkg::AMO_NONE) && (outstanding_aw_cnt_q != MAX_OUTSTANDING_AW)) begin
state_d = IDLE;
outstanding_aw_cnt_d = outstanding_aw_cnt_q + 1;
end
end
end
// ~> some atomics wait for read data
WAIT_AMO_R_VALID: begin
// acknowledge data and terminate atomic
if (axi_resp_i.r_valid) begin
axi_req_o.r_ready = 1'b1;
state_d = IDLE;
valid_o = 1'b1;
rdata_o = axi_resp_i.r.data;
end
end
// ~> cacheline read, single read
WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin
if (CRITICAL_WORD_FIRST) index = addr_offset_q + (BURST_SIZE[ADDR_INDEX-1:0] - cnt_q);
else index = BURST_SIZE[ADDR_INDEX-1:0] - cnt_q;
// reads are always wrapping here
axi_req_o.r_ready = 1'b1;
// this is the first read a.k.a the critical word
if (axi_resp_i.r_valid) begin
if (CRITICAL_WORD_FIRST) begin
// this is the first word of a cacheline read, e.g.: the word which was causing the miss
if (state_q == WAIT_R_VALID_MULTIPLE && cnt_q == BURST_SIZE) begin
critical_word_valid_o = 1'b1;
critical_word_o = axi_resp_i.r.data;
end
end else begin
// check if the address offset matches - then we are getting the critical word
if (index == addr_offset_q) begin
critical_word_valid_o = 1'b1;
critical_word_o = axi_resp_i.r.data;
end
end
// this is the last read
if (axi_resp_i.r.last) begin
id_d = axi_resp_i.r.id;
state_d = COMPLETE_READ;
end
// save the word
if (state_q == WAIT_R_VALID_MULTIPLE) begin
cache_line_d[index] = axi_resp_i.r.data;
end else cache_line_d[0] = axi_resp_i.r.data;
// Decrease the counter
cnt_d = cnt_q - 1;
end
end
// ~> read is complete
COMPLETE_READ: begin
valid_o = 1'b1;
state_d = IDLE;
id_o = id_q;
end
default: state_d = IDLE;
endcase
// This process handles B responses when accepting
// multiple outstanding write transactions
if (any_outstanding_aw && axi_resp_i.b_valid) begin
axi_req_o.b_ready = 1'b1;
valid_o = 1'b1;
// Right hand side contains non-registered signal as we want
// to preserve a possible increment from the WAIT_B_VALID state
outstanding_aw_cnt_d = outstanding_aw_cnt_d - 1;
end
end
// ----------------
// Registers
// ----------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
// start in flushing state and initialize the memory
state_q <= IDLE;
cnt_q <= '0;
cache_line_q <= '0;
addr_offset_q <= '0;
id_q <= '0;
amo_q <= ariane_pkg::AMO_NONE;
size_q <= '0;
outstanding_aw_cnt_q <= '0;
end else begin
state_q <= state_d;
cnt_q <= cnt_d;
cache_line_q <= cache_line_d;
addr_offset_q <= addr_offset_d;
id_q <= id_d;
amo_q <= amo_d;
size_q <= size_d;
outstanding_aw_cnt_q <= outstanding_aw_cnt_d;
end
end
function automatic axi_pkg::atop_t atop_from_amo(ariane_pkg::amo_t amo);
axi_pkg::atop_t result = 6'b000000;
unique case (amo)
ariane_pkg::AMO_NONE: result = {axi_pkg::ATOP_NONE, 4'b0000};
ariane_pkg::AMO_SWAP: result = {axi_pkg::ATOP_ATOMICSWAP};
ariane_pkg::AMO_ADD:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
ariane_pkg::AMO_AND:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR};
ariane_pkg::AMO_OR:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
ariane_pkg::AMO_XOR:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
ariane_pkg::AMO_MAX:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX};
ariane_pkg::AMO_MAXU:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX};
ariane_pkg::AMO_MIN:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN};
ariane_pkg::AMO_MINU:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN};
ariane_pkg::AMO_CAS1: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported
ariane_pkg::AMO_CAS2: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported
default: result = 6'b000000;
endcase
return result;
endfunction
function automatic logic amo_returns_data(ariane_pkg::amo_t amo);
axi_pkg::atop_t atop = atop_from_amo(amo);
logic is_load = atop[5:4] == axi_pkg::ATOP_ATOMICLOAD;
logic is_swap_or_cmp = atop[5:4] == axi_pkg::ATOP_ATOMICSWAP[5:4];
return is_load || is_swap_or_cmp;
endfunction
endmodule

View File

@ -0,0 +1,475 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// File: cache_ctrl.svh
// Author: Florian Zaruba <zarubaf@ethz.ch>
// Date: 14.10.2017
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// Description: Cache controller
module cache_ctrl
import ariane_pkg::*;
import std_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic bypass_i, // enable cache
output logic busy_o,
// Core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// SRAM interface
output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
input logic gnt_i,
output cache_line_t data_o,
output cl_be_t be_o,
output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
output logic we_o,
input logic [DCACHE_SET_ASSOC-1:0] hit_way_i,
// Miss handling
output miss_req_t miss_req_o,
// return
input logic miss_gnt_i,
input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss
input logic [63:0] critical_word_i,
input logic critical_word_valid_i,
// bypass ports
input logic bypass_gnt_i,
input logic bypass_valid_i,
input logic [63:0] bypass_data_i,
// check MSHR for aliasing
output logic [55:0] mshr_addr_o,
input logic mshr_addr_matches_i,
input logic mshr_index_matches_i
);
enum logic [3:0] {
IDLE, // 0
WAIT_TAG, // 1
WAIT_TAG_BYPASSED, // 2
WAIT_GNT, // 3
WAIT_GNT_SAVED, // 4
STORE_REQ, // 5
WAIT_REFILL_VALID, // 6
WAIT_REFILL_GNT, // 7
WAIT_TAG_SAVED, // 8
WAIT_MSHR, // 9
WAIT_CRITICAL_WORD // 10
}
state_d, state_q;
typedef struct packed {
logic [DCACHE_INDEX_WIDTH-1:0] index;
logic [DCACHE_TAG_WIDTH-1:0] tag;
logic [DCACHE_TID_WIDTH-1:0] id;
logic [7:0] be;
logic [1:0] size;
logic we;
logic [63:0] wdata;
logic bypass;
logic killed;
} mem_req_t;
logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q;
mem_req_t mem_req_d, mem_req_q;
assign busy_o = (state_q != IDLE);
assign tag_o = mem_req_d.tag;
logic [DCACHE_LINE_WIDTH-1:0] cl_i;
always_comb begin : way_select
cl_i = '0;
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) if (hit_way_i[i]) cl_i = data_i[i].data;
// cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
end
// --------------
// Cache FSM
// --------------
always_comb begin : cache_ctrl_fsm
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
// cache-line offset -> multiple of 64
cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
// default assignments
state_d = state_q;
mem_req_d = mem_req_q;
hit_way_d = hit_way_q;
// output assignments
req_port_o.data_gnt = 1'b0;
req_port_o.data_rvalid = 1'b0;
req_port_o.data_rdata = '0;
req_port_o.data_rid = mem_req_q.id;
miss_req_o = '0;
mshr_addr_o = '0;
// Memory array communication
req_o = '0;
addr_o = req_port_i.address_index;
data_o = '0;
be_o = '0;
we_o = '0;
mem_req_d.killed |= req_port_i.kill_req;
case (state_q)
IDLE: begin
// a new request arrived
if (req_port_i.data_req && !flush_i) begin
// request the cache line - we can do this speculatively
req_o = '1;
// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
// Bypass mode, check for uncacheable address here as well
if (bypass_i) begin
state_d = WAIT_TAG_BYPASSED;
// grant this access only if it was a load
req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1;
mem_req_d.bypass = 1'b1;
// ------------------
// Cache is enabled
// ------------------
end else begin
// Wait that we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
// only for a read
if (!req_port_i.data_we) req_port_o.data_gnt = 1'b1;
end
end
end
end
// cache enabled and waiting for tag
WAIT_TAG, WAIT_TAG_SAVED: begin
// check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin
// save tag if we didn't already save it
if (state_q != WAIT_TAG_SAVED) begin
mem_req_d.tag = req_port_i.address_tag;
end
// we speculatively request another transfer
if (req_port_i.data_req && !flush_i) begin
req_o = '1;
end
// ------------
// HIT CASE
// ------------
if (|hit_way_i) begin
// we can request another cache-line if this was a load
if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin
state_d = WAIT_TAG; // switch back to WAIT_TAG
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = gnt_i;
if (!gnt_i) begin
state_d = IDLE;
end
end else begin
state_d = IDLE;
end
// this is timing critical
req_port_o.data_rdata = cl_i[cl_offset+:64];
// report data for a read
if (!mem_req_q.we) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
// else this was a store so we need an extra step to handle it
end else begin
state_d = STORE_REQ;
hit_way_d = hit_way_i;
end
// ------------
// MISS CASE
// ------------
end else begin
// make a miss request
state_d = WAIT_REFILL_GNT;
end
// ----------------------------------------------
// Check MSHR - Miss Status Handling Register
// ----------------------------------------------
mshr_addr_o = {tag_o, mem_req_q.index};
// 1. We've got a match on MSHR and while are going down the
// store path. This means that the miss controller is
// currently evicting our cache-line. As the store is
// non-atomic we need to constantly check whether we are
// matching the address the miss handler is serving.
// Furthermore we need to check for the whole index
// because a completely different memory line could alias
// with the cache-line we are evicting.
// 2. The second case is where we are currently loading and
// the address matches the exact CL the miss controller
// is currently serving. That means we need to wait for
// the miss controller to finish its request before we
// can continue to serve this CL. Otherwise we will fetch
// the cache-line again and potentially loosing any
// content we've written so far. This as a consequence
// means we can't have hit on the CL which mean the
// req_port_o.data_rvalid will be de-asserted.
if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin
state_d = WAIT_MSHR;
end
// -------------------------
// Check for cache-ability
// -------------------------
if (!config_pkg::is_inside_cacheable_regions(
CVA6Cfg, {{{64 - riscv::PLEN} {1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}}
)) begin
mem_req_d.bypass = 1'b1;
state_d = WAIT_REFILL_GNT;
end
// we are still waiting for a valid tag
end else begin
// request cache line for saved index
addr_o = mem_req_q.index;
req_o = '1;
// check that we still have a memory grant
if (!gnt_i) begin
state_d = WAIT_GNT;
end
end
end
// ~> we already granted the request but lost the memory grant while waiting for the tag
WAIT_GNT, WAIT_GNT_SAVED: begin
// request cache line for saved index
addr_o = mem_req_q.index;
req_o = '1;
// if we get a valid tag while waiting for the memory grant, save it
if (req_port_i.tag_valid) begin
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_GNT_SAVED;
end
// we have a memory grant again ~> go back to WAIT_TAG
if (gnt_i) begin
state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED;
end
end
// ~> we are here as we need a second round of memory access for a store
STORE_REQ: begin
// check if the MSHR still doesn't match
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// We need to re-check for MSHR aliasing here as the store requires at least
// two memory look-ups on a single-ported SRAM and therefore is non-atomic
if (!mshr_index_matches_i) begin
// store data, write dirty bit
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;
be_o.vldrty = hit_way_q;
// set the correct byte enable
be_o.data[cl_offset>>3+:8] = mem_req_q.be;
data_o.data[cl_offset+:64] = mem_req_q.wdata;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;
// got a grant ~> this is finished now
if (gnt_i) begin
req_port_o.data_gnt = 1'b1;
state_d = IDLE;
end
end else begin
state_d = WAIT_MSHR;
end
end // case: STORE_REQ
// we've got a match on MSHR ~> miss unit is currently serving a request
WAIT_MSHR: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// we can start a new request
if (!mshr_index_matches_i) begin
req_o = '1;
addr_o = mem_req_q.index;
if (gnt_i) state_d = WAIT_TAG_SAVED;
end
end
// its for sure a miss
WAIT_TAG_BYPASSED: begin
// check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin
// save tag
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_REFILL_GNT;
end
end
// ~> wait for grant from miss unit
WAIT_REFILL_GNT: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
miss_req_o.valid = 1'b1;
miss_req_o.bypass = mem_req_q.bypass;
miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
miss_req_o.be = mem_req_q.be;
miss_req_o.size = mem_req_q.size;
miss_req_o.we = mem_req_q.we;
miss_req_o.wdata = mem_req_q.wdata;
// got a grant so go to valid
if (bypass_gnt_i) begin
state_d = WAIT_REFILL_VALID;
// if this was a write we still need to give a grant to the store unit.
// We can also avoid waiting for the response valid, this signal is
// currently not used by the store unit
if (mem_req_q.we) begin
req_port_o.data_gnt = 1'b1;
state_d = IDLE;
end
end
if (miss_gnt_i && !mem_req_q.we) state_d = WAIT_CRITICAL_WORD;
else if (miss_gnt_i) begin
state_d = IDLE;
req_port_o.data_gnt = 1'b1;
end
// it can be the case that the miss unit is currently serving a
// request which matches ours
// so we need to check the MSHR for matching continuously
// if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i
if (mshr_addr_matches_i && !active_serving_i) begin
state_d = WAIT_MSHR;
end
end
// ~> wait for critical word to arrive
WAIT_CRITICAL_WORD: begin
// speculatively request another word
if (req_port_i.data_req) begin
// request the cache line
req_o = '1;
end
if (critical_word_valid_i) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
req_port_o.data_rdata = critical_word_i;
// we can make another request
if (req_port_i.data_req && !flush_i) begin
// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
state_d = IDLE;
// Wait until we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = 1'b1;
end
end else begin
state_d = IDLE;
end
end
end
// ~> wait until the bypass request is valid
WAIT_REFILL_VALID: begin
// got a valid answer
if (bypass_valid_i) begin
req_port_o.data_rdata = bypass_data_i;
req_port_o.data_rvalid = ~mem_req_q.killed;
state_d = IDLE;
end
end
endcase
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (!(state_q inside {WAIT_REFILL_GNT, WAIT_CRITICAL_WORD})) begin
state_d = IDLE;
end
end
end
// --------------
// Registers
// --------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
mem_req_q <= '0;
hit_way_q <= '0;
end else begin
state_q <= state_d;
mem_req_q <= mem_req_d;
hit_way_q <= hit_way_d;
end
end
//pragma translate_off
`ifndef VERILATOR
initial begin
assert (DCACHE_LINE_WIDTH == 128)
else
$error(
"Cacheline width has to be 128 for the moment. But only small changes required in data select logic"
);
end
// if the full MSHR address matches so should also match the partial one
partial_full_mshr_match :
assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i)
else $fatal(1, "partial mshr index doesn't match");
// there should never be a valid answer when the MSHR matches and we are not being served
no_valid_on_mshr_match :
assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req)
else $fatal(1, "rvalid_o should not be set on MSHR match");
`endif
//pragma translate_on
endmodule

View File

@ -0,0 +1,200 @@
// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies
// Alternatives (CEA)
//
// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”);
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Authors: Cesar Fuguet
// Date: February, 2023
// Description: Interface adapter for the CVA6 core
module cva6_hpdcache_if_adapter
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter bit is_load_port = 1'b1
)
// }}}
// Ports
// {{{
(
// Clock and active-low reset pins
input logic clk_i,
input logic rst_ni,
// Port ID
input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i,
// Request/response ports from/to the CVA6 core
input ariane_pkg::dcache_req_i_t cva6_req_i,
output ariane_pkg::dcache_req_o_t cva6_req_o,
input ariane_pkg::amo_req_t cva6_amo_req_i,
output ariane_pkg::amo_resp_t cva6_amo_resp_o,
// Request port to the L1 Dcache
output logic hpdcache_req_valid_o,
input logic hpdcache_req_ready_i,
output hpdcache_pkg::hpdcache_req_t hpdcache_req_o,
output logic hpdcache_req_abort_o,
output hpdcache_pkg::hpdcache_tag_t hpdcache_req_tag_o,
output hpdcache_pkg::hpdcache_pma_t hpdcache_req_pma_o,
// Response port from the L1 Dcache
input logic hpdcache_rsp_valid_i,
input hpdcache_pkg::hpdcache_rsp_t hpdcache_rsp_i
);
// }}}
// Internal nets and registers
// {{{
logic forward_store, forward_amo;
logic hpdcache_req_is_uncacheable;
// }}}
// Request forwarding
// {{{
generate
// LOAD request
// {{{
if (is_load_port == 1'b1) begin : load_port_gen
assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions(
CVA6Cfg,
{
{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
, cva6_req_i.address_tag
, {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}
}
);
// Request forwarding
assign hpdcache_req_valid_o = cva6_req_i.data_req,
hpdcache_req_o.addr_offset = cva6_req_i.address_index,
hpdcache_req_o.wdata = '0,
hpdcache_req_o.op = hpdcache_pkg::HPDCACHE_REQ_LOAD,
hpdcache_req_o.be = cva6_req_i.data_be,
hpdcache_req_o.size = cva6_req_i.data_size,
hpdcache_req_o.sid = hpdcache_req_sid_i,
hpdcache_req_o.tid = cva6_req_i.data_id,
hpdcache_req_o.need_rsp = 1'b1,
hpdcache_req_o.phys_indexed = 1'b0,
hpdcache_req_o.addr_tag = '0, // unused on virtually indexed request
hpdcache_req_o.pma = '0; // unused on virtually indexed request
assign hpdcache_req_abort_o = cva6_req_i.kill_req,
hpdcache_req_tag_o = cva6_req_i.address_tag,
hpdcache_req_pma_o.uncacheable = hpdcache_req_is_uncacheable,
hpdcache_req_pma_o.io = 1'b0;
// Response forwarding
assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i,
cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
cva6_req_o.data_rid = hpdcache_rsp_i.tid,
cva6_req_o.data_gnt = hpdcache_req_ready_i;
end // }}}
// {{{
else begin : store_amo_gen
// STORE/AMO request
hpdcache_req_addr_t amo_addr;
hpdcache_req_offset_t amo_addr_offset;
hpdcache_tag_t amo_tag;
logic amo_is_word, amo_is_word_hi;
hpdcache_req_data_t amo_data;
hpdcache_req_be_t amo_data_be;
hpdcache_req_op_t amo_op;
logic [31:0] amo_resp_word;
// AMO logic
// {{{
always_comb begin : amo_op_comb
amo_addr = cva6_amo_req_i.operand_a;
amo_addr_offset = amo_addr[0+:HPDCACHE_REQ_OFFSET_WIDTH];
amo_tag = amo_addr[HPDCACHE_REQ_OFFSET_WIDTH+:HPDCACHE_TAG_WIDTH];
amo_is_word = (cva6_amo_req_i.size == 2'b10);
amo_is_word_hi = cva6_amo_req_i.operand_a[2];
amo_data = amo_is_word ? {2{cva6_amo_req_i.operand_b[0+:32]}} : cva6_amo_req_i.operand_b;
amo_data_be = amo_is_word_hi ? 8'hf0 : amo_is_word ? 8'h0f : 8'hff;
unique case (cva6_amo_req_i.amo_op)
ariane_pkg::AMO_LR: amo_op = HPDCACHE_REQ_AMO_LR;
ariane_pkg::AMO_SC: amo_op = HPDCACHE_REQ_AMO_SC;
ariane_pkg::AMO_SWAP: amo_op = HPDCACHE_REQ_AMO_SWAP;
ariane_pkg::AMO_ADD: amo_op = HPDCACHE_REQ_AMO_ADD;
ariane_pkg::AMO_AND: amo_op = HPDCACHE_REQ_AMO_AND;
ariane_pkg::AMO_OR: amo_op = HPDCACHE_REQ_AMO_OR;
ariane_pkg::AMO_XOR: amo_op = HPDCACHE_REQ_AMO_XOR;
ariane_pkg::AMO_MAX: amo_op = HPDCACHE_REQ_AMO_MAX;
ariane_pkg::AMO_MAXU: amo_op = HPDCACHE_REQ_AMO_MAXU;
ariane_pkg::AMO_MIN: amo_op = HPDCACHE_REQ_AMO_MIN;
ariane_pkg::AMO_MINU: amo_op = HPDCACHE_REQ_AMO_MINU;
default: amo_op = HPDCACHE_REQ_LOAD;
endcase
end
assign amo_resp_word = amo_is_word_hi ? hpdcache_rsp_i.rdata[0][32 +: 32]
: hpdcache_rsp_i.rdata[0][0 +: 32];
// }}}
// Request forwarding
// {{{
assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions(
CVA6Cfg,
{
{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
, hpdcache_req_o.addr_tag,
{ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}
}
);
assign forward_store = cva6_req_i.data_req, forward_amo = cva6_amo_req_i.req;
assign hpdcache_req_valid_o = forward_store | forward_amo,
hpdcache_req_o.addr_offset = forward_amo ? amo_addr_offset : cva6_req_i.address_index,
hpdcache_req_o.wdata = forward_amo ? amo_data : cva6_req_i.data_wdata,
hpdcache_req_o.op = forward_amo ? amo_op : hpdcache_pkg::HPDCACHE_REQ_STORE,
hpdcache_req_o.be = forward_amo ? amo_data_be : cva6_req_i.data_be,
hpdcache_req_o.size = forward_amo ? cva6_amo_req_i.size : cva6_req_i.data_size,
hpdcache_req_o.sid = hpdcache_req_sid_i,
hpdcache_req_o.tid = forward_amo ? '1 : '0,
hpdcache_req_o.need_rsp = forward_amo,
hpdcache_req_o.phys_indexed = 1'b1,
hpdcache_req_o.addr_tag = forward_amo ? amo_tag : cva6_req_i.address_tag,
hpdcache_req_o.pma.uncacheable = hpdcache_req_is_uncacheable,
hpdcache_req_o.pma.io = 1'b0,
hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests
hpdcache_req_tag_o = '0, // unused on physically indexed requests
hpdcache_req_pma_o = '0; // unused on physically indexed requests
// }}}
// Response forwarding
// {{{
assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid != '1),
cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
cva6_req_o.data_rid = hpdcache_rsp_i.tid,
cva6_req_o.data_gnt = hpdcache_req_ready_i;
assign cva6_amo_resp_o.ack = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid == '1),
cva6_amo_resp_o.result = amo_is_word ? {{32{amo_resp_word[31]}}, amo_resp_word}
: hpdcache_rsp_i.rdata[0][63:0];
// }}}
end
// }}}
endgenerate
// }}}
// Assertions
// {{{
// pragma translate_off
forward_one_request_assert :
assert property (@(posedge clk_i) ($onehot0({forward_store, forward_amo})))
else $error("Only one request shall be forwarded");
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,609 @@
// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies
// Alternatives (CEA)
//
// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”);
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Authors: Cesar Fuguet
// Date: February, 2023
// Description: CVA6 cache subsystem integrating standard CVA6's
// instruction cache and the Core-V High-Performance L1
// data cache (CV-HPDcache).
module cva6_hpdcache_subsystem
// Parameters
// {{{
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int NumPorts = 4,
parameter int NrHwPrefetchers = 4,
parameter type noc_req_t = logic,
parameter type noc_resp_t = logic,
parameter type cmo_req_t = logic,
parameter type cmo_rsp_t = logic
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
// I$
// {{{
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input ariane_pkg::icache_areq_t icache_areq_i, // to/from frontend
output ariane_pkg::icache_arsp_t icache_areq_o,
// data requests
input ariane_pkg::icache_dreq_t icache_dreq_i, // to/from frontend
output ariane_pkg::icache_drsp_t icache_dreq_o,
// }}}
// D$
// {{{
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
// AMO interface
input ariane_pkg::amo_req_t dcache_amo_req_i, // from LSU
output ariane_pkg::amo_resp_t dcache_amo_resp_o, // to LSU
// CMO interface
input cmo_req_t dcache_cmo_req_i, // from CMO FU
output cmo_rsp_t dcache_cmo_resp_o, // to CMO FU
// Request ports
input ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU
output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU
// Write Buffer status
output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o,
// Hardware memory prefetcher configuration
input logic [NrHwPrefetchers-1:0] hwpf_base_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_base_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_base_o,
input logic [NrHwPrefetchers-1:0] hwpf_param_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_param_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_param_o,
input logic [NrHwPrefetchers-1:0] hwpf_throttle_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_o,
output logic [ 63:0] hwpf_status_o,
// }}}
// AXI port to upstream memory/peripherals
// {{{
output noc_req_t noc_req_o,
input noc_resp_t noc_resp_i
// }}}
);
// }}}
`include "axi/typedef.svh"
// I$ instantiation
// {{{
logic icache_miss_valid, icache_miss_ready;
wt_cache_pkg::icache_req_t icache_miss;
logic icache_miss_resp_valid;
wt_cache_pkg::icache_rtrn_t icache_miss_resp;
localparam int ICACHE_RDTXID = 1 << (ariane_pkg::MEM_TID_WIDTH - 1);
cva6_icache #(
.CVA6Cfg(CVA6Cfg),
.RdTxId (ICACHE_RDTXID)
) i_cva6_icache (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (icache_flush_i),
.en_i (icache_en_i),
.miss_o (icache_miss_o),
.areq_i (icache_areq_i),
.areq_o (icache_areq_o),
.dreq_i (icache_dreq_i),
.dreq_o (icache_dreq_o),
.mem_rtrn_vld_i(icache_miss_resp_valid),
.mem_rtrn_i (icache_miss_resp),
.mem_data_req_o(icache_miss_valid),
.mem_data_ack_i(icache_miss_ready),
.mem_data_o (icache_miss)
);
// }}}
// D$ instantiation
// {{{
`include "hpdcache_typedef.svh"
// 0: Page-Table Walk (PTW)
// 1: Load unit
// 2: Accelerator load
// 3: Store/AMO
// .
// .
// .
// NumPorts: CMO
// NumPorts + 1: Hardware Memory Prefetcher (hwpf)
localparam int HPDCACHE_NREQUESTERS = NumPorts + 2;
typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t;
typedef logic [ariane_pkg::MEM_TID_WIDTH-1:0] hpdcache_mem_id_t;
typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t;
typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t;
`HPDCACHE_TYPEDEF_MEM_REQ_T(hpdcache_mem_req_t, hpdcache_mem_addr_t, hpdcache_mem_id_t);
`HPDCACHE_TYPEDEF_MEM_RESP_R_T(hpdcache_mem_resp_r_t, hpdcache_mem_id_t, hpdcache_mem_data_t);
`HPDCACHE_TYPEDEF_MEM_REQ_W_T(hpdcache_mem_req_w_t, hpdcache_mem_data_t, hpdcache_mem_be_t);
`HPDCACHE_TYPEDEF_MEM_RESP_W_T(hpdcache_mem_resp_w_t, hpdcache_mem_id_t);
typedef logic [63:0] hwpf_stride_param_t;
logic dcache_req_valid[HPDCACHE_NREQUESTERS-1:0];
logic dcache_req_ready[HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_req_t dcache_req [HPDCACHE_NREQUESTERS-1:0];
logic dcache_req_abort[HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_tag_t dcache_req_tag [HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_pma_t dcache_req_pma [HPDCACHE_NREQUESTERS-1:0];
logic dcache_rsp_valid[HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_rsp_t dcache_rsp [HPDCACHE_NREQUESTERS-1:0];
logic dcache_read_miss, dcache_write_miss;
logic [ 2:0] snoop_valid;
logic [ 2:0] snoop_abort;
hpdcache_pkg::hpdcache_req_offset_t [ 2:0] snoop_addr_offset;
hpdcache_pkg::hpdcache_tag_t [ 2:0] snoop_addr_tag;
logic [ 2:0] snoop_phys_indexed;
logic dcache_cmo_req_is_prefetch;
logic dcache_miss_ready;
logic dcache_miss_valid;
hpdcache_mem_req_t dcache_miss;
logic dcache_miss_resp_ready;
logic dcache_miss_resp_valid;
hpdcache_mem_resp_r_t dcache_miss_resp;
logic dcache_wbuf_ready;
logic dcache_wbuf_valid;
hpdcache_mem_req_t dcache_wbuf;
logic dcache_wbuf_data_ready;
logic dcache_wbuf_data_valid;
hpdcache_mem_req_w_t dcache_wbuf_data;
logic dcache_wbuf_resp_ready;
logic dcache_wbuf_resp_valid;
hpdcache_mem_resp_w_t dcache_wbuf_resp;
logic dcache_uc_read_ready;
logic dcache_uc_read_valid;
hpdcache_mem_req_t dcache_uc_read;
logic dcache_uc_read_resp_ready;
logic dcache_uc_read_resp_valid;
hpdcache_mem_resp_r_t dcache_uc_read_resp;
logic dcache_uc_write_ready;
logic dcache_uc_write_valid;
hpdcache_mem_req_t dcache_uc_write;
logic dcache_uc_write_data_ready;
logic dcache_uc_write_data_valid;
hpdcache_mem_req_w_t dcache_uc_write_data;
logic dcache_uc_write_resp_ready;
logic dcache_uc_write_resp_valid;
hpdcache_mem_resp_w_t dcache_uc_write_resp;
hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_in;
hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_out;
generate
ariane_pkg::dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0];
for (genvar r = 0; r < (NumPorts - 1); r++) begin : cva6_hpdcache_load_if_adapter_gen
assign dcache_req_ports[r] = dcache_req_ports_i[r];
cva6_hpdcache_if_adapter #(
.CVA6Cfg (CVA6Cfg),
.is_load_port(1'b1)
) i_cva6_hpdcache_load_if_adapter (
.clk_i,
.rst_ni,
.hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(r)),
.cva6_req_i (dcache_req_ports[r]),
.cva6_req_o (dcache_req_ports_o[r]),
.cva6_amo_req_i ('0),
.cva6_amo_resp_o( /* unused */),
.hpdcache_req_valid_o(dcache_req_valid[r]),
.hpdcache_req_ready_i(dcache_req_ready[r]),
.hpdcache_req_o (dcache_req[r]),
.hpdcache_req_abort_o(dcache_req_abort[r]),
.hpdcache_req_tag_o (dcache_req_tag[r]),
.hpdcache_req_pma_o (dcache_req_pma[r]),
.hpdcache_rsp_valid_i(dcache_rsp_valid[r]),
.hpdcache_rsp_i (dcache_rsp[r])
);
end
cva6_hpdcache_if_adapter #(
.CVA6Cfg (CVA6Cfg),
.is_load_port(1'b0)
) i_cva6_hpdcache_store_if_adapter (
.clk_i,
.rst_ni,
.hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts - 1)),
.cva6_req_i (dcache_req_ports_i[NumPorts-1]),
.cva6_req_o (dcache_req_ports_o[NumPorts-1]),
.cva6_amo_req_i (dcache_amo_req_i),
.cva6_amo_resp_o(dcache_amo_resp_o),
.hpdcache_req_valid_o(dcache_req_valid[NumPorts-1]),
.hpdcache_req_ready_i(dcache_req_ready[NumPorts-1]),
.hpdcache_req_o (dcache_req[NumPorts-1]),
.hpdcache_req_abort_o(dcache_req_abort[NumPorts-1]),
.hpdcache_req_tag_o (dcache_req_tag[NumPorts-1]),
.hpdcache_req_pma_o (dcache_req_pma[NumPorts-1]),
.hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts-1]),
.hpdcache_rsp_i (dcache_rsp[NumPorts-1])
);
`ifdef HPDCACHE_ENABLE_CMO
cva6_hpdcache_cmo_if_adapter #(
.cmo_req_t(cmo_req_t),
.cmo_rsp_t(cmo_rsp_t)
) i_cva6_hpdcache_cmo_if_adapter (
.clk_i,
.rst_ni,
.dcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts)),
.cva6_cmo_req_i (dcache_cmo_req_i),
.cva6_cmo_resp_o(dcache_cmo_resp_o),
.dcache_req_valid_o(dcache_req_valid[NumPorts]),
.dcache_req_ready_i(dcache_req_ready[NumPorts]),
.dcache_req_o (dcache_req[NumPorts]),
.dcache_req_abort_o(dcache_req_abort[NumPorts]),
.dcache_req_tag_o (dcache_req_tag[NumPorts]),
.dcache_req_pma_o (dcache_req_pma[NumPorts]),
.dcache_rsp_valid_i(dcache_rsp_valid[NumPorts]),
.dcache_rsp_i (dcache_rsp[NumPorts])
);
`else
assign dcache_req_valid[NumPorts] = 1'b0,
dcache_req[NumPorts] = '0,
dcache_req_abort[NumPorts] = 1'b0,
dcache_req_tag[NumPorts] = '0,
dcache_req_pma[NumPorts] = '0;
`endif
endgenerate
// Snoop load port
assign snoop_valid[0] = dcache_req_valid[1] & dcache_req_ready[1],
snoop_abort[0] = dcache_req_abort[1],
snoop_addr_offset[0] = dcache_req[1].addr_offset,
snoop_addr_tag[0] = dcache_req_tag[1],
snoop_phys_indexed[0] = dcache_req[1].phys_indexed;
// Snoop Store/AMO port
assign snoop_valid[1] = dcache_req_valid[NumPorts-1] & dcache_req_ready[NumPorts-1],
snoop_abort[1] = dcache_req_abort[NumPorts-1],
snoop_addr_offset[1] = dcache_req[NumPorts-1].addr_offset,
snoop_addr_tag[1] = dcache_req_tag[NumPorts-1],
snoop_phys_indexed[1] = dcache_req[NumPorts-1].phys_indexed;
`ifdef HPDCACHE_ENABLE_CMO
// Snoop CMO port (in case of read prefetch accesses)
assign dcache_cmo_req_is_prefetch = hpdcache_pkg::is_cmo_prefetch(
dcache_req[NumPorts].op, dcache_req[NumPorts].size
);
assign snoop_valid[2] = dcache_req_valid[NumPorts]
& dcache_req_ready[NumPorts]
& dcache_cmo_req_is_prefetch,
snoop_abort[2] = dcache_req_abort[NumPorts],
snoop_addr_offset[2] = dcache_req[NumPorts].addr_offset,
snoop_addr_tag[2] = dcache_req_tag[NumPorts],
snoop_phys_indexed[2] = dcache_req[NumPorts].phys_indexed;
`else
assign snoop_valid[2] = 1'b0,
snoop_abort[2] = 1'b0,
snoop_addr_offset[2] = '0,
snoop_addr_tag[2] = '0,
snoop_phys_indexed[2] = 1'b0;
`endif
generate
for (genvar h = 0; h < NrHwPrefetchers; h++) begin : hwpf_throttle_gen
assign hwpf_throttle_in[h] = hwpf_stride_pkg::hwpf_stride_throttle_t'(hwpf_throttle_i[h]),
hwpf_throttle_o[h] = hwpf_stride_pkg::hwpf_stride_param_t'(hwpf_throttle_out[h]);
end
endgenerate
hwpf_stride_wrapper #(
.NUM_HW_PREFETCH(NrHwPrefetchers),
.NUM_SNOOP_PORTS(3)
) i_hwpf_stride_wrapper (
.clk_i,
.rst_ni,
.hwpf_stride_base_set_i (hwpf_base_set_i),
.hwpf_stride_base_i (hwpf_base_i),
.hwpf_stride_base_o (hwpf_base_o),
.hwpf_stride_param_set_i (hwpf_param_set_i),
.hwpf_stride_param_i (hwpf_param_i),
.hwpf_stride_param_o (hwpf_param_o),
.hwpf_stride_throttle_set_i(hwpf_throttle_set_i),
.hwpf_stride_throttle_i (hwpf_throttle_in),
.hwpf_stride_throttle_o (hwpf_throttle_out),
.hwpf_stride_status_o (hwpf_status_o),
.snoop_valid_i (snoop_valid),
.snoop_abort_i (snoop_abort),
.snoop_addr_offset_i (snoop_addr_offset),
.snoop_addr_tag_i (snoop_addr_tag),
.snoop_phys_indexed_i(snoop_phys_indexed),
.hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts + 1)),
.hpdcache_req_valid_o(dcache_req_valid[NumPorts+1]),
.hpdcache_req_ready_i(dcache_req_ready[NumPorts+1]),
.hpdcache_req_o (dcache_req[NumPorts+1]),
.hpdcache_req_abort_o(dcache_req_abort[NumPorts+1]),
.hpdcache_req_tag_o (dcache_req_tag[NumPorts+1]),
.hpdcache_req_pma_o (dcache_req_pma[NumPorts+1]),
.hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts+1]),
.hpdcache_rsp_i (dcache_rsp[NumPorts+1])
);
hpdcache #(
.NREQUESTERS (HPDCACHE_NREQUESTERS),
.HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH),
.HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
.hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t)
) i_hpdcache (
.clk_i,
.rst_ni,
.wbuf_flush_i(dcache_flush_i),
.core_req_valid_i(dcache_req_valid),
.core_req_ready_o(dcache_req_ready),
.core_req_i (dcache_req),
.core_req_abort_i(dcache_req_abort),
.core_req_tag_i (dcache_req_tag),
.core_req_pma_i (dcache_req_pma),
.core_rsp_valid_o(dcache_rsp_valid),
.core_rsp_o (dcache_rsp),
.mem_req_miss_read_ready_i(dcache_miss_ready),
.mem_req_miss_read_valid_o(dcache_miss_valid),
.mem_req_miss_read_o (dcache_miss),
.mem_resp_miss_read_ready_o(dcache_miss_resp_ready),
.mem_resp_miss_read_valid_i(dcache_miss_resp_valid),
.mem_resp_miss_read_i (dcache_miss_resp),
.mem_req_wbuf_write_ready_i(dcache_wbuf_ready),
.mem_req_wbuf_write_valid_o(dcache_wbuf_valid),
.mem_req_wbuf_write_o (dcache_wbuf),
.mem_req_wbuf_write_data_ready_i(dcache_wbuf_data_ready),
.mem_req_wbuf_write_data_valid_o(dcache_wbuf_data_valid),
.mem_req_wbuf_write_data_o (dcache_wbuf_data),
.mem_resp_wbuf_write_ready_o(dcache_wbuf_resp_ready),
.mem_resp_wbuf_write_valid_i(dcache_wbuf_resp_valid),
.mem_resp_wbuf_write_i (dcache_wbuf_resp),
.mem_req_uc_read_ready_i(dcache_uc_read_ready),
.mem_req_uc_read_valid_o(dcache_uc_read_valid),
.mem_req_uc_read_o (dcache_uc_read),
.mem_resp_uc_read_ready_o(dcache_uc_read_resp_ready),
.mem_resp_uc_read_valid_i(dcache_uc_read_resp_valid),
.mem_resp_uc_read_i (dcache_uc_read_resp),
.mem_req_uc_write_ready_i(dcache_uc_write_ready),
.mem_req_uc_write_valid_o(dcache_uc_write_valid),
.mem_req_uc_write_o (dcache_uc_write),
.mem_req_uc_write_data_ready_i(dcache_uc_write_data_ready),
.mem_req_uc_write_data_valid_o(dcache_uc_write_data_valid),
.mem_req_uc_write_data_o (dcache_uc_write_data),
.mem_resp_uc_write_ready_o(dcache_uc_write_resp_ready),
.mem_resp_uc_write_valid_i(dcache_uc_write_resp_valid),
.mem_resp_uc_write_i (dcache_uc_write_resp),
.evt_cache_write_miss_o(dcache_write_miss),
.evt_cache_read_miss_o (dcache_read_miss),
.evt_uncached_req_o ( /* unused */),
.evt_cmo_req_o ( /* unused */),
.evt_write_req_o ( /* unused */),
.evt_read_req_o ( /* unused */),
.evt_prefetch_req_o ( /* unused */),
.evt_req_on_hold_o ( /* unused */),
.evt_rtab_rollback_o ( /* unused */),
.evt_stall_refill_o ( /* unused */),
.evt_stall_o ( /* unused */),
.wbuf_empty_o(wbuffer_empty_o),
.cfg_enable_i (dcache_enable_i),
.cfg_wbuf_threshold_i (4'd2),
.cfg_wbuf_reset_timecnt_on_write_i (1'b1),
.cfg_wbuf_sequential_waw_i (1'b0),
.cfg_wbuf_inhibit_write_coalescing_i(1'b0),
.cfg_prefetch_updt_plru_i (1'b1),
.cfg_error_on_cacheable_amo_i (1'b0),
.cfg_rtab_single_entry_i (1'b0)
);
assign dcache_miss_o = dcache_read_miss, wbuffer_not_ni_o = wbuffer_empty_o;
always_ff @(posedge clk_i or negedge rst_ni) begin : dcache_flush_ff
if (!rst_ni) dcache_flush_ack_o <= 1'b0;
else dcache_flush_ack_o <= ~dcache_flush_ack_o & dcache_flush_i;
end
// }}}
// AXI arbiter instantiation
// {{{
typedef logic [CVA6Cfg.AxiAddrWidth-1:0] axi_addr_t;
typedef logic [CVA6Cfg.AxiDataWidth-1:0] axi_data_t;
typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] axi_strb_t;
typedef logic [CVA6Cfg.AxiIdWidth-1:0] axi_id_t;
typedef logic [CVA6Cfg.AxiUserWidth-1:0] axi_user_t;
`AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, axi_addr_t, axi_id_t, axi_user_t)
`AXI_TYPEDEF_W_CHAN_T(axi_w_chan_t, axi_data_t, axi_strb_t, axi_user_t)
`AXI_TYPEDEF_B_CHAN_T(axi_b_chan_t, axi_id_t, axi_user_t)
`AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, axi_addr_t, axi_id_t, axi_user_t)
`AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, axi_data_t, axi_id_t, axi_user_t)
cva6_hpdcache_subsystem_axi_arbiter #(
.HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH),
.HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
.hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t),
.AxiAddrWidth (CVA6Cfg.AxiAddrWidth),
.AxiDataWidth (CVA6Cfg.AxiDataWidth),
.AxiIdWidth (CVA6Cfg.AxiIdWidth),
.AxiUserWidth (CVA6Cfg.AxiUserWidth),
.axi_ar_chan_t(axi_ar_chan_t),
.axi_aw_chan_t(axi_aw_chan_t),
.axi_w_chan_t (axi_w_chan_t),
.axi_req_t (noc_req_t),
.axi_rsp_t (noc_resp_t)
) i_axi_arbiter (
.clk_i,
.rst_ni,
.icache_miss_valid_i(icache_miss_valid),
.icache_miss_ready_o(icache_miss_ready),
.icache_miss_i (icache_miss),
.icache_miss_id_i (hpdcache_mem_id_t'(ICACHE_RDTXID)),
.icache_miss_resp_valid_o(icache_miss_resp_valid),
.icache_miss_resp_o (icache_miss_resp),
.dcache_miss_ready_o(dcache_miss_ready),
.dcache_miss_valid_i(dcache_miss_valid),
.dcache_miss_i (dcache_miss),
.dcache_miss_resp_ready_i(dcache_miss_resp_ready),
.dcache_miss_resp_valid_o(dcache_miss_resp_valid),
.dcache_miss_resp_o (dcache_miss_resp),
.dcache_wbuf_ready_o(dcache_wbuf_ready),
.dcache_wbuf_valid_i(dcache_wbuf_valid),
.dcache_wbuf_i (dcache_wbuf),
.dcache_wbuf_data_ready_o(dcache_wbuf_data_ready),
.dcache_wbuf_data_valid_i(dcache_wbuf_data_valid),
.dcache_wbuf_data_i (dcache_wbuf_data),
.dcache_wbuf_resp_ready_i(dcache_wbuf_resp_ready),
.dcache_wbuf_resp_valid_o(dcache_wbuf_resp_valid),
.dcache_wbuf_resp_o (dcache_wbuf_resp),
.dcache_uc_read_ready_o(dcache_uc_read_ready),
.dcache_uc_read_valid_i(dcache_uc_read_valid),
.dcache_uc_read_i (dcache_uc_read),
.dcache_uc_read_id_i ('1),
.dcache_uc_read_resp_ready_i(dcache_uc_read_resp_ready),
.dcache_uc_read_resp_valid_o(dcache_uc_read_resp_valid),
.dcache_uc_read_resp_o (dcache_uc_read_resp),
.dcache_uc_write_ready_o(dcache_uc_write_ready),
.dcache_uc_write_valid_i(dcache_uc_write_valid),
.dcache_uc_write_i (dcache_uc_write),
.dcache_uc_write_id_i ('1),
.dcache_uc_write_data_ready_o(dcache_uc_write_data_ready),
.dcache_uc_write_data_valid_i(dcache_uc_write_data_valid),
.dcache_uc_write_data_i (dcache_uc_write_data),
.dcache_uc_write_resp_ready_i(dcache_uc_write_resp_ready),
.dcache_uc_write_resp_valid_o(dcache_uc_write_resp_valid),
.dcache_uc_write_resp_o (dcache_uc_write_resp),
.axi_req_o (noc_req_o),
.axi_resp_i(noc_resp_i)
);
// }}}
// Assertions
// {{{
// pragma translate_off
initial
assert (hpdcache_pkg::HPDCACHE_REQ_SRC_ID_WIDTH >= $clog2(HPDCACHE_NREQUESTERS))
else $fatal(1, "HPDCACHE_REQ_SRC_ID_WIDTH is not wide enough");
a_invalid_instruction_fetch :
assert property (
@(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
else
$warning(
1,
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr,
icache_dreq_o.data
);
a_invalid_write_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX)
else
$warning(
1,
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
{
dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index
},
dcache_req_ports_i[2].data_be,
dcache_req_ports_i[2].data_wdata
);
for (genvar j = 0; j < 2; j++) begin : gen_assertion
a_invalid_read_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
else
$warning(
1,
"[l1 dcache] reading invalid data on port %01d: data=%016X",
j,
dcache_req_ports_o[j].data_rdata
);
end
// pragma translate_on
// }}}
endmodule : cva6_hpdcache_subsystem

View File

@ -0,0 +1,586 @@
// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies
// Alternatives (CEA)
//
// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”);
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Authors: Cesar Fuguet
// Date: February, 2023
// Description: AXI arbiter for the CVA6 cache subsystem integrating standard
// CVA6's instruction cache and the Core-V High-Performance
// L1 Dcache (CV-HPDcache).
module cva6_hpdcache_subsystem_axi_arbiter
// Parameters
// {{{
#(
parameter int HPDcacheMemIdWidth = 8,
parameter int HPDcacheMemDataWidth = 512,
parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_req_w_t = logic,
parameter type hpdcache_mem_resp_r_t = logic,
parameter type hpdcache_mem_resp_w_t = logic,
parameter int unsigned AxiAddrWidth = 1,
parameter int unsigned AxiDataWidth = 1,
parameter int unsigned AxiIdWidth = 1,
parameter int unsigned AxiUserWidth = 1,
parameter type axi_ar_chan_t = logic,
parameter type axi_aw_chan_t = logic,
parameter type axi_w_chan_t = logic,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic,
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
// Interfaces from/to I$
// {{{
input logic icache_miss_valid_i,
output logic icache_miss_ready_o,
input wt_cache_pkg::icache_req_t icache_miss_i,
input hpdcache_mem_id_t icache_miss_id_i,
output logic icache_miss_resp_valid_o,
output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o,
// }}}
// Interfaces from/to D$
// {{{
output logic dcache_miss_ready_o,
input logic dcache_miss_valid_i,
input hpdcache_mem_req_t dcache_miss_i,
input logic dcache_miss_resp_ready_i,
output logic dcache_miss_resp_valid_o,
output hpdcache_mem_resp_r_t dcache_miss_resp_o,
// Write-buffer write interface
output logic dcache_wbuf_ready_o,
input logic dcache_wbuf_valid_i,
input hpdcache_mem_req_t dcache_wbuf_i,
output logic dcache_wbuf_data_ready_o,
input logic dcache_wbuf_data_valid_i,
input hpdcache_mem_req_w_t dcache_wbuf_data_i,
input logic dcache_wbuf_resp_ready_i,
output logic dcache_wbuf_resp_valid_o,
output hpdcache_mem_resp_w_t dcache_wbuf_resp_o,
// Uncached read interface
output logic dcache_uc_read_ready_o,
input logic dcache_uc_read_valid_i,
input hpdcache_mem_req_t dcache_uc_read_i,
input hpdcache_mem_id_t dcache_uc_read_id_i,
input logic dcache_uc_read_resp_ready_i,
output logic dcache_uc_read_resp_valid_o,
output hpdcache_mem_resp_r_t dcache_uc_read_resp_o,
// Uncached write interface
output logic dcache_uc_write_ready_o,
input logic dcache_uc_write_valid_i,
input hpdcache_mem_req_t dcache_uc_write_i,
input hpdcache_mem_id_t dcache_uc_write_id_i,
output logic dcache_uc_write_data_ready_o,
input logic dcache_uc_write_data_valid_i,
input hpdcache_mem_req_w_t dcache_uc_write_data_i,
input logic dcache_uc_write_resp_ready_i,
output logic dcache_uc_write_resp_valid_o,
output hpdcache_mem_resp_w_t dcache_uc_write_resp_o,
// }}}
// AXI port to upstream memory/peripherals
// {{{
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
// }}}
);
// }}}
// Internal type definitions
// {{{
typedef struct packed {
logic [AxiIdWidth-1:0] id;
logic [AxiDataWidth-1:0] data;
axi_pkg::resp_t resp;
logic last;
logic [AxiUserWidth-1:0] user;
} axi_r_chan_t;
typedef struct packed {
logic [AxiIdWidth-1:0] id;
axi_pkg::resp_t resp;
logic [AxiUserWidth-1:0] user;
} axi_b_chan_t;
localparam int MEM_RESP_RT_DEPTH = (1 << HPDcacheMemIdWidth);
typedef hpdcache_mem_id_t [MEM_RESP_RT_DEPTH-1:0] mem_resp_rt_t;
typedef logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] icache_resp_data_t;
// }}}
// Adapt the I$ interface to the HPDcache memory interface
// {{{
localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH / 64;
localparam int ICACHE_CL_WORD_INDEX = $clog2(ICACHE_CL_WORDS);
localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8);
localparam int ICACHE_WORD_SIZE = 3;
localparam int ICACHE_MEM_REQ_CL_LEN =
(ariane_pkg::ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth;
localparam int ICACHE_MEM_REQ_CL_SIZE =
(HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) ?
$clog2(
HPDcacheMemDataWidth / 8
) : ICACHE_CL_SIZE;
// I$ request
hpdcache_mem_req_t icache_miss_req_wdata;
logic icache_miss_req_w, icache_miss_req_wok;
hpdcache_mem_req_t icache_miss_req_rdata;
logic icache_miss_req_r, icache_miss_req_rok;
logic icache_miss_pending_q;
// This FIFO has two functionnalities:
// - Stabilize the ready-valid protocol. The ICACHE can abort a valid
// transaction without receiving the corresponding ready signal. This
// behavior is not supported by AXI.
// - Cut a possible long timing path.
hpdcache_fifo_reg #(
.FIFO_DEPTH (1),
.fifo_data_t(hpdcache_mem_req_t)
) i_icache_miss_req_fifo (
.clk_i,
.rst_ni,
.w_i (icache_miss_req_w),
.wok_o (icache_miss_req_wok),
.wdata_i(icache_miss_req_wdata),
.r_i (icache_miss_req_r),
.rok_o (icache_miss_req_rok),
.rdata_o(icache_miss_req_rdata)
);
assign icache_miss_req_w = icache_miss_valid_i, icache_miss_ready_o = icache_miss_req_wok;
assign icache_miss_req_wdata.mem_req_addr = icache_miss_i.paddr,
icache_miss_req_wdata.mem_req_len = icache_miss_i.nc ? 0 : ICACHE_MEM_REQ_CL_LEN - 1,
icache_miss_req_wdata.mem_req_size = icache_miss_i.nc ? ICACHE_WORD_SIZE : ICACHE_MEM_REQ_CL_SIZE,
icache_miss_req_wdata.mem_req_id = icache_miss_i.tid,
icache_miss_req_wdata.mem_req_command = hpdcache_pkg::HPDCACHE_MEM_READ,
icache_miss_req_wdata.mem_req_atomic = hpdcache_pkg::hpdcache_mem_atomic_e'(0),
icache_miss_req_wdata.mem_req_cacheable = ~icache_miss_i.nc;
// I$ response
logic icache_miss_resp_w, icache_miss_resp_wok;
hpdcache_mem_resp_r_t icache_miss_resp_wdata;
logic icache_miss_resp_data_w, icache_miss_resp_data_wok;
logic icache_miss_resp_data_r, icache_miss_resp_data_rok;
icache_resp_data_t icache_miss_resp_data_rdata;
logic icache_miss_resp_meta_w, icache_miss_resp_meta_wok;
logic icache_miss_resp_meta_r, icache_miss_resp_meta_rok;
hpdcache_mem_id_t icache_miss_resp_meta_id;
icache_resp_data_t icache_miss_rdata;
generate
if (HPDcacheMemDataWidth < ariane_pkg::ICACHE_LINE_WIDTH) begin
hpdcache_fifo_reg #(
.FIFO_DEPTH (1),
.fifo_data_t(hpdcache_mem_id_t)
) i_icache_refill_meta_fifo (
.clk_i,
.rst_ni,
.w_i (icache_miss_resp_meta_w),
.wok_o (icache_miss_resp_meta_wok),
.wdata_i(icache_miss_resp_wdata.mem_resp_r_id),
.r_i (icache_miss_resp_meta_r),
.rok_o (icache_miss_resp_meta_rok),
.rdata_o(icache_miss_resp_meta_id)
);
hpdcache_data_upsize #(
.WR_WIDTH(HPDcacheMemDataWidth),
.RD_WIDTH(ariane_pkg::ICACHE_LINE_WIDTH),
.DEPTH (1)
) i_icache_hpdcache_data_upsize (
.clk_i,
.rst_ni,
.w_i (icache_miss_resp_data_w),
.wlast_i(icache_miss_resp_wdata.mem_resp_r_last),
.wok_o (icache_miss_resp_data_wok),
.wdata_i(icache_miss_resp_wdata.mem_resp_r_data),
.r_i (icache_miss_resp_data_r),
.rok_o (icache_miss_resp_data_rok),
.rdata_o(icache_miss_resp_data_rdata)
);
assign icache_miss_resp_meta_r = 1'b1, icache_miss_resp_data_r = 1'b1;
assign icache_miss_resp_meta_w = icache_miss_resp_w & icache_miss_resp_wdata.mem_resp_r_last;
assign icache_miss_resp_data_w = icache_miss_resp_w;
assign icache_miss_resp_wok = icache_miss_resp_data_wok & (
icache_miss_resp_meta_wok | ~icache_miss_resp_wdata.mem_resp_r_last);
assign icache_miss_rdata = icache_miss_resp_data_rdata;
end else begin
assign icache_miss_resp_data_rok = icache_miss_resp_w;
assign icache_miss_resp_meta_rok = icache_miss_resp_w;
assign icache_miss_resp_wok = 1'b1;
assign icache_miss_resp_meta_id = icache_miss_resp_wdata.mem_resp_r_id;
assign icache_miss_resp_data_rdata = icache_miss_resp_wdata.mem_resp_r_data;
// In the case of uncacheable accesses, the Icache expects the data to be right-aligned
always_comb begin : icache_miss_resp_data_comb
if (!icache_miss_req_rdata.mem_req_cacheable) begin
automatic logic [ICACHE_CL_WORD_INDEX - 1:0] icache_miss_word_index;
automatic logic [63:0] icache_miss_word;
icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3+:ICACHE_CL_WORD_INDEX];
icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64+:64];
icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH - 64{1'b0}}, icache_miss_word};
end else begin
icache_miss_rdata = icache_miss_resp_data_rdata;
end
end
end
endgenerate
assign icache_miss_resp_valid_o = icache_miss_resp_meta_rok,
icache_miss_resp_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK,
icache_miss_resp_o.user = '0,
icache_miss_resp_o.inv = '0,
icache_miss_resp_o.tid = icache_miss_resp_meta_id,
icache_miss_resp_o.data = icache_miss_rdata;
// consume the Icache miss on the arrival of the response. The request
// metadata is decoded to forward the correct word in case of uncacheable
// Icache access
assign icache_miss_req_r = icache_miss_resp_meta_rok;
// }}}
// Read request arbiter
// {{{
logic mem_req_read_ready [2:0];
logic mem_req_read_valid [2:0];
hpdcache_mem_req_t mem_req_read [2:0];
logic mem_req_read_ready_arb;
logic mem_req_read_valid_arb;
hpdcache_mem_req_t mem_req_read_arb;
assign mem_req_read_valid[0] = icache_miss_req_rok & ~icache_miss_pending_q,
mem_req_read[0] = icache_miss_req_rdata;
assign dcache_miss_ready_o = mem_req_read_ready[1],
mem_req_read_valid[1] = dcache_miss_valid_i,
mem_req_read[1] = dcache_miss_i;
assign dcache_uc_read_ready_o = mem_req_read_ready[2],
mem_req_read_valid[2] = dcache_uc_read_valid_i,
mem_req_read[2] = dcache_uc_read_i;
hpdcache_mem_req_read_arbiter #(
.N (3),
.hpdcache_mem_req_t(hpdcache_mem_req_t)
) i_mem_req_read_arbiter (
.clk_i,
.rst_ni,
.mem_req_read_ready_o(mem_req_read_ready),
.mem_req_read_valid_i(mem_req_read_valid),
.mem_req_read_i (mem_req_read),
.mem_req_read_ready_i(mem_req_read_ready_arb),
.mem_req_read_valid_o(mem_req_read_valid_arb),
.mem_req_read_o (mem_req_read_arb)
);
// }}}
// Read response demultiplexor
// {{{
logic mem_resp_read_ready;
logic mem_resp_read_valid;
hpdcache_mem_resp_r_t mem_resp_read;
logic mem_resp_read_ready_arb[2:0];
logic mem_resp_read_valid_arb[2:0];
hpdcache_mem_resp_r_t mem_resp_read_arb [2:0];
mem_resp_rt_t mem_resp_read_rt;
always_comb begin
for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin
mem_resp_read_rt[i] = (i == int'( icache_miss_id_i)) ? 0 :
(i == int'(dcache_uc_read_id_i)) ? 2 : 1;
end
end
hpdcache_mem_resp_demux #(
.N (3),
.resp_t (hpdcache_mem_resp_r_t),
.resp_id_t(hpdcache_mem_id_t)
) i_mem_resp_read_demux (
.clk_i,
.rst_ni,
.mem_resp_ready_o(mem_resp_read_ready),
.mem_resp_valid_i(mem_resp_read_valid),
.mem_resp_id_i (mem_resp_read.mem_resp_r_id),
.mem_resp_i (mem_resp_read),
.mem_resp_ready_i(mem_resp_read_ready_arb),
.mem_resp_valid_o(mem_resp_read_valid_arb),
.mem_resp_o (mem_resp_read_arb),
.mem_resp_rt_i(mem_resp_read_rt)
);
assign icache_miss_resp_w = mem_resp_read_valid_arb[0],
icache_miss_resp_wdata = mem_resp_read_arb[0],
mem_resp_read_ready_arb[0] = icache_miss_resp_wok;
assign dcache_miss_resp_valid_o = mem_resp_read_valid_arb[1],
dcache_miss_resp_o = mem_resp_read_arb[1],
mem_resp_read_ready_arb[1] = dcache_miss_resp_ready_i;
assign dcache_uc_read_resp_valid_o = mem_resp_read_valid_arb[2],
dcache_uc_read_resp_o = mem_resp_read_arb[2],
mem_resp_read_ready_arb[2] = dcache_uc_read_resp_ready_i;
// }}}
// Write request arbiter
// {{{
logic mem_req_write_ready [1:0];
logic mem_req_write_valid [1:0];
hpdcache_mem_req_t mem_req_write [1:0];
logic mem_req_write_data_ready [1:0];
logic mem_req_write_data_valid [1:0];
hpdcache_mem_req_w_t mem_req_write_data [1:0];
logic mem_req_write_ready_arb;
logic mem_req_write_valid_arb;
hpdcache_mem_req_t mem_req_write_arb;
logic mem_req_write_data_ready_arb;
logic mem_req_write_data_valid_arb;
hpdcache_mem_req_w_t mem_req_write_data_arb;
assign dcache_wbuf_ready_o = mem_req_write_ready[0],
mem_req_write_valid[0] = dcache_wbuf_valid_i,
mem_req_write[0] = dcache_wbuf_i;
assign dcache_wbuf_data_ready_o = mem_req_write_data_ready[0],
mem_req_write_data_valid[0] = dcache_wbuf_data_valid_i,
mem_req_write_data[0] = dcache_wbuf_data_i;
assign dcache_uc_write_ready_o = mem_req_write_ready[1],
mem_req_write_valid[1] = dcache_uc_write_valid_i,
mem_req_write[1] = dcache_uc_write_i;
assign dcache_uc_write_data_ready_o = mem_req_write_data_ready[1],
mem_req_write_data_valid[1] = dcache_uc_write_data_valid_i,
mem_req_write_data[1] = dcache_uc_write_data_i;
hpdcache_mem_req_write_arbiter #(
.N (2),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t(hpdcache_mem_req_w_t)
) i_mem_req_write_arbiter (
.clk_i,
.rst_ni,
.mem_req_write_ready_o(mem_req_write_ready),
.mem_req_write_valid_i(mem_req_write_valid),
.mem_req_write_i (mem_req_write),
.mem_req_write_data_ready_o(mem_req_write_data_ready),
.mem_req_write_data_valid_i(mem_req_write_data_valid),
.mem_req_write_data_i (mem_req_write_data),
.mem_req_write_ready_i(mem_req_write_ready_arb),
.mem_req_write_valid_o(mem_req_write_valid_arb),
.mem_req_write_o (mem_req_write_arb),
.mem_req_write_data_ready_i(mem_req_write_data_ready_arb),
.mem_req_write_data_valid_o(mem_req_write_data_valid_arb),
.mem_req_write_data_o (mem_req_write_data_arb)
);
// }}}
// Write response demultiplexor
// {{{
logic mem_resp_write_ready;
logic mem_resp_write_valid;
hpdcache_mem_resp_w_t mem_resp_write;
logic mem_resp_write_ready_arb[1:0];
logic mem_resp_write_valid_arb[1:0];
hpdcache_mem_resp_w_t mem_resp_write_arb [1:0];
mem_resp_rt_t mem_resp_write_rt;
always_comb begin
for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin
mem_resp_write_rt[i] = (i == int'(dcache_uc_write_id_i)) ? 1 : 0;
end
end
hpdcache_mem_resp_demux #(
.N (2),
.resp_t (hpdcache_mem_resp_w_t),
.resp_id_t(hpdcache_mem_id_t)
) i_hpdcache_mem_resp_write_demux (
.clk_i,
.rst_ni,
.mem_resp_ready_o(mem_resp_write_ready),
.mem_resp_valid_i(mem_resp_write_valid),
.mem_resp_id_i (mem_resp_write.mem_resp_w_id),
.mem_resp_i (mem_resp_write),
.mem_resp_ready_i(mem_resp_write_ready_arb),
.mem_resp_valid_o(mem_resp_write_valid_arb),
.mem_resp_o (mem_resp_write_arb),
.mem_resp_rt_i(mem_resp_write_rt)
);
assign dcache_wbuf_resp_valid_o = mem_resp_write_valid_arb[0],
dcache_wbuf_resp_o = mem_resp_write_arb[0],
mem_resp_write_ready_arb[0] = dcache_wbuf_resp_ready_i;
assign dcache_uc_write_resp_valid_o = mem_resp_write_valid_arb[1],
dcache_uc_write_resp_o = mem_resp_write_arb[1],
mem_resp_write_ready_arb[1] = dcache_uc_write_resp_ready_i;
// }}}
// I$ miss pending
// {{{
always_ff @(posedge clk_i or negedge rst_ni) begin : icache_miss_pending_ff
if (!rst_ni) begin
icache_miss_pending_q <= 1'b0;
end else begin
icache_miss_pending_q <= ( (icache_miss_req_rok & mem_req_read_ready[0]) & ~icache_miss_pending_q) |
(~(icache_miss_req_r & icache_miss_req_rok) & icache_miss_pending_q);
end
end
// }}}
// AXI adapters
// {{{
axi_req_t axi_req;
axi_rsp_t axi_resp;
hpdcache_mem_to_axi_write #(
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t),
.aw_chan_t (axi_aw_chan_t),
.w_chan_t (axi_w_chan_t),
.b_chan_t (axi_b_chan_t)
) i_hpdcache_mem_to_axi_write (
.req_ready_o(mem_req_write_ready_arb),
.req_valid_i(mem_req_write_valid_arb),
.req_i (mem_req_write_arb),
.req_data_ready_o(mem_req_write_data_ready_arb),
.req_data_valid_i(mem_req_write_data_valid_arb),
.req_data_i (mem_req_write_data_arb),
.resp_ready_i(mem_resp_write_ready),
.resp_valid_o(mem_resp_write_valid),
.resp_o (mem_resp_write),
.axi_aw_valid_o(axi_req.aw_valid),
.axi_aw_o (axi_req.aw),
.axi_aw_ready_i(axi_resp.aw_ready),
.axi_w_valid_o(axi_req.w_valid),
.axi_w_o (axi_req.w),
.axi_w_ready_i(axi_resp.w_ready),
.axi_b_valid_i(axi_resp.b_valid),
.axi_b_i (axi_resp.b),
.axi_b_ready_o(axi_req.b_ready)
);
hpdcache_mem_to_axi_read #(
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
.ar_chan_t (axi_ar_chan_t),
.r_chan_t (axi_r_chan_t)
) i_hpdcache_mem_to_axi_read (
.req_ready_o(mem_req_read_ready_arb),
.req_valid_i(mem_req_read_valid_arb),
.req_i (mem_req_read_arb),
.resp_ready_i(mem_resp_read_ready),
.resp_valid_o(mem_resp_read_valid),
.resp_o (mem_resp_read),
.axi_ar_valid_o(axi_req.ar_valid),
.axi_ar_o (axi_req.ar),
.axi_ar_ready_i(axi_resp.ar_ready),
.axi_r_valid_i(axi_resp.r_valid),
.axi_r_i (axi_resp.r),
.axi_r_ready_o(axi_req.r_ready)
);
assign axi_req_o = axi_req;
assign axi_resp = axi_resp_i;
// }}}
// Assertions
// {{{
// pragma translate_off
initial
assert (HPDcacheMemIdWidth <= AxiIdWidth)
else $fatal("HPDcacheMemIdWidth shall be less or equal to AxiIdWidth");
initial
assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_MSHR_SET_WIDTH + hpdcache_pkg::HPDCACHE_MSHR_WAY_WIDTH + 1))
else
$fatal(
"HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache misses and Icache misses"
);
initial
assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_WBUF_DIR_PTR_WIDTH + 1))
else
$fatal(
"HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes"
);
initial
assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH)
else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line");
initial
assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH)
else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line");
// pragma translate_on
// }}}
endmodule : cva6_hpdcache_subsystem_axi_arbiter

View File

@ -0,0 +1,584 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Instruction cache that is compatible with openpiton.
//
// Some notes:
//
// 1) refills always have the size of one cache line, except for accesses to the I/O region, which is mapped
// to the top half of the physical address space (bit 39 = 1). the data width of the interface has the width
// of one cache line, and hence the ifills can be transferred in a single cycle. note that the ifills must be
// consumed unconditionally.
//
// 2) instruction fetches are always assumed to be aligned to 32bit (lower 2 bits are ignored)
//
// 3) NC accesses to I/O space are expected to return 32bit from memory.
//
module cva6_icache
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
/// ID to be used for read transactions
parameter logic [MEM_TID_WIDTH-1:0] RdTxId = 0
) (
input logic clk_i,
input logic rst_ni,
/// flush the icache, flush and kill have to be asserted together
input logic flush_i,
/// enable icache
input logic en_i,
/// to performance counter
output logic miss_o,
// address translation requests
input icache_areq_t areq_i,
output icache_arsp_t areq_o,
// data requests
input icache_dreq_t dreq_i,
output icache_drsp_t dreq_o,
// refill port
input logic mem_rtrn_vld_i,
input icache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output icache_req_t mem_data_o
);
// functions
function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh(
input logic [L1I_WAY_WIDTH-1:0] in);
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out;
out = '0;
out[in] = 1'b1;
return out;
endfunction
// signals
logic cache_en_d, cache_en_q; // cache is enabled
logic [riscv::VLEN-1:0] vaddr_d, vaddr_q;
logic paddr_is_nc; // asserted if physical address is non-cacheable
logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare
logic cache_rden; // triggers cache lookup
logic cache_wren; // triggers write to cacheline
logic
cmp_en_d,
cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal.
logic flush_d, flush_q; // used to register and signal pending flushes
// replacement strategy
logic update_lfsr; // shift the LFSR
logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered
logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement
logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace
logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot)
logic all_ways_valid; // we need to switch repl strategy since all are valid
// invalidations / flushing
logic inv_en; // incoming invalidations
logic inv_d, inv_q; // invalidation in progress
logic flush_en, flush_done; // used to flush cache entries
logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries
// mem arrays
logic cl_we; // write enable to memory array
logic [ ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array
logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array
logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem
logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache
logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs
logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
// cpmtroller FSM
typedef enum logic [2:0] {
FLUSH,
IDLE,
READ,
MISS,
KILL_ATRANS,
KILL_MISS
} state_e;
state_e state_d, state_q;
///////////////////////////////////////////////////////
// address -> cl_index mapping, interface plumbing
///////////////////////////////////////////////////////
// extract tag from physical address, check if NC
assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q;
// noncacheable if request goes to I/O space, or if cache is disabled
assign paddr_is_nc = (~cache_en_q) | (~config_pkg::is_inside_cacheable_regions(
CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}}
));
// pass exception through
assign dreq_o.ex = areq_i.fetch_exception;
// latch this in case we have to stall later on
// make sure this is 32bit aligned
assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q;
assign areq_o.fetch_vaddr = {vaddr_q[riscv::VLEN-1:2], 2'b0};
// split virtual address into index and offset to address cache arrays
assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH];
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset
// if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr[ICACHE_OFFSET_WIDTH-1:2], 2'b0} :
( paddr_is_nc & mem_data_req_o ) ? {{ICACHE_OFFSET_WIDTH-1{1'b0}}, cl_offset_q[2]}<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case
cl_offset_q;
// request word address instead of cl address in case of NC access
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end else begin : gen_piton_offset
// icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not.
// since the piton cache system replicates the data, we can always index the full CL
assign cl_offset_d = (dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr >> 2, 2'b0} : cl_offset_q;
// request word address instead of cl address in case of NC access
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end
assign mem_data_o.tid = RdTxId;
assign mem_data_o.nc = paddr_is_nc;
// way that is being replaced
assign mem_data_o.way = repl_way;
assign dreq_o.vaddr = vaddr_q;
// invalidations take two cycles
assign inv_d = inv_en;
///////////////////////////////////////////////////////
// main control logic
///////////////////////////////////////////////////////
logic addr_ni;
assign addr_ni = config_pkg::is_inside_nonidempotent_regions(
CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, areq_i.fetch_paddr}
);
always_comb begin : p_fsm
// default assignment
state_d = state_q;
cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush
flush_en = 1'b0;
cmp_en_d = 1'b0;
cache_rden = 1'b0;
cache_wren = 1'b0;
inv_en = 1'b0;
flush_d = flush_q | flush_i; // register incoming flush
// interfaces
dreq_o.ready = 1'b0;
areq_o.fetch_req = 1'b0;
dreq_o.valid = 1'b0;
mem_data_req_o = 1'b0;
// performance counter
miss_o = 1'b0;
// handle invalidations unconditionally
// note: invald are mutually exclusive with
// ifills, since both arrive over the same IF
// however, we need to make sure below that we
// do not trigger a cache readout at the same time...
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_INV_REQ) begin
inv_en = 1'b1;
end
unique case (state_q)
//////////////////////////////////
// this clears all valid bits
FLUSH: begin
flush_en = 1'b1;
if (flush_done) begin
state_d = IDLE;
flush_d = 1'b0;
// if the cache was not enabled set this
cache_en_d = en_i;
end
end
//////////////////////////////////
// wait for an incoming request
IDLE: begin
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;
// handle pending flushes, or perform cache clear upon enable
if (flush_d || (en_i && !cache_en_q)) begin
state_d = FLUSH;
// wait for incoming requests
end else begin
// mem requests are for sure invals here
if (!mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1;
// we have a new request
if (dreq_i.req) begin
cache_rden = 1'b1;
state_d = READ;
end
end
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
end
end
//////////////////////////////////
// check whether we have a hit
// in case the cache is disabled,
// or in case the address is NC, we
// reuse the miss mechanism to handle
// the request
READ: begin
areq_o.fetch_req = '1;
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;
// readout speculatively
cache_rden = cache_en_q;
if (areq_i.fetch_valid && (!dreq_i.spec || ((CVA6Cfg.NonIdemPotenceEn && !addr_ni) || (!CVA6Cfg.NonIdemPotenceEn)))) begin
// check if we have to flush
if (flush_d) begin
state_d = IDLE;
// we have a hit or an exception output valid result
end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin
dreq_o.valid = ~dreq_i.kill_s2; // just don't output in this case
state_d = IDLE;
// we can accept another request
// and stay here, but only if no inval is coming in
// note: we are not expecting ifill return packets here...
if (!mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1;
if (dreq_i.req) begin
state_d = READ;
end
end
// if a request is being killed at this stage,
// we have to bail out and wait for the address translation to complete
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
// we have a miss / NC transaction
end else if (dreq_i.kill_s2) begin
state_d = IDLE;
end else if (!inv_q) begin
cmp_en_d = 1'b0;
// only count this as a miss if the cache is enabled, and
// the address is cacheable
// send out ifill request
mem_data_req_o = 1'b1;
if (mem_data_ack_i) begin
miss_o = ~paddr_is_nc;
state_d = MISS;
end
end
// bail out if this request is being killed (and we missed on the TLB)
end else if (dreq_i.kill_s2 || flush_d) begin
state_d = KILL_ATRANS;
end
end
//////////////////////////////////
// wait until the memory transaction
// returns. do not write to memory
// if the nc bit is set.
MISS: begin
// note: this is mutually exclusive with ICACHE_INV_REQ,
// so we do not have to check for invals here
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
state_d = IDLE;
// only return data if request is not being killed
if (!(dreq_i.kill_s2 || flush_d)) begin
dreq_o.valid = 1'b1;
// only write to cache if this address is cacheable
cache_wren = ~paddr_is_nc;
end
// bail out if this request is being killed
end else if (dreq_i.kill_s2 || flush_d) begin
state_d = KILL_MISS;
end
end
//////////////////////////////////
// killed address translation,
// wait until paddr is valid, and go
// back to idle
KILL_ATRANS: begin
areq_o.fetch_req = '1;
if (areq_i.fetch_valid) begin
state_d = IDLE;
end
end
//////////////////////////////////
// killed miss,
// wait until memory responds and
// go back to idle
KILL_MISS: begin
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
state_d = IDLE;
end
end
default: begin
// we should never get here
state_d = FLUSH;
end
endcase // state_q
end
///////////////////////////////////////////////////////
// valid bit invalidation and replacement strategy
///////////////////////////////////////////////////////
// note: it cannot happen that we get an invalidation + a cl replacement
// in the same cycle as these requests arrive via the same interface
// flushes take precedence over invalidations (it is ok if we ignore
// the inval since the cache is cleared anyway)
assign flush_cnt_d = (flush_done) ? '0 : (flush_en) ? flush_cnt_q + 1 : flush_cnt_q;
assign flush_done = (flush_cnt_q == (ICACHE_NUM_WORDS - 1));
// invalidation/clearing address
// flushing takes precedence over invals
assign vld_addr = (flush_en) ? flush_cnt_q :
(inv_en) ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] :
cl_index;
assign vld_req = (flush_en || cache_rden) ? '1 :
(mem_rtrn_i.inv.all && inv_en) ? '1 :
(mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh(
mem_rtrn_i.inv.way
) : repl_way_oh_q;
assign vld_wdata = (cache_wren) ? '1 : '0;
assign vld_we = (cache_wren | inv_en | flush_en);
// assign vld_req = (vld_we | cache_rden);
// chose random replacement if all are valid
assign update_lfsr = cache_wren & all_ways_valid;
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q;
// enable signals for memory arrays
assign cl_req = (cache_rden) ? '1 : (cache_wren) ? repl_way_oh_q : '0;
assign cl_we = cache_wren;
// find invalid cache line
lzc #(
.WIDTH(ICACHE_SET_ASSOC)
) i_lzc (
.in_i (~vld_rdata),
.cnt_o (inv_way),
.empty_o(all_ways_valid)
);
// generate random cacheline index
lfsr #(
.LfsrWidth(8),
.OutWidth ($clog2(ariane_pkg::ICACHE_SET_ASSOC))
) i_lfsr (
.clk_i (clk_i),
.rst_ni(rst_ni),
.en_i (update_lfsr),
.out_o (rnd_way)
);
///////////////////////////////////////////////////////
// tag comparison, hit generation
///////////////////////////////////////////////////////
logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx;
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:FETCH_WIDTH];
assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH];
end
lzc #(
.WIDTH(ICACHE_SET_ASSOC)
) i_lzc_hit (
.in_i (cl_hit),
.cnt_o (hit_idx),
.empty_o()
);
always_comb begin
if (cmp_en_q) begin
dreq_o.data = cl_sel[hit_idx];
dreq_o.user = cl_user[hit_idx];
end else begin
dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:FETCH_WIDTH];
dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH];
end
end
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata[ICACHE_SET_ASSOC-1:0];
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram
// Tag RAM
sram #(
// tag + valid bit
.DATA_WIDTH(ICACHE_TAG_WIDTH + 1),
.NUM_WORDS (ICACHE_NUM_WORDS)
) tag_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (vld_req[i]),
.we_i (vld_we),
.addr_i (vld_addr),
// we can always use the saved tag here since it takes a
// couple of cycle until we write to the cache upon a miss
.wuser_i('0),
.wdata_i({vld_wdata[i], cl_tag_q}),
.be_i ('1),
.ruser_o(),
.rdata_o(cl_tag_valid_rdata[i])
);
assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0];
assign vld_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH];
// Data RAM
sram #(
.USER_WIDTH(ICACHE_USER_LINE_WIDTH),
.DATA_WIDTH(ICACHE_LINE_WIDTH),
.USER_EN (ariane_pkg::FETCH_USER_EN),
.NUM_WORDS (ICACHE_NUM_WORDS)
) data_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (cl_req[i]),
.we_i (cl_we),
.addr_i (cl_index),
.wuser_i(mem_rtrn_i.user),
.wdata_i(mem_rtrn_i.data),
.be_i ('1),
.ruser_o(cl_ruser[i]),
.rdata_o(cl_rdata[i])
);
end
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin
cl_tag_q <= '0;
flush_cnt_q <= '0;
vaddr_q <= '0;
cmp_en_q <= '0;
cache_en_q <= '0;
flush_q <= '0;
state_q <= FLUSH;
cl_offset_q <= '0;
repl_way_oh_q <= '0;
inv_q <= '0;
end else begin
cl_tag_q <= cl_tag_d;
flush_cnt_q <= flush_cnt_d;
vaddr_q <= vaddr_d;
cmp_en_q <= cmp_en_d;
cache_en_q <= cache_en_d;
flush_q <= flush_d;
state_q <= state_d;
cl_offset_q <= cl_offset_d;
repl_way_oh_q <= repl_way_oh_d;
inv_q <= inv_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
repl_inval0 :
assert property (
@(posedge clk_i) disable iff (!rst_ni) cache_wren |-> !(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld))
else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
repl_inval1 :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> !cache_wren)
else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
invalid_state :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS}))
else $fatal(1, "[l1 icache] fsm reached an invalid state");
hot1 :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(
cl_hit
))
else $fatal(1, "[l1 icache] cl_hit signal must be hot1");
// this is only used for verification!
logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
if (!rst_ni) begin
vld_mirror <= '{default: '0};
tag_mirror <= '{default: '0};
end else begin
for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin
if (vld_req[i] & vld_we) begin
vld_mirror[vld_addr][i] <= vld_wdata[i];
tag_mirror[vld_addr][i] <= cl_tag_q;
end
end
end
end
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_dupl
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata);
end
tag_write_duplicate :
assert property (
@(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
else $fatal(1, "[l1 icache] cannot allocate a CL that is already present in the cache");
initial begin
// assert wrong parameterizations
assert (ICACHE_INDEX_WIDTH <= 12)
else $fatal(1, "[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on
endmodule // cva6_icache

View File

@ -0,0 +1,202 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Nils Wistoff <nwistoff@iis.ee.ethz.ch>, ETH Zurich
// Date: 07.09.2020
// Description: wrapper module to connect the L1I$ to a 64bit AXI bus.
//
module cva6_icache_axi_wrapper
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i,
input logic rst_ni,
input riscv::priv_lvl_t priv_lvl_i,
input logic flush_i, // flush the icache, flush and kill have to be asserted together
input logic en_i, // enable icache
output logic miss_o, // to performance counter
// address translation requests
input icache_areq_t areq_i,
output icache_arsp_t areq_o,
// data requests
input icache_dreq_t dreq_i,
output icache_drsp_t dreq_o,
// AXI refill port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
);
localparam AxiNumWords = (ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH > DCACHE_LINE_WIDTH) +
(DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ;
logic icache_mem_rtrn_vld;
icache_rtrn_t icache_mem_rtrn;
logic icache_mem_data_req;
logic icache_mem_data_ack;
icache_req_t icache_mem_data;
logic axi_rd_req;
logic axi_rd_gnt;
logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr;
logic [ $clog2(AxiNumWords)-1:0] axi_rd_blen;
logic [ 2:0] axi_rd_size;
logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in;
logic axi_rd_rdy;
logic axi_rd_lock;
logic axi_rd_last;
logic axi_rd_valid;
logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_out;
logic axi_rd_exokay;
logic req_valid_d, req_valid_q;
icache_req_t req_data_d, req_data_q;
logic first_d, first_q;
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
rd_shift_d, rd_shift_q;
// Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but
// required by AXI).
assign req_valid_d = ~axi_rd_gnt & (icache_mem_data_req | req_valid_q);
// Update read request information on a new request
assign req_data_d = (icache_mem_data_req) ? icache_mem_data : req_data_q;
// We have a new or pending read request
assign axi_rd_req = icache_mem_data_req | req_valid_q;
assign axi_rd_addr = CVA6Cfg.AxiAddrWidth'(req_data_d.paddr);
// Fetch a full cache line on a cache miss, or a single word on a bypassed access
assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH / 64 - 1;
assign axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth / 8); // Maximum
assign axi_rd_id_in = req_data_d.tid;
assign axi_rd_rdy = 1'b1;
assign axi_rd_lock = 1'b0;
// Immediately acknowledge read request. This is an implicit requirement for the icache.
assign icache_mem_data_ack = icache_mem_data_req;
// Return data as soon as last word arrives
assign icache_mem_rtrn_vld = axi_rd_valid & axi_rd_last;
assign icache_mem_rtrn.data = rd_shift_d;
assign icache_mem_rtrn.tid = req_data_q.tid;
assign icache_mem_rtrn.rtype = wt_cache_pkg::ICACHE_IFILL_ACK;
assign icache_mem_rtrn.inv = '0;
// -------
// I-Cache
// -------
cva6_icache #(
// use ID 0 for icache reads
.CVA6Cfg(CVA6Cfg),
.RdTxId (0)
) i_cva6_icache (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.en_i (en_i),
.miss_o (miss_o),
.areq_i (areq_i),
.areq_o (areq_o),
.dreq_i (dreq_i),
.dreq_o (dreq_o),
.mem_rtrn_vld_i(icache_mem_rtrn_vld),
.mem_rtrn_i (icache_mem_rtrn),
.mem_data_req_o(icache_mem_data_req),
.mem_data_ack_i(icache_mem_data_ack),
.mem_data_o (icache_mem_data)
);
// --------
// AXI shim
// --------
axi_shim #(
.CVA6Cfg (CVA6Cfg),
.AxiNumWords(AxiNumWords),
.axi_req_t (axi_req_t),
.axi_rsp_t (axi_rsp_t)
) i_axi_shim (
.clk_i (clk_i),
.rst_ni (rst_ni),
.rd_req_i (axi_rd_req),
.rd_gnt_o (axi_rd_gnt),
.rd_addr_i (axi_rd_addr),
.rd_blen_i (axi_rd_blen),
.rd_size_i (axi_rd_size),
.rd_id_i (axi_rd_id_in),
.rd_rdy_i (axi_rd_rdy),
.rd_lock_i (axi_rd_lock),
.rd_last_o (axi_rd_last),
.rd_valid_o (axi_rd_valid),
.rd_data_o (axi_rd_data),
.rd_user_o (),
.rd_id_o (axi_rd_id_out),
.rd_exokay_o(axi_rd_exokay),
.wr_req_i ('0),
.wr_gnt_o (),
.wr_addr_i ('0),
.wr_data_i ('0),
.wr_user_i ('0),
.wr_be_i ('0),
.wr_blen_i ('0),
.wr_size_i ('0),
.wr_id_i ('0),
.wr_lock_i ('0),
.wr_atop_i ('0),
.wr_rdy_i ('0),
.wr_valid_o (),
.wr_id_o (),
.wr_exokay_o(),
.axi_req_o (axi_req_o),
.axi_resp_i (axi_resp_i)
);
// Buffer burst data in shift register
always_comb begin : p_axi_rtrn_shift
first_d = first_q;
rd_shift_d = rd_shift_q;
if (axi_rd_valid) begin
first_d = axi_rd_last;
if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
rd_shift_d = axi_rd_data;
end else begin
rd_shift_d = {axi_rd_data, rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]};
end
// If this is a single word transaction, we need to make sure that word is placed at offset 0
if (first_q) begin
rd_shift_d[0] = axi_rd_data;
end
end
end
// Registers
always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf
if (!rst_ni) begin
req_valid_q <= 1'b0;
req_data_q <= '0;
first_q <= 1'b1;
rd_shift_q <= '0;
end else begin
req_valid_q <= req_valid_d;
req_data_q <= req_data_d;
first_q <= first_d;
rd_shift_q <= rd_shift_d;
end
end
endmodule // cva6_icache_axi_wrapper

View File

@ -0,0 +1,62 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : February, 2023
* Description : HPDcache Types' Definition
* History :
*/
`ifndef __HPDCACHE_TYPEDEF_SVH__
`define __HPDCACHE_TYPEDEF_SVH__
`define HPDCACHE_TYPEDEF_MEM_REQ_T(__name__, addr_t, id_t) \
typedef struct packed { \
addr_t mem_req_addr; \
hpdcache_pkg::hpdcache_mem_len_t mem_req_len; \
hpdcache_pkg::hpdcache_mem_size_t mem_req_size; \
id_t mem_req_id; \
hpdcache_pkg::hpdcache_mem_command_e mem_req_command; \
hpdcache_pkg::hpdcache_mem_atomic_e mem_req_atomic; \
logic mem_req_cacheable; \
} __name__
`define HPDCACHE_TYPEDEF_MEM_RESP_R_T(__name__, id_t, data_t) \
typedef struct packed { \
hpdcache_pkg::hpdcache_mem_error_e mem_resp_r_error; \
id_t mem_resp_r_id; \
data_t mem_resp_r_data; \
logic mem_resp_r_last; \
} __name__
`define HPDCACHE_TYPEDEF_MEM_REQ_W_T(__name__, data_t, be_t) \
typedef struct packed { \
data_t mem_req_w_data; \
be_t mem_req_w_be; \
logic mem_req_w_last; \
} __name__
`define HPDCACHE_TYPEDEF_MEM_RESP_W_T(__name__, id_t) \
typedef struct packed { \
logic mem_resp_w_is_atomic; \
hpdcache_pkg::hpdcache_mem_error_e mem_resp_w_error; \
id_t mem_resp_w_id; \
} __name__
`endif

View File

@ -0,0 +1,181 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : November 22, 2022
* Description : Refill data downsize
* History :
*/
module hpdcache_data_downsize
// {{{
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter int WR_WIDTH = 0,
parameter int RD_WIDTH = 0,
parameter int DEPTH = 0,
localparam type wdata_t = logic [WR_WIDTH-1:0],
localparam type rdata_t = logic [RD_WIDTH-1:0]
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
input logic w_i,
output logic wok_o,
input wdata_t wdata_i,
input logic r_i,
output logic rok_o,
output rdata_t rdata_o
);
// }}}
// Architecture
// {{{
// Local definitions
// {{{
localparam int RD_WORDS = WR_WIDTH/RD_WIDTH;
localparam int PTR_WIDTH = $clog2(DEPTH);
localparam int WORDCNT_WIDTH = $clog2(RD_WORDS);
typedef logic [PTR_WIDTH-1:0] bufptr_t;
typedef logic [WORDCNT_WIDTH-1:0] wordptr_t;
typedef logic [PTR_WIDTH:0] occupancy_t;
// }}}
// Internal registers and signals
// {{{
rdata_t [DEPTH-1:0][RD_WORDS-1:0] buf_q;
bufptr_t wrptr_q, wrptr_d;
bufptr_t rdptr_q, rdptr_d;
occupancy_t used_q, used_d;
wordptr_t [DEPTH-1:0] words_q, words_d;
logic words_set;
logic full, empty;
// }}}
// Control-Path
// {{{
assign full = (hpdcache_uint'(used_q) == DEPTH),
empty = (used_q == 0),
wok_o = ~full,
rok_o = ~empty;
always_comb
begin : ctrl_comb
automatic logic used_inc, used_dec;
automatic logic words_dec;
rdptr_d = rdptr_q;
wrptr_d = wrptr_q;
used_dec = 1'b0;
used_inc = 1'b0;
words_dec = 1'b0;
words_set = 1'b0;
if (w_i && wok_o) begin
used_inc = 1'b1;
words_set = 1'b1;
if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin
wrptr_d = 0;
end else begin
wrptr_d = wrptr_q + 1;
end
end
if (r_i && rok_o) begin
words_dec = (words_q[rdptr_q] > 0);
if (words_q[rdptr_q] == 0) begin
used_dec = 1'b1;
if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin
rdptr_d = 0;
end else begin
rdptr_d = rdptr_q + 1;
end
end
end
case ({used_inc, used_dec})
2'b10 : used_d = used_q + 1;
2'b01 : used_d = used_q - 1;
default: used_d = used_q;
endcase
words_d = words_q;
if (words_set) begin
words_d[wrptr_q] = wordptr_t'(RD_WORDS - 1);
end
if (words_dec) begin
words_d[rdptr_q] = words_q[rdptr_q] - 1;
end
end
always_ff @(posedge clk_i or negedge rst_ni)
begin : ctrl_ff
if (!rst_ni) begin
rdptr_q <= 0;
wrptr_q <= 0;
used_q <= 0;
words_q <= 0;
end else begin
rdptr_q <= rdptr_d;
wrptr_q <= wrptr_d;
used_q <= used_d;
words_q <= words_d;
end
end
// }}}
// Data-Path
// {{{
always_ff @(posedge clk_i or negedge rst_ni)
begin : buf_ff
if (!rst_ni) begin
buf_q <= '0;
end else begin
if (words_set) begin
buf_q[wrptr_q] <= wdata_i;
end
end
end
assign rdata_o = buf_q[rdptr_q][RD_WORDS - hpdcache_uint'(words_q[rdptr_q]) - 1];
// }}}
// Assertions
// {{{
// pragma translate_off
initial
begin : initial_assertions
assert (DEPTH > 0) else $error("DEPTH must be greater than 0");
assert (WR_WIDTH > 0) else $error("WR_WIDTH must be greater than 0");
assert (RD_WIDTH > 0) else $error("RD_WIDTH must be greater than 0");
assert (RD_WIDTH < WR_WIDTH) else $error("RD_WIDTH must be less to WR_WIDTH");
assert ((WR_WIDTH % RD_WIDTH) == 0) else $error("WR_WIDTH must be a multiple RD_WIDTH");
end
// pragma translate_on
// }}}
// }}}
endmodule
// }}}

View File

@ -0,0 +1,181 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : November 22, 2022
* Description : Refill data upsize
* History :
*/
module hpdcache_data_upsize
// {{{
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter int WR_WIDTH = 0,
parameter int RD_WIDTH = 0,
parameter int DEPTH = 0,
localparam type wdata_t = logic [WR_WIDTH-1:0],
localparam type rdata_t = logic [RD_WIDTH-1:0]
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
input logic w_i,
input logic wlast_i,
output logic wok_o,
input wdata_t wdata_i,
input logic r_i,
output logic rok_o,
output rdata_t rdata_o
);
// }}}
// Architecture
// {{{
// Local definitions
// {{{
localparam int WR_WORDS = RD_WIDTH/WR_WIDTH;
localparam int PTR_WIDTH = $clog2(DEPTH);
localparam int WORDCNT_WIDTH = $clog2(WR_WORDS);
typedef logic [PTR_WIDTH-1:0] bufptr_t;
typedef logic [WORDCNT_WIDTH-1:0] wordptr_t;
typedef logic [PTR_WIDTH:0] occupancy_t;
// }}}
// Internal registers and signals
// {{{
wdata_t [DEPTH-1:0][WR_WORDS-1:0] buf_q;
bufptr_t wrptr_q, wrptr_d;
bufptr_t rdptr_q, rdptr_d;
occupancy_t used_q, used_d;
wordptr_t [DEPTH-1:0] words_q, words_d;
logic full, empty;
logic shift;
// }}}
// Control-Path
// {{{
assign full = (hpdcache_uint'(used_q) == DEPTH),
empty = (used_q == 0),
wok_o = ~full,
rok_o = ~empty;
always_comb
begin : ctrl_comb
automatic logic used_inc, used_dec;
automatic logic words_inc, words_reset;
wrptr_d = wrptr_q;
rdptr_d = rdptr_q;
words_d = words_q;
used_dec = 1'b0;
used_inc = 1'b0;
words_reset = 1'b0;
words_inc = 1'b0;
shift = 1'b0;
if (w_i && wok_o) begin
shift = 1'b1;
words_inc = (hpdcache_uint'(words_q[wrptr_q]) < (WR_WORDS-1));
if (hpdcache_uint'(words_q[wrptr_q]) == (WR_WORDS-1) || wlast_i) begin
used_inc = 1'b1;
if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin
wrptr_d = 0;
end else begin
wrptr_d = wrptr_q + 1;
end
end
end
if (r_i && rok_o) begin
used_dec = 1'b1;
words_reset = 1'b1;
if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin
rdptr_d = 0;
end else begin
rdptr_d = rdptr_q + 1;
end
end
case ({used_inc, used_dec})
2'b10 : used_d = used_q + 1;
2'b01 : used_d = used_q - 1;
default: used_d = used_q;
endcase
if (words_inc) words_d[wrptr_q] = words_q[wrptr_q] + 1;
if (words_reset) words_d[rdptr_q] = 0;
end
always_ff @(posedge clk_i or negedge rst_ni)
begin : ctrl_ff
if (!rst_ni) begin
rdptr_q <= 0;
wrptr_q <= 0;
used_q <= 0;
words_q <= '0;
end else begin
rdptr_q <= rdptr_d;
wrptr_q <= wrptr_d;
used_q <= used_d;
words_q <= words_d;
end
end
// }}}
// Data-Path
// {{{
always_ff @(posedge clk_i or negedge rst_ni)
begin : buf_ff
if (!rst_ni) begin
buf_q <= '0;
end else begin
if (shift) begin
buf_q[wrptr_q][words_q[wrptr_q]] <= wdata_i;
end
end
end
assign rdata_o = buf_q[rdptr_q];
// }}}
// Assertions
// {{{
// pragma translate_off
initial
begin : initial_assertions
assert (DEPTH > 0) else $error("DEPTH must be greater than 0");
assert (WR_WIDTH > 0) else $error("WR_WIDTH must be greater than 0");
assert (RD_WIDTH > 0) else $error("RD_WIDTH must be greater than 0");
assert (WR_WIDTH < RD_WIDTH) else $error("WR_WIDTH must be less to RD_WIDTH");
assert ((RD_WIDTH % WR_WIDTH) == 0) else $error("RD_WIDTH must be a multiple WR_WIDTH");
end
// pragma translate_on
// }}}
// }}}
endmodule
// }}}

View File

@ -0,0 +1,69 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : Simple multiplexor
* History :
*/
module hpdcache_demux
// Parameters
// {{{
#(
// Number of outputs
parameter int unsigned NOUTPUT = 0,
// Width in bits of each input
parameter int unsigned DATA_WIDTH = 0,
// Selector signal is one-hot encoded
parameter bit ONE_HOT_SEL = 0,
// Compute the width of the selection signal
localparam int unsigned NOUTPUT_LOG2 = $clog2(NOUTPUT),
localparam int unsigned SEL_WIDTH = ONE_HOT_SEL ? NOUTPUT : NOUTPUT_LOG2,
localparam type data_t = logic [DATA_WIDTH-1:0],
localparam type sel_t = logic [SEL_WIDTH-1:0]
)
// }}}
// Ports
// {{{
(
input data_t data_i,
input sel_t sel_i,
output data_t [NOUTPUT-1:0] data_o
);
// }}}
generate
always_comb
begin : demux_comb
for (int unsigned i = 0; i < NOUTPUT; i++) begin
if (!ONE_HOT_SEL) begin
data_o[i] = (sel_t'(i) == sel_i) ? data_i : '0;
end else begin
data_o[i] = sel_i[i] ? data_i : '0;
end
end
end
endgenerate
endmodule

View File

@ -0,0 +1,167 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : FIFO buffer (using registers)
* History :
*/
module hpdcache_fifo_reg
// Parameters
// {{{
#(
parameter int unsigned FIFO_DEPTH = 0,
parameter bit FEEDTHROUGH = 1'b0,
parameter type fifo_data_t = logic
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
input logic w_i,
output logic wok_o,
input fifo_data_t wdata_i,
input logic r_i,
output logic rok_o,
output fifo_data_t rdata_o
);
// }}}
/*
* Single-entry FIFO buffer -> synchronization buffer
*/
if (FIFO_DEPTH == 1) begin : gen_sync_buffer
hpdcache_sync_buffer #(
.FEEDTHROUGH (FEEDTHROUGH),
.data_t (fifo_data_t)
) i_sync_buffer (
.clk_i,
.rst_ni,
.w_i,
.wok_o,
.wdata_i,
.r_i,
.rok_o,
.rdata_o
);
/*
* Multi-entry FIFO buffer
*/
end else if (FIFO_DEPTH > 0) begin : gen_fifo
// Declaration of constants, types and functions
// {{{
typedef logic unsigned [$clog2(FIFO_DEPTH)-1:0] fifo_addr_t;
// }}}
// Declaration of internal wires and registers
// {{{
fifo_data_t [FIFO_DEPTH-1:0] fifo_mem_q;
fifo_addr_t rptr_q, rptr_d; // read pointer
fifo_addr_t wptr_q, wptr_d; // write pointer
logic crossover_q, crossover_d; // write pointer has wrap
logic rexec, wexec;
logic rptr_max, wptr_max;
logic match_ptr;
logic empty, full;
// }}}
// Global control signals
// {{{
assign match_ptr = (wptr_q == rptr_q);
assign empty = match_ptr & ~crossover_q,
full = match_ptr & crossover_q;
assign rok_o = ~empty | (FEEDTHROUGH & w_i),
wok_o = ~full | (FEEDTHROUGH & r_i);
assign rexec = r_i & ~empty,
wexec = w_i & (( FEEDTHROUGH & ((empty & ~r_i) | (full & r_i) | (~full & ~empty))) |
(~FEEDTHROUGH & ~full));
// }}}
// Control of read and write pointers
// {{{
assign rptr_max = (rptr_q == fifo_addr_t'(FIFO_DEPTH-1));
assign wptr_max = (wptr_q == fifo_addr_t'(FIFO_DEPTH-1));
always_comb
begin : fifo_ctrl_comb
rptr_d = rptr_q;
wptr_d = wptr_q;
crossover_d = crossover_q;
if (rexec) begin
rptr_d = rptr_max ? 0 : rptr_q + 1;
end
if (wexec) begin
wptr_d = wptr_max ? 0 : wptr_q + 1;
end
if (wexec && wptr_max) begin
crossover_d = 1'b1;
end else if (rexec && rptr_max) begin
crossover_d = 1'b0;
end
end
// }}}
// FIFO buffer memory management
// {{{
always_ff @(posedge clk_i)
begin
if (wexec) fifo_mem_q[wptr_q] <= wdata_i;
end
assign rdata_o = FEEDTHROUGH && empty ? wdata_i : fifo_mem_q[rptr_q];
// }}}
// Setting of internal state
// {{{
always_ff @(posedge clk_i or negedge rst_ni)
begin
if (!rst_ni) begin
rptr_q <= 0;
wptr_q <= 0;
crossover_q <= 1'b0;
end else begin
rptr_q <= rptr_d;
wptr_q <= wptr_d;
crossover_q <= crossover_d;
end
end
// }}}
// Assertions
// {{{
// pragma translate_off
rptr_ahead_wptr_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
((rptr_q <= wptr_q) && !crossover_q) ||
((rptr_q >= wptr_q) && crossover_q)) else
$error("fifo: read pointer is ahead of the write pointer");
// pragma translate_on
// }}}
end
endmodule

View File

@ -0,0 +1,85 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : Fixed-Priority Arbiter
* History :
*/
module hpdcache_fxarb
// Parameters
// {{{
#(
// Number of requesters
parameter int unsigned N = 0
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
input logic [N-1:0] req_i,
output logic [N-1:0] gnt_o,
input logic ready_i
);
// }}}
// Declaration of internal wires and registers
// {{{
logic [N-1:0] gnt_q, gnt;
logic wait_q;
// }}}
// Compute the grant vector
// {{{
hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i), .val_o(gnt));
// }}}
// Compute the output grant vector
// {{{
assign gnt_o = wait_q ? gnt_q : gnt;
// }}}
// Setting of internal state
// {{{
always_ff @(posedge clk_i or negedge rst_ni)
begin
if (!rst_ni) begin
wait_q <= 1'b0;
gnt_q <= '0;
end else begin
wait_q <= ~ready_i & (wait_q | (|req_i));
if (!ready_i && !wait_q && (|req_i)) begin
gnt_q <= gnt;
end
end
end
// }}}
// Assertions
// {{{
// pragma translate_off
gnt_at_most_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni)
$onehot0(gnt_o)) else $error("arbiter: granting more than one requester");
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,79 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Author(s) : Cesar Fuguet
* Creation Date : April, 2021
* Description : Simple multiplexor
* History :
*/
module hpdcache_mux
// Parameters
// {{{
#(
// Number of inputs
parameter int unsigned NINPUT = 0,
// Width in bits of each input
parameter int unsigned DATA_WIDTH = 0,
// Selector signal is one-hot encoded
parameter bit ONE_HOT_SEL = 0,
// Compute the width of the selection signal
localparam int unsigned NINPUT_LOG2 = $clog2(NINPUT),
localparam int unsigned SEL_WIDTH = ONE_HOT_SEL ? NINPUT : NINPUT_LOG2,
localparam type data_t = logic [DATA_WIDTH-1:0],
localparam type sel_t = logic [SEL_WIDTH-1:0]
)
// }}}
// Ports
// {{{
(
input data_t [NINPUT-1:0] data_i,
input sel_t sel_i,
output data_t data_o
);
// }}}
generate
// Selector is one-hot encoded
if (ONE_HOT_SEL == 1) begin
always_comb
begin : data_out_mux_comb
data_o = '0;
for (int unsigned i = 0; i < NINPUT; i++) begin
data_o |= sel_i[i] ? data_i[i] : '0;
end
end
// Selector is binary encoded
end else begin
always_comb
begin : data_out_mux_comb
data_o = '0;
for (int unsigned i = 0; i < NINPUT; i++) begin
data_o |= (i == int'(sel_i)) ? data_i[i] : '0;
end
end
end
endgenerate
endmodule

View File

@ -0,0 +1,43 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : Priority One-hot Encoder
* History :
*/
module hpdcache_prio_1hot_encoder
// Parameters
#(
parameter int unsigned N = 0
)
// Ports
(
input logic [N-1:0] val_i,
output logic [N-1:0] val_o
);
generate
assign val_o[0] = val_i[0];
for (genvar i = 1; i < int'(N); i++) begin : prio_gen
assign val_o[i] = val_i[i] & ~(|val_i[i-1:0]);
end
endgenerate
endmodule

View File

@ -0,0 +1,63 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : March, 2020
* Description : 1RW register bank with write byte enable
* History :
*/
module hpdcache_regbank_wbyteenable_1rw
#(
parameter int unsigned ADDR_SIZE = 0,
parameter int unsigned DATA_SIZE = 0,
parameter int unsigned DEPTH = 2**ADDR_SIZE
)
(
input logic clk,
input logic rst_n,
input logic cs,
input logic we,
input logic [ADDR_SIZE-1:0] addr,
input logic [DATA_SIZE-1:0] wdata,
input logic [DATA_SIZE/8-1:0] wbyteenable,
output logic [DATA_SIZE-1:0] rdata
);
/*
* Internal memory array declaration
*/
typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
mem_t mem;
/*
* Process to update or read the memory array
*/
always_ff @(posedge clk)
begin : mem_update_ff
if (cs == 1'b1) begin
if (we == 1'b1) begin
for (int i = 0; i < DATA_SIZE/8; i++) begin
if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8];
end
end
rdata <= mem[addr];
end
end : mem_update_ff
endmodule : hpdcache_regbank_wbyteenable_1rw

View File

@ -0,0 +1,61 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : March, 2020
* Description : 1RW register bank with write bit mask
* History :
*/
module hpdcache_regbank_wmask_1rw
#(
parameter int unsigned ADDR_SIZE = 0,
parameter int unsigned DATA_SIZE = 0,
parameter int unsigned DEPTH = 2**ADDR_SIZE
)
(
input logic clk,
input logic rst_n,
input logic cs,
input logic we,
input logic [ADDR_SIZE-1:0] addr,
input logic [DATA_SIZE-1:0] wdata,
input logic [DATA_SIZE-1:0] wmask,
output logic [DATA_SIZE-1:0] rdata
);
/*
* Internal memory array declaration
*/
typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
mem_t mem;
/*
* Process to update or read the memory array
*/
always_ff @(posedge clk)
begin : mem_update_ff
if (cs == 1'b1) begin
if (we == 1'b1) begin
mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask);
end
rdata <= mem[addr];
end
end : mem_update_ff
endmodule : hpdcache_regbank_wmask_1rw

View File

@ -0,0 +1,121 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/**
* Author(s) : Cesar Fuguet
* Creation Date : April, 2021
* Description : Round-Robin Arbiter
* Based on design from
* http://www.rtlery.com/articles/how-design-round-robin-arbiter
* History :
*/
module hpdcache_rrarb
// Parameters
// {{{
#(
// Number of requesters
parameter int unsigned N = 0
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
input logic [N-1:0] req_i,
output logic [N-1:0] gnt_o,
input logic ready_i
);
// }}}
// Declaration of internal wires and registers
// {{{
logic [N-1:0] gnt_q, gnt;
logic [N-1:0] nxt;
logic wait_q;
logic [N-1:0] mask, gnt_msk, gnt_nomsk;
logic pending;
genvar gen_i;
// }}}
// Elaboration-time assertions
// {{{
// pragma translate_off
generate
if (N == 0) $error("N must be greater than 0");
endgenerate
// pragma translate_on
// }}}
// Compute the thermometer mask vector
// {{{
generate
if (N > 1) begin : gen_nxt_gt_1
assign nxt = {gnt_q[N-2:0], gnt_q[N-1]};
end else begin : gen_nxt_1
assign nxt = gnt_q[0];
end
for (gen_i = 0; gen_i < int'(N); gen_i++) begin : gen_mask
assign mask[gen_i] = |nxt[gen_i:0];
end
endgenerate
// }}}
// Compute the grant vector
// {{{
hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i & mask), .val_o(gnt_msk));
hpdcache_prio_1hot_encoder #(.N(N)) prio_nomsk_i (.val_i(req_i) , .val_o(gnt_nomsk));
assign gnt = |gnt_msk ? gnt_msk : gnt_nomsk;
// }}}
// Compute the output grant vector
// {{{
assign gnt_o = wait_q ? gnt_q : gnt;
// }}}
// Setting of internal state
// {{{
assign pending = |req_i;
always_ff @(posedge clk_i or negedge rst_ni)
begin
if (!rst_ni) begin
wait_q <= 1'b0;
gnt_q <= {1'b1, {N-1{1'b0}}};
end else begin
wait_q <= ~ready_i & (wait_q | pending);
if (!wait_q && pending) begin
gnt_q <= gnt;
end
end
end
// }}}
// Assertions
// {{{
// pragma translate_off
gnt_at_most_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni)
$onehot0(gnt)) else $error("arbiter: granting more than one requester");
gnt_q_exactly_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni)
$onehot(gnt_q)) else $error("arbiter: grant state is not one-hot");
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,56 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : March, 2020
* Description : Wrapper for Behavioral SRAM macros
* History :
*/
module hpdcache_sram
#(
parameter int unsigned ADDR_SIZE = 0,
parameter int unsigned DATA_SIZE = 0,
parameter int unsigned DEPTH = 2**ADDR_SIZE
)
(
input logic clk,
input logic rst_n,
input logic cs,
input logic we,
input logic [ADDR_SIZE-1:0] addr,
input logic [DATA_SIZE-1:0] wdata,
output logic [DATA_SIZE-1:0] rdata
);
hpdcache_sram_1rw #(
.ADDR_SIZE(ADDR_SIZE),
.DATA_SIZE(DATA_SIZE),
.DEPTH(DEPTH)
) ram_i (
.clk,
.rst_n,
.cs,
.we,
.addr,
.wdata,
.rdata
);
endmodule : hpdcache_sram

View File

@ -0,0 +1,58 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : March, 2020
* Description : Wrapper for 1RW SRAM macros implementing a write byte enable
* History :
*/
module hpdcache_sram_wbyteenable
#(
parameter int unsigned ADDR_SIZE = 0,
parameter int unsigned DATA_SIZE = 0,
parameter int unsigned DEPTH = 2**ADDR_SIZE
)
(
input logic clk,
input logic rst_n,
input logic cs,
input logic we,
input logic [ADDR_SIZE-1:0] addr,
input logic [DATA_SIZE-1:0] wdata,
input logic [DATA_SIZE/8-1:0] wbyteenable,
output logic [DATA_SIZE-1:0] rdata
);
hpdcache_sram_wbyteenable_1rw #(
.ADDR_SIZE(ADDR_SIZE),
.DATA_SIZE(DATA_SIZE),
.DEPTH(DEPTH)
) ram_i (
.clk,
.rst_n,
.cs,
.we,
.addr,
.wdata,
.wbyteenable,
.rdata
);
endmodule : hpdcache_sram_wbyteenable

View File

@ -0,0 +1,58 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : March, 2020
* Description : Wrapper for 1RW SRAM macros implementing write bit mask
* History :
*/
module hpdcache_sram_wmask
#(
parameter int unsigned ADDR_SIZE = 0,
parameter int unsigned DATA_SIZE = 0,
parameter int unsigned DEPTH = 2**ADDR_SIZE
)
(
input logic clk,
input logic rst_n,
input logic cs,
input logic we,
input logic [ADDR_SIZE-1:0] addr,
input logic [DATA_SIZE-1:0] wdata,
input logic [DATA_SIZE-1:0] wmask,
output logic [DATA_SIZE-1:0] rdata
);
hpdcache_sram_wmask_1rw #(
.ADDR_SIZE(ADDR_SIZE),
.DATA_SIZE(DATA_SIZE),
.DEPTH(DEPTH)
) ram_i (
.clk,
.rst_n,
.cs,
.we,
.addr,
.wdata,
.wmask,
.rdata
);
endmodule : hpdcache_sram_wmask

View File

@ -0,0 +1,89 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : October, 2023
* Description : Synchronization buffer
* History :
*/
module hpdcache_sync_buffer
// Parameters
// {{{
#(
parameter bit FEEDTHROUGH = 1'b0,
parameter type data_t = logic
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
input logic w_i,
output logic wok_o,
input data_t wdata_i,
input logic r_i,
output logic rok_o,
output data_t rdata_o
);
// }}}
// Declaration of internal wires and registers
// {{{
data_t buf_q;
logic buf_we;
logic valid_q, valid_d;
// }}}
// Global control signals
// {{{
assign rok_o = valid_q | (FEEDTHROUGH & w_i),
wok_o = ~valid_q | (FEEDTHROUGH & r_i);
assign buf_we = w_i & ((FEEDTHROUGH & ~(valid_q ^ r_i)) | (~FEEDTHROUGH & ~valid_q));
// }}}
// Control of buffer
// {{{
assign valid_d = buf_we | (valid_q & ~r_i);
// }}}
// FIFO buffer memory management
// {{{
always_ff @(posedge clk_i)
begin
if (buf_we) buf_q <= wdata_i;
end
assign rdata_o = FEEDTHROUGH && !valid_q ? wdata_i : buf_q;
// }}}
// Setting of internal state
// {{{
always_ff @(posedge clk_i or negedge rst_ni)
begin
if (!rst_ni) begin
valid_q <= 1'b0;
end else begin
valid_q <= valid_d;
end
end
// }}}
endmodule

View File

@ -0,0 +1,60 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : March, 2020
* Description : SRAM behavioral model
* History :
*/
module hpdcache_sram_1rw
#(
parameter int unsigned ADDR_SIZE = 0,
parameter int unsigned DATA_SIZE = 0,
parameter int unsigned DEPTH = 2**ADDR_SIZE
)
(
input logic clk,
input logic rst_n,
input logic cs,
input logic we,
input logic [ADDR_SIZE-1:0] addr,
input logic [DATA_SIZE-1:0] wdata,
output logic [DATA_SIZE-1:0] rdata
);
/*
* Internal memory array declaration
*/
typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
mem_t mem;
/*
* Process to update or read the memory array
*/
always_ff @(posedge clk)
begin : mem_update_ff
if (cs == 1'b1) begin
if (we == 1'b1) begin
mem[addr] <= wdata;
end
rdata <= mem[addr];
end
end : mem_update_ff
endmodule : hpdcache_sram_1rw

View File

@ -0,0 +1,63 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : March, 2020
* Description : Behavioral model of a 1RW SRAM with write byte enable
* History :
*/
module hpdcache_sram_wbyteenable_1rw
#(
parameter int unsigned ADDR_SIZE = 0,
parameter int unsigned DATA_SIZE = 0,
parameter int unsigned DEPTH = 2**ADDR_SIZE
)
(
input logic clk,
input logic rst_n,
input logic cs,
input logic we,
input logic [ADDR_SIZE-1:0] addr,
input logic [DATA_SIZE-1:0] wdata,
input logic [DATA_SIZE/8-1:0] wbyteenable,
output logic [DATA_SIZE-1:0] rdata
);
/*
* Internal memory array declaration
*/
typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
mem_t mem;
/*
* Process to update or read the memory array
*/
always_ff @(posedge clk)
begin : mem_update_ff
if (cs == 1'b1) begin
if (we == 1'b1) begin
for (int i = 0; i < DATA_SIZE/8; i++) begin
if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8];
end
end
rdata <= mem[addr];
end
end : mem_update_ff
endmodule : hpdcache_sram_wbyteenable_1rw

View File

@ -0,0 +1,61 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : March, 2020
* Description : Behavioral model of a 1RW SRAM with write bit mask
* History :
*/
module hpdcache_sram_wmask_1rw
#(
parameter int unsigned ADDR_SIZE = 0,
parameter int unsigned DATA_SIZE = 0,
parameter int unsigned DEPTH = 2**ADDR_SIZE
)
(
input logic clk,
input logic rst_n,
input logic cs,
input logic we,
input logic [ADDR_SIZE-1:0] addr,
input logic [DATA_SIZE-1:0] wdata,
input logic [DATA_SIZE-1:0] wmask,
output logic [DATA_SIZE-1:0] rdata
);
/*
* Internal memory array declaration
*/
typedef logic [DATA_SIZE-1:0] mem_t [DEPTH];
mem_t mem;
/*
* Process to update or read the memory array
*/
always_ff @(posedge clk)
begin : mem_update_ff
if (cs == 1'b1) begin
if (we == 1'b1) begin
mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask);
end
rdata <= mem[addr];
end
end : mem_update_ff
endmodule : hpdcache_sram_wmask_1rw

View File

@ -0,0 +1,658 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : HPDcache top
* History :
*/
module hpdcache
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter int NREQUESTERS = 1,
parameter int HPDcacheMemIdWidth = 8,
parameter int HPDcacheMemDataWidth = 512,
parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_req_w_t = logic,
parameter type hpdcache_mem_resp_r_t = logic,
parameter type hpdcache_mem_resp_w_t = logic
)
// }}}
// Ports
// {{{
(
// Clock and reset signals
input logic clk_i,
input logic rst_ni,
// Force the write buffer to send all pending writes
input logic wbuf_flush_i,
// Core request interface
// 1st cycle
input logic core_req_valid_i [NREQUESTERS-1:0],
output logic core_req_ready_o [NREQUESTERS-1:0],
input hpdcache_req_t core_req_i [NREQUESTERS-1:0],
// 2nd cycle
input logic core_req_abort_i [NREQUESTERS-1:0],
input hpdcache_tag_t core_req_tag_i [NREQUESTERS-1:0],
input hpdcache_pma_t core_req_pma_i [NREQUESTERS-1:0],
// Core response interface
output logic core_rsp_valid_o [NREQUESTERS-1:0],
output hpdcache_rsp_t core_rsp_o [NREQUESTERS-1:0],
// Miss read interface
input logic mem_req_miss_read_ready_i,
output logic mem_req_miss_read_valid_o,
output hpdcache_mem_req_t mem_req_miss_read_o,
output logic mem_resp_miss_read_ready_o,
input logic mem_resp_miss_read_valid_i,
input hpdcache_mem_resp_r_t mem_resp_miss_read_i,
// Write-buffer write interface
input logic mem_req_wbuf_write_ready_i,
output logic mem_req_wbuf_write_valid_o,
output hpdcache_mem_req_t mem_req_wbuf_write_o,
input logic mem_req_wbuf_write_data_ready_i,
output logic mem_req_wbuf_write_data_valid_o,
output hpdcache_mem_req_w_t mem_req_wbuf_write_data_o,
output logic mem_resp_wbuf_write_ready_o,
input logic mem_resp_wbuf_write_valid_i,
input hpdcache_mem_resp_w_t mem_resp_wbuf_write_i,
// Uncached read interface
input logic mem_req_uc_read_ready_i,
output logic mem_req_uc_read_valid_o,
output hpdcache_mem_req_t mem_req_uc_read_o,
output logic mem_resp_uc_read_ready_o,
input logic mem_resp_uc_read_valid_i,
input hpdcache_mem_resp_r_t mem_resp_uc_read_i,
// Uncached write interface
input logic mem_req_uc_write_ready_i,
output logic mem_req_uc_write_valid_o,
output hpdcache_mem_req_t mem_req_uc_write_o,
input logic mem_req_uc_write_data_ready_i,
output logic mem_req_uc_write_data_valid_o,
output hpdcache_mem_req_w_t mem_req_uc_write_data_o,
output logic mem_resp_uc_write_ready_o,
input logic mem_resp_uc_write_valid_i,
input hpdcache_mem_resp_w_t mem_resp_uc_write_i,
// Performance events
output logic evt_cache_write_miss_o,
output logic evt_cache_read_miss_o,
output logic evt_uncached_req_o,
output logic evt_cmo_req_o,
output logic evt_write_req_o,
output logic evt_read_req_o,
output logic evt_prefetch_req_o,
output logic evt_req_on_hold_o,
output logic evt_rtab_rollback_o,
output logic evt_stall_refill_o,
output logic evt_stall_o,
// Status interface
output logic wbuf_empty_o,
// Configuration interface
input logic cfg_enable_i,
input wbuf_timecnt_t cfg_wbuf_threshold_i,
input logic cfg_wbuf_reset_timecnt_on_write_i,
input logic cfg_wbuf_sequential_waw_i,
input logic cfg_wbuf_inhibit_write_coalescing_i,
input logic cfg_prefetch_updt_plru_i,
input logic cfg_error_on_cacheable_amo_i,
input logic cfg_rtab_single_entry_i
);
// }}}
// Declaration of internal signals
// {{{
logic refill_req_valid;
logic refill_req_ready;
logic refill_busy;
logic refill_updt_plru;
hpdcache_set_t refill_set;
hpdcache_dir_entry_t refill_dir_entry;
hpdcache_way_vector_t refill_read_victim_way;
hpdcache_way_vector_t refill_write_victim_way;
logic refill_write_dir;
logic refill_write_data;
hpdcache_word_t refill_word;
hpdcache_refill_data_t refill_data;
logic refill_core_rsp_valid;
hpdcache_rsp_t refill_core_rsp;
hpdcache_nline_t refill_nline;
logic refill_updt_rtab;
logic miss_mshr_empty;
logic miss_mshr_check;
mshr_set_t miss_mshr_check_set;
mshr_tag_t miss_mshr_check_tag;
logic miss_mshr_hit;
logic miss_mshr_alloc_cs;
logic miss_mshr_alloc;
logic miss_mshr_alloc_ready;
logic miss_mshr_alloc_full;
hpdcache_nline_t miss_mshr_alloc_nline;
hpdcache_req_tid_t miss_mshr_alloc_tid;
hpdcache_req_sid_t miss_mshr_alloc_sid;
hpdcache_word_t miss_mshr_alloc_word;
logic miss_mshr_alloc_need_rsp;
logic miss_mshr_alloc_is_prefetch;
logic wbuf_flush_all;
logic wbuf_write;
logic wbuf_write_ready;
wbuf_addr_t wbuf_write_addr;
wbuf_data_t wbuf_write_data;
wbuf_be_t wbuf_write_be;
logic wbuf_write_uncacheable;
logic wbuf_read_hit;
logic wbuf_read_flush_hit;
hpdcache_req_addr_t wbuf_rtab_addr;
logic wbuf_rtab_is_read;
logic wbuf_rtab_hit_open;
logic wbuf_rtab_hit_pend;
logic wbuf_rtab_hit_sent;
logic wbuf_rtab_not_ready;
logic uc_ready;
logic uc_req_valid;
hpdcache_uc_op_t uc_req_op;
hpdcache_req_addr_t uc_req_addr;
hpdcache_req_size_t uc_req_size;
hpdcache_req_data_t uc_req_data;
hpdcache_req_be_t uc_req_be;
logic uc_req_uncacheable;
hpdcache_req_sid_t uc_req_sid;
hpdcache_req_tid_t uc_req_tid;
logic uc_req_need_rsp;
logic uc_wbuf_flush_all;
logic uc_dir_amo_match;
hpdcache_set_t uc_dir_amo_match_set;
hpdcache_tag_t uc_dir_amo_match_tag;
logic uc_dir_amo_update_plru;
hpdcache_way_vector_t uc_dir_amo_hit_way;
logic uc_data_amo_write;
logic uc_data_amo_write_enable;
hpdcache_set_t uc_data_amo_write_set;
hpdcache_req_size_t uc_data_amo_write_size;
hpdcache_word_t uc_data_amo_write_word;
logic [63:0] uc_data_amo_write_data;
logic [7:0] uc_data_amo_write_be;
logic uc_lrsc_snoop;
hpdcache_req_addr_t uc_lrsc_snoop_addr;
hpdcache_req_size_t uc_lrsc_snoop_size;
logic uc_core_rsp_ready;
logic uc_core_rsp_valid;
hpdcache_rsp_t uc_core_rsp;
logic cmo_req_valid;
logic cmo_ready;
hpdcache_cmoh_op_t cmo_req_op;
hpdcache_req_addr_t cmo_req_addr;
hpdcache_req_data_t cmo_req_wdata;
logic cmo_wbuf_flush_all;
logic cmo_dir_check;
hpdcache_set_t cmo_dir_check_set;
hpdcache_tag_t cmo_dir_check_tag;
hpdcache_way_vector_t cmo_dir_check_hit_way;
logic cmo_dir_inval;
hpdcache_set_t cmo_dir_inval_set;
hpdcache_way_vector_t cmo_dir_inval_way;
logic rtab_empty;
logic ctrl_empty;
logic core_rsp_valid;
hpdcache_rsp_t core_rsp;
logic arb_req_valid;
logic arb_req_ready;
hpdcache_req_t arb_req;
logic arb_abort;
hpdcache_tag_t arb_tag;
hpdcache_pma_t arb_pma;
localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_READ_ID = {HPDcacheMemIdWidth{1'b1}};
localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_WRITE_ID = {HPDcacheMemIdWidth{1'b1}};
// }}}
// Requesters arbiter
// {{{
hpdcache_core_arbiter #(
.NREQUESTERS (NREQUESTERS)
) core_req_arbiter_i (
.clk_i,
.rst_ni,
.core_req_valid_i,
.core_req_ready_o,
.core_req_i,
.core_req_abort_i,
.core_req_tag_i,
.core_req_pma_i,
.core_rsp_valid_i (core_rsp_valid),
.core_rsp_i (core_rsp),
.core_rsp_valid_o,
.core_rsp_o,
.arb_req_valid_o (arb_req_valid),
.arb_req_ready_i (arb_req_ready),
.arb_req_o (arb_req),
.arb_abort_o (arb_abort),
.arb_tag_o (arb_tag),
.arb_pma_o (arb_pma)
);
// }}}
// HPDcache controller
// {{{
hpdcache_ctrl hpdcache_ctrl_i(
.clk_i,
.rst_ni,
.core_req_valid_i (arb_req_valid),
.core_req_ready_o (arb_req_ready),
.core_req_i (arb_req),
.core_req_abort_i (arb_abort),
.core_req_tag_i (arb_tag),
.core_req_pma_i (arb_pma),
.core_rsp_valid_o (core_rsp_valid),
.core_rsp_o (core_rsp),
.wbuf_flush_i,
.cachedir_hit_o (/* unused */),
.miss_mshr_check_o (miss_mshr_check),
.miss_mshr_check_set_o (miss_mshr_check_set),
.miss_mshr_check_tag_o (miss_mshr_check_tag),
.miss_mshr_alloc_o (miss_mshr_alloc),
.miss_mshr_alloc_cs_o (miss_mshr_alloc_cs),
.miss_mshr_alloc_ready_i (miss_mshr_alloc_ready),
.miss_mshr_alloc_full_i (miss_mshr_alloc_full),
.miss_mshr_alloc_nline_o (miss_mshr_alloc_nline),
.miss_mshr_alloc_tid_o (miss_mshr_alloc_tid),
.miss_mshr_alloc_sid_o (miss_mshr_alloc_sid),
.miss_mshr_alloc_word_o (miss_mshr_alloc_word),
.miss_mshr_alloc_need_rsp_o (miss_mshr_alloc_need_rsp),
.miss_mshr_alloc_is_prefetch_o (miss_mshr_alloc_is_prefetch),
.miss_mshr_hit_i (miss_mshr_hit),
.refill_req_valid_i (refill_req_valid),
.refill_req_ready_o (refill_req_ready),
.refill_busy_i (refill_busy),
.refill_updt_plru_i (refill_updt_plru),
.refill_set_i (refill_set),
.refill_dir_entry_i (refill_dir_entry),
.refill_victim_way_o (refill_read_victim_way),
.refill_victim_way_i (refill_write_victim_way),
.refill_write_dir_i (refill_write_dir),
.refill_write_data_i (refill_write_data),
.refill_word_i (refill_word),
.refill_data_i (refill_data),
.refill_core_rsp_valid_i (refill_core_rsp_valid),
.refill_core_rsp_i (refill_core_rsp),
.refill_nline_i (refill_nline),
.refill_updt_rtab_i (refill_updt_rtab),
.wbuf_empty_i (wbuf_empty_o),
.wbuf_flush_all_o (wbuf_flush_all),
.wbuf_write_o (wbuf_write),
.wbuf_write_ready_i (wbuf_write_ready),
.wbuf_write_addr_o (wbuf_write_addr),
.wbuf_write_data_o (wbuf_write_data),
.wbuf_write_be_o (wbuf_write_be),
.wbuf_write_uncacheable_o (wbuf_write_uncacheable),
.wbuf_read_hit_i (wbuf_read_hit),
.wbuf_read_flush_hit_o (wbuf_read_flush_hit),
.wbuf_rtab_addr_o (wbuf_rtab_addr),
.wbuf_rtab_is_read_o (wbuf_rtab_is_read),
.wbuf_rtab_hit_open_i (wbuf_rtab_hit_open),
.wbuf_rtab_hit_pend_i (wbuf_rtab_hit_pend),
.wbuf_rtab_hit_sent_i (wbuf_rtab_hit_sent),
.wbuf_rtab_not_ready_i (wbuf_rtab_not_ready),
.uc_busy_i (~uc_ready),
.uc_lrsc_snoop_o (uc_lrsc_snoop),
.uc_lrsc_snoop_addr_o (uc_lrsc_snoop_addr),
.uc_lrsc_snoop_size_o (uc_lrsc_snoop_size),
.uc_req_valid_o (uc_req_valid),
.uc_req_op_o (uc_req_op),
.uc_req_addr_o (uc_req_addr),
.uc_req_size_o (uc_req_size),
.uc_req_data_o (uc_req_data),
.uc_req_be_o (uc_req_be),
.uc_req_uc_o (uc_req_uncacheable),
.uc_req_sid_o (uc_req_sid),
.uc_req_tid_o (uc_req_tid),
.uc_req_need_rsp_o (uc_req_need_rsp),
.uc_wbuf_flush_all_i (uc_wbuf_flush_all),
.uc_dir_amo_match_i (uc_dir_amo_match),
.uc_dir_amo_match_set_i (uc_dir_amo_match_set),
.uc_dir_amo_match_tag_i (uc_dir_amo_match_tag),
.uc_dir_amo_update_plru_i (uc_dir_amo_update_plru),
.uc_dir_amo_hit_way_o (uc_dir_amo_hit_way),
.uc_data_amo_write_i (uc_data_amo_write),
.uc_data_amo_write_enable_i (uc_data_amo_write_enable),
.uc_data_amo_write_set_i (uc_data_amo_write_set),
.uc_data_amo_write_size_i (uc_data_amo_write_size),
.uc_data_amo_write_word_i (uc_data_amo_write_word),
.uc_data_amo_write_data_i (uc_data_amo_write_data),
.uc_data_amo_write_be_i (uc_data_amo_write_be),
.uc_core_rsp_ready_o (uc_core_rsp_ready),
.uc_core_rsp_valid_i (uc_core_rsp_valid),
.uc_core_rsp_i (uc_core_rsp),
.cmo_busy_i (~cmo_ready),
.cmo_req_valid_o (cmo_req_valid),
.cmo_req_op_o (cmo_req_op),
.cmo_req_addr_o (cmo_req_addr),
.cmo_req_wdata_o (cmo_req_wdata),
.cmo_wbuf_flush_all_i (cmo_wbuf_flush_all),
.cmo_dir_check_i (cmo_dir_check),
.cmo_dir_check_set_i (cmo_dir_check_set),
.cmo_dir_check_tag_i (cmo_dir_check_tag),
.cmo_dir_check_hit_way_o (cmo_dir_check_hit_way),
.cmo_dir_inval_i (cmo_dir_inval),
.cmo_dir_inval_set_i (cmo_dir_inval_set),
.cmo_dir_inval_way_i (cmo_dir_inval_way),
.rtab_empty_o (rtab_empty),
.ctrl_empty_o (ctrl_empty),
.cfg_enable_i,
.cfg_rtab_single_entry_i,
.evt_cache_write_miss_o,
.evt_cache_read_miss_o,
.evt_uncached_req_o,
.evt_cmo_req_o,
.evt_write_req_o,
.evt_read_req_o,
.evt_prefetch_req_o,
.evt_req_on_hold_o,
.evt_rtab_rollback_o,
.evt_stall_refill_o,
.evt_stall_o
);
// }}}
// HPDcache write-buffer
// {{{
hpdcache_wbuf_wrapper #(
.HPDcacheMemIdWidth (HPDcacheMemIdWidth),
.HPDcacheMemDataWidth (HPDcacheMemDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t)
) hpdcache_wbuf_i(
.clk_i,
.rst_ni,
.empty_o (wbuf_empty_o),
.full_o (/* unused */),
.flush_all_i (wbuf_flush_all),
.cfg_threshold_i (cfg_wbuf_threshold_i),
.cfg_reset_timecnt_on_write_i (cfg_wbuf_reset_timecnt_on_write_i),
.cfg_sequential_waw_i (cfg_wbuf_sequential_waw_i),
.cfg_inhibit_write_coalescing_i (cfg_wbuf_inhibit_write_coalescing_i),
.write_i (wbuf_write),
.write_ready_o (wbuf_write_ready),
.write_addr_i (wbuf_write_addr),
.write_data_i (wbuf_write_data),
.write_be_i (wbuf_write_be),
.write_uc_i (wbuf_write_uncacheable),
.read_addr_i (wbuf_write_addr),
.read_hit_o (wbuf_read_hit),
.read_flush_hit_i (wbuf_read_flush_hit),
.replay_addr_i (wbuf_rtab_addr),
.replay_is_read_i (wbuf_rtab_is_read),
.replay_open_hit_o (wbuf_rtab_hit_open),
.replay_pend_hit_o (wbuf_rtab_hit_pend),
.replay_sent_hit_o (wbuf_rtab_hit_sent),
.replay_not_ready_o (wbuf_rtab_not_ready),
.mem_req_write_ready_i (mem_req_wbuf_write_ready_i),
.mem_req_write_valid_o (mem_req_wbuf_write_valid_o),
.mem_req_write_o (mem_req_wbuf_write_o),
.mem_req_write_data_ready_i (mem_req_wbuf_write_data_ready_i),
.mem_req_write_data_valid_o (mem_req_wbuf_write_data_valid_o),
.mem_req_write_data_o (mem_req_wbuf_write_data_o),
.mem_resp_write_ready_o (mem_resp_wbuf_write_ready_o),
.mem_resp_write_valid_i (mem_resp_wbuf_write_valid_i),
.mem_resp_write_i (mem_resp_wbuf_write_i)
);
// }}}
// Miss handler
// {{{
hpdcache_miss_handler #(
.HPDcacheMemIdWidth (HPDcacheMemIdWidth),
.HPDcacheMemDataWidth (HPDcacheMemDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t)
) hpdcache_miss_handler_i(
.clk_i,
.rst_ni,
.mshr_empty_o (miss_mshr_empty),
.mshr_full_o (/* unused */),
.cfg_prefetch_updt_plru_i,
.mshr_check_i (miss_mshr_check),
.mshr_check_set_i (miss_mshr_check_set),
.mshr_check_tag_i (miss_mshr_check_tag),
.mshr_check_hit_o (miss_mshr_hit),
.mshr_alloc_ready_o (miss_mshr_alloc_ready),
.mshr_alloc_i (miss_mshr_alloc),
.mshr_alloc_cs_i (miss_mshr_alloc_cs),
.mshr_alloc_full_o (miss_mshr_alloc_full),
.mshr_alloc_nline_i (miss_mshr_alloc_nline),
.mshr_alloc_tid_i (miss_mshr_alloc_tid),
.mshr_alloc_sid_i (miss_mshr_alloc_sid),
.mshr_alloc_word_i (miss_mshr_alloc_word),
.mshr_alloc_need_rsp_i (miss_mshr_alloc_need_rsp),
.mshr_alloc_is_prefetch_i (miss_mshr_alloc_is_prefetch),
.refill_req_ready_i (refill_req_ready),
.refill_req_valid_o (refill_req_valid),
.refill_busy_o (refill_busy),
.refill_updt_plru_o (refill_updt_plru),
.refill_set_o (refill_set),
.refill_dir_entry_o (refill_dir_entry),
.refill_victim_way_i (refill_read_victim_way),
.refill_write_dir_o (refill_write_dir),
.refill_write_data_o (refill_write_data),
.refill_victim_way_o (refill_write_victim_way),
.refill_data_o (refill_data),
.refill_word_o (refill_word),
.refill_nline_o (refill_nline),
.refill_updt_rtab_o (refill_updt_rtab),
.refill_core_rsp_valid_o (refill_core_rsp_valid),
.refill_core_rsp_o (refill_core_rsp),
.mem_req_ready_i (mem_req_miss_read_ready_i),
.mem_req_valid_o (mem_req_miss_read_valid_o),
.mem_req_o (mem_req_miss_read_o),
.mem_resp_ready_o (mem_resp_miss_read_ready_o),
.mem_resp_valid_i (mem_resp_miss_read_valid_i),
.mem_resp_i (mem_resp_miss_read_i)
);
// }}}
// Uncacheable request handler
// {{{
hpdcache_uncached #(
.HPDcacheMemIdWidth (HPDcacheMemIdWidth),
.HPDcacheMemDataWidth (HPDcacheMemDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t),
.hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t)
) hpdcache_uc_i(
.clk_i,
.rst_ni,
.wbuf_empty_i (wbuf_empty_o),
.mshr_empty_i (miss_mshr_empty),
.rtab_empty_i (rtab_empty),
.ctrl_empty_i (ctrl_empty),
.req_valid_i (uc_req_valid),
.req_ready_o (uc_ready),
.req_op_i (uc_req_op),
.req_addr_i (uc_req_addr),
.req_size_i (uc_req_size),
.req_data_i (uc_req_data),
.req_be_i (uc_req_be),
.req_uc_i (uc_req_uncacheable),
.req_sid_i (uc_req_sid),
.req_tid_i (uc_req_tid),
.req_need_rsp_i (uc_req_need_rsp),
.wbuf_flush_all_o (uc_wbuf_flush_all),
.dir_amo_match_o (uc_dir_amo_match),
.dir_amo_match_set_o (uc_dir_amo_match_set),
.dir_amo_match_tag_o (uc_dir_amo_match_tag),
.dir_amo_update_plru_o (uc_dir_amo_update_plru),
.dir_amo_hit_way_i (uc_dir_amo_hit_way),
.data_amo_write_o (uc_data_amo_write),
.data_amo_write_enable_o (uc_data_amo_write_enable),
.data_amo_write_set_o (uc_data_amo_write_set),
.data_amo_write_size_o (uc_data_amo_write_size),
.data_amo_write_word_o (uc_data_amo_write_word),
.data_amo_write_data_o (uc_data_amo_write_data),
.data_amo_write_be_o (uc_data_amo_write_be),
.lrsc_snoop_i (uc_lrsc_snoop),
.lrsc_snoop_addr_i (uc_lrsc_snoop_addr),
.lrsc_snoop_size_i (uc_lrsc_snoop_size),
.core_rsp_ready_i (uc_core_rsp_ready),
.core_rsp_valid_o (uc_core_rsp_valid),
.core_rsp_o (uc_core_rsp),
.mem_read_id_i (HPDCACHE_UC_READ_ID),
.mem_write_id_i (HPDCACHE_UC_WRITE_ID),
.mem_req_read_ready_i (mem_req_uc_read_ready_i),
.mem_req_read_valid_o (mem_req_uc_read_valid_o),
.mem_req_read_o (mem_req_uc_read_o),
.mem_resp_read_ready_o (mem_resp_uc_read_ready_o),
.mem_resp_read_valid_i (mem_resp_uc_read_valid_i),
.mem_resp_read_i (mem_resp_uc_read_i),
.mem_req_write_ready_i (mem_req_uc_write_ready_i),
.mem_req_write_valid_o (mem_req_uc_write_valid_o),
.mem_req_write_o (mem_req_uc_write_o),
.mem_req_write_data_ready_i (mem_req_uc_write_data_ready_i),
.mem_req_write_data_valid_o (mem_req_uc_write_data_valid_o),
.mem_req_write_data_o (mem_req_uc_write_data_o),
.mem_resp_write_ready_o (mem_resp_uc_write_ready_o),
.mem_resp_write_valid_i (mem_resp_uc_write_valid_i),
.mem_resp_write_i (mem_resp_uc_write_i),
.cfg_error_on_cacheable_amo_i
);
// CMO Request Handler
// {{{
hpdcache_cmo hpdcache_cmo_i(
.clk_i,
.rst_ni,
.wbuf_empty_i (wbuf_empty_o),
.mshr_empty_i (miss_mshr_empty),
.rtab_empty_i (rtab_empty),
.ctrl_empty_i (ctrl_empty),
.req_valid_i (cmo_req_valid),
.req_ready_o (cmo_ready),
.req_op_i (cmo_req_op),
.req_addr_i (cmo_req_addr),
.req_wdata_i (cmo_req_wdata),
.wbuf_flush_all_o (cmo_wbuf_flush_all),
.dir_check_o (cmo_dir_check),
.dir_check_set_o (cmo_dir_check_set),
.dir_check_tag_o (cmo_dir_check_tag),
.dir_check_hit_way_i (cmo_dir_check_hit_way),
.dir_inval_o (cmo_dir_inval),
.dir_inval_set_o (cmo_dir_inval_set),
.dir_inval_way_o (cmo_dir_inval_way)
);
// }}}
// Assertions
// {{{
// pragma translate_off
initial begin
req_access_width_assert:
assert (HPDCACHE_REQ_WORDS <= HPDCACHE_ACCESS_WORDS) else
$error("req data width shall be l.e. to cache access width");
refill_access_width_assert:
assert (HPDCACHE_CL_WORDS >= HPDCACHE_ACCESS_WORDS) else
$error("cache access width shall be l.e. to cache-line width");
miss_mem_id_width_assert:
assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_WAY_WIDTH + HPDCACHE_MSHR_SET_WIDTH)) else
$error("insufficient ID bits on the mem interface to transport misses");
wbuf_mem_id_width_assert:
assert (HPDcacheMemIdWidth >= HPDCACHE_WBUF_DIR_PTR_WIDTH) else
$error("insufficient ID bits on the mem interface to transport writes");
end
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,67 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : May, 2021
* Description : HPDcache AMO computing unit
* History :
*/
module hpdcache_amo
import hpdcache_pkg::*;
// Ports
// {{{
(
input logic [63:0] ld_data_i,
input logic [63:0] st_data_i,
input hpdcache_uc_op_t op_i,
output logic [63:0] result_o
);
// }}}
logic signed [63:0] ld_data;
logic signed [63:0] st_data;
logic signed [63:0] sum;
logic ugt, sgt;
assign ld_data = ld_data_i,
st_data = st_data_i;
assign ugt = (ld_data_i > st_data_i),
sgt = (ld_data > st_data),
sum = ld_data + st_data;
always_comb
begin : amo_compute_comb
unique case (1'b1)
op_i.is_amo_lr : result_o = ld_data_i;
op_i.is_amo_sc : result_o = st_data_i;
op_i.is_amo_swap : result_o = st_data_i;
op_i.is_amo_add : result_o = sum;
op_i.is_amo_and : result_o = ld_data_i & st_data_i;
op_i.is_amo_or : result_o = ld_data_i | st_data_i;
op_i.is_amo_xor : result_o = ld_data_i ^ st_data_i;
op_i.is_amo_max : result_o = sgt ? ld_data_i : st_data_i;
op_i.is_amo_maxu : result_o = ugt ? ld_data_i : st_data_i;
op_i.is_amo_min : result_o = sgt ? st_data_i : ld_data_i;
op_i.is_amo_minu : result_o = ugt ? st_data_i : ld_data_i;
default : result_o = '0;
endcase
end
endmodule

View File

@ -0,0 +1,250 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : July, 2021
* Description : HPDcache Cache-Management-Operation Handler
* History :
*/
module hpdcache_cmo
import hpdcache_pkg::*;
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
// Global control signals
// {{{
input logic wbuf_empty_i,
input logic mshr_empty_i,
input logic rtab_empty_i,
input logic ctrl_empty_i,
// }}}
// Request interface
// {{{
input logic req_valid_i,
output logic req_ready_o,
input hpdcache_cmoh_op_t req_op_i,
input hpdcache_req_addr_t req_addr_i,
input hpdcache_req_data_t req_wdata_i,
// }}}
// Write Buffer Interface
// {{{
output logic wbuf_flush_all_o,
// }}}
// Cache Directory Interface
// {{{
output logic dir_check_o,
output hpdcache_set_t dir_check_set_o,
output hpdcache_tag_t dir_check_tag_o,
input hpdcache_way_vector_t dir_check_hit_way_i,
output logic dir_inval_o,
output hpdcache_set_t dir_inval_set_o,
output hpdcache_way_vector_t dir_inval_way_o
// }}}
);
// }}}
// Definition of constants and types
// {{{
typedef enum {
CMOH_IDLE,
CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY,
CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY,
CMOH_INVAL_CHECK_NLINE,
CMOH_INVAL_SET
} hpdcache_cmoh_fsm_t;
// }}}
// Internal signals and registers
// {{{
hpdcache_cmoh_fsm_t cmoh_fsm_q, cmoh_fsm_d;
hpdcache_cmoh_op_t cmoh_op_q, cmoh_op_d;
hpdcache_req_addr_t cmoh_addr_q, cmoh_addr_d;
hpdcache_way_vector_t cmoh_way_q, cmoh_way_d;
hpdcache_set_t cmoh_set_cnt_q, cmoh_set_cnt_d;
hpdcache_nline_t cmoh_nline_q;
hpdcache_tag_t cmoh_tag_q;
hpdcache_set_t cmoh_set_q;
hpdcache_data_word_t cmoh_wdata;
// }}}
// CMO request handler FSM
// {{{
assign cmoh_nline_q = cmoh_addr_q[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH],
cmoh_set_q = cmoh_nline_q[0 +: HPDCACHE_SET_WIDTH],
cmoh_tag_q = cmoh_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];
assign dir_check_set_o = cmoh_set_q,
dir_check_tag_o = cmoh_tag_q;
assign req_ready_o = (cmoh_fsm_q == CMOH_IDLE);
// Only the least significant word of the write data contains parameters
// for the CMO handler
assign cmoh_wdata = req_wdata_i[0];
always_comb
begin : cmoh_fsm_comb
cmoh_op_d = cmoh_op_q;
cmoh_addr_d = cmoh_addr_q;
cmoh_way_d = cmoh_way_q;
cmoh_set_cnt_d = cmoh_set_cnt_q;
dir_check_o = 1'b0;
dir_inval_o = 1'b0;
dir_inval_set_o = cmoh_set_q;
dir_inval_way_o = '0;
wbuf_flush_all_o = 1'b0;
cmoh_fsm_d = cmoh_fsm_q;
case (cmoh_fsm_q)
CMOH_IDLE: begin
cmoh_fsm_d = CMOH_IDLE;
if (req_valid_i) begin
unique case (1'b1)
req_op_i.is_fence: begin
// request to the write buffer to send all open entries
wbuf_flush_all_o = rtab_empty_i;
// then wait for the write buffer to be empty
if (!rtab_empty_i || !wbuf_empty_i) begin
cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY;
end
end
req_op_i.is_inval_by_nline,
req_op_i.is_inval_by_set,
req_op_i.is_inval_all: begin
cmoh_op_d = req_op_i;
cmoh_addr_d = req_addr_i;
cmoh_way_d = cmoh_wdata[0 +: HPDCACHE_WAYS];
cmoh_set_cnt_d = 0;
if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
if (req_op_i.is_inval_by_nline) begin
cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE;
end else begin
cmoh_fsm_d = CMOH_INVAL_SET;
end
end else begin
cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY;
end
end
default: begin
// pragma translate_off
$error("cmo handler: unexpected operation");
// pragma translate_on
end
endcase
end
end
CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY: begin
wbuf_flush_all_o = rtab_empty_i;
if (wbuf_empty_i && rtab_empty_i) begin
cmoh_fsm_d = CMOH_IDLE;
end else begin
cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY;
end
end
CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY: begin
cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY;
if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
if (cmoh_op_q.is_inval_by_nline) begin
cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE;
end else begin
cmoh_fsm_d = CMOH_INVAL_SET;
end
end
end
CMOH_INVAL_CHECK_NLINE: begin
dir_check_o = 1'b1;
cmoh_fsm_d = CMOH_INVAL_SET;
end
CMOH_INVAL_SET: begin
cmoh_fsm_d = CMOH_INVAL_SET;
case (1'b1)
cmoh_op_q.is_inval_by_nline: begin
dir_inval_o = |dir_check_hit_way_i;
dir_inval_way_o = dir_check_hit_way_i;
cmoh_fsm_d = CMOH_IDLE;
end
cmoh_op_q.is_inval_all: begin
dir_inval_o = 1'b1;
dir_inval_way_o = {HPDCACHE_WAYS{1'b1}};
dir_inval_set_o = cmoh_set_cnt_q;
cmoh_set_cnt_d = cmoh_set_cnt_q + 1;
if (cmoh_set_cnt_q == hpdcache_set_t'(HPDCACHE_SETS - 1)) begin
cmoh_fsm_d = CMOH_IDLE;
end
end
cmoh_op_q.is_inval_by_set: begin
dir_inval_o = 1'b1;
dir_inval_way_o = cmoh_way_q;
cmoh_fsm_d = CMOH_IDLE;
end
endcase
end
endcase
end
// }}}
// CMO request handler set state
// {{{
always_ff @(posedge clk_i or negedge rst_ni)
begin
if (!rst_ni) begin
cmoh_fsm_q <= CMOH_IDLE;
end else begin
cmoh_fsm_q <= cmoh_fsm_d;
end
end
always_ff @(posedge clk_i)
begin
cmoh_op_q <= cmoh_op_d;
cmoh_addr_q <= cmoh_addr_d;
cmoh_way_q <= cmoh_way_d;
cmoh_set_cnt_q <= cmoh_set_cnt_d;
end
// }}}
// Assertions
// {{{
// pragma translate_off
assert property (@(posedge clk_i) disable iff (!rst_ni)
req_valid_i -> $onehot(req_op_i)) else
$error("cmo_handler: more than one operation type requested");
assert property (@(posedge clk_i) disable iff (!rst_ni)
req_valid_i -> (cmoh_fsm_q == CMOH_IDLE)) else
$error("cmo_handler: new request received while busy");
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,171 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : September, 2023
* Description : HPDcache request arbiter
* History :
*/
module hpdcache_core_arbiter
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter int NREQUESTERS = 1
)
// }}}
// Ports
// {{{
(
// Clock and reset signals
input logic clk_i,
input logic rst_ni,
// Core request interface
// 1st cycle
input logic core_req_valid_i [NREQUESTERS-1:0],
output logic core_req_ready_o [NREQUESTERS-1:0],
input hpdcache_req_t core_req_i [NREQUESTERS-1:0],
// 2nd cycle
input logic core_req_abort_i [NREQUESTERS-1:0],
input hpdcache_tag_t core_req_tag_i [NREQUESTERS-1:0],
input hpdcache_pma_t core_req_pma_i [NREQUESTERS-1:0],
// Core response interface
input logic core_rsp_valid_i,
input hpdcache_rsp_t core_rsp_i,
output logic core_rsp_valid_o [NREQUESTERS-1:0],
output hpdcache_rsp_t core_rsp_o [NREQUESTERS-1:0],
// Granted request
output logic arb_req_valid_o,
input logic arb_req_ready_i,
output hpdcache_req_t arb_req_o,
output logic arb_abort_o,
output hpdcache_tag_t arb_tag_o,
output hpdcache_pma_t arb_pma_o
);
// }}}
// Declaration of internal signals
// {{{
logic [NREQUESTERS-1:0] core_req_valid;
hpdcache_req_t [NREQUESTERS-1:0] core_req;
logic [NREQUESTERS-1:0] core_req_abort;
hpdcache_tag_t [NREQUESTERS-1:0] core_req_tag;
hpdcache_pma_t [NREQUESTERS-1:0] core_req_pma;
logic [NREQUESTERS-1:0] arb_req_gnt_q, arb_req_gnt_d;
// }}}
// Requesters arbiter
// {{{
// Pack request ports
genvar gen_i;
generate
for (gen_i = 0; gen_i < int'(NREQUESTERS); gen_i++) begin : gen_core_req
assign core_req_ready_o[gen_i] = arb_req_gnt_d[gen_i] & arb_req_ready_i,
core_req_valid[gen_i] = core_req_valid_i[gen_i],
core_req[gen_i] = core_req_i[gen_i];
assign core_req_abort[gen_i] = core_req_abort_i[gen_i],
core_req_tag[gen_i] = core_req_tag_i[gen_i],
core_req_pma[gen_i] = core_req_pma_i[gen_i];
end
endgenerate
// Arbiter
hpdcache_fxarb #(.N(NREQUESTERS)) req_arbiter_i
(
.clk_i,
.rst_ni,
.req_i (core_req_valid),
.gnt_o (arb_req_gnt_d),
.ready_i (arb_req_ready_i)
);
// Request multiplexor
hpdcache_mux #(
.NINPUT (NREQUESTERS),
.DATA_WIDTH ($bits(hpdcache_req_t)),
.ONE_HOT_SEL (1'b1)
) core_req_mux_i (
.data_i (core_req),
.sel_i (arb_req_gnt_d),
.data_o (arb_req_o)
);
// Request abort multiplexor
hpdcache_mux #(
.NINPUT (NREQUESTERS),
.DATA_WIDTH (1),
.ONE_HOT_SEL (1'b1)
) core_req_abort_mux_i (
.data_i (core_req_abort),
.sel_i (arb_req_gnt_q),
.data_o (arb_abort_o)
);
// Tag Multiplexor
hpdcache_mux #(
.NINPUT (NREQUESTERS),
.DATA_WIDTH ($bits(hpdcache_tag_t)),
.ONE_HOT_SEL (1'b1)
) core_req_tag_mux_i (
.data_i (core_req_tag),
.sel_i (arb_req_gnt_q),
.data_o (arb_tag_o)
);
// PMA Multiplexor
hpdcache_mux #(
.NINPUT (NREQUESTERS),
.DATA_WIDTH ($bits(hpdcache_pma_t)),
.ONE_HOT_SEL (1'b1)
) core_req_pma_mux_i (
.data_i (core_req_pma),
.sel_i (arb_req_gnt_q),
.data_o (arb_pma_o)
);
// Save the grant signal for the tag in the next cycle
always_ff @(posedge clk_i or negedge rst_ni)
begin : arb_req_gnt_ff
if (!rst_ni) arb_req_gnt_q <= '0;
else arb_req_gnt_q <= arb_req_gnt_d;
end
assign arb_req_valid_o = |arb_req_gnt_d;
// }}}
// Response demultiplexor
// {{{
always_comb
begin : resp_demux
for (int unsigned i = 0; i < NREQUESTERS; i++) begin
core_rsp_valid_o[i] = core_rsp_valid_i && (i == int'(core_rsp_i.sid));
core_rsp_o[i] = core_rsp_i;
end
end
// }}}
endmodule

View File

@ -0,0 +1,760 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : HPDcache controller
* History :
*/
module hpdcache_ctrl
// Package imports
// {{{
import hpdcache_pkg::*;
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
// Core request interface
input logic core_req_valid_i,
output logic core_req_ready_o,
input hpdcache_req_t core_req_i,
input logic core_req_abort_i,
input hpdcache_tag_t core_req_tag_i,
input hpdcache_pma_t core_req_pma_i,
// Core response interface
output logic core_rsp_valid_o,
output hpdcache_rsp_t core_rsp_o,
// Force the write buffer to send all pending writes
input logic wbuf_flush_i,
// Global control signals
output logic cachedir_hit_o,
// Miss handler interface
output logic miss_mshr_check_o,
output mshr_set_t miss_mshr_check_set_o,
output mshr_tag_t miss_mshr_check_tag_o,
output logic miss_mshr_alloc_o,
output logic miss_mshr_alloc_cs_o,
input logic miss_mshr_alloc_ready_i,
input logic miss_mshr_alloc_full_i,
output hpdcache_nline_t miss_mshr_alloc_nline_o,
output hpdcache_req_tid_t miss_mshr_alloc_tid_o,
output hpdcache_req_sid_t miss_mshr_alloc_sid_o,
output hpdcache_word_t miss_mshr_alloc_word_o,
output logic miss_mshr_alloc_need_rsp_o,
output logic miss_mshr_alloc_is_prefetch_o,
input logic miss_mshr_hit_i,
// Refill interface
input logic refill_req_valid_i,
output logic refill_req_ready_o,
input logic refill_busy_i,
input logic refill_updt_plru_i,
input hpdcache_set_t refill_set_i,
input hpdcache_dir_entry_t refill_dir_entry_i,
output hpdcache_way_vector_t refill_victim_way_o,
input hpdcache_way_vector_t refill_victim_way_i,
input logic refill_write_dir_i,
input logic refill_write_data_i,
input hpdcache_word_t refill_word_i,
input hpdcache_refill_data_t refill_data_i,
input logic refill_core_rsp_valid_i,
input hpdcache_rsp_t refill_core_rsp_i,
input hpdcache_nline_t refill_nline_i,
input logic refill_updt_rtab_i,
// Write buffer interface
input logic wbuf_empty_i,
output logic wbuf_flush_all_o,
output logic wbuf_write_o,
input logic wbuf_write_ready_i,
output wbuf_addr_t wbuf_write_addr_o,
output wbuf_data_t wbuf_write_data_o,
output wbuf_be_t wbuf_write_be_o,
output logic wbuf_write_uncacheable_o,
input logic wbuf_read_hit_i,
output logic wbuf_read_flush_hit_o,
output hpdcache_req_addr_t wbuf_rtab_addr_o,
output logic wbuf_rtab_is_read_o,
input logic wbuf_rtab_hit_open_i,
input logic wbuf_rtab_hit_pend_i,
input logic wbuf_rtab_hit_sent_i,
input logic wbuf_rtab_not_ready_i,
// Uncacheable request handler
input logic uc_busy_i,
output logic uc_lrsc_snoop_o,
output hpdcache_req_addr_t uc_lrsc_snoop_addr_o,
output hpdcache_req_size_t uc_lrsc_snoop_size_o,
output logic uc_req_valid_o,
output hpdcache_uc_op_t uc_req_op_o,
output hpdcache_req_addr_t uc_req_addr_o,
output hpdcache_req_size_t uc_req_size_o,
output hpdcache_req_data_t uc_req_data_o,
output hpdcache_req_be_t uc_req_be_o,
output logic uc_req_uc_o,
output hpdcache_req_sid_t uc_req_sid_o,
output hpdcache_req_tid_t uc_req_tid_o,
output logic uc_req_need_rsp_o,
input logic uc_wbuf_flush_all_i,
input logic uc_dir_amo_match_i,
input hpdcache_set_t uc_dir_amo_match_set_i,
input hpdcache_tag_t uc_dir_amo_match_tag_i,
input logic uc_dir_amo_update_plru_i,
output hpdcache_way_vector_t uc_dir_amo_hit_way_o,
input logic uc_data_amo_write_i,
input logic uc_data_amo_write_enable_i,
input hpdcache_set_t uc_data_amo_write_set_i,
input hpdcache_req_size_t uc_data_amo_write_size_i,
input hpdcache_word_t uc_data_amo_write_word_i,
input logic [63:0] uc_data_amo_write_data_i,
input logic [7:0] uc_data_amo_write_be_i,
output logic uc_core_rsp_ready_o,
input logic uc_core_rsp_valid_i,
input hpdcache_rsp_t uc_core_rsp_i,
// Cache Management Operation (CMO)
input logic cmo_busy_i,
output logic cmo_req_valid_o,
output hpdcache_cmoh_op_t cmo_req_op_o,
output hpdcache_req_addr_t cmo_req_addr_o,
output hpdcache_req_data_t cmo_req_wdata_o,
input logic cmo_wbuf_flush_all_i,
input logic cmo_dir_check_i,
input hpdcache_set_t cmo_dir_check_set_i,
input hpdcache_tag_t cmo_dir_check_tag_i,
output hpdcache_way_vector_t cmo_dir_check_hit_way_o,
input logic cmo_dir_inval_i,
input hpdcache_set_t cmo_dir_inval_set_i,
input hpdcache_way_vector_t cmo_dir_inval_way_i,
output logic rtab_empty_o,
output logic ctrl_empty_o,
// Configuration signals
input logic cfg_enable_i,
input logic cfg_rtab_single_entry_i,
// Performance events
output logic evt_cache_write_miss_o,
output logic evt_cache_read_miss_o,
output logic evt_uncached_req_o,
output logic evt_cmo_req_o,
output logic evt_write_req_o,
output logic evt_read_req_o,
output logic evt_prefetch_req_o,
output logic evt_req_on_hold_o,
output logic evt_rtab_rollback_o,
output logic evt_stall_refill_o,
output logic evt_stall_o
);
// }}}
// Definition of internal registers
// {{{
logic st1_req_valid_q, st1_req_valid_d;
hpdcache_req_t st1_req_q;
logic st1_req_rtab_q;
rtab_ptr_t st1_rtab_pop_try_ptr_q;
logic st2_req_valid_q, st2_req_valid_d;
logic st2_req_is_prefetch_q, st2_req_is_prefetch_d;
logic st2_req_need_rsp_q;
hpdcache_req_addr_t st2_req_addr_q;
hpdcache_req_sid_t st2_req_sid_q;
hpdcache_req_tid_t st2_req_tid_q;
// }}}
// Definition of internal signals
// {{{
logic [1:0] st0_arb_req;
logic [1:0] st0_arb_req_grant;
logic st0_arb_ready;
logic st0_req_ready;
logic st0_req_valid;
hpdcache_req_t st0_req;
logic st0_req_is_uncacheable;
logic st0_req_is_load;
logic st0_req_is_store;
logic st0_req_is_amo;
logic st0_req_is_cmo_fence;
logic st0_req_is_cmo_inval;
logic st0_req_is_cmo_prefetch;
logic st0_req_cachedir_read;
logic st0_req_cachedata_read;
hpdcache_set_t st0_req_set;
hpdcache_word_t st0_req_word;
logic st0_rtab_pop_try_valid;
logic st0_rtab_pop_try_ready;
hpdcache_req_t st0_rtab_pop_try_req;
logic st0_rtab_pop_try_sel;
rtab_ptr_t st0_rtab_pop_try_ptr;
logic st1_rsp_valid;
logic st1_rsp_aborted;
hpdcache_req_t st1_req;
logic st1_req_abort;
logic st1_req_cachedata_write;
logic st1_req_cachedata_write_enable;
hpdcache_pma_t st1_req_pma;
hpdcache_tag_t st1_req_tag;
hpdcache_set_t st1_req_set;
hpdcache_word_t st1_req_word;
hpdcache_nline_t st1_req_nline;
hpdcache_req_addr_t st1_req_addr;
logic st1_req_updt_lru;
logic st1_req_is_uncacheable;
logic st1_req_is_load;
logic st1_req_is_store;
logic st1_req_is_amo;
logic st1_req_is_amo_lr;
logic st1_req_is_amo_sc;
logic st1_req_is_amo_swap;
logic st1_req_is_amo_add;
logic st1_req_is_amo_and;
logic st1_req_is_amo_or;
logic st1_req_is_amo_xor;
logic st1_req_is_amo_max;
logic st1_req_is_amo_maxu;
logic st1_req_is_amo_min;
logic st1_req_is_amo_minu;
logic st1_req_is_cmo_inval;
logic st1_req_is_cmo_fence;
logic st1_req_is_cmo_prefetch;
hpdcache_way_vector_t st1_dir_hit;
hpdcache_req_data_t st1_read_data;
logic st1_rtab_alloc;
logic st1_rtab_alloc_and_link;
logic st1_rtab_pop_try_commit;
logic st1_rtab_pop_try_rback;
logic st1_rtab_mshr_hit;
logic st1_rtab_mshr_full;
logic st1_rtab_mshr_ready;
logic st1_rtab_wbuf_hit;
logic st1_rtab_wbuf_not_ready;
logic st1_rtab_check;
logic st1_rtab_check_hit;
logic st2_req_we;
hpdcache_word_t st2_req_word;
logic rtab_full;
logic hpdcache_init_ready;
// }}}
// Decoding of the request
// {{{
// Select between request in the replay table or a new core requests
assign st0_req_valid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_valid
: core_req_valid_i,
st0_req.addr_offset = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_offset
: core_req_i.addr_offset,
st0_req.addr_tag = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_tag
: core_req_i.addr_tag,
st0_req.wdata = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.wdata
: core_req_i.wdata,
st0_req.op = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.op
: core_req_i.op,
st0_req.be = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.be
: core_req_i.be,
st0_req.size = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.size
: core_req_i.size,
st0_req.sid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.sid
: core_req_i.sid,
st0_req.tid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.tid
: core_req_i.tid,
st0_req.need_rsp = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.need_rsp
: core_req_i.need_rsp,
st0_req.phys_indexed = st0_rtab_pop_try_sel ? 1'b1
: core_req_i.phys_indexed,
st0_req.pma = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.pma
: core_req_i.pma;
// Decode operation in stage 0
assign st0_req_is_uncacheable = ~cfg_enable_i | ( st0_req.phys_indexed
& st0_req.pma.uncacheable),
st0_req_is_load = is_load(st0_req.op),
st0_req_is_store = is_store(st0_req.op),
st0_req_is_amo = is_amo(st0_req.op),
st0_req_is_cmo_fence = is_cmo_fence(st0_req.op, st0_req.size),
st0_req_is_cmo_inval = is_cmo_inval(st0_req.op, st0_req.size),
st0_req_is_cmo_prefetch = is_cmo_prefetch(st0_req.op, st0_req.size);
// Decode operation in stage 1
// In case of replay or physically-indexed cache, the tag and PMA come
// from stage 0. Otherwise, this information come directly from the
// requester in stage 1
assign st1_req_tag = st1_req_q.phys_indexed ? st1_req_q.addr_tag : core_req_tag_i,
st1_req_pma = st1_req_q.phys_indexed ? st1_req_q.pma : core_req_pma_i;
assign st1_req.addr_offset = st1_req_q.addr_offset,
st1_req.addr_tag = st1_req_rtab_q ? st1_req_q.addr_tag : st1_req_tag,
st1_req.wdata = st1_req_q.wdata,
st1_req.op = st1_req_q.op,
st1_req.be = st1_req_q.be,
st1_req.size = st1_req_q.size,
st1_req.sid = st1_req_q.sid,
st1_req.tid = st1_req_q.tid,
st1_req.need_rsp = st1_req_q.need_rsp,
st1_req.phys_indexed = st1_req_q.phys_indexed,
st1_req.pma = st1_req_rtab_q ? st1_req_q.pma : st1_req_pma;
// A requester can ask to abort a request it initiated on the
// previous cycle (stage 0). Useful in case of TLB miss for example
assign st1_req_abort = core_req_abort_i & ~st1_req.phys_indexed;
assign st1_req_is_uncacheable = ~cfg_enable_i | st1_req.pma.uncacheable,
st1_req_is_load = is_load(st1_req.op),
st1_req_is_store = is_store(st1_req.op),
st1_req_is_amo = is_amo(st1_req.op),
st1_req_is_amo_lr = is_amo_lr(st1_req.op),
st1_req_is_amo_sc = is_amo_sc(st1_req.op),
st1_req_is_amo_swap = is_amo_swap(st1_req.op),
st1_req_is_amo_add = is_amo_add(st1_req.op),
st1_req_is_amo_and = is_amo_and(st1_req.op),
st1_req_is_amo_or = is_amo_or(st1_req.op),
st1_req_is_amo_xor = is_amo_xor(st1_req.op),
st1_req_is_amo_max = is_amo_max(st1_req.op),
st1_req_is_amo_maxu = is_amo_maxu(st1_req.op),
st1_req_is_amo_min = is_amo_min(st1_req.op),
st1_req_is_amo_minu = is_amo_minu(st1_req.op),
st1_req_is_cmo_inval = is_cmo_inval(st1_req.op, st1_req.size),
st1_req_is_cmo_fence = is_cmo_fence(st1_req.op, st1_req.size),
st1_req_is_cmo_prefetch = is_cmo_prefetch(st1_req.op, st1_req.size);
// }}}
// Refill arbiter: it arbitrates between normal requests (from the core,
// coprocessor, prefetch) and refill requests (from the miss handler).
//
// TODO This arbiter could be replaced by a weighted-round-robin arbiter.
// This way we could distribute asymetrically the bandwidth to the core
// and the refill interfaces.
// {{{
hpdcache_rrarb #(.N(2)) st0_arb_i
(
.clk_i,
.rst_ni,
.req_i (st0_arb_req),
.gnt_o (st0_arb_req_grant),
.ready_i (st0_arb_ready)
);
// The arbiter can cycle the priority token when:
// - The granted request is consumed (req_grant & req_valid & req_ready)
// - The granted request is aborted (req_grant & ~req_valid)
assign st0_arb_ready = ((st0_arb_req_grant[0] & st0_req_valid & st0_req_ready ) |
(st0_arb_req_grant[1] & refill_req_valid_i & refill_req_ready_o) |
(st0_arb_req_grant[0] & ~st0_req_valid ) |
(st0_arb_req_grant[1] & ~refill_req_valid_i));
assign st0_arb_req[0] = st0_req_valid,
st0_arb_req[1] = refill_req_valid_i;
assign core_req_ready_o = st0_req_ready & ~st0_rtab_pop_try_sel,
st0_rtab_pop_try_ready = st0_req_ready & st0_rtab_pop_try_sel;
// Trigger an event signal when the pipeline is stalled (new request is not consumed)
assign evt_stall_o = core_req_valid_i & ~core_req_ready_o;
// }}}
// Cache controller protocol engine
// {{{
hpdcache_ctrl_pe hpdcache_ctrl_pe_i(
.arb_st0_req_valid_i (st0_req_valid & st0_arb_req_grant[0]),
.arb_st0_req_ready_o (st0_req_ready),
.arb_refill_valid_i (refill_req_valid_i & st0_arb_req_grant[1]),
.arb_refill_ready_o (refill_req_ready_o),
.st0_req_is_uncacheable_i (st0_req_is_uncacheable),
.st0_req_need_rsp_i (st0_req.need_rsp),
.st0_req_is_load_i (st0_req_is_load),
.st0_req_is_store_i (st0_req_is_store),
.st0_req_is_amo_i (st0_req_is_amo),
.st0_req_is_cmo_fence_i (st0_req_is_cmo_fence),
.st0_req_is_cmo_inval_i (st0_req_is_cmo_inval),
.st0_req_is_cmo_prefetch_i (st0_req_is_cmo_prefetch),
.st0_req_mshr_check_o (miss_mshr_check_o),
.st0_req_cachedir_read_o (st0_req_cachedir_read),
.st0_req_cachedata_read_o (st0_req_cachedata_read),
.st1_req_valid_i (st1_req_valid_q),
.st1_req_abort_i (st1_req_abort),
.st1_req_rtab_i (st1_req_rtab_q),
.st1_req_is_uncacheable_i (st1_req_is_uncacheable),
.st1_req_need_rsp_i (st1_req.need_rsp),
.st1_req_is_load_i (st1_req_is_load),
.st1_req_is_store_i (st1_req_is_store),
.st1_req_is_amo_i (st1_req_is_amo),
.st1_req_is_cmo_inval_i (st1_req_is_cmo_inval),
.st1_req_is_cmo_fence_i (st1_req_is_cmo_fence),
.st1_req_is_cmo_prefetch_i (st1_req_is_cmo_prefetch),
.st1_req_valid_o (st1_req_valid_d),
.st1_rsp_valid_o (st1_rsp_valid),
.st1_rsp_aborted_o (st1_rsp_aborted),
.st1_req_cachedir_updt_lru_o (st1_req_updt_lru),
.st1_req_cachedata_write_o (st1_req_cachedata_write),
.st1_req_cachedata_write_enable_o (st1_req_cachedata_write_enable),
.st2_req_valid_i (st2_req_valid_q),
.st2_req_is_prefetch_i (st2_req_is_prefetch_q),
.st2_req_valid_o (st2_req_valid_d),
.st2_req_we_o (st2_req_we),
.st2_req_is_prefetch_o (st2_req_is_prefetch_d),
.st2_req_mshr_alloc_o (miss_mshr_alloc_o),
.st2_req_mshr_alloc_cs_o (miss_mshr_alloc_cs_o),
.rtab_full_i (rtab_full),
.rtab_req_valid_i (st0_rtab_pop_try_valid),
.rtab_sel_o (st0_rtab_pop_try_sel),
.rtab_check_o (st1_rtab_check),
.rtab_check_hit_i (st1_rtab_check_hit),
.st1_rtab_alloc_o (st1_rtab_alloc),
.st1_rtab_alloc_and_link_o (st1_rtab_alloc_and_link),
.st1_rtab_commit_o (st1_rtab_pop_try_commit),
.st1_rtab_rback_o (st1_rtab_pop_try_rback),
.st1_rtab_mshr_hit_o (st1_rtab_mshr_hit),
.st1_rtab_mshr_full_o (st1_rtab_mshr_full),
.st1_rtab_mshr_ready_o (st1_rtab_mshr_ready),
.st1_rtab_wbuf_hit_o (st1_rtab_wbuf_hit),
.st1_rtab_wbuf_not_ready_o (st1_rtab_wbuf_not_ready),
.cachedir_hit_i (cachedir_hit_o),
.cachedir_init_ready_i (hpdcache_init_ready),
.mshr_alloc_ready_i (miss_mshr_alloc_ready_i),
.mshr_hit_i (miss_mshr_hit_i),
.mshr_full_i (miss_mshr_alloc_full_i),
.refill_busy_i,
.refill_core_rsp_valid_i,
.wbuf_write_valid_o (wbuf_write_o),
.wbuf_write_ready_i,
.wbuf_read_hit_i,
.wbuf_write_uncacheable_o,
.wbuf_read_flush_hit_o,
.uc_busy_i,
.uc_req_valid_o,
.uc_core_rsp_ready_o,
.cmo_busy_i,
.cmo_req_valid_o,
.evt_cache_write_miss_o,
.evt_cache_read_miss_o,
.evt_uncached_req_o,
.evt_cmo_req_o,
.evt_write_req_o,
.evt_read_req_o,
.evt_prefetch_req_o,
.evt_req_on_hold_o,
.evt_rtab_rollback_o,
.evt_stall_refill_o
);
assign ctrl_empty_o = ~(st1_req_valid_q | st2_req_valid_q);
// }}}
// Replay table
// {{{
hpdcache_rtab #(
.rtab_entry_t (hpdcache_req_t)
) hpdcache_rtab_i(
.clk_i,
.rst_ni,
.empty_o (rtab_empty_o),
.full_o (rtab_full),
.check_i (st1_rtab_check),
.check_nline_i (st1_req_nline),
.check_hit_o (st1_rtab_check_hit),
.alloc_i (st1_rtab_alloc),
.alloc_and_link_i (st1_rtab_alloc_and_link),
.alloc_req_i (st1_req),
.alloc_mshr_hit_i (st1_rtab_mshr_hit),
.alloc_mshr_full_i (st1_rtab_mshr_full),
.alloc_mshr_ready_i (st1_rtab_mshr_ready),
.alloc_wbuf_hit_i (st1_rtab_wbuf_hit),
.alloc_wbuf_not_ready_i (st1_rtab_wbuf_not_ready),
.pop_try_valid_o (st0_rtab_pop_try_valid),
.pop_try_i (st0_rtab_pop_try_ready),
.pop_try_req_o (st0_rtab_pop_try_req),
.pop_try_ptr_o (st0_rtab_pop_try_ptr),
.pop_commit_i (st1_rtab_pop_try_commit),
.pop_commit_ptr_i (st1_rtab_pop_try_ptr_q),
.pop_rback_i (st1_rtab_pop_try_rback),
.pop_rback_ptr_i (st1_rtab_pop_try_ptr_q),
.pop_rback_mshr_hit_i (st1_rtab_mshr_hit),
.pop_rback_mshr_full_i (st1_rtab_mshr_full),
.pop_rback_mshr_ready_i (st1_rtab_mshr_ready),
.pop_rback_wbuf_hit_i (st1_rtab_wbuf_hit),
.pop_rback_wbuf_not_ready_i (st1_rtab_wbuf_not_ready),
.wbuf_addr_o (wbuf_rtab_addr_o),
.wbuf_is_read_o (wbuf_rtab_is_read_o),
.wbuf_hit_open_i (wbuf_rtab_hit_open_i),
.wbuf_hit_pend_i (wbuf_rtab_hit_pend_i),
.wbuf_hit_sent_i (wbuf_rtab_hit_sent_i),
.wbuf_not_ready_i (wbuf_rtab_not_ready_i),
.miss_ready_i (miss_mshr_alloc_ready_i),
.refill_i (refill_updt_rtab_i),
.refill_nline_i,
.cfg_single_entry_i (cfg_rtab_single_entry_i)
);
// }}}
// Pipeline stage 1 registers
// {{{
always_ff @(posedge clk_i)
begin : st1_req_payload_ff
if (st0_req_ready) begin
st1_req_q <= st0_req;
end
end
always_ff @(posedge clk_i or negedge rst_ni)
begin : st1_req_valid_ff
if (!rst_ni) begin
st1_req_valid_q <= 1'b0;
st1_req_rtab_q <= 1'b0;
st1_rtab_pop_try_ptr_q <= '0;
end else begin
st1_req_valid_q <= st1_req_valid_d;
if (st0_req_ready) begin
st1_req_rtab_q <= st0_rtab_pop_try_sel;
if (st0_rtab_pop_try_sel) begin
st1_rtab_pop_try_ptr_q <= st0_rtab_pop_try_ptr;
end
end
end
end
// }}}
// Pipeline stage 2 registers
// {{{
always_ff @(posedge clk_i)
begin : st2_req_payload_ff
if (st2_req_we) begin
st2_req_need_rsp_q <= st1_req.need_rsp;
st2_req_addr_q <= st1_req_addr;
st2_req_sid_q <= st1_req.sid;
st2_req_tid_q <= st1_req.tid;
end
end
always_ff @(posedge clk_i or negedge rst_ni)
begin : st2_req_valid_ff
if (!rst_ni) begin
st2_req_valid_q <= 1'b0;
st2_req_is_prefetch_q <= 1'b0;
end else begin
st2_req_valid_q <= st2_req_valid_d;
st2_req_is_prefetch_q <= st2_req_is_prefetch_d;
end
end
// }}}
// Controller for the HPDcache directory and data memory arrays
// {{{
assign st0_req_set = hpdcache_get_req_offset_set(st0_req.addr_offset),
st0_req_word = hpdcache_get_req_offset_word(st0_req.addr_offset),
st1_req_set = hpdcache_get_req_offset_set(st1_req.addr_offset),
st1_req_word = hpdcache_get_req_offset_word(st1_req.addr_offset),
st1_req_addr = {st1_req.addr_tag, st1_req.addr_offset},
st1_req_nline = hpdcache_get_req_addr_nline(st1_req_addr),
st2_req_word = hpdcache_get_req_addr_word(st2_req_addr_q);
hpdcache_memctrl hpdcache_memctrl_i (
.clk_i,
.rst_ni,
.ready_o (hpdcache_init_ready),
.dir_match_i (st0_req_cachedir_read),
.dir_match_set_i (st0_req_set),
.dir_match_tag_i (st1_req.addr_tag),
.dir_update_lru_i (st1_req_updt_lru),
.dir_hit_way_o (st1_dir_hit),
.dir_amo_match_i (uc_dir_amo_match_i),
.dir_amo_match_set_i (uc_dir_amo_match_set_i),
.dir_amo_match_tag_i (uc_dir_amo_match_tag_i),
.dir_amo_update_plru_i (uc_dir_amo_update_plru_i),
.dir_amo_hit_way_o (uc_dir_amo_hit_way_o),
.dir_refill_i (refill_write_dir_i),
.dir_refill_set_i (refill_set_i),
.dir_refill_entry_i (refill_dir_entry_i),
.dir_refill_updt_plru_i (refill_updt_plru_i),
.dir_victim_way_o (refill_victim_way_o),
.dir_cmo_check_i (cmo_dir_check_i),
.dir_cmo_check_set_i (cmo_dir_check_set_i),
.dir_cmo_check_tag_i (cmo_dir_check_tag_i),
.dir_cmo_check_hit_way_o (cmo_dir_check_hit_way_o),
.dir_cmo_inval_i (cmo_dir_inval_i),
.dir_cmo_inval_set_i (cmo_dir_inval_set_i),
.dir_cmo_inval_way_i (cmo_dir_inval_way_i),
.data_req_read_i (st0_req_cachedata_read),
.data_req_read_set_i (st0_req_set),
.data_req_read_size_i (st0_req.size),
.data_req_read_word_i (st0_req_word),
.data_req_read_data_o (st1_read_data),
.data_req_write_i (st1_req_cachedata_write),
.data_req_write_enable_i (st1_req_cachedata_write_enable),
.data_req_write_set_i (st1_req_set),
.data_req_write_size_i (st1_req.size),
.data_req_write_word_i (st1_req_word),
.data_req_write_data_i (st1_req.wdata),
.data_req_write_be_i (st1_req.be),
.data_amo_write_i (uc_data_amo_write_i),
.data_amo_write_enable_i (uc_data_amo_write_enable_i),
.data_amo_write_set_i (uc_data_amo_write_set_i),
.data_amo_write_size_i (uc_data_amo_write_size_i),
.data_amo_write_word_i (uc_data_amo_write_word_i),
.data_amo_write_data_i (uc_data_amo_write_data_i),
.data_amo_write_be_i (uc_data_amo_write_be_i),
.data_refill_i (refill_write_data_i),
.data_refill_way_i (refill_victim_way_i),
.data_refill_set_i (refill_set_i),
.data_refill_word_i (refill_word_i),
.data_refill_data_i (refill_data_i)
);
assign cachedir_hit_o = |st1_dir_hit;
// }}}
// Write buffer outputs
// {{{
assign wbuf_write_addr_o = st1_req_addr,
wbuf_write_data_o = st1_req.wdata,
wbuf_write_be_o = st1_req.be,
wbuf_flush_all_o = cmo_wbuf_flush_all_i | uc_wbuf_flush_all_i | wbuf_flush_i;
// }}}
// Miss handler outputs
// {{{
assign miss_mshr_check_set_o =
st0_req.addr_offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_MSHR_SET_WIDTH];
assign miss_mshr_check_tag_o =
st1_req_nline[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH];
assign miss_mshr_alloc_nline_o = hpdcache_get_req_addr_nline(st2_req_addr_q),
miss_mshr_alloc_tid_o = st2_req_tid_q,
miss_mshr_alloc_sid_o = st2_req_sid_q,
miss_mshr_alloc_word_o = st2_req_word,
miss_mshr_alloc_need_rsp_o = st2_req_need_rsp_q,
miss_mshr_alloc_is_prefetch_o = st2_req_is_prefetch_q;
// }}}
// Uncacheable request handler outputs
// {{{
assign uc_lrsc_snoop_o = st1_req_valid_q & st1_req_is_store,
uc_lrsc_snoop_addr_o = st1_req_addr,
uc_lrsc_snoop_size_o = st1_req.size,
uc_req_addr_o = st1_req_addr,
uc_req_size_o = st1_req.size,
uc_req_data_o = st1_req.wdata,
uc_req_be_o = st1_req.be,
uc_req_uc_o = st1_req_is_uncacheable,
uc_req_sid_o = st1_req.sid,
uc_req_tid_o = st1_req.tid,
uc_req_need_rsp_o = st1_req.need_rsp,
uc_req_op_o.is_ld = st1_req_is_load,
uc_req_op_o.is_st = st1_req_is_store,
uc_req_op_o.is_amo_lr = st1_req_is_amo_lr,
uc_req_op_o.is_amo_sc = st1_req_is_amo_sc,
uc_req_op_o.is_amo_swap = st1_req_is_amo_swap,
uc_req_op_o.is_amo_add = st1_req_is_amo_add,
uc_req_op_o.is_amo_and = st1_req_is_amo_and,
uc_req_op_o.is_amo_or = st1_req_is_amo_or,
uc_req_op_o.is_amo_xor = st1_req_is_amo_xor,
uc_req_op_o.is_amo_max = st1_req_is_amo_max,
uc_req_op_o.is_amo_maxu = st1_req_is_amo_maxu,
uc_req_op_o.is_amo_min = st1_req_is_amo_min,
uc_req_op_o.is_amo_minu = st1_req_is_amo_minu;
// }}}
// CMO request handler outputs
// {{{
assign cmo_req_addr_o = st1_req_addr,
cmo_req_wdata_o = st1_req.wdata,
cmo_req_op_o.is_fence = st1_req_is_cmo_fence,
cmo_req_op_o.is_inval_by_nline = st1_req_is_cmo_inval &
is_cmo_inval_by_nline(st1_req.size),
cmo_req_op_o.is_inval_by_set = st1_req_is_cmo_inval &
is_cmo_inval_by_set(st1_req.size),
cmo_req_op_o.is_inval_all = st1_req_is_cmo_inval &
is_cmo_inval_all(st1_req.size);
// }}}
// Control of the response to the core
// {{{
assign core_rsp_valid_o = refill_core_rsp_valid_i |
(uc_core_rsp_valid_i & uc_core_rsp_ready_o) |
st1_rsp_valid,
core_rsp_o.rdata = (refill_core_rsp_valid_i ? refill_core_rsp_i.rdata :
(uc_core_rsp_valid_i ? uc_core_rsp_i.rdata :
st1_read_data)),
core_rsp_o.sid = (refill_core_rsp_valid_i ? refill_core_rsp_i.sid :
(uc_core_rsp_valid_i ? uc_core_rsp_i.sid :
st1_req.sid)),
core_rsp_o.tid = (refill_core_rsp_valid_i ? refill_core_rsp_i.tid :
(uc_core_rsp_valid_i ? uc_core_rsp_i.tid :
st1_req.tid)),
core_rsp_o.error = (refill_core_rsp_valid_i ? refill_core_rsp_i.error :
(uc_core_rsp_valid_i ? uc_core_rsp_i.error :
/* FIXME */1'b0)),
core_rsp_o.aborted = st1_rsp_aborted;
// }}}
// Assertions
// pragma translate_off
// {{{
assert property (@(posedge clk_i) disable iff (!rst_ni)
$onehot0({core_req_ready_o, st0_rtab_pop_try_ready, refill_req_ready_o})) else
$error("ctrl: only one request can be served per cycle");
// }}}
// pragma translate_on
endmodule

View File

@ -0,0 +1,620 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : HPDcache Control Protocol Engine
* History :
*/
module hpdcache_ctrl_pe
// Ports
// {{{
(
// Refill arbiter
// {{{
input logic arb_st0_req_valid_i,
output logic arb_st0_req_ready_o,
input logic arb_refill_valid_i,
output logic arb_refill_ready_o,
// }}}
// Pipeline stage 0
// {{{
input logic st0_req_is_uncacheable_i,
input logic st0_req_need_rsp_i,
input logic st0_req_is_load_i,
input logic st0_req_is_store_i,
input logic st0_req_is_amo_i,
input logic st0_req_is_cmo_fence_i,
input logic st0_req_is_cmo_inval_i,
input logic st0_req_is_cmo_prefetch_i,
output logic st0_req_mshr_check_o,
output logic st0_req_cachedir_read_o,
output logic st0_req_cachedata_read_o,
// }}}
// Pipeline stage 1
// {{{
input logic st1_req_valid_i,
input logic st1_req_abort_i,
input logic st1_req_rtab_i,
input logic st1_req_is_uncacheable_i,
input logic st1_req_need_rsp_i,
input logic st1_req_is_load_i,
input logic st1_req_is_store_i,
input logic st1_req_is_amo_i,
input logic st1_req_is_cmo_inval_i,
input logic st1_req_is_cmo_fence_i,
input logic st1_req_is_cmo_prefetch_i,
output logic st1_req_valid_o,
output logic st1_rsp_valid_o,
output logic st1_rsp_aborted_o,
output logic st1_req_cachedir_updt_lru_o,
output logic st1_req_cachedata_write_o,
output logic st1_req_cachedata_write_enable_o,
// }}}
// Pipeline stage 2
// {{{
input logic st2_req_valid_i,
input logic st2_req_is_prefetch_i,
output logic st2_req_valid_o,
output logic st2_req_we_o,
output logic st2_req_is_prefetch_o,
output logic st2_req_mshr_alloc_o,
output logic st2_req_mshr_alloc_cs_o,
// }}}
// Replay
// {{{
input logic rtab_full_i,
input logic rtab_req_valid_i,
output logic rtab_sel_o,
output logic rtab_check_o,
input logic rtab_check_hit_i,
output logic st1_rtab_alloc_o,
output logic st1_rtab_alloc_and_link_o,
output logic st1_rtab_commit_o,
output logic st1_rtab_rback_o,
output logic st1_rtab_mshr_hit_o,
output logic st1_rtab_mshr_full_o,
output logic st1_rtab_mshr_ready_o,
output logic st1_rtab_wbuf_hit_o,
output logic st1_rtab_wbuf_not_ready_o,
// }}}
// Cache directory
// {{{
input logic cachedir_hit_i,
input logic cachedir_init_ready_i,
// }}}
// Miss Status Holding Register (MSHR)
// {{{
input logic mshr_alloc_ready_i,
input logic mshr_hit_i,
input logic mshr_full_i,
// }}}
// Refill interface
// {{{
input logic refill_busy_i,
input logic refill_core_rsp_valid_i,
// }}}
// Write buffer
// {{{
input logic wbuf_write_ready_i,
input logic wbuf_read_hit_i,
output logic wbuf_write_valid_o,
output logic wbuf_write_uncacheable_o,
output logic wbuf_read_flush_hit_o,
// }}}
// Uncacheable request handler
// {{{
input logic uc_busy_i,
output logic uc_req_valid_o,
output logic uc_core_rsp_ready_o,
// }}}
// Cache Management Operation (CMO)
// {{{
input logic cmo_busy_i,
output logic cmo_req_valid_o,
// }}}
// Performance events
// {{{
output logic evt_cache_write_miss_o,
output logic evt_cache_read_miss_o,
output logic evt_uncached_req_o,
output logic evt_cmo_req_o,
output logic evt_write_req_o,
output logic evt_read_req_o,
output logic evt_prefetch_req_o,
output logic evt_req_on_hold_o,
output logic evt_rtab_rollback_o,
output logic evt_stall_refill_o
// }}}
);
// }}}
// Definition of internal signals
// {{{
logic st1_fence;
logic st1_rtab_alloc, st1_rtab_alloc_and_link;
// }}}
// Global control signals
// {{{
// Determine if the new request is a "fence". Here, fence instructions are
// considered those that need to be executed in program order
// (irrespectively of addresses). This means that all memory operations
// arrived before the "fence" instruction need to be finished, and only
// then the "fence" instruction is executed. In the same manner, all
// instructions following the "fence" need to wait the completion of this
// last before being executed.
assign st1_fence = st1_req_is_uncacheable_i |
st1_req_is_cmo_fence_i |
st1_req_is_cmo_inval_i |
st1_req_is_amo_i;
// }}}
// Arbitration of responses to the core
// {{{
assign uc_core_rsp_ready_o = ~refill_core_rsp_valid_i;
// }}}
// Arbiter between core or replay request.
// {{{
// Take the replay request when:
// - The replay table is full.
// - The replay table has a ready request (request with all dependencies solved)
// - There is an outstanding CMO or uncached/AMO request
//
// IMPORTANT: When the replay table is full, the cache cannot accept new core
// requests because this can introduce a dead-lock : If the core request needs to
// be put on hold, as there is no place the replay table, the pipeline needs to
// stall. If the pipeline is stalled, dependencies of on-hold requests cannot be
// solved, and the system is locked.
assign rtab_sel_o = rtab_full_i |
rtab_req_valid_i |
(st1_req_valid_i & st1_fence) |
cmo_busy_i |
uc_busy_i;
// }}}
// Replay logic
// {{{
// Replay table allocation
assign st1_rtab_alloc_o = st1_rtab_alloc & ~st1_req_rtab_i,
st1_rtab_alloc_and_link_o = st1_rtab_alloc_and_link,
st1_rtab_rback_o = st1_rtab_alloc & st1_req_rtab_i;
// Performance event
assign evt_req_on_hold_o = st1_rtab_alloc | st1_rtab_alloc_and_link,
evt_rtab_rollback_o = st1_rtab_rback_o;
// }}}
// Data-cache control lines
// {{{
always_comb
begin : hpdcache_ctrl_comb
automatic logic nop, st1_nop, st2_nop;
uc_req_valid_o = 1'b0;
cmo_req_valid_o = 1'b0;
wbuf_write_valid_o = 1'b0;
wbuf_read_flush_hit_o = 1'b0;
wbuf_write_uncacheable_o = 1'b0; // unused
arb_st0_req_ready_o = 1'b0;
arb_refill_ready_o = 1'b0;
st0_req_mshr_check_o = 1'b0;
st0_req_cachedir_read_o = 1'b0;
st0_req_cachedata_read_o = 1'b0;
st1_req_valid_o = st1_req_valid_i;
st1_nop = 1'b0;
st1_req_cachedata_write_o = 1'b0;
st1_req_cachedata_write_enable_o = 1'b0;
st1_req_cachedir_updt_lru_o = 1'b0;
st1_rsp_valid_o = 1'b0;
st1_rsp_aborted_o = 1'b0;
st2_req_valid_o = st2_req_valid_i;
st2_req_we_o = 1'b0;
st2_req_is_prefetch_o = 1'b0;
st2_req_mshr_alloc_cs_o = 1'b0;
st2_req_mshr_alloc_o = 1'b0;
st2_nop = 1'b0;
nop = 1'b0;
rtab_check_o = 1'b0;
st1_rtab_alloc = 1'b0;
st1_rtab_alloc_and_link = 1'b0;
st1_rtab_commit_o = 1'b0;
st1_rtab_mshr_hit_o = 1'b0;
st1_rtab_mshr_full_o = 1'b0;
st1_rtab_mshr_ready_o = 1'b0;
st1_rtab_wbuf_hit_o = 1'b0;
st1_rtab_wbuf_not_ready_o = 1'b0;
evt_cache_write_miss_o = 1'b0;
evt_cache_read_miss_o = 1'b0;
evt_uncached_req_o = 1'b0;
evt_cmo_req_o = 1'b0;
evt_write_req_o = 1'b0;
evt_read_req_o = 1'b0;
evt_prefetch_req_o = 1'b0;
evt_stall_refill_o = 1'b0;
// Wait for the cache to be initialized
// {{{
if (!cachedir_init_ready_i) begin
// initialization of the cache RAMs
end
// }}}
// Refilling the cache
// {{{
else if (refill_busy_i) begin
// miss handler has the control of the cache
evt_stall_refill_o = arb_st0_req_valid_i;
end
// }}}
// Normal pipeline operation
// {{{
else begin
// Stage 2 request pending
// {{{
if (st2_req_valid_i) begin
st2_req_valid_o = 1'b0;
// Allocate an entry in the MSHR
st2_req_mshr_alloc_cs_o = 1'b1;
st2_req_mshr_alloc_o = 1'b1;
// Introduce a NOP in the next cycle to prevent a hazard on the MSHR
st2_nop = 1'b1;
// Performance event
evt_cache_read_miss_o = ~st2_req_is_prefetch_i;
evt_read_req_o = ~st2_req_is_prefetch_i;
evt_prefetch_req_o = st2_req_is_prefetch_i;
end
// }}}
// Stage 1 request pending
// {{{
if (st1_req_valid_i) begin
// Check if the request in stage 1 has a conflict with one of the
// request in the replay table.
rtab_check_o = ~st1_req_rtab_i & ~st1_fence;
// Check if the current request is aborted. If so, respond to the
// core (when need_rsp is set) and set the aborted flag
if (st1_req_abort_i && !st1_req_rtab_i) begin
st1_rsp_valid_o = st1_req_need_rsp_i;
st1_rsp_aborted_o = 1'b1;
end
// Allocate a new entry in the replay table in case of conflict with
// an on-hold request
else if (rtab_check_o && rtab_check_hit_i) begin
st1_rtab_alloc_and_link = 1'b1;
// Do not consume a request in this cycle in stage 0
st1_nop = 1'b1;
end
// CMO fence or invalidate
// {{{
else if (st1_req_is_cmo_fence_i || st1_req_is_cmo_inval_i) begin
cmo_req_valid_o = 1'b1;
st1_nop = 1'b1;
// Performance event
evt_cmo_req_o = 1'b1;
end
// }}}
// Uncacheable load, store or AMO request
// {{{
else if (st1_req_is_uncacheable_i) begin
uc_req_valid_o = 1'b1;
st1_nop = 1'b1;
// Performance event
evt_uncached_req_o = 1'b1;
end
// }}}
// Cacheable request
// {{{
else begin
// AMO cacheable request
// {{{
if (st1_req_is_amo_i) begin
uc_req_valid_o = 1'b1;
st1_nop = 1'b1;
// Performance event
evt_uncached_req_o = 1'b1;
end
// }}}
// Load cacheable request
// {{{
if (|{st1_req_is_load_i,
st1_req_is_cmo_prefetch_i})
begin
// Cache miss
// {{{
if (!cachedir_hit_i) begin
// If there is a match in the write buffer, lets send the
// entry right away
wbuf_read_flush_hit_o = 1'b1;
// Do not consume a request in this cycle in stage 0
st1_nop = 1'b1;
// Pending miss on the same line
if (mshr_hit_i) begin
// Put the request in the replay table
st1_rtab_alloc = 1'b1;
st1_rtab_mshr_hit_o = 1'b1;
end
// No available slot in the MSHR
else if (mshr_full_i) begin
// Put the request in the replay table
st1_rtab_alloc = 1'b1;
st1_rtab_mshr_full_o = 1'b1;
end
// Hit on an open entry of the write buffer:
// wait for the entry to be acknowledged
else if (wbuf_read_hit_i) begin
// Put the request in the replay table
st1_rtab_alloc = 1'b1;
st1_rtab_wbuf_hit_o = 1'b1;
end
// Miss Handler is not ready to send
else if (!mshr_alloc_ready_i) begin
// Put the request on hold if the MISS HANDLER is not
// ready to send a new miss request. This is to prevent
// a deadlock between the read request channel and the
// read response channel.
//
// The request channel may be stalled by targets if they
// are not able to send a response (response is
// prioritary). Therefore, we need to put the request on
// hold to allow a possible refill read response to be
// accomplished.
st1_rtab_alloc = 1'b1;
st1_rtab_mshr_ready_o = 1'b1;
end
// Forward the request to the next stage to allocate the
// entry in the MSHR and send the refill request
else begin
// If the request comes from the replay table, free the
// corresponding RTAB entry
st1_rtab_commit_o = st1_req_rtab_i;
st2_req_valid_o = 1'b1;
st2_req_we_o = 1'b1;
st2_req_is_prefetch_o = st1_req_is_cmo_prefetch_i;
end
end
// }}}
// Cache hit
// {{{
else begin
// If the request comes from the replay table, free the
// corresponding RTAB entry
st1_rtab_commit_o = st1_req_rtab_i;
// Add a NOP when replaying a request, and there is no available
// request from the replay table.
st1_nop = st1_req_rtab_i & ~rtab_sel_o;
// Update the PLRU bit for the accessed set
st1_req_cachedir_updt_lru_o = st1_req_is_load_i;
// Respond to the core (if needed)
st1_rsp_valid_o = st1_req_need_rsp_i;
// Performance event
evt_read_req_o = ~st1_req_is_cmo_prefetch_i;
evt_prefetch_req_o = st1_req_is_cmo_prefetch_i;
end
// }}}
end
// }}}
// Store cacheable request
// {{{
if (st1_req_is_store_i) begin
// Write in the write buffer if there is no pending miss in the same line.
//
// We assume here that the NoC that transports read and write transactions does
// not guaranty the order between transactions on those channels.
// Therefore, the cache must hold a write if there is a pending read on the
// same address.
wbuf_write_valid_o = ~mshr_hit_i;
// Add a NOP in the pipeline when:
// - Structural hazard on the cache data if the st0 request is a load
// operation.
// - Replaying a request, the cache cannot accept a request from the
// core the next cycle. It can however accept a new request from the
// replay table
//
// IMPORTANT: we could remove the NOP in the first scenario if the
// controller checks for the hit of this write. However, this adds
// a DIR_RAM -> DATA_RAM timing path.
st1_nop = (arb_st0_req_valid_i & st0_req_is_load_i) |
(st1_req_rtab_i & ~rtab_sel_o);
// Enable the data RAM in case of write. However, the actual write
// depends on the hit signal from the cache directory.
//
// IMPORTANT: this produces unnecessary power consumption in case of
// write misses, but removes timing paths between the cache directory
// RAM and the data RAM chip-select.
st1_req_cachedata_write_o = 1'b1;
// Cache miss
if (!cachedir_hit_i) begin
// Pending miss on the same line
if (mshr_hit_i) begin
// Put the request in the replay table
st1_rtab_alloc = 1'b1;
st1_rtab_mshr_hit_o = 1'b1;
// Do not consume a request in this cycle in stage 0
st1_nop = 1'b1;
end
// No available entry in the write buffer (or conflict on pending entry)
else if (!wbuf_write_ready_i) begin
// Put the request in the replay table
st1_rtab_alloc = 1'b1;
st1_rtab_wbuf_not_ready_o = 1'b1;
// Do not consume a request in this cycle in stage 0
st1_nop = 1'b1;
end
else begin
// If the request comes from the replay table, free the
// corresponding RTAB entry
st1_rtab_commit_o = st1_req_rtab_i;
// Respond to the core (if needed)
st1_rsp_valid_o = st1_req_need_rsp_i;
// Performance event
evt_cache_write_miss_o = 1'b1;
evt_write_req_o = 1'b1;
end
end
// Cache hit
else begin
// No available entry in the write buffer (or conflict on pending entry)
if (!wbuf_write_ready_i) begin
// Put the request in the replay table
st1_rtab_alloc = 1'b1;
st1_rtab_wbuf_not_ready_o = 1'b1;
// Do not consume a request in this cycle in stage 0
st1_nop = 1'b1;
end
// The store can be performed in the write buffer and in the cache
else begin
// If the request comes from the replay table, free the
// corresponding RTAB entry
st1_rtab_commit_o = st1_req_rtab_i;
// Respond to the core
st1_rsp_valid_o = st1_req_need_rsp_i;
// Update the PLRU bit for the accessed set
st1_req_cachedir_updt_lru_o = 1'b1;
// Write in the data RAM
st1_req_cachedata_write_enable_o = 1'b1;
// Performance event
evt_write_req_o = 1'b1;
end
end
end
// }}}
end
// }}}
end
// }}}
// New request
// {{{
nop = st1_nop | st2_nop;
// The cache controller accepts a core request when:
// - The req-refill arbiter grants the request
// - The pipeline is not being flushed
arb_st0_req_ready_o = arb_st0_req_valid_i & ~nop;
// The cache controller accepts a refill when:
// - The req-refill arbiter grants the refill
// - The pipeline is empty
arb_refill_ready_o = arb_refill_valid_i & ~(st1_req_valid_i | st2_req_valid_i);
// Forward the request to stage 1
// - There is a valid request in stage 0
st1_req_valid_o = arb_st0_req_ready_o;
// New cacheable stage 0 request granted
// {{{
// IMPORTANT: here the RAM is enabled independently if the
// request needs to be put on-hold.
// This increases the power consumption in that cases, but
// removes the timing paths RAM-to-RAM between the cache
// directory and the data array.
if (arb_st0_req_valid_i && !st0_req_is_uncacheable_i) begin
st0_req_cachedata_read_o =
st0_req_is_load_i &
~(st1_req_valid_i & st1_req_is_store_i & ~st1_req_is_uncacheable_i);
if (st0_req_is_load_i |
st0_req_is_cmo_prefetch_i |
st0_req_is_store_i |
st0_req_is_amo_i )
begin
st0_req_mshr_check_o = 1'b1;
st0_req_cachedir_read_o = ~st0_req_is_amo_i;
end
end
// }}}
// }}}
end
// }}} end of normal pipeline operation
end
// }}}
endmodule

View File

@ -0,0 +1,120 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : HPDcache Directory and Data Memory Arrays
* History :
*/
module hpdcache_memarray
import hpdcache_pkg::*;
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
input hpdcache_dir_addr_t dir_addr_i,
input hpdcache_way_vector_t dir_cs_i,
input hpdcache_way_vector_t dir_we_i,
input hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry_i,
output hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry_o,
input hpdcache_data_addr_t data_addr_i,
input hpdcache_data_enable_t data_cs_i,
input hpdcache_data_enable_t data_we_i,
input hpdcache_data_be_entry_t data_wbyteenable_i,
input hpdcache_data_entry_t data_wentry_i,
output hpdcache_data_entry_t data_rentry_o
);
// }}}
// Memory arrays
// {{{
generate
genvar x, y, dir_w;
// Directory
//
for (dir_w = 0; dir_w < int'(HPDCACHE_WAYS); dir_w++) begin : dir_sram_gen
hpdcache_sram #(
.DATA_SIZE (HPDCACHE_DIR_RAM_WIDTH),
.ADDR_SIZE (HPDCACHE_DIR_RAM_ADDR_WIDTH)
) dir_sram (
.clk (clk_i),
.rst_n (rst_ni),
.cs (dir_cs_i[dir_w]),
.we (dir_we_i[dir_w]),
.addr (dir_addr_i),
.wdata (dir_wentry_i[dir_w]),
.rdata (dir_rentry_o[dir_w])
);
end
// Data
//
for (y = 0; y < int'(HPDCACHE_DATA_RAM_Y_CUTS); y++) begin : data_sram_row_gen
for (x = 0; x < int'(HPDCACHE_DATA_RAM_X_CUTS); x++) begin : data_sram_col_gen
if (HPDCACHE_DATA_RAM_WBYTEENABLE) begin : data_sram_wbyteenable_gen
hpdcache_sram_wbyteenable #(
.DATA_SIZE (HPDCACHE_DATA_RAM_WIDTH),
.ADDR_SIZE (HPDCACHE_DATA_RAM_ADDR_WIDTH)
) data_sram (
.clk (clk_i),
.rst_n (rst_ni),
.cs (data_cs_i[y][x]),
.we (data_we_i[y][x]),
.addr (data_addr_i[y][x]),
.wdata (data_wentry_i[y][x]),
.wbyteenable (data_wbyteenable_i[y][x]),
.rdata (data_rentry_o[y][x])
);
end else begin : data_sram_wmask_gen
hpdcache_data_ram_data_t data_wmask;
// build the bitmask from the write byte enable signal
always_comb
begin : data_wmask_comb
for (int w = 0; w < HPDCACHE_DATA_WAYS_PER_RAM_WORD; w++) begin
for (int b = 0; b < HPDCACHE_WORD_WIDTH/8; b++) begin
data_wmask[w][8*b +: 8] = {8{data_wbyteenable_i[y][x][w][b]}};
end
end
end
hpdcache_sram_wmask #(
.DATA_SIZE (HPDCACHE_DATA_RAM_WIDTH),
.ADDR_SIZE (HPDCACHE_DATA_RAM_ADDR_WIDTH)
) data_sram (
.clk (clk_i),
.rst_n (rst_ni),
.cs (data_cs_i[y][x]),
.we (data_we_i[y][x]),
.addr (data_addr_i[y][x]),
.wdata (data_wentry_i[y][x]),
.wmask (data_wmask),
.rdata (data_rentry_o[y][x])
);
end
end
end
endgenerate
// }}}
endmodule

View File

@ -0,0 +1,656 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : HPDcache Directory and Data Memory RAMs Controller
* History :
*/
module hpdcache_memctrl
import hpdcache_pkg::*;
// Ports
// {{{
(
// Global clock and reset signals
// {{{
input logic clk_i,
input logic rst_ni,
// }}}
// Global control signals
// {{{
output logic ready_o,
// }}}
// DIR array access interface
// {{{
input logic dir_match_i,
input hpdcache_set_t dir_match_set_i,
input hpdcache_tag_t dir_match_tag_i,
input logic dir_update_lru_i,
output hpdcache_way_vector_t dir_hit_way_o,
input logic dir_amo_match_i,
input hpdcache_set_t dir_amo_match_set_i,
input hpdcache_tag_t dir_amo_match_tag_i,
input logic dir_amo_update_plru_i,
output hpdcache_way_vector_t dir_amo_hit_way_o,
input logic dir_refill_i,
input hpdcache_set_t dir_refill_set_i,
input hpdcache_dir_entry_t dir_refill_entry_i,
input logic dir_refill_updt_plru_i,
output hpdcache_way_vector_t dir_victim_way_o,
input logic dir_cmo_check_i,
input hpdcache_set_t dir_cmo_check_set_i,
input hpdcache_tag_t dir_cmo_check_tag_i,
output hpdcache_way_vector_t dir_cmo_check_hit_way_o,
input logic dir_cmo_inval_i,
input hpdcache_set_t dir_cmo_inval_set_i,
input hpdcache_way_vector_t dir_cmo_inval_way_i,
// }}}
// DATA array access interface
// {{{
input logic data_req_read_i,
input hpdcache_set_t data_req_read_set_i,
input hpdcache_req_size_t data_req_read_size_i,
input hpdcache_word_t data_req_read_word_i,
output hpdcache_req_data_t data_req_read_data_o,
input logic data_req_write_i,
input logic data_req_write_enable_i,
input hpdcache_set_t data_req_write_set_i,
input hpdcache_req_size_t data_req_write_size_i,
input hpdcache_word_t data_req_write_word_i,
input hpdcache_req_data_t data_req_write_data_i,
input hpdcache_req_be_t data_req_write_be_i,
input logic data_amo_write_i,
input logic data_amo_write_enable_i,
input hpdcache_set_t data_amo_write_set_i,
input hpdcache_req_size_t data_amo_write_size_i,
input hpdcache_word_t data_amo_write_word_i,
input logic [63:0] data_amo_write_data_i,
input logic [7:0] data_amo_write_be_i,
input logic data_refill_i,
input hpdcache_way_vector_t data_refill_way_i,
input hpdcache_set_t data_refill_set_i,
input hpdcache_word_t data_refill_word_i,
input hpdcache_refill_data_t data_refill_data_i
// }}}
);
// }}}
// Definition of constants
// {{{
localparam int unsigned HPDCACHE_ALL_CUTS = HPDCACHE_DATA_RAM_X_CUTS*HPDCACHE_DATA_RAM_Y_CUTS;
localparam int unsigned HPDCACHE_DATA_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS;
// }}}
// Definition of functions
// {{{
// hpdcache_compute_data_ram_cs
//
// description: This function computes the chip-select signal for data
// RAMs depending on the request size and the word offset
function automatic hpdcache_data_row_enable_t hpdcache_compute_data_ram_cs(
input hpdcache_req_size_t size_i,
input hpdcache_word_t word_i);
localparam hpdcache_uint32 off_width =
HPDCACHE_ACCESS_WORDS > 1 ? $clog2(HPDCACHE_ACCESS_WORDS) : 1;
hpdcache_data_row_enable_t ret;
hpdcache_uint32 off;
case (size_i)
3'h0,
3'h1,
3'h2,
3'h3: ret = hpdcache_data_row_enable_t'({ 64/HPDCACHE_WORD_WIDTH{1'b1}});
3'h4: ret = hpdcache_data_row_enable_t'({128/HPDCACHE_WORD_WIDTH{1'b1}});
3'h5: ret = hpdcache_data_row_enable_t'({256/HPDCACHE_WORD_WIDTH{1'b1}});
default: ret = hpdcache_data_row_enable_t'({512/HPDCACHE_WORD_WIDTH{1'b1}});
endcase
off = HPDCACHE_ACCESS_WORDS > 1 ? hpdcache_uint'(word_i[0 +: off_width]) : 0;
return hpdcache_data_row_enable_t'(ret << off);
endfunction
function automatic hpdcache_data_ram_row_idx_t hpdcache_way_to_data_ram_row(
input hpdcache_way_vector_t way);
for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin
if (way[i]) return hpdcache_data_ram_row_idx_t'(i / HPDCACHE_DATA_WAYS_PER_RAM_WORD);
end
return 0;
endfunction
function automatic hpdcache_data_ram_way_idx_t hpdcache_way_to_data_ram_word(
input hpdcache_way_vector_t way);
for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin
if (way[i]) return hpdcache_data_ram_way_idx_t'(i % HPDCACHE_DATA_WAYS_PER_RAM_WORD);
end
return 0;
endfunction
function automatic hpdcache_data_ram_addr_t hpdcache_set_to_data_ram_addr(
input hpdcache_set_t set,
input hpdcache_word_t word);
hpdcache_uint ret;
ret = (hpdcache_uint'(set)*(HPDCACHE_CL_WORDS / HPDCACHE_ACCESS_WORDS)) +
(hpdcache_uint'(word) / HPDCACHE_ACCESS_WORDS);
return hpdcache_data_ram_addr_t'(ret);
endfunction
// }}}
// Definition of internal signals and registers
// {{{
genvar gen_i, gen_j, gen_k;
// Directory initialization signals and registers
logic init_q, init_d;
hpdcache_dir_addr_t init_set_q, init_set_d;
hpdcache_way_vector_t init_dir_cs;
hpdcache_way_vector_t init_dir_we;
hpdcache_dir_entry_t init_dir_wentry;
// Directory valid bit vector (one bit per set and way)
hpdcache_way_vector_t [HPDCACHE_SETS-1:0] dir_valid_q, dir_valid_d;
hpdcache_set_t dir_req_set_q, dir_req_set_d;
hpdcache_dir_addr_t dir_addr;
hpdcache_way_vector_t dir_cs;
hpdcache_way_vector_t dir_we;
hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry;
hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry;
hpdcache_data_addr_t data_addr;
hpdcache_data_enable_t data_cs;
hpdcache_data_enable_t data_we;
hpdcache_data_be_entry_t data_wbyteenable;
hpdcache_data_entry_t data_wentry;
hpdcache_data_entry_t data_rentry;
logic data_write;
logic data_write_enable;
hpdcache_set_t data_write_set;
hpdcache_req_size_t data_write_size;
hpdcache_word_t data_write_word;
hpdcache_refill_data_t data_write_data;
hpdcache_refill_be_t data_write_be;
hpdcache_refill_data_t data_req_write_data;
hpdcache_refill_be_t data_req_write_be;
hpdcache_refill_data_t data_amo_write_data;
hpdcache_refill_be_t data_amo_write_be;
hpdcache_way_vector_t data_way;
hpdcache_data_ram_row_idx_t data_ram_row;
hpdcache_data_ram_way_idx_t data_ram_word;
// }}}
// Init FSM
// {{{
always_comb
begin : init_comb
init_dir_wentry.tag = '0;
init_dir_wentry.reserved = '0;
init_dir_cs = '0;
init_dir_we = '0;
init_d = init_q;
init_set_d = init_set_q;
case (init_q)
1'b0: begin
init_d = (hpdcache_uint'(init_set_q) == (HPDCACHE_SETS - 1));
init_set_d = init_set_q + 1;
init_dir_cs = '1;
init_dir_we = '1;
end
1'b1: begin
init_d = 1'b1;
init_set_d = init_set_q;
end
endcase
end
assign ready_o = init_q;
always_ff @(posedge clk_i or negedge rst_ni)
begin : init_ff
if (!rst_ni) begin
init_q <= 1'b0;
init_set_q <= 0;
dir_valid_q <= '0;
end else begin
init_q <= init_d;
init_set_q <= init_set_d;
dir_valid_q <= dir_valid_d;
end
end
// }}}
// Memory arrays
// {{{
hpdcache_memarray hpdcache_memarray_i(
.clk_i,
.rst_ni,
.dir_addr_i (dir_addr),
.dir_cs_i (dir_cs),
.dir_we_i (dir_we),
.dir_wentry_i (dir_wentry),
.dir_rentry_o (dir_rentry),
.data_addr_i (data_addr),
.data_cs_i (data_cs),
.data_we_i (data_we),
.data_wbyteenable_i (data_wbyteenable),
.data_wentry_i (data_wentry),
.data_rentry_o (data_rentry)
);
// }}}
// Directory RAM request mux
// {{{
always_comb
begin : dir_ctrl_comb
case (1'b1)
// Cache directory initialization
~init_q: begin
dir_addr = init_set_q;
dir_cs = init_dir_cs;
dir_we = init_dir_we;
dir_wentry = {HPDCACHE_WAYS{init_dir_wentry}};
end
// Cache directory match tag -> hit
dir_match_i: begin
dir_addr = dir_match_set_i;
dir_cs = '1;
dir_we = '0;
dir_wentry = '0;
end
// Cache directory AMO match tag -> hit
dir_amo_match_i: begin
dir_addr = dir_amo_match_set_i;
dir_cs = '1;
dir_we = '0;
dir_wentry = '0;
end
// Cache directory update
dir_refill_i: begin
dir_addr = dir_refill_set_i;
dir_cs = dir_victim_way_o;
dir_we = dir_victim_way_o;
dir_wentry = {HPDCACHE_WAYS{dir_refill_entry_i}};
end
// Cache directory CMO match tag
dir_cmo_check_i: begin
dir_addr = dir_cmo_check_set_i;
dir_cs = '1;
dir_we = '0;
dir_wentry = '0;
end
// Do nothing
default: begin
dir_addr = '0;
dir_cs = '0;
dir_we = '0;
dir_wentry = '0;
end
endcase
end
// }}}
// Directory valid logic
// {{{
always_comb
begin : dir_valid_comb
dir_valid_d = dir_valid_q;
unique case (1'b1)
// Refill the cache after a miss
dir_refill_i: begin
dir_valid_d[dir_refill_set_i] = dir_valid_q[dir_refill_set_i] | dir_victim_way_o;
end
// CMO invalidate a set
dir_cmo_inval_i: begin
dir_valid_d[dir_cmo_inval_set_i] = dir_valid_q[dir_cmo_inval_set_i] & ~dir_cmo_inval_way_i;
end
default: begin
// do nothing
end
endcase
end
// }}}
// Directory hit logic
// {{{
assign dir_req_set_d = dir_match_i ? dir_match_set_i :
dir_amo_match_i ? dir_amo_match_set_i :
dir_cmo_check_i ? dir_cmo_check_set_i :
dir_req_set_q ;
generate
hpdcache_way_vector_t req_hit;
hpdcache_way_vector_t amo_hit;
hpdcache_way_vector_t cmo_hit;
for (gen_i = 0; gen_i < int'(HPDCACHE_WAYS); gen_i++)
begin : dir_match_tag_gen
assign req_hit[gen_i] = (dir_rentry[gen_i].tag == dir_match_tag_i),
amo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_amo_match_tag_i),
cmo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_cmo_check_tag_i);
assign dir_hit_way_o [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & req_hit[gen_i],
dir_amo_hit_way_o [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & amo_hit[gen_i],
dir_cmo_check_hit_way_o[gen_i] = dir_valid_q[dir_req_set_q][gen_i] & cmo_hit[gen_i];
end
endgenerate
// }}}
// Directory victim select logic
// {{{
logic plru_updt;
hpdcache_way_vector_t plru_updt_way;
assign plru_updt = dir_update_lru_i | dir_amo_update_plru_i,
plru_updt_way = dir_update_lru_i ? dir_hit_way_o : dir_amo_hit_way_o;
hpdcache_plru #(
.SETS (HPDCACHE_SETS),
.WAYS (HPDCACHE_WAYS)
) plru_i (
.clk_i,
.rst_ni,
.updt_i (plru_updt),
.updt_set_i (dir_req_set_q),
.updt_way_i (plru_updt_way),
.repl_i (dir_refill_i),
.repl_set_i (dir_refill_set_i),
.repl_dir_valid_i (dir_valid_q[dir_refill_set_i]),
.repl_updt_plru_i (dir_refill_updt_plru_i),
.victim_way_o (dir_victim_way_o)
);
// }}}
// Data RAM request multiplexor
// {{{
// Upsize the request interface to match the maximum access width of the data RAM
generate
if (HPDCACHE_DATA_REQ_RATIO > 1) begin : upsize_data_req_write_gen
// demux request DATA
assign data_req_write_data = {HPDCACHE_DATA_REQ_RATIO{data_req_write_data_i}};
// demux request BE
hpdcache_demux #(
.NOUTPUT (HPDCACHE_DATA_REQ_RATIO),
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH/8),
.ONE_HOT_SEL (1'b0)
) data_req_write_be_demux_i (
.data_i (data_req_write_be_i),
.sel_i (data_req_write_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +:
$clog2(HPDCACHE_DATA_REQ_RATIO)]),
.data_o (data_req_write_be)
);
end else begin
assign data_req_write_data = data_req_write_data_i,
data_req_write_be = data_req_write_be_i;
end
endgenerate
// Upsize the AMO data interface to match the maximum access width of the data RAM
generate
localparam hpdcache_uint AMO_DATA_RATIO = HPDCACHE_DATA_RAM_ACCESS_WIDTH/64;
localparam hpdcache_uint AMO_DATA_INDEX_WIDTH = $clog2(AMO_DATA_RATIO);
if (AMO_DATA_RATIO > 1) begin
assign data_amo_write_data = {AMO_DATA_RATIO{data_amo_write_data_i}};
hpdcache_demux #(
.NOUTPUT (AMO_DATA_RATIO),
.DATA_WIDTH (8),
.ONE_HOT_SEL (1'b0)
) amo_be_demux_i (
.data_i (data_amo_write_be_i),
.sel_i (data_amo_write_word_i[0 +: AMO_DATA_INDEX_WIDTH]),
.data_o (data_amo_write_be)
);
end else begin
assign data_amo_write_data = data_amo_write_data_i,
data_amo_write_be = data_amo_write_be_i;
end
endgenerate
// Multiplex between data write requests
always_comb
begin : data_write_comb
case (1'b1)
data_refill_i: begin
data_write = 1'b1;
data_write_enable = 1'b1;
data_write_set = data_refill_set_i;
data_write_size = hpdcache_req_size_t'($clog2(HPDCACHE_DATA_RAM_ACCESS_WIDTH/8));
data_write_word = data_refill_word_i;
data_write_data = data_refill_data_i;
data_write_be = '1;
end
data_req_write_i: begin
data_write = 1'b1;
data_write_enable = data_req_write_enable_i;
data_write_set = data_req_write_set_i;
data_write_size = data_req_write_size_i;
data_write_word = data_req_write_word_i;
data_write_data = data_req_write_data;
data_write_be = data_req_write_be;
end
data_amo_write_i: begin
data_write = 1'b1;
data_write_enable = data_amo_write_enable_i;
data_write_set = data_amo_write_set_i;
data_write_size = data_amo_write_size_i;
data_write_word = data_amo_write_word_i;
data_write_data = data_amo_write_data;
data_write_be = data_amo_write_be;
end
default: begin
data_write = 1'b0;
data_write_enable = 1'b0;
data_write_set = '0;
data_write_size = '0;
data_write_word = '0;
data_write_data = '0;
data_write_be = '0;
end
endcase
end
// Multiplex between read and write access on the data RAM
assign data_way = data_refill_i ? data_refill_way_i :
data_amo_write_i ? dir_amo_hit_way_o :
dir_hit_way_o;
// Decode way index
assign data_ram_word = hpdcache_way_to_data_ram_word(data_way),
data_ram_row = hpdcache_way_to_data_ram_row(data_way);
always_comb
begin : data_ctrl_comb
case (1'b1)
// Select data read inputs
data_req_read_i: begin
data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_req_read_set_i,
data_req_read_word_i)}};
data_we = '0;
data_wbyteenable = '0;
data_wentry = '0;
for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin
data_cs[i] = hpdcache_compute_data_ram_cs(data_req_read_size_i,
data_req_read_word_i);
end
end
// Select data write inputs
data_write: begin
data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_write_set,
data_write_word)}};
for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin
for (int unsigned j = 0; j < HPDCACHE_DATA_RAM_X_CUTS; j++) begin
data_wentry[i][j] = {HPDCACHE_DATA_WAYS_PER_RAM_WORD{data_write_data[j]}};
end
end
for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin
data_cs[i] = hpdcache_compute_data_ram_cs(data_write_size, data_write_word);
if (i == hpdcache_uint'(data_ram_row)) begin
data_we[i] = data_write_enable ? data_cs[i] : '0;
end else begin
data_we[i] = '0;
end
// Build the write mask
for (int unsigned j = 0; j < HPDCACHE_ACCESS_WORDS; j++) begin
for (int unsigned k = 0; k < HPDCACHE_DATA_WAYS_PER_RAM_WORD; k++) begin
data_wbyteenable[i][j][k] = (k == hpdcache_uint'(data_ram_word)) ?
data_write_be[j] : '0;
end
end
end
end
// Do nothing
default: begin
data_addr = '0;
data_cs = '0;
data_we = '0;
data_wbyteenable = '0;
data_wentry = '0;
end
endcase
end
// }}}
// Data RAM read data multiplexor
// {{{
generate
hpdcache_req_data_t [HPDCACHE_DATA_REQ_RATIO-1:0][HPDCACHE_WAYS-1:0] data_read_words;
hpdcache_req_data_t [HPDCACHE_WAYS-1:0] data_read_req_word;
// Organize the read data by words (all ways for the same word are contiguous)
for (gen_i = 0; gen_i < int'(HPDCACHE_DATA_REQ_RATIO); gen_i++) begin
for (gen_j = 0; gen_j < int'(HPDCACHE_WAYS); gen_j++) begin
for (gen_k = 0; gen_k < int'(HPDCACHE_REQ_WORDS); gen_k++) begin
assign data_read_words[gen_i][gen_j][gen_k] =
data_rentry[(gen_j / HPDCACHE_DATA_WAYS_PER_RAM_WORD)]
[(gen_i * HPDCACHE_REQ_WORDS ) + gen_k]
[(gen_j % HPDCACHE_DATA_WAYS_PER_RAM_WORD)];
end
end
end
// Mux the data according to the access word
if (HPDCACHE_DATA_REQ_RATIO > 1) begin : req_width_lt_ram_width
typedef logic [$clog2(HPDCACHE_DATA_REQ_RATIO)-1:0] data_req_word_t;
data_req_word_t data_read_req_word_index_q;
hpdcache_mux #(
.NINPUT (HPDCACHE_DATA_REQ_RATIO),
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH*HPDCACHE_WAYS)
) data_read_req_word_mux_i(
.data_i (data_read_words),
.sel_i (data_read_req_word_index_q),
.data_o (data_read_req_word)
);
always_ff @(posedge clk_i)
begin : data_req_read_word_ff
data_read_req_word_index_q <=
data_req_read_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +:
$clog2(HPDCACHE_DATA_REQ_RATIO)];
end
end
// Request data interface width is equal to the data RAM width
else begin : req_width_eq_ram_width
assign data_read_req_word = data_read_words;
end
// Mux the data according to the hit way
hpdcache_mux #(
.NINPUT (HPDCACHE_WAYS),
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH),
.ONE_HOT_SEL (1'b1)
) data_read_req_word_way_mux_i(
.data_i (data_read_req_word),
.sel_i (dir_hit_way_o),
.data_o (data_req_read_data_o)
);
endgenerate
// Delay the accessed set for checking the tag from the directory in the
// next cycle (hit logic)
always_ff @(posedge clk_i)
begin : req_read_ff
if (dir_match_i || dir_amo_match_i || dir_cmo_check_i) begin
dir_req_set_q <= dir_req_set_d;
end
end
// }}}
// Assertions
// {{{
// pragma translate_off
concurrent_dir_access_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
$onehot0({dir_match_i, dir_amo_match_i, dir_cmo_check_i, dir_refill_i})) else
$error("hpdcache_memctrl: more than one process is accessing the cache directory");
concurrent_data_access_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
$onehot0({data_req_read_i, data_req_write_i, data_amo_write_i, data_refill_i})) else
$error("hpdcache_memctrl: more than one process is accessing the cache data");
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,659 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : HPDcache Miss Handler
* History :
*/
module hpdcache_miss_handler
// {{{
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter int HPDcacheMemIdWidth = 8,
parameter int HPDcacheMemDataWidth = 512,
parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_resp_r_t = logic,
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
// Global control signals
// {{{
output logic mshr_empty_o,
output logic mshr_full_o,
// }}}
// Configuration signals
// {{{
input logic cfg_prefetch_updt_plru_i,
// }}}
// CHECK interface
// {{{
input logic mshr_check_i,
input mshr_set_t mshr_check_set_i,
input mshr_tag_t mshr_check_tag_i,
output logic mshr_check_hit_o,
// }}}
// MISS interface
// {{{
// MISS request interface
output logic mshr_alloc_ready_o,
input logic mshr_alloc_i,
input logic mshr_alloc_cs_i,
input hpdcache_nline_t mshr_alloc_nline_i,
output logic mshr_alloc_full_o,
input hpdcache_req_tid_t mshr_alloc_tid_i,
input hpdcache_req_sid_t mshr_alloc_sid_i,
input hpdcache_word_t mshr_alloc_word_i,
input logic mshr_alloc_need_rsp_i,
input logic mshr_alloc_is_prefetch_i,
// REFILL MISS interface
input logic refill_req_ready_i,
output logic refill_req_valid_o,
output logic refill_busy_o,
output logic refill_updt_plru_o,
output hpdcache_set_t refill_set_o,
output hpdcache_dir_entry_t refill_dir_entry_o,
input hpdcache_way_vector_t refill_victim_way_i,
output logic refill_write_dir_o,
output logic refill_write_data_o,
output hpdcache_way_vector_t refill_victim_way_o,
output hpdcache_refill_data_t refill_data_o,
output hpdcache_word_t refill_word_o,
output hpdcache_nline_t refill_nline_o,
output logic refill_updt_rtab_o,
// REFILL core response interface
output logic refill_core_rsp_valid_o,
output hpdcache_rsp_t refill_core_rsp_o,
// }}}
// MEMORY interface
// {{{
input logic mem_req_ready_i,
output logic mem_req_valid_o,
output hpdcache_mem_req_t mem_req_o,
output logic mem_resp_ready_o,
input logic mem_resp_valid_i,
input hpdcache_mem_resp_r_t mem_resp_i
// }}}
);
// }}}
// Declaration of constants and types
// {{{
localparam int unsigned REFILL_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS;
typedef enum logic {
MISS_REQ_IDLE = 1'b0,
MISS_REQ_SEND = 1'b1
} miss_req_fsm_e;
typedef enum {
REFILL_IDLE,
REFILL_WRITE,
REFILL_WRITE_DIR
} refill_fsm_e;
typedef struct packed {
hpdcache_mem_error_e r_error;
hpdcache_mem_id_t r_id;
} mem_resp_metadata_t;
function automatic mshr_set_t get_ack_mshr_set(hpdcache_mem_id_t id);
return id[0 +: HPDCACHE_MSHR_SET_WIDTH];
endfunction
function automatic mshr_way_t get_ack_mshr_way(hpdcache_mem_id_t id);
return id[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_WAY_WIDTH];
endfunction
// }}}
// Declaration of internal signals and registers
// {{{
miss_req_fsm_e miss_req_fsm_q, miss_req_fsm_d;
mshr_way_t mshr_alloc_way_q, mshr_alloc_way_d;
mshr_set_t mshr_alloc_set_q, mshr_alloc_set_d;
mshr_tag_t mshr_alloc_tag_q, mshr_alloc_tag_d;
refill_fsm_e refill_fsm_q, refill_fsm_d;
hpdcache_set_t refill_set_q;
hpdcache_tag_t refill_tag_q;
hpdcache_way_vector_t refill_way_q;
hpdcache_req_sid_t refill_sid_q;
hpdcache_req_tid_t refill_tid_q;
hpdcache_word_t refill_cnt_q, refill_cnt_d;
logic refill_need_rsp_q;
logic refill_is_prefetch_q;
hpdcache_word_t refill_core_rsp_word_q;
logic refill_way_bypass;
mem_resp_metadata_t refill_fifo_resp_meta_wdata, refill_fifo_resp_meta_rdata;
logic refill_fifo_resp_meta_w, refill_fifo_resp_meta_wok;
logic refill_fifo_resp_meta_r, refill_fifo_resp_meta_rok;
logic refill_fifo_resp_data_w, refill_fifo_resp_data_wok;
hpdcache_refill_data_t refill_fifo_resp_data_rdata;
logic refill_fifo_resp_data_r;
logic refill_core_rsp_valid;
hpdcache_req_data_t refill_core_rsp_rdata;
hpdcache_req_sid_t refill_core_rsp_sid;
hpdcache_req_tid_t refill_core_rsp_tid;
logic refill_core_rsp_error;
hpdcache_word_t refill_core_rsp_word;
hpdcache_rsp_t refill_core_rsp;
logic refill_is_error;
logic mshr_alloc;
logic mshr_alloc_cs;
logic mshr_ack;
logic mshr_ack_cs;
mshr_set_t mshr_ack_set;
mshr_way_t mshr_ack_way;
hpdcache_nline_t mshr_ack_nline;
hpdcache_req_sid_t mshr_ack_src_id;
hpdcache_req_tid_t mshr_ack_req_id;
hpdcache_word_t mshr_ack_word;
logic mshr_ack_need_rsp;
logic mshr_ack_is_prefetch;
logic mshr_empty;
// }}}
// Miss Request FSM
// {{{
always_comb
begin : miss_req_fsm_comb
mshr_alloc_ready_o = 1'b0;
mshr_alloc = 1'b0;
mshr_alloc_cs = 1'b0;
mem_req_valid_o = 1'b0;
miss_req_fsm_d = miss_req_fsm_q;
case (miss_req_fsm_q)
MISS_REQ_IDLE: begin
mshr_alloc_ready_o = 1'b1;
mshr_alloc = mshr_alloc_i;
mshr_alloc_cs = mshr_alloc_cs_i;
if (mshr_alloc_i) begin
miss_req_fsm_d = MISS_REQ_SEND;
end else begin
miss_req_fsm_d = MISS_REQ_IDLE;
end
end
MISS_REQ_SEND: begin
mem_req_valid_o = 1'b1;
if (mem_req_ready_i) begin
miss_req_fsm_d = MISS_REQ_IDLE;
end else begin
miss_req_fsm_d = MISS_REQ_SEND;
end
end
endcase
end
localparam hpdcache_uint REFILL_REQ_SIZE = $clog2(HPDcacheMemDataWidth/8);
localparam hpdcache_uint REFILL_REQ_LEN = HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth;
assign mem_req_o.mem_req_addr = {mshr_alloc_tag_q, mshr_alloc_set_q, {HPDCACHE_OFFSET_WIDTH{1'b0}} },
mem_req_o.mem_req_len = hpdcache_mem_len_t'(REFILL_REQ_LEN-1),
mem_req_o.mem_req_size = hpdcache_mem_size_t'(REFILL_REQ_SIZE),
mem_req_o.mem_req_id = hpdcache_mem_id_t'({mshr_alloc_way_q, mshr_alloc_set_q}),
mem_req_o.mem_req_command = HPDCACHE_MEM_READ,
mem_req_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD,
mem_req_o.mem_req_cacheable = 1'b1;
always_ff @(posedge clk_i)
begin : miss_req_fsm_internal_ff
if (mshr_alloc) begin
mshr_alloc_way_q <= mshr_alloc_way_d;
mshr_alloc_set_q <= mshr_alloc_set_d;
mshr_alloc_tag_q <= mshr_alloc_tag_d;
end
end
always_ff @(posedge clk_i or negedge rst_ni)
begin : miss_req_fsm_ff
if (!rst_ni) begin
miss_req_fsm_q <= MISS_REQ_IDLE;
end else begin
miss_req_fsm_q <= miss_req_fsm_d;
end
end
// }}}
// Refill FSM
// {{{
// ask permission to the refill arbiter if there is a pending refill
assign refill_req_valid_o = refill_fsm_q == REFILL_IDLE ? refill_fifo_resp_meta_rok : 1'b0;
// forward the victim way directly from the victim selection logic or
// from the internal register
assign refill_victim_way_o = refill_way_bypass ? refill_victim_way_i : refill_way_q;
always_comb
begin : miss_resp_fsm_comb
automatic hpdcache_uint REFILL_LAST_CHUNK_WORD;
REFILL_LAST_CHUNK_WORD = HPDCACHE_CL_WORDS - HPDCACHE_ACCESS_WORDS;
refill_updt_plru_o = 1'b0;
refill_set_o = '0;
refill_write_dir_o = 1'b0;
refill_write_data_o = 1'b0;
refill_updt_rtab_o = 1'b0;
refill_cnt_d = refill_cnt_q;
refill_way_bypass = 1'b0;
refill_core_rsp_valid = 1'b0;
refill_core_rsp_sid = '0;
refill_core_rsp_tid = '0;
refill_core_rsp_error = 1'b0;
refill_core_rsp_word = 0;
refill_fifo_resp_meta_r = 1'b0;
refill_fifo_resp_data_r = 1'b0;
mshr_ack_cs = 1'b0;
mshr_ack = 1'b0;
refill_fsm_d = refill_fsm_q;
case (refill_fsm_q)
// Wait for refill responses
// {{{
REFILL_IDLE: begin
if (refill_fifo_resp_meta_rok) begin
// anticipate the activation of the MSHR independently of the grant signal from
// the refill arbiter. This is to avoid the introduction of unnecessary timing
// paths (however there could be a minor augmentation of the power
// consumption).
mshr_ack_cs = 1'b1;
// if the permission is granted, start refilling
if (refill_req_ready_i) begin
refill_fsm_d = REFILL_WRITE;
// read the MSHR and reset the valid bit for the
// corresponding entry
mshr_ack = 1'b1;
// initialize the counter for refill words
refill_cnt_d = 0;
end
end
end
// }}}
// Write refill data into the cache
// {{{
REFILL_WRITE: begin
automatic logic is_prefetch;
// Respond to the core (when needed)
if (refill_cnt_q == 0) begin
automatic hpdcache_uint _core_rsp_word;
_core_rsp_word = hpdcache_uint'(mshr_ack_word)/HPDCACHE_ACCESS_WORDS;
if (mshr_ack_need_rsp) begin
refill_core_rsp_valid = (hpdcache_uint'(_core_rsp_word) == 0);
end
refill_core_rsp_sid = mshr_ack_src_id;
refill_core_rsp_tid = mshr_ack_req_id;
refill_core_rsp_error = refill_is_error;
refill_core_rsp_word = hpdcache_word_t'(
hpdcache_uint'(mshr_ack_word)/HPDCACHE_REQ_WORDS);
end else begin
automatic hpdcache_uint _core_rsp_word;
_core_rsp_word = hpdcache_uint'(refill_core_rsp_word_q)/
HPDCACHE_ACCESS_WORDS;
if (refill_need_rsp_q) begin
automatic hpdcache_uint _refill_cnt;
_refill_cnt = hpdcache_uint'(refill_cnt_q)/HPDCACHE_ACCESS_WORDS;
refill_core_rsp_valid = (_core_rsp_word == _refill_cnt);
end
refill_core_rsp_sid = refill_sid_q;
refill_core_rsp_tid = refill_tid_q;
refill_core_rsp_error = refill_is_error;
refill_core_rsp_word = hpdcache_word_t'(
hpdcache_uint'(refill_core_rsp_word_q)/HPDCACHE_REQ_WORDS);
end
// Write the the data in the cache data array
if (refill_cnt_q == 0) begin
refill_set_o = mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH];
refill_way_bypass = 1'b1;
is_prefetch = mshr_ack_is_prefetch;
end else begin
refill_set_o = refill_set_q;
refill_way_bypass = 1'b0;
is_prefetch = refill_is_prefetch_q;
end
refill_write_data_o = ~refill_is_error;
// Consume chunk of data from the FIFO buffer in the memory interface
refill_fifo_resp_data_r = 1'b1;
// Update directory on the last chunk of data
refill_cnt_d = refill_cnt_q + hpdcache_word_t'(HPDCACHE_ACCESS_WORDS);
if (hpdcache_uint'(refill_cnt_q) == REFILL_LAST_CHUNK_WORD) begin
if (REFILL_LAST_CHUNK_WORD == 0) begin
// Special case: if the cache-line data can be written in a single cycle,
// wait an additional cycle to write the directory. This allows to prevent
// a RAM-to-RAM timing path between the MSHR and the DIR.
refill_fsm_d = REFILL_WRITE_DIR;
end else begin
// Write the new entry in the cache directory
refill_write_dir_o = ~refill_is_error;
// Update the PLRU bits. Only in the following cases:
// - There is no error in response AND
// - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR
// - It is a read miss.
refill_updt_plru_o = ~refill_is_error &
(~is_prefetch | cfg_prefetch_updt_plru_i);
// Update dependency flags in the retry table
refill_updt_rtab_o = 1'b1;
// consume the response from the network
refill_fifo_resp_meta_r = 1'b1;
refill_fsm_d = REFILL_IDLE;
end
end
end
// }}}
// Write cache directory (this state is only visited when ACCESS_WORDS == CL_WORDS,
// this is when the entire cache-line can be written in a single cycle)
// {{{
REFILL_WRITE_DIR: begin
automatic logic is_prefetch;
is_prefetch = refill_is_prefetch_q;
// Select the target set and way
refill_set_o = refill_set_q;
refill_way_bypass = 1'b0;
// Write the new entry in the cache directory
refill_write_dir_o = ~refill_is_error;
// Update the PLRU bits. Only in the following cases:
// - There is no error in response AND
// - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR
// - It is a read miss.
refill_updt_plru_o = ~refill_is_error &
(~is_prefetch | cfg_prefetch_updt_plru_i);
// Update dependency flags in the retry table
refill_updt_rtab_o = 1'b1;
// consume the response from the network
refill_fifo_resp_meta_r = 1'b1;
refill_fsm_d = REFILL_IDLE;
end
// }}}
default: begin
// pragma translate_off
$error("Illegal state");
// pragma translate_on
end
endcase
end
assign refill_is_error = (refill_fifo_resp_meta_rdata.r_error == HPDCACHE_MEM_RESP_NOK);
assign refill_busy_o = (refill_fsm_q != REFILL_IDLE),
refill_nline_o = {refill_tag_q, refill_set_q},
refill_word_o = refill_cnt_q;
assign mshr_ack_set = get_ack_mshr_set(refill_fifo_resp_meta_rdata.r_id),
mshr_ack_way = get_ack_mshr_way(refill_fifo_resp_meta_rdata.r_id);
assign refill_dir_entry_o.tag = refill_tag_q,
refill_dir_entry_o.reserved = '0;
assign refill_core_rsp.rdata = refill_core_rsp_rdata,
refill_core_rsp.sid = refill_core_rsp_sid,
refill_core_rsp.tid = refill_core_rsp_tid,
refill_core_rsp.error = refill_core_rsp_error,
refill_core_rsp.aborted = 1'b0;
hpdcache_fifo_reg #(
.FIFO_DEPTH (1),
.FEEDTHROUGH (HPDCACHE_REFILL_CORE_RSP_FEEDTHROUGH),
.fifo_data_t (hpdcache_rsp_t)
) i_refill_core_rsp_buf(
.clk_i,
.rst_ni,
.w_i (refill_core_rsp_valid),
.wok_o (/*unused*/),
.wdata_i (refill_core_rsp),
.r_i (1'b1), // core shall always be ready to consume a response
.rok_o (refill_core_rsp_valid_o),
.rdata_o (refill_core_rsp_o)
);
generate
// refill's width is bigger than the width of the core's interface
if (REFILL_REQ_RATIO > 1) begin : core_rsp_data_mux_gen
hpdcache_mux #(
.NINPUT (REFILL_REQ_RATIO),
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH)
) data_read_rsp_mux_i(
.data_i (refill_data_o),
.sel_i (refill_core_rsp_word[0 +: $clog2(REFILL_REQ_RATIO)]),
.data_o (refill_core_rsp_rdata)
);
end
// refill's width is equal to the width of the core's interface
else begin
assign refill_core_rsp_rdata = refill_data_o;
end
endgenerate
/* FIXME: when multiple chunks, in case of error, the error bit is not
* necessarily set on all chunks */
assign refill_fifo_resp_meta_wdata = '{
r_error: mem_resp_i.mem_resp_r_error,
r_id : mem_resp_i.mem_resp_r_id
};
hpdcache_fifo_reg #(
.FIFO_DEPTH (2),
.fifo_data_t (mem_resp_metadata_t)
) i_r_metadata_fifo (
.clk_i,
.rst_ni,
.w_i (refill_fifo_resp_meta_w),
.wok_o (refill_fifo_resp_meta_wok),
.wdata_i(refill_fifo_resp_meta_wdata),
.r_i (refill_fifo_resp_meta_r),
.rok_o (refill_fifo_resp_meta_rok),
.rdata_o(refill_fifo_resp_meta_rdata)
);
generate
if (HPDcacheMemDataWidth < HPDCACHE_REFILL_DATA_WIDTH) begin
hpdcache_data_upsize #(
.WR_WIDTH(HPDcacheMemDataWidth),
.RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH),
.DEPTH(2*(HPDCACHE_CL_WIDTH/HPDCACHE_REFILL_DATA_WIDTH))
) i_rdata_upsize (
.clk_i,
.rst_ni,
.w_i (refill_fifo_resp_data_w),
.wlast_i (mem_resp_i.mem_resp_r_last),
.wok_o (refill_fifo_resp_data_wok),
.wdata_i (mem_resp_i.mem_resp_r_data),
.r_i (refill_fifo_resp_data_r),
.rok_o (/* unused */),
.rdata_o (refill_fifo_resp_data_rdata)
);
end else if (HPDcacheMemDataWidth > HPDCACHE_REFILL_DATA_WIDTH) begin
hpdcache_data_downsize #(
.WR_WIDTH(HPDcacheMemDataWidth),
.RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH),
.DEPTH(2*(HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth))
) i_rdata_downsize (
.clk_i,
.rst_ni,
.w_i (refill_fifo_resp_data_w),
.wok_o (refill_fifo_resp_data_wok),
.wdata_i (mem_resp_i.mem_resp_r_data),
.r_i (refill_fifo_resp_data_r),
.rok_o (/* unused */),
.rdata_o (refill_fifo_resp_data_rdata)
);
end else begin
hpdcache_fifo_reg #(
.FIFO_DEPTH (2),
.fifo_data_t (hpdcache_refill_data_t)
) i_rdata_fifo (
.clk_i,
.rst_ni,
.w_i (refill_fifo_resp_data_w),
.wok_o (refill_fifo_resp_data_wok),
.wdata_i (mem_resp_i.mem_resp_r_data),
.r_i (refill_fifo_resp_data_r),
.rok_o (/* unused */),
.rdata_o (refill_fifo_resp_data_rdata)
);
end
endgenerate
assign refill_data_o = refill_fifo_resp_data_rdata;
assign refill_fifo_resp_data_w = mem_resp_valid_i &
(refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last),
refill_fifo_resp_meta_w = mem_resp_valid_i &
(refill_fifo_resp_data_wok & mem_resp_i.mem_resp_r_last),
mem_resp_ready_o = refill_fifo_resp_data_wok &
(refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last);
always_ff @(posedge clk_i or negedge rst_ni)
begin : miss_resp_fsm_ff
if (!rst_ni) begin
refill_fsm_q <= REFILL_IDLE;
end else begin
refill_fsm_q <= refill_fsm_d;
end
end
always_ff @(posedge clk_i)
begin : miss_resp_fsm_internal_ff
if ((refill_fsm_q == REFILL_WRITE) && (refill_cnt_q == 0)) begin
refill_set_q <= mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH];
refill_tag_q <= mshr_ack_nline[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];;
refill_way_q <= refill_victim_way_i;
refill_sid_q <= mshr_ack_src_id;
refill_tid_q <= mshr_ack_req_id;
refill_need_rsp_q <= mshr_ack_need_rsp;
refill_is_prefetch_q <= mshr_ack_is_prefetch;
refill_core_rsp_word_q <= mshr_ack_word;
end
refill_cnt_q <= refill_cnt_d;
end
// }}}
// Miss Status Holding Register component
// {{{
hpdcache_mshr hpdcache_mshr_i (
.clk_i,
.rst_ni,
.empty_o (mshr_empty),
.full_o (mshr_full_o),
.check_i (mshr_check_i),
.check_set_i (mshr_check_set_i),
.check_tag_i (mshr_check_tag_i),
.hit_o (mshr_check_hit_o),
.alloc_i (mshr_alloc),
.alloc_cs_i (mshr_alloc_cs),
.alloc_nline_i (mshr_alloc_nline_i),
.alloc_req_id_i (mshr_alloc_tid_i),
.alloc_src_id_i (mshr_alloc_sid_i),
.alloc_word_i (mshr_alloc_word_i),
.alloc_need_rsp_i (mshr_alloc_need_rsp_i),
.alloc_is_prefetch_i (mshr_alloc_is_prefetch_i),
.alloc_full_o (mshr_alloc_full_o),
.alloc_set_o (mshr_alloc_set_d),
.alloc_tag_o (mshr_alloc_tag_d),
.alloc_way_o (mshr_alloc_way_d),
.ack_i (mshr_ack),
.ack_cs_i (mshr_ack_cs),
.ack_set_i (mshr_ack_set),
.ack_way_i (mshr_ack_way),
.ack_req_id_o (mshr_ack_req_id),
.ack_src_id_o (mshr_ack_src_id),
.ack_nline_o (mshr_ack_nline),
.ack_word_o (mshr_ack_word),
.ack_need_rsp_o (mshr_ack_need_rsp),
.ack_is_prefetch_o (mshr_ack_is_prefetch)
);
// Indicate to the cache controller that there is no pending miss. This
// is, when the MSHR is empty, and the MISS handler has finished of
// processing the last miss response.
assign mshr_empty_o = mshr_empty & ~refill_busy_o;
// }}}
// Assertions
// {{{
// pragma translate_off
initial assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_SET_WIDTH + HPDCACHE_MSHR_WAY_WIDTH)) else
$error("miss_handler: not enough ID bits in the memory interface");
// pragma translate_on
// }}}
endmodule
// }}}

View File

@ -0,0 +1,385 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : HPDcache Miss Status Holding Register (MSHR)
* History :
*/
module hpdcache_mshr
import hpdcache_pkg::*;
// Ports
// {{{
(
// Clock and reset signals
input logic clk_i,
input logic rst_ni,
// Global control signals
output logic empty_o,
output logic full_o,
// Check and allocation interface
input logic check_i,
input mshr_set_t check_set_i,
input mshr_tag_t check_tag_i,
output logic hit_o,
input logic alloc_i,
input logic alloc_cs_i,
input hpdcache_nline_t alloc_nline_i,
input hpdcache_req_tid_t alloc_req_id_i,
input hpdcache_req_sid_t alloc_src_id_i,
input hpdcache_word_t alloc_word_i,
input logic alloc_need_rsp_i,
input logic alloc_is_prefetch_i,
output logic alloc_full_o,
output mshr_set_t alloc_set_o,
output mshr_tag_t alloc_tag_o,
output mshr_way_t alloc_way_o,
// Acknowledge interface
input logic ack_i,
input logic ack_cs_i,
input mshr_set_t ack_set_i,
input mshr_way_t ack_way_i,
output hpdcache_req_tid_t ack_req_id_o,
output hpdcache_req_sid_t ack_src_id_o,
output hpdcache_nline_t ack_nline_o,
output hpdcache_word_t ack_word_o,
output logic ack_need_rsp_o,
output logic ack_is_prefetch_o
);
// }}}
// Definition of constants and types
// {{{
typedef struct packed {
mshr_tag_t tag;
hpdcache_req_tid_t req_id;
hpdcache_req_sid_t src_id;
hpdcache_word_t word_idx;
logic need_rsp;
logic is_prefetch;
} mshr_entry_t;
// Compute the width of MSHR entries depending on the support of write
// bitmask or not (write byte enable)
localparam int unsigned HPDCACHE_MSHR_ENTRY_BITS = $bits(mshr_entry_t);
localparam int unsigned HPDCACHE_MSHR_RAM_ENTRY_BITS =
HPDCACHE_MSHR_RAM_WBYTEENABLE ?
((HPDCACHE_MSHR_ENTRY_BITS + 7)/8) * 8 : // align to 8 bits
HPDCACHE_MSHR_ENTRY_BITS; // or use the exact number of bits
typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_data_t;
// }}}
// Definition of internal wires and registers
// {{{
logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_q, mshr_valid_d;
mshr_set_t check_set_q;
mshr_set_t alloc_set;
mshr_tag_t alloc_tag;
hpdcache_set_t alloc_dcache_set;
mshr_way_t ack_way_q;
mshr_set_t ack_set_q;
hpdcache_set_t ack_dcache_set;
hpdcache_tag_t ack_dcache_tag;
logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_set, mshr_valid_rst;
mshr_entry_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wentry;
mshr_sram_data_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wdata;
mshr_entry_t [HPDCACHE_MSHR_WAYS-1:0] mshr_rentry;
mshr_sram_data_t [HPDCACHE_MSHR_WAYS-1:0] mshr_rdata;
logic mshr_we;
logic mshr_cs;
mshr_set_t mshr_addr;
logic check;
// }}}
// Control part for the allocation and check operations
// {{{
// The allocation operation is prioritary with respect to the check operation
assign check = check_i & ~alloc_i;
assign alloc_set = alloc_nline_i[0 +: HPDCACHE_MSHR_SET_WIDTH],
alloc_tag = alloc_nline_i[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH],
alloc_dcache_set = alloc_nline_i[0 +: HPDCACHE_SET_WIDTH];
// Look for an available way in case of allocation
always_comb
begin
automatic mshr_way_t found_available_way;
found_available_way = 0;
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(alloc_set)]) begin
found_available_way = mshr_way_t'(i);
break;
end
end
alloc_way_o = found_available_way;
end
// Look if the mshr can accept the checked nline (in case of allocation)
always_comb
begin
automatic bit found_available;
found_available = 1'b0;
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(check_set_q)]) begin
found_available = 1'b1;
break;
end
end
alloc_full_o = ~found_available;
end
assign alloc_set_o = alloc_set,
alloc_tag_o = alloc_tag;
// Write when there is an allocation operation
assign mshr_we = alloc_i;
// HPDcache SET to MSHR SET translation table
hpdcache_mshr_to_cache_set trlt_i (
.clk_i,
.write_i (mshr_we),
.write_dcache_set_i (alloc_dcache_set),
.write_mshr_way_i (alloc_way_o),
.read_mshr_set_i (ack_set_q),
.read_mshr_way_i (ack_way_q),
.read_dcache_set_o (ack_dcache_set)
);
// Generate write data and mask depending on the available way
always_comb
begin
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
mshr_wentry[i].tag = alloc_tag;
mshr_wentry[i].req_id = alloc_req_id_i;
mshr_wentry[i].src_id = alloc_src_id_i;
mshr_wentry[i].word_idx = alloc_word_i;
mshr_wentry[i].need_rsp = alloc_need_rsp_i;
mshr_wentry[i].is_prefetch = alloc_is_prefetch_i;
end
end
// }}}
// Shared control signals
// {{{
assign mshr_cs = check_i | alloc_cs_i | ack_cs_i;
assign mshr_addr = ack_i ? ack_set_i :
(alloc_i ? alloc_set : check_set_i);
always_comb
begin : mshr_valid_comb
automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_alloc_slot;
automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_ack_slot;
mshr_alloc_slot = {alloc_way_o, alloc_set};
mshr_ack_slot = { ack_way_i, ack_set_i};
for (int unsigned i = 0; i < HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS; i++) begin
mshr_valid_rst[i] = (i == hpdcache_uint'(mshr_ack_slot)) ? ack_i : 1'b0;
mshr_valid_set[i] = (i == hpdcache_uint'(mshr_alloc_slot)) ? alloc_i : 1'b0;
end
end
assign mshr_valid_d = (~mshr_valid_q & mshr_valid_set) | (mshr_valid_q & ~mshr_valid_rst);
// }}}
// Read interface (ack)
// {{{
generate
// extract HPDcache tag from the MSb of the MSHT TAG
if (HPDCACHE_SETS >= HPDCACHE_MSHR_SETS) begin : ack_dcache_set_ge_mshr_set_gen
assign ack_dcache_tag = mshr_rentry[ack_way_q].tag[
HPDCACHE_MSHR_TAG_WIDTH - 1 :
HPDCACHE_MSHR_TAG_WIDTH - HPDCACHE_TAG_WIDTH];
end
// extract HPDcache tag from MSb of the MSHR set concatenated with the MSHR tag
else begin : ack_dcache_set_lt_mshr_set_gen
assign ack_dcache_tag = {
mshr_rentry[ack_way_q].tag ,
ack_set_q[HPDCACHE_MSHR_SET_WIDTH - 1:HPDCACHE_SET_WIDTH]};
end
endgenerate
assign ack_req_id_o = mshr_rentry[ack_way_q].req_id,
ack_src_id_o = mshr_rentry[ack_way_q].src_id,
ack_nline_o = {ack_dcache_tag, ack_dcache_set},
ack_word_o = mshr_rentry[ack_way_q].word_idx,
ack_need_rsp_o = mshr_rentry[ack_way_q].need_rsp,
ack_is_prefetch_o = mshr_rentry[ack_way_q].is_prefetch;
// }}}
// Global control signals
// {{{
assign empty_o = ~|mshr_valid_q;
assign full_o = &mshr_valid_q;
always_comb
begin : hit_comb
automatic bit [HPDCACHE_MSHR_WAYS-1:0] __hit_way;
for (int unsigned w = 0; w < HPDCACHE_MSHR_WAYS; w++) begin
automatic bit __valid;
automatic bit __match;
__valid = mshr_valid_q[w*HPDCACHE_MSHR_SETS + int'(check_set_q)];
__match = (mshr_rentry[w].tag == check_tag_i);
__hit_way[w] = (__valid && __match);
end
hit_o = |__hit_way;
end
// }}}
// Internal state assignment
// {{{
always_ff @(posedge clk_i or negedge rst_ni)
begin : mshr_ff_set
if (!rst_ni) begin
mshr_valid_q <= '0;
ack_way_q <= '0;
ack_set_q <= '0;
check_set_q <= '0;
end else begin
mshr_valid_q <= mshr_valid_d;
if (ack_i) begin
ack_way_q <= ack_way_i;
ack_set_q <= ack_set_i;
end
if (check) begin
check_set_q <= check_set_i;
end
end
end
// }}}
// Internal components
// {{{
generate
if (HPDCACHE_MSHR_RAM_WBYTEENABLE) begin : mshr_wbyteenable_gen
typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS/8-1:0] mshr_sram_wbyteenable_t;
mshr_sram_wbyteenable_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wbyteenable;
always_comb
begin : mshr_wbyteenable_comb
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
mshr_wbyteenable[i] = (int'(alloc_way_o) == i) ? '1 : '0;
end
end
if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen
hpdcache_regbank_wbyteenable_1rw #(
.DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
.ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH)
) mshr_mem(
.clk (clk_i),
.rst_n (rst_ni),
.cs (mshr_cs),
.we (mshr_we),
.addr (mshr_addr),
.wbyteenable (mshr_wbyteenable),
.wdata (mshr_wdata),
.rdata (mshr_rdata)
);
end else begin : mshr_sram_gen
hpdcache_sram_wbyteenable #(
.DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
.ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH)
) mshr_mem(
.clk (clk_i),
.rst_n (rst_ni),
.cs (mshr_cs),
.we (mshr_we),
.addr (mshr_addr),
.wbyteenable (mshr_wbyteenable),
.wdata (mshr_wdata),
.rdata (mshr_rdata)
);
end
end else begin : mshr_wmask_gen
typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_wmask_t;
mshr_sram_wmask_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wmask;
always_comb
begin : mshr_wmask_comb
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
mshr_wmask[i] = (int'(alloc_way_o) == i) ? '1 : '0;
end
end
if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen
hpdcache_regbank_wmask_1rw #(
.DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
.ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH)
) mshr_mem(
.clk (clk_i),
.rst_n (rst_ni),
.cs (mshr_cs),
.we (mshr_we),
.addr (mshr_addr),
.wmask (mshr_wmask),
.wdata (mshr_wdata),
.rdata (mshr_rdata)
);
end else begin : mshr_sram_gen
hpdcache_sram_wmask #(
.DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS),
.ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH)
) mshr_mem(
.clk (clk_i),
.rst_n (rst_ni),
.cs (mshr_cs),
.we (mshr_we),
.addr (mshr_addr),
.wmask (mshr_wmask),
.wdata (mshr_wdata),
.rdata (mshr_rdata)
);
end
end
endgenerate
always_comb
begin : ram_word_fitting_comb
for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin
mshr_wdata[i] = mshr_sram_data_t'(mshr_wentry[i]);
mshr_rentry[i] = mshr_entry_t'(mshr_rdata[i][0 +: HPDCACHE_MSHR_ENTRY_BITS]);
end
end
// }}}
// Assertions
// {{{
// pragma translate_off
one_command_assert: assert property (@(posedge clk_i)
(ack_i -> !(alloc_i || check_i))) else
$error("MSHR: ack with concurrent alloc or check");
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,105 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : HPDcache MSHR set translation table
* History :
*/
module hpdcache_mshr_to_cache_set
import hpdcache_pkg::*;
// Ports
// {{{
(
// Clock signals
input logic clk_i,
// Write interface
input logic write_i,
input hpdcache_set_t write_dcache_set_i,
input mshr_way_t write_mshr_way_i,
// Read interface
input mshr_way_t read_mshr_way_i,
input mshr_set_t read_mshr_set_i,
output hpdcache_set_t read_dcache_set_o
);
// }}}
//
generate
// Number of HPDcache sets is bigger than the MSHR sets
// In this case, a translation table (in flip-flops) is needed
// {{{
// Write most significant bits of the HPDcache set into the
// translation table
if (HPDCACHE_SETS > HPDCACHE_MSHR_SETS) begin : hpdcache_sets_gt_mshr_sets_gen
localparam hpdcache_uint TRLT_TAB_ENTRY_WIDTH =
HPDCACHE_SET_WIDTH - HPDCACHE_MSHR_SET_WIDTH;
typedef logic [TRLT_TAB_ENTRY_WIDTH-1:0] trlt_entry_t;
// Translation table
//
// This table is used to store the most significant bits of the HPDcache set
trlt_entry_t [HPDCACHE_MSHR_SETS-1:0][HPDCACHE_MSHR_WAYS-1:0] tab;
trlt_entry_t tab_wdata;
mshr_set_t write_mshr_set;
// Write operation
// {{{
// Write most significant bits of the HPDcache set into the
// translation table
always_ff @(posedge clk_i)
begin
if (write_i) begin
tab[write_mshr_set][write_mshr_way_i] <= tab_wdata;
end
end
assign tab_wdata = write_dcache_set_i[HPDCACHE_MSHR_SET_WIDTH +:
TRLT_TAB_ENTRY_WIDTH],
write_mshr_set = write_dcache_set_i[0 +: HPDCACHE_MSHR_SET_WIDTH];
// }}}
// Read operation
// {{{
// Concatenate the mshr set with the most significant bits of the
// dcache set stored in the translation table
assign read_dcache_set_o = {tab[read_mshr_set_i][read_mshr_way_i], read_mshr_set_i};
// }}}
end
// }}}
// Number of HPDcache sets is smaller or equal than the MSHR sets
// In this case, no translation table is needed
// {{{
else begin : hpdcache_sets_le_mshr_sets_gen
assign read_dcache_set_o = hpdcache_set_t'(read_mshr_set_i);
end
// }}}
endgenerate
// Assertions
// {{{
// pragma translate_off
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,623 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : Write-Through (WT), High-Throughput (HTPUT) HPDcache Package
* History :
*/
package hpdcache_pkg;
// Definition of global constants for the HPDcache data and directory
// {{{
// HPDcache physical address width (bits)
localparam int unsigned HPDCACHE_PA_WIDTH = hpdcache_params_pkg::PARAM_PA_WIDTH;
// HPDcache number of sets
localparam int unsigned HPDCACHE_SETS = hpdcache_params_pkg::PARAM_SETS;
// HPDcache number of ways
localparam int unsigned HPDCACHE_WAYS = hpdcache_params_pkg::PARAM_WAYS;
// HPDcache word width (bits)
localparam int unsigned HPDCACHE_WORD_WIDTH = hpdcache_params_pkg::PARAM_WORD_WIDTH;
// HPDcache cache-line width (bits)
localparam int unsigned HPDCACHE_CL_WORDS = hpdcache_params_pkg::PARAM_CL_WORDS;
// HPDcache number of words in the request data channels (request and response)
localparam int unsigned HPDCACHE_REQ_WORDS = hpdcache_params_pkg::PARAM_REQ_WORDS;
// HPDcache request transaction ID width (bits)
localparam int unsigned HPDCACHE_REQ_TRANS_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_TRANS_ID_WIDTH;
// HPDcache request source ID width (bits)
localparam int unsigned HPDCACHE_REQ_SRC_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_SRC_ID_WIDTH;
// }}}
// Utility definitions
// {{{
typedef logic unsigned [31:0] hpdcache_uint;
typedef logic signed [31:0] hpdcache_int;
typedef logic unsigned [31:0] hpdcache_uint32;
typedef logic signed [31:0] hpdcache_int32;
typedef logic unsigned [63:0] hpdcache_uint64;
typedef logic signed [63:0] hpdcache_int64;
// }}}
// Definition of constants and types for HPDcache directory memory
// {{{
localparam int unsigned HPDCACHE_CL_WIDTH = HPDCACHE_CL_WORDS*HPDCACHE_WORD_WIDTH;
localparam int unsigned HPDCACHE_OFFSET_WIDTH = $clog2(HPDCACHE_CL_WIDTH/8);
localparam int unsigned HPDCACHE_NLINE_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_OFFSET_WIDTH;
localparam int unsigned HPDCACHE_SET_WIDTH = $clog2(HPDCACHE_SETS);
localparam int unsigned HPDCACHE_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_SET_WIDTH;
localparam int unsigned HPDCACHE_WORD_IDX_WIDTH = $clog2(HPDCACHE_CL_WORDS);
typedef logic unsigned [ HPDCACHE_OFFSET_WIDTH-1:0] hpdcache_offset_t;
typedef logic unsigned [ HPDCACHE_NLINE_WIDTH-1:0] hpdcache_nline_t;
typedef logic unsigned [ HPDCACHE_SET_WIDTH-1:0] hpdcache_set_t;
typedef logic unsigned [ HPDCACHE_TAG_WIDTH-1:0] hpdcache_tag_t;
typedef logic unsigned [ $clog2(HPDCACHE_WAYS)-1:0] hpdcache_way_t;
typedef logic unsigned [ HPDCACHE_WAYS-1:0] hpdcache_way_vector_t;
typedef logic unsigned [HPDCACHE_WORD_IDX_WIDTH-1:0] hpdcache_word_t;
typedef struct packed {
hpdcache_tag_t tag;
logic [1:0] reserved;
} hpdcache_dir_entry_t;
localparam int unsigned HPDCACHE_DIR_RAM_WIDTH = $bits(hpdcache_dir_entry_t);
localparam int unsigned HPDCACHE_DIR_RAM_DEPTH = HPDCACHE_SETS;
localparam int unsigned HPDCACHE_DIR_RAM_ADDR_WIDTH = $clog2(HPDCACHE_DIR_RAM_DEPTH);
typedef logic [HPDCACHE_DIR_RAM_ADDR_WIDTH-1:0] hpdcache_dir_addr_t;
function automatic hpdcache_way_t hpdcache_way_vector_to_index(input hpdcache_way_vector_t way);
for (int unsigned i = 0; i < HPDCACHE_WAYS; i++) begin
if (way[i]) return hpdcache_way_t'(i);
end
return 0;
endfunction
// }}}
// Definition of constants and types for HPDcache data memory
// {{{
localparam int unsigned HPDCACHE_DATA_WAYS_PER_RAM_WORD =
hpdcache_params_pkg::PARAM_DATA_WAYS_PER_RAM_WORD;
localparam int unsigned HPDCACHE_DATA_SETS_PER_RAM = /* FIXME this parameter is currently ignored */
hpdcache_params_pkg::PARAM_DATA_SETS_PER_RAM;
// HPDcache DATA RAM implements write byte enable
localparam bit HPDCACHE_DATA_RAM_WBYTEENABLE =
hpdcache_params_pkg::PARAM_DATA_RAM_WBYTEENABLE;
// Define the number of memory contiguous words that can be accessed
// simultaneously from the cache.
// - This limits the maximum width for the data channel from requesters
// - This impacts the refill latency
localparam int unsigned HPDCACHE_ACCESS_WORDS = hpdcache_params_pkg::PARAM_ACCESS_WORDS;
localparam int unsigned HPDCACHE_DATA_RAM_WIDTH =
HPDCACHE_DATA_WAYS_PER_RAM_WORD*HPDCACHE_WORD_WIDTH;
localparam int unsigned HPDCACHE_DATA_RAM_Y_CUTS = HPDCACHE_WAYS/HPDCACHE_DATA_WAYS_PER_RAM_WORD;
localparam int unsigned HPDCACHE_DATA_RAM_X_CUTS = HPDCACHE_ACCESS_WORDS;
localparam int unsigned HPDCACHE_DATA_RAM_ACCESS_WIDTH = HPDCACHE_ACCESS_WORDS*HPDCACHE_WORD_WIDTH;
localparam int unsigned HPDCACHE_DATA_RAM_ENTR_PER_SET = HPDCACHE_CL_WORDS/HPDCACHE_ACCESS_WORDS;
localparam int unsigned HPDCACHE_DATA_RAM_DEPTH = HPDCACHE_SETS*HPDCACHE_DATA_RAM_ENTR_PER_SET;
localparam int unsigned HPDCACHE_DATA_RAM_ADDR_WIDTH = $clog2(HPDCACHE_DATA_RAM_DEPTH);
typedef logic [ HPDCACHE_WORD_WIDTH-1:0] hpdcache_data_word_t;
typedef logic [ HPDCACHE_WORD_WIDTH/8-1:0] hpdcache_data_be_t;
typedef logic [ $clog2(HPDCACHE_DATA_RAM_Y_CUTS)-1:0] hpdcache_data_ram_row_idx_t;
typedef logic [ $clog2(HPDCACHE_DATA_WAYS_PER_RAM_WORD)-1:0] hpdcache_data_ram_way_idx_t;
typedef logic [HPDCACHE_DATA_RAM_ADDR_WIDTH-1:0] hpdcache_data_ram_addr_t;
typedef hpdcache_data_word_t[HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_data_t;
typedef hpdcache_data_be_t [HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_be_t;
typedef hpdcache_data_ram_data_t
[HPDCACHE_DATA_RAM_Y_CUTS-1:0]
[HPDCACHE_DATA_RAM_X_CUTS-1:0]
hpdcache_data_entry_t;
typedef hpdcache_data_ram_be_t
[HPDCACHE_DATA_RAM_Y_CUTS-1:0]
[HPDCACHE_DATA_RAM_X_CUTS-1:0]
hpdcache_data_be_entry_t;
typedef logic
[HPDCACHE_DATA_RAM_X_CUTS-1:0]
hpdcache_data_row_enable_t;
typedef hpdcache_data_row_enable_t
[HPDCACHE_DATA_RAM_Y_CUTS-1:0]
hpdcache_data_enable_t;
typedef hpdcache_data_ram_addr_t
[HPDCACHE_DATA_RAM_Y_CUTS-1:0]
[HPDCACHE_DATA_RAM_X_CUTS-1:0]
hpdcache_data_addr_t;
// }}}
// Definition of interface with miss handler
// {{{
localparam int unsigned HPDCACHE_REFILL_DATA_WIDTH = HPDCACHE_DATA_RAM_ACCESS_WIDTH;
// Use feedthrough FIFOs from the refill handler to the core. This
// reduces the latency (by one cycle) but adds an additional timing path
localparam bit HPDCACHE_REFILL_CORE_RSP_FEEDTHROUGH =
hpdcache_params_pkg::PARAM_REFILL_CORE_RSP_FEEDTHROUGH;
typedef hpdcache_data_word_t[HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_data_t;
typedef hpdcache_data_be_t [HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_be_t;
// }}}
// Definition of interface with requesters
// {{{
localparam int unsigned HPDCACHE_REQ_DATA_WIDTH = HPDCACHE_REQ_WORDS*HPDCACHE_WORD_WIDTH;
localparam int unsigned HPDCACHE_REQ_DATA_BYTES = HPDCACHE_REQ_DATA_WIDTH/8;
localparam int unsigned HPDCACHE_REQ_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_WORDS);
localparam int unsigned HPDCACHE_REQ_BYTE_OFFSET_WIDTH = $clog2(HPDCACHE_REQ_DATA_BYTES);
localparam int unsigned HPDCACHE_REQ_OFFSET_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_TAG_WIDTH;
typedef logic [HPDCACHE_PA_WIDTH-1:0] hpdcache_req_addr_t;
typedef logic [HPDCACHE_REQ_OFFSET_WIDTH-1:0] hpdcache_req_offset_t;
typedef hpdcache_data_word_t [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_data_t;
typedef hpdcache_data_be_t [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_be_t;
typedef logic [2:0] hpdcache_req_size_t;
typedef logic [HPDCACHE_REQ_SRC_ID_WIDTH-1:0] hpdcache_req_sid_t;
typedef logic [HPDCACHE_REQ_TRANS_ID_WIDTH-1:0] hpdcache_req_tid_t;
// Definition of operation codes
// {{{
typedef enum logic [3:0] {
HPDCACHE_REQ_LOAD = 4'h0,
HPDCACHE_REQ_STORE = 4'h1,
// RESERVED = 4'h2,
// RESERVED = 4'h3,
HPDCACHE_REQ_AMO_LR = 4'h4,
HPDCACHE_REQ_AMO_SC = 4'h5,
HPDCACHE_REQ_AMO_SWAP = 4'h6,
HPDCACHE_REQ_AMO_ADD = 4'h7,
HPDCACHE_REQ_AMO_AND = 4'h8,
HPDCACHE_REQ_AMO_OR = 4'h9,
HPDCACHE_REQ_AMO_XOR = 4'ha,
HPDCACHE_REQ_AMO_MAX = 4'hb,
HPDCACHE_REQ_AMO_MAXU = 4'hc,
HPDCACHE_REQ_AMO_MIN = 4'hd,
HPDCACHE_REQ_AMO_MINU = 4'he,
HPDCACHE_REQ_CMO = 4'hf
} hpdcache_req_op_t;
// }}}
// Definition of CMO codes
// {{{
typedef enum hpdcache_req_size_t {
HPDCACHE_REQ_CMO_FENCE = 3'h0,
// RESERVED = 3'h1,
HPDCACHE_REQ_CMO_INVAL_NLINE = 3'h2,
HPDCACHE_REQ_CMO_INVAL_SET_WAY = 3'h3,
HPDCACHE_REQ_CMO_INVAL_ALL = 3'h4,
HPDCACHE_REQ_CMO_PREFETCH = 3'h5
} hpdcache_req_cmo_t;
// }}}
// Definition of PMA flags
// {{{
typedef struct packed
{
logic uncacheable;
logic io; // FIXME: for future use
} hpdcache_pma_t;
// }}}
// Definition of interfaces
// {{{
// Request Interface
typedef struct packed
{
hpdcache_req_offset_t addr_offset;
hpdcache_req_data_t wdata;
hpdcache_req_op_t op;
hpdcache_req_be_t be;
hpdcache_req_size_t size;
hpdcache_req_sid_t sid;
hpdcache_req_tid_t tid;
logic need_rsp;
// only valid in case of physically indexed requests
logic phys_indexed;
hpdcache_tag_t addr_tag;
hpdcache_pma_t pma;
} hpdcache_req_t;
// Response Interface
typedef struct packed
{
hpdcache_req_data_t rdata;
hpdcache_req_sid_t sid;
hpdcache_req_tid_t tid;
logic error;
logic aborted;
} hpdcache_rsp_t;
// }}}
// Definition of functions
// {{{
function automatic logic is_load(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_LOAD: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_store(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_STORE: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_LR,
HPDCACHE_REQ_AMO_SC,
HPDCACHE_REQ_AMO_SWAP,
HPDCACHE_REQ_AMO_ADD,
HPDCACHE_REQ_AMO_AND,
HPDCACHE_REQ_AMO_OR,
HPDCACHE_REQ_AMO_XOR,
HPDCACHE_REQ_AMO_MAX,
HPDCACHE_REQ_AMO_MAXU,
HPDCACHE_REQ_AMO_MIN,
HPDCACHE_REQ_AMO_MINU:
return 1'b1;
default:
return 1'b0;
endcase
endfunction
function automatic logic is_amo_lr(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_LR: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo_sc(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_SC: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo_swap(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_SWAP: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo_add(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_ADD: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo_and(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_AND: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo_or(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_OR: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo_xor(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_XOR: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo_max(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_MAX: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo_maxu(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_MAXU: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo_min(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_MIN: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_amo_minu(input hpdcache_req_op_t op);
case (op)
HPDCACHE_REQ_AMO_MINU: return 1'b1;
default: return 1'b0;
endcase
endfunction
function automatic logic is_cmo_inval(
input hpdcache_req_op_t op,
input hpdcache_req_size_t sz);
case (op)
HPDCACHE_REQ_CMO:
case (sz)
HPDCACHE_REQ_CMO_INVAL_NLINE,
HPDCACHE_REQ_CMO_INVAL_SET_WAY,
HPDCACHE_REQ_CMO_INVAL_ALL: begin
return 1'b1;
end
default: begin
return 1'b0;
end
endcase
default: begin
return 1'b0;
end
endcase
endfunction
function automatic logic is_cmo_inval_by_nline(input hpdcache_req_size_t sz);
return (sz == HPDCACHE_REQ_CMO_INVAL_NLINE);
endfunction
function automatic logic is_cmo_inval_by_set(input hpdcache_req_size_t sz);
return (sz == HPDCACHE_REQ_CMO_INVAL_SET_WAY);
endfunction
function automatic logic is_cmo_inval_all(input hpdcache_req_size_t sz);
return (sz == HPDCACHE_REQ_CMO_INVAL_ALL);
endfunction
function automatic logic is_cmo_fence(
input hpdcache_req_op_t op,
input hpdcache_req_size_t sz);
case (op)
HPDCACHE_REQ_CMO: begin
return (sz == HPDCACHE_REQ_CMO_FENCE);
end
default: begin
return 1'b0;
end
endcase
endfunction
function automatic logic is_cmo_prefetch(
input hpdcache_req_op_t op,
input hpdcache_req_size_t sz);
case (op)
HPDCACHE_REQ_CMO: begin
return (sz == HPDCACHE_REQ_CMO_PREFETCH);
end
default: begin
return 1'b0;
end
endcase
endfunction
function automatic hpdcache_tag_t hpdcache_get_req_addr_tag(input hpdcache_req_addr_t addr);
return addr[(HPDCACHE_OFFSET_WIDTH + HPDCACHE_SET_WIDTH) +: HPDCACHE_TAG_WIDTH];
endfunction
function automatic hpdcache_set_t hpdcache_get_req_addr_set(input hpdcache_req_addr_t addr);
return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH];
endfunction
function automatic hpdcache_word_t hpdcache_get_req_addr_word(input hpdcache_req_addr_t addr);
return addr[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH];
endfunction
function automatic hpdcache_offset_t hpdcache_get_req_addr_offset(input hpdcache_req_addr_t addr);
return addr[0 +: HPDCACHE_OFFSET_WIDTH];
endfunction
function automatic hpdcache_nline_t hpdcache_get_req_addr_nline(input hpdcache_req_addr_t addr);
return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH];
endfunction
function automatic hpdcache_set_t hpdcache_get_req_offset_set(input hpdcache_req_offset_t offset);
return offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH];
endfunction
function automatic hpdcache_word_t hpdcache_get_req_offset_word(input hpdcache_req_offset_t offset);
return offset[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH];
endfunction
// }}}
// }}}
// Definition of constants and types for the Miss Status Holding Register (MSHR)
// {{{
// HPDcache MSHR number of sets
localparam int unsigned HPDCACHE_MSHR_SETS =
hpdcache_params_pkg::PARAM_MSHR_SETS;
// HPDcache MSHR number of ways
localparam int unsigned HPDCACHE_MSHR_WAYS =
hpdcache_params_pkg::PARAM_MSHR_WAYS;
// HPDcache MSHR number of ways in the same SRAM word
localparam int unsigned HPDCACHE_MSHR_WAYS_PER_RAM_WORD =
hpdcache_params_pkg::PARAM_MSHR_WAYS_PER_RAM_WORD; /* FIXME this parameter is currently ignored */
// HPDcache MSHR number of sets in the same SRAM
localparam int unsigned HPDCACHE_MSHR_SETS_PER_RAM =
hpdcache_params_pkg::PARAM_MSHR_SETS_PER_RAM; /* FIXME this parameter is currently ignored */
// HPDcache MSHR implements write byte enable
localparam bit HPDCACHE_MSHR_RAM_WBYTEENABLE =
hpdcache_params_pkg::PARAM_MSHR_RAM_WBYTEENABLE;
localparam bit HPDCACHE_MSHR_USE_REGBANK =
hpdcache_params_pkg::PARAM_MSHR_USE_REGBANK;
localparam int unsigned HPDCACHE_MSHR_SET_WIDTH = $clog2(HPDCACHE_MSHR_SETS);
localparam int unsigned HPDCACHE_MSHR_WAY_WIDTH = $clog2(HPDCACHE_MSHR_WAYS);
localparam int unsigned HPDCACHE_MSHR_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_MSHR_SET_WIDTH;
typedef logic unsigned [HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_set_t;
typedef logic unsigned [HPDCACHE_MSHR_TAG_WIDTH-1:0] mshr_tag_t;
typedef logic unsigned [HPDCACHE_MSHR_WAY_WIDTH-1:0] mshr_way_t;
// }}}
// Definition of interface with memory
// {{{
typedef logic [7:0] hpdcache_mem_len_t;
typedef logic [2:0] hpdcache_mem_size_t;
typedef enum logic [1:0] {
HPDCACHE_MEM_RESP_OK = 2'b00,
HPDCACHE_MEM_RESP_NOK = 2'b01
} hpdcache_mem_error_e;
typedef enum logic [1:0] {
HPDCACHE_MEM_READ = 2'b00,
HPDCACHE_MEM_WRITE = 2'b01,
HPDCACHE_MEM_ATOMIC = 2'b10
// Reserved = 2'b11 - TODO: CMO ?
} hpdcache_mem_command_e;
typedef enum logic [3:0] {
HPDCACHE_MEM_ATOMIC_ADD = 4'b0000,
HPDCACHE_MEM_ATOMIC_CLR = 4'b0001,
HPDCACHE_MEM_ATOMIC_SET = 4'b0010,
HPDCACHE_MEM_ATOMIC_EOR = 4'b0011,
HPDCACHE_MEM_ATOMIC_SMAX = 4'b0100,
HPDCACHE_MEM_ATOMIC_SMIN = 4'b0101,
HPDCACHE_MEM_ATOMIC_UMAX = 4'b0110,
HPDCACHE_MEM_ATOMIC_UMIN = 4'b0111,
HPDCACHE_MEM_ATOMIC_SWAP = 4'b1000,
// Reserved = 4'b1001,
// Reserved = 4'b1010,
// Reserved = 4'b1011,
HPDCACHE_MEM_ATOMIC_LDEX = 4'b1100,
HPDCACHE_MEM_ATOMIC_STEX = 4'b1101
// Reserved = 4'b1110,
// Reserved = 4'b1111
} hpdcache_mem_atomic_e;
function automatic hpdcache_mem_size_t get_hpdcache_mem_size(int unsigned bytes);
if (bytes == 0) return 0;
else if (bytes <= 2) return 1;
else if (bytes <= 4) return 2;
else if (bytes <= 8) return 3;
else if (bytes <= 16) return 4;
else if (bytes <= 32) return 5;
else if (bytes <= 64) return 6;
else if (bytes <= 128) return 7;
// pragma translate_off
else $error("hpdcache: unsupported number of bytes");
// pragma translate_on
endfunction
// }}}
// Definition of constants and types for the Write Buffer (WBUF)
// {{{
localparam int unsigned HPDCACHE_WBUF_DIR_ENTRIES =
hpdcache_params_pkg::PARAM_WBUF_DIR_ENTRIES;
localparam int unsigned HPDCACHE_WBUF_DATA_ENTRIES =
hpdcache_params_pkg::PARAM_WBUF_DATA_ENTRIES;
localparam int unsigned HPDCACHE_WBUF_WORDS =
hpdcache_params_pkg::PARAM_WBUF_WORDS;
localparam int unsigned HPDCACHE_WBUF_TIMECNT_WIDTH =
hpdcache_params_pkg::PARAM_WBUF_TIMECNT_WIDTH;
// Use feedthrough FIFOs from the write-buffer to the NoC. This reduces
// the latency (by one cycle) but adds an additional timing path
localparam bit HPDCACHE_WBUF_SEND_FEEDTHROUGH =
hpdcache_params_pkg::PARAM_WBUF_SEND_FEEDTHROUGH;
localparam int unsigned HPDCACHE_WBUF_DATA_WIDTH = HPDCACHE_REQ_DATA_WIDTH*
HPDCACHE_WBUF_WORDS;
localparam int unsigned HPDCACHE_WBUF_DATA_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DATA_ENTRIES);
localparam int unsigned HPDCACHE_WBUF_DIR_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DIR_ENTRIES);
typedef hpdcache_req_addr_t wbuf_addr_t;
typedef hpdcache_nline_t wbuf_match_t;
typedef hpdcache_req_data_t wbuf_data_t;
typedef hpdcache_req_be_t wbuf_be_t;
typedef wbuf_data_t[HPDCACHE_WBUF_WORDS-1:0] wbuf_data_buf_t;
typedef wbuf_be_t [HPDCACHE_WBUF_WORDS-1:0] wbuf_be_buf_t;
typedef logic unsigned [ HPDCACHE_WBUF_TIMECNT_WIDTH-1:0] wbuf_timecnt_t;
typedef logic unsigned [ HPDCACHE_WBUF_DIR_PTR_WIDTH-1:0] wbuf_dir_ptr_t;
typedef logic unsigned [HPDCACHE_WBUF_DATA_PTR_WIDTH-1:0] wbuf_data_ptr_t;
// }}}
// Definition of constants and types for the Replay Table (RTAB)
// {{{
localparam int HPDCACHE_RTAB_ENTRIES = hpdcache_params_pkg::PARAM_RTAB_ENTRIES;
typedef logic [$clog2(HPDCACHE_RTAB_ENTRIES)-1:0] rtab_ptr_t;
// }}}
// Definition of constants and types for the uncacheable request handler (UC)
// {{{
typedef struct packed {
logic is_ld;
logic is_st;
logic is_amo_lr;
logic is_amo_sc;
logic is_amo_swap;
logic is_amo_add;
logic is_amo_and;
logic is_amo_or;
logic is_amo_xor;
logic is_amo_max;
logic is_amo_maxu;
logic is_amo_min;
logic is_amo_minu;
} hpdcache_uc_op_t;
// }}}
// Definition of constants and types for the CMO request handler (CMOH)
// {{{
typedef struct packed {
logic is_inval_by_nline;
logic is_inval_by_set;
logic is_inval_all;
logic is_fence;
} hpdcache_cmoh_op_t;
// }}}
endpackage

View File

@ -0,0 +1,138 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : May, 2021
* Description : HPDcache Pseudo-LRU replacement policy
* History :
*/
module hpdcache_plru
// Parameters
// {{{
#(
parameter int unsigned SETS = 0,
parameter int unsigned WAYS = 0,
localparam type set_t = logic [$clog2(SETS)-1:0],
localparam type way_vector_t = logic [WAYS-1:0]
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
// PLRU update interface
input logic updt_i,
input set_t updt_set_i,
input way_vector_t updt_way_i,
// Victim replacement interface
input logic repl_i,
input set_t repl_set_i,
input way_vector_t repl_dir_valid_i,
input logic repl_updt_plru_i,
output way_vector_t victim_way_o
);
// }}}
// Internal signals and registers
// {{{
way_vector_t [SETS-1:0] plru_q, plru_d;
way_vector_t updt_plru;
way_vector_t repl_plru;
way_vector_t used_victim_way, unused_victim_way;
// }}}
// Victim way selection
// {{{
hpdcache_prio_1hot_encoder #(.N(WAYS))
used_victim_select_i (
.val_i (~plru_q[repl_set_i]),
.val_o (used_victim_way)
);
hpdcache_prio_1hot_encoder #(.N(WAYS))
unused_victim_select_i (
.val_i (~repl_dir_valid_i),
.val_o (unused_victim_way)
);
// If there is a free entry in the directory (valid == 0), choose it as victim
assign victim_way_o = |unused_victim_way ? unused_victim_way : used_victim_way;
// }}}
// Pseudo-LRU update process
// {{{
assign updt_plru = plru_q[updt_set_i] | updt_way_i;
assign repl_plru = plru_q[repl_set_i] | victim_way_o;
always_comb
begin : plru_update_comb
plru_d = plru_q;
case (1'b1)
// When replacing a cache-line, set the PLRU bit of the new line
repl_i:
if (repl_updt_plru_i) begin
// If all PLRU bits of a given would be set, reset them all
// but the currently accessed way
if (&repl_plru) begin
plru_d[repl_set_i] = victim_way_o;
end else begin
plru_d[repl_set_i] = repl_plru;
end
end
// When accessing a cache-line, set the corresponding PLRU bit
updt_i:
// If all PLRU bits of a given would be set, reset them all
// but the currently accessed way
if (&updt_plru) begin
plru_d[updt_set_i] = updt_way_i;
end else begin
plru_d[updt_set_i] = updt_plru;
end
default: begin
// do nothing
end
endcase
end
// }}}
// Set state process
// {{{
always_ff @(posedge clk_i or negedge rst_ni)
begin : lru_ff
if (!rst_ni) begin
plru_q <= '0;
end else begin
if (updt_i || repl_i) begin
plru_q <= plru_d;
end
end
end
// }}}
endmodule

View File

@ -0,0 +1,666 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : September, 2021
* Description : HPDcache Replay Table
* History :
*/
module hpdcache_rtab
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter type rtab_entry_t = logic
)
// }}}
// Ports
// {{{
(
// Clock and reset signals
input logic clk_i,
input logic rst_ni,
// Global control signals
output logic empty_o, // RTAB is empty
output logic full_o, // RTAB is full
// Check RTAB signals
// This interface allows to check if there is an address-overlapping
// request in the RTAB with respect to the given nline.
input logic check_i, // Check for hit (nline) in the RTAB
input hpdcache_nline_t check_nline_i,
output logic check_hit_o,
// Allocate signals
// This interface allows to allocate a new request in a new linked list
input logic alloc_i,
input logic alloc_and_link_i,
input rtab_entry_t alloc_req_i,
input logic alloc_mshr_hit_i,
input logic alloc_mshr_full_i,
input logic alloc_mshr_ready_i,
input logic alloc_wbuf_hit_i,
input logic alloc_wbuf_not_ready_i,
// Pop signals
// This interface allows to read (and remove) a request from the RTAB
output logic pop_try_valid_o, // Request ready to be replayed
input logic pop_try_i,
output rtab_entry_t pop_try_req_o,
output rtab_ptr_t pop_try_ptr_o,
// Pop Commit signals
// This interface allows to actually remove a popped request
input logic pop_commit_i,
input rtab_ptr_t pop_commit_ptr_i,
// Pop Rollback signals
// This interface allows to put back a popped request
input logic pop_rback_i,
input rtab_ptr_t pop_rback_ptr_i,
input logic pop_rback_mshr_hit_i,
input logic pop_rback_mshr_full_i,
input logic pop_rback_mshr_ready_i,
input logic pop_rback_wbuf_hit_i,
input logic pop_rback_wbuf_not_ready_i,
// Control signals from/to WBUF
output hpdcache_req_addr_t wbuf_addr_o, // address to check against ongoing writes
output logic wbuf_is_read_o, // monitored request is read
input logic wbuf_hit_open_i, // Hit on open entry in the write buf
input logic wbuf_hit_pend_i, // Hit on pend entry in the write buf
input logic wbuf_hit_sent_i, // Hit on sent entry in the write buf
input logic wbuf_not_ready_i, // Write buffer cannot accept the write
// Control signals from the Miss Handler
input logic miss_ready_i, // Miss Handler is ready
// Control signals from the Refill Handler
input logic refill_i, // Active refill
input hpdcache_nline_t refill_nline_i, // Cache-line index being refilled
// Configuration parameters
input logic cfg_single_entry_i // Enable only one entry of the table
);
// }}}
// Definition of constants, types and functions
// {{{
localparam int N = HPDCACHE_RTAB_ENTRIES;
function automatic rtab_ptr_t rtab_bv_to_index(
input logic [N-1:0] bv);
for (int i = 0; i < N; i++) begin
if (bv[i]) return rtab_ptr_t'(i);
end
return 0;
endfunction
function automatic logic [N-1:0] rtab_index_to_bv(
input rtab_ptr_t index);
logic [N-1:0] bv;
for (int i = 0; i < N; i++) begin
bv[i] = (rtab_ptr_t'(i) == index);
end
return bv;
endfunction
function automatic bit rtab_mshr_set_equal(
input hpdcache_nline_t x,
input hpdcache_nline_t y);
return (x[0 +: HPDCACHE_MSHR_SET_WIDTH] == y[0 +: HPDCACHE_MSHR_SET_WIDTH]);
endfunction
function automatic logic [N-1:0] rtab_next(rtab_ptr_t [N-1:0] next, rtab_ptr_t x);
return rtab_index_to_bv(next[x]);
endfunction
typedef enum {
POP_TRY_HEAD,
POP_TRY_NEXT,
POP_TRY_NEXT_WAIT
} rtab_pop_try_state_e;
// }}}
// Internal signals and registers
// {{{
rtab_entry_t [N-1:0] req_q;
rtab_ptr_t [N-1:0] next_q;
rtab_pop_try_state_e pop_try_state_q, pop_try_state_d;
logic [N-1:0] pop_try_next_q, pop_try_next_d;
logic [N-1:0] valid_q;
logic [N-1:0] valid_set, valid_rst;
logic [N-1:0] alloc_valid_set;
logic [N-1:0] pop_commit_valid_rst;
// Bits indicating if the corresponding entry is the head of a linked list
logic [N-1:0] head_q;
logic [N-1:0] head_set, head_rst;
logic [N-1:0] alloc_head_set, alloc_head_rst;
logic [N-1:0] pop_try_head_rst;
logic [N-1:0] pop_commit_head_set;
logic [N-1:0] pop_rback_head_set;
// Bits indicating if the corresponding entry is the tail of a linked list
logic [N-1:0] tail_q;
logic [N-1:0] tail_set, tail_rst;
logic [N-1:0] alloc_tail_set, alloc_tail_rst;
// There is a pend ing miss on the target nline
logic [N-1:0] deps_mshr_hit_q;
logic [N-1:0] deps_mshr_hit_set, deps_mshr_hit_rst;
logic [N-1:0] alloc_deps_mshr_hit_set;
logic [N-1:0] pop_rback_deps_mshr_hit_set;
// The MSHR has no available slot for the new miss
logic [N-1:0] deps_mshr_full_q;
logic [N-1:0] deps_mshr_full_set, deps_mshr_full_rst;
logic [N-1:0] alloc_deps_mshr_full_set;
logic [N-1:0] pop_rback_deps_mshr_full_set;
// The MSHR is not ready to send a new miss requests
logic [N-1:0] deps_mshr_ready_q;
logic [N-1:0] deps_mshr_ready_set, deps_mshr_ready_rst;
logic [N-1:0] alloc_deps_mshr_ready_set;
logic [N-1:0] pop_rback_deps_mshr_ready_set;
// Hit on an non-e mpty entry of the write buffer
logic [N-1:0] deps_wbuf_hit_q;
logic [N-1:0] deps_wbuf_hit_set, deps_wbuf_hit_rst;
logic [N-1:0] alloc_deps_wbuf_hit_set;
logic [N-1:0] pop_rback_deps_wbuf_hit_set;
// Hit on a pend entry of the write buffer
logic [N-1:0] deps_wbuf_not_ready_q;
logic [N-1:0] deps_wbuf_not_ready_set, deps_wbuf_not_ready_rst;
logic [N-1:0] alloc_deps_wbuf_not_ready_set;
logic [N-1:0] pop_rback_deps_wbuf_not_ready_set;
logic [N-1:0] nodeps;
hpdcache_nline_t [N-1:0] nline;
hpdcache_req_addr_t [N-1:0] addr;
logic [N-1:0] is_read;
logic [N-1:0] check_hit;
logic [N-1:0] match_check_nline;
logic [N-1:0] match_check_tail;
logic [N-1:0] match_refill_nline;
logic [N-1:0] match_refill_mshr_set;
logic [N-1:0] free;
logic [N-1:0] free_alloc;
logic alloc;
logic [N-1:0] pop_match_next;
logic [N-1:0] pop_rback_ptr_bv;
logic [N-1:0] pop_try_bv;
logic [N-1:0] ready;
genvar gen_i;
// }}}
// Compute global control signals
// {{{
// compute if entries are ready to be replayed
assign nodeps = ~(deps_mshr_hit_q |
deps_mshr_full_q |
deps_mshr_ready_q |
deps_wbuf_hit_q |
deps_wbuf_not_ready_q);
assign ready = valid_q & head_q & nodeps;
assign free = ~valid_q;
// compute the free vector (one-hot signal)
hpdcache_prio_1hot_encoder #(
.N (N)
) free_encoder_i (
.val_i (free),
.val_o (free_alloc)
);
// full and empty signals
assign empty_o = &(~valid_q);
assign full_o = &( valid_q) | (|valid_q & cfg_single_entry_i);
// }}}
// Check interface
// {{{
generate
for (gen_i = 0; gen_i < N; gen_i++) begin : check_gen
assign addr[gen_i] = {req_q[gen_i].addr_tag, req_q[gen_i].addr_offset},
nline[gen_i] = hpdcache_get_req_addr_nline(addr[gen_i]),
match_check_nline[gen_i] = (check_nline_i == nline[gen_i]);
assign is_read[gen_i] = is_load(req_q[gen_i].op) |
is_cmo_prefetch(req_q[gen_i].op, req_q[gen_i].size);
end
endgenerate
assign check_hit = valid_q & match_check_nline,
check_hit_o = |check_hit,
match_check_tail = check_hit & tail_q;
// }}}
// Allocation process
// {{{
assign alloc = alloc_i | alloc_and_link_i;
// Set the valid bit-vector of the replay table
assign alloc_valid_set = free_alloc & {N{alloc}};
// Set of head and tail bit-vectors during an allocation
// - The head bit is only set when creating a new linked-list
// - The tail bit is always set because new requests are added on the tail.
assign alloc_head_set = free_alloc & {N{alloc_i}},
alloc_tail_set = alloc_valid_set;
// Reset of head and tail bit-vectors during an allocation
// - When doing an allocation and link, head bit shall be reset
// - when doing an allocation and link, the "prev" tail shall be reset
assign alloc_head_rst = free_alloc & {N{alloc_and_link_i}},
alloc_tail_rst = match_check_tail & {N{alloc_and_link_i}};
// Set the dependency bits for the allocated entry
assign alloc_deps_mshr_hit_set = alloc_valid_set & {N{ alloc_mshr_hit_i}},
alloc_deps_mshr_full_set = alloc_valid_set & {N{ alloc_mshr_full_i}},
alloc_deps_mshr_ready_set = alloc_valid_set & {N{ alloc_mshr_ready_i}},
alloc_deps_wbuf_hit_set = alloc_valid_set & {N{ alloc_wbuf_hit_i}},
alloc_deps_wbuf_not_ready_set = alloc_valid_set & {N{alloc_wbuf_not_ready_i}};
// }}}
// Update replay table dependencies
// {{{
// Update write buffer hit dependencies
// {{{
// Build a bit-vector with HEAD requests waiting for a conflict in the wbuf
logic [N-1:0] wbuf_rd_pending, wbuf_wr_pending;
logic [N-1:0] wbuf_rd_gnt, wbuf_wr_gnt;
logic [ 1:0] wbuf_pending;
logic [ 1:0] wbuf_gnt;
logic wbuf_ready;
logic [N-1:0] wbuf_sel;
assign wbuf_rd_pending = valid_q & head_q & deps_wbuf_hit_q,
wbuf_wr_pending = valid_q & head_q & deps_wbuf_not_ready_q;
// Choose in a round-robin manner a ready transaction waiting for a conflict in the wbuf
hpdcache_rrarb #(
.N (N)
) wbuf_rd_pending_arb_i (
.clk_i,
.rst_ni,
.req_i (wbuf_rd_pending),
.gnt_o (wbuf_rd_gnt),
.ready_i (wbuf_gnt[0] & wbuf_ready)
);
hpdcache_rrarb #(
.N (N)
) wbuf_wr_pending_arb_i (
.clk_i,
.rst_ni,
.req_i (wbuf_wr_pending),
.gnt_o (wbuf_wr_gnt),
.ready_i (wbuf_gnt[1] & wbuf_ready)
);
assign wbuf_pending = {|wbuf_wr_gnt, |wbuf_rd_gnt},
wbuf_ready = |(pop_try_bv & (wbuf_rd_gnt | wbuf_wr_gnt));
hpdcache_fxarb #(
.N (2)
) wbuf_pending_arb_i (
.clk_i,
.rst_ni,
.req_i (wbuf_pending),
.gnt_o (wbuf_gnt),
.ready_i (wbuf_ready)
);
assign wbuf_sel = wbuf_gnt[0] ? wbuf_rd_gnt :
wbuf_gnt[1] ? wbuf_wr_gnt : '0;
hpdcache_mux #(
.NINPUT (N),
.DATA_WIDTH ($bits(hpdcache_req_addr_t)),
.ONE_HOT_SEL (1'b1)
) wbuf_pending_addr_mux_i (
.data_i (addr),
.sel_i (wbuf_sel),
.data_o (wbuf_addr_o)
);
hpdcache_mux #(
.NINPUT (N),
.DATA_WIDTH (1),
.ONE_HOT_SEL (1'b1)
) wbuf_pending_is_read_mux_i (
.data_i (is_read),
.sel_i (wbuf_sel),
.data_o (wbuf_is_read_o)
);
// reset write buffer dependency bits with the output from the write buffer
assign deps_wbuf_hit_rst =
wbuf_sel & ~{N{wbuf_hit_open_i | wbuf_hit_pend_i | wbuf_hit_sent_i}};
assign deps_wbuf_not_ready_rst =
wbuf_sel & ~{N{wbuf_not_ready_i}};
// }}}
// Update miss handler dependency
// {{{
assign deps_mshr_ready_rst = {N{miss_ready_i}};
// }}}
// Update refill dependencies
// {{{
generate
for (gen_i = 0; gen_i < N; gen_i++) begin : match_refill_gen
assign match_refill_mshr_set[gen_i] =
rtab_mshr_set_equal(refill_nline_i, nline[gen_i]);
assign match_refill_nline[gen_i] =
(refill_nline_i == nline[gen_i]);
end
endgenerate
assign deps_mshr_full_rst = {N{refill_i}} & match_refill_mshr_set;
assign deps_mshr_hit_rst = {N{refill_i}} & match_refill_nline;
// }}}
// }}}
// Pop interface
// {{{
logic [N-1:0] pop_sel;
logic [N-1:0] pop_commit_bv;
assign pop_commit_bv = rtab_index_to_bv(pop_commit_ptr_i);
// Pop try process
// {{{
logic [N-1:0] pop_gnt;
logic pop_head;
hpdcache_rrarb #(
.N (N)
) pop_arb_i (
.clk_i,
.rst_ni,
.req_i (ready),
.gnt_o (pop_gnt),
.ready_i (pop_head)
);
always_comb
begin : req_valid_comb
case(pop_try_state_q)
POP_TRY_HEAD : pop_try_valid_o = |ready;
POP_TRY_NEXT : pop_try_valid_o = 1'b1;
POP_TRY_NEXT_WAIT: pop_try_valid_o = 1'b1;
default : pop_try_valid_o = 1'b0;
endcase
end
always_comb
begin : pop_entry_sel_comb
pop_try_state_d = pop_try_state_q;
pop_try_next_d = pop_try_next_q;
pop_head = 1'b0;
pop_sel = '0;
case (pop_try_state_q)
POP_TRY_HEAD: begin
// This FSM may be in this state after forwarding the tail of
// a list. In that case, a rollback may arrive in this cycle.
pop_sel = pop_gnt;
if (!pop_rback_i && pop_try_valid_o) begin
if (pop_try_i) begin
// If the request interface accepts the request, go to the next request
// in the list (if the current request is not the tail). Otherwise, stay in
// the same state to to forward a request from a new list
pop_head = 1'b1;
if ((pop_gnt & ~tail_q) != 0) begin
pop_try_state_d = POP_TRY_NEXT;
pop_try_next_d = rtab_next(next_q, pop_try_ptr_o);
end
end
end
end
POP_TRY_NEXT: begin
pop_sel = pop_try_next_q;
if (pop_rback_i) begin
pop_try_state_d = POP_TRY_HEAD;
end else begin
if (pop_try_i) begin
// If the request interface accepts the new request, go to the next request
// in the list (if the current request is not the tail). Otherwise, return
// to the POP_TRY_HEAD state to forward a request from a new list
if ((pop_try_next_q & ~tail_q) != 0) begin
pop_try_state_d = POP_TRY_NEXT;
pop_try_next_d = rtab_next(next_q, pop_try_ptr_o);
end else begin
pop_try_state_d = POP_TRY_HEAD;
end
end else begin
// If the request interface is not ready to consume the new request, wait
// until it is
pop_try_state_d = POP_TRY_NEXT_WAIT;
end
end
end
POP_TRY_NEXT_WAIT: begin
// Wait for the current request to be accepted. Then go to the next request in the
// list or to a new list
pop_sel = pop_try_next_q;
if (pop_try_i) begin
if ((pop_try_next_q & ~tail_q) != 0) begin
pop_try_state_d = POP_TRY_NEXT;
pop_try_next_d = rtab_next(next_q, pop_try_ptr_o);
end else begin
pop_try_state_d = POP_TRY_HEAD;
end
end
end
default: begin
end
endcase
end
assign pop_commit_head_set = '0;
hpdcache_mux #(
.NINPUT (N),
.DATA_WIDTH ($bits(rtab_entry_t)),
.ONE_HOT_SEL (1'b1)
) pop_mux_i (
.data_i (req_q),
.sel_i (pop_sel),
.data_o (pop_try_req_o)
);
// Temporarily unset the head bit of the popped request to prevent it to be rescheduled
assign pop_try_bv = pop_sel & {N{pop_try_i}},
pop_try_head_rst = pop_try_bv;
// Forward the index of the entry being popped. This is used later by the
// commit or rollback operations
assign pop_try_ptr_o = rtab_bv_to_index(pop_sel);
// }}}
// Pop commit process
// {{{
// Invalidate the entry being popped (head of the linked list)
assign pop_commit_valid_rst = {N{pop_commit_i}} & rtab_index_to_bv(pop_commit_ptr_i);
// }}}
// Pop rollback process
// {{{
// Set again the head bit of the rolled-back request
assign pop_rback_ptr_bv = rtab_index_to_bv(pop_rback_ptr_i);
assign pop_rback_head_set = {N{pop_rback_i}} & pop_rback_ptr_bv;
assign pop_rback_deps_mshr_hit_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_hit_i}},
pop_rback_deps_mshr_full_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_full_i}},
pop_rback_deps_mshr_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_ready_i}},
pop_rback_deps_wbuf_hit_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_hit_i}},
pop_rback_deps_wbuf_not_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_not_ready_i}};
// }}}
// }}}
// Internal state assignment
// {{{
assign head_set = alloc_head_set | pop_commit_head_set | pop_rback_head_set,
head_rst = alloc_head_rst | pop_try_head_rst;
assign tail_set = alloc_tail_set,
tail_rst = alloc_tail_rst;
assign valid_set = alloc_valid_set,
valid_rst = pop_commit_valid_rst;
assign deps_mshr_hit_set = alloc_deps_mshr_hit_set | pop_rback_deps_mshr_hit_set,
deps_mshr_full_set = alloc_deps_mshr_full_set | pop_rback_deps_mshr_full_set,
deps_mshr_ready_set = alloc_deps_mshr_ready_set | pop_rback_deps_mshr_ready_set,
deps_wbuf_hit_set = alloc_deps_wbuf_hit_set | pop_rback_deps_wbuf_hit_set,
deps_wbuf_not_ready_set = alloc_deps_wbuf_not_ready_set | pop_rback_deps_wbuf_not_ready_set;
always_ff @(posedge clk_i or negedge rst_ni)
begin : rtab_valid_ff
if (!rst_ni) begin
valid_q <= '0;
head_q <= '0;
tail_q <= '0;
deps_mshr_hit_q <= '0;
deps_mshr_full_q <= '0;
deps_mshr_ready_q <= '0;
deps_wbuf_hit_q <= '0;
deps_wbuf_not_ready_q <= '0;
next_q <= '0;
end else begin
valid_q <= (~valid_q & valid_set) |
( valid_q & ~valid_rst);
// update head and tail flags
head_q <= (~head_q & head_set) |
( head_q & ~head_rst);
tail_q <= (~tail_q & tail_set) |
( tail_q & ~tail_rst);
// update dependency flags
deps_mshr_hit_q <= (~deps_mshr_hit_q & deps_mshr_hit_set) |
( deps_mshr_hit_q & ~deps_mshr_hit_rst);
deps_mshr_full_q <= (~deps_mshr_full_q & deps_mshr_full_set) |
( deps_mshr_full_q & ~deps_mshr_full_rst);
deps_mshr_ready_q <= (~deps_mshr_ready_q & deps_mshr_ready_set) |
( deps_mshr_ready_q & ~deps_mshr_ready_rst);
deps_wbuf_hit_q <= (~deps_wbuf_hit_q & deps_wbuf_hit_set) |
( deps_wbuf_hit_q & ~deps_wbuf_hit_rst);
deps_wbuf_not_ready_q <= (~deps_wbuf_not_ready_q & deps_wbuf_not_ready_set) |
( deps_wbuf_not_ready_q & ~deps_wbuf_not_ready_rst);
// update the next pointers
for (int i = 0; i < N; i++) begin
if (alloc_and_link_i && match_check_tail[i]) begin
next_q[i] <= rtab_bv_to_index(free_alloc);
end
end
end
end
always_ff @(posedge clk_i or negedge rst_ni)
begin : pop_try_ff
if (!rst_ni) begin
pop_try_state_q <= POP_TRY_HEAD;
pop_try_next_q <= '0;
end else begin
pop_try_state_q <= pop_try_state_d;
pop_try_next_q <= pop_try_next_d;
end
end
always_ff @(posedge clk_i)
begin : rtab_ff
for (int i = 0; i < N; i++) begin
// update the request array
if (valid_set[i]) begin
req_q[i] <= alloc_req_i;
end
end
end
// }}}
// Assertions
// {{{
// pragma translate_off
assert property (@(posedge clk_i) disable iff (!rst_ni)
check_i |-> $onehot0(match_check_tail)) else
$error("rtab: more than one entry matching");
assert property (@(posedge clk_i) disable iff (!rst_ni)
alloc_and_link_i |-> (check_i & check_hit_o)) else
$error("rtab: alloc and link shall be performed in case of check hit");
assert property (@(posedge clk_i) disable iff (!rst_ni)
alloc_and_link_i |->
({alloc_req_i.addr_tag, hpdcache_get_req_offset_set(alloc_req_i.addr_offset)} ==
check_nline_i)) else
$error("rtab: nline for alloc and link shall match the one being checked");
assert property (@(posedge clk_i) disable iff (!rst_ni)
alloc_i |-> !alloc_and_link_i) else
$error("rtab: only one allocation per cycle is allowed");
`ifndef VERILATOR
assert property (@(posedge clk_i) disable iff (!rst_ni)
pop_try_i |-> ##1 (pop_commit_i | pop_rback_i)) else
$error("rtab: a pop try shall be followed by a commit or rollback");
`endif
assert property (@(posedge clk_i) disable iff (!rst_ni)
pop_commit_i |-> valid_q[pop_commit_ptr_i]) else
$error("rtab: commiting an invalid entry");
assert property (@(posedge clk_i) disable iff (!rst_ni)
pop_rback_i |-> valid_q[pop_rback_ptr_i]) else
$error("rtab: rolling-back an invalid entry");
assert property (@(posedge clk_i) disable iff (!rst_ni)
pop_rback_i |-> !pop_try_i) else
$error("rtab: cache shall not accept a new request while rolling back");
assert property (@(posedge clk_i) disable iff (!rst_ni)
alloc |-> ~full_o) else
$error("rtab: trying to allocate while the table is full");
assert property (@(posedge clk_i) disable iff (!rst_ni)
alloc_and_link_i |-> ~cfg_single_entry_i) else
$error("rtab: trying to link a request in single entry mode");
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,965 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : May, 2021
* Description : HPDcache uncached and AMO request handler
* History :
*/
module hpdcache_uncached
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter int HPDcacheMemIdWidth = 8,
parameter int HPDcacheMemDataWidth = 512,
parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_req_w_t = logic,
parameter type hpdcache_mem_resp_r_t = logic,
parameter type hpdcache_mem_resp_w_t = logic,
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
// Global control signals
// {{{
input logic wbuf_empty_i,
input logic mshr_empty_i,
input logic rtab_empty_i,
input logic ctrl_empty_i,
// }}}
// Cache-side request interface
// {{{
input logic req_valid_i,
output logic req_ready_o,
input hpdcache_uc_op_t req_op_i,
input hpdcache_req_addr_t req_addr_i,
input hpdcache_req_size_t req_size_i,
input hpdcache_req_data_t req_data_i,
input hpdcache_req_be_t req_be_i,
input logic req_uc_i,
input hpdcache_req_sid_t req_sid_i,
input hpdcache_req_tid_t req_tid_i,
input logic req_need_rsp_i,
// }}}
// Write buffer interface
// {{{
output logic wbuf_flush_all_o,
// }}}
// AMO Cache Interface
// {{{
output logic dir_amo_match_o,
output hpdcache_set_t dir_amo_match_set_o,
output hpdcache_tag_t dir_amo_match_tag_o,
output logic dir_amo_update_plru_o,
input hpdcache_way_vector_t dir_amo_hit_way_i,
output logic data_amo_write_o,
output logic data_amo_write_enable_o,
output hpdcache_set_t data_amo_write_set_o,
output hpdcache_req_size_t data_amo_write_size_o,
output hpdcache_word_t data_amo_write_word_o,
output logic [63:0] data_amo_write_data_o,
output logic [7:0] data_amo_write_be_o,
// }}}
// LR/SC reservation buffer
// {{{
input logic lrsc_snoop_i,
input hpdcache_req_addr_t lrsc_snoop_addr_i,
input hpdcache_req_size_t lrsc_snoop_size_i,
// }}}
// Core response interface
// {{{
input logic core_rsp_ready_i,
output logic core_rsp_valid_o,
output hpdcache_rsp_t core_rsp_o,
// }}}
// MEMORY interfaces
// {{{
// Memory request unique identifier
input hpdcache_mem_id_t mem_read_id_i,
input hpdcache_mem_id_t mem_write_id_i,
// Read interface
input logic mem_req_read_ready_i,
output logic mem_req_read_valid_o,
output hpdcache_mem_req_t mem_req_read_o,
output logic mem_resp_read_ready_o,
input logic mem_resp_read_valid_i,
input hpdcache_mem_resp_r_t mem_resp_read_i,
// Write interface
input logic mem_req_write_ready_i,
output logic mem_req_write_valid_o,
output hpdcache_mem_req_t mem_req_write_o,
input logic mem_req_write_data_ready_i,
output logic mem_req_write_data_valid_o,
output hpdcache_mem_req_w_t mem_req_write_data_o,
output logic mem_resp_write_ready_o,
input logic mem_resp_write_valid_i,
input hpdcache_mem_resp_w_t mem_resp_write_i,
// }}}
// Configuration interface
// {{{
input logic cfg_error_on_cacheable_amo_i
// }}}
);
// }}}
// Definition of constants and types
// {{{
localparam hpdcache_uint MEM_REQ_RATIO = HPDcacheMemDataWidth/HPDCACHE_REQ_DATA_WIDTH;
localparam hpdcache_uint MEM_REQ_WORD_INDEX_WIDTH = $clog2(MEM_REQ_RATIO);
typedef enum {
UC_IDLE,
UC_WAIT_PENDING,
UC_MEM_REQ,
UC_MEM_W_REQ,
UC_MEM_WDATA_REQ,
UC_MEM_WAIT_RSP,
UC_CORE_RSP,
UC_AMO_READ_DIR,
UC_AMO_WRITE_DATA
} hpdcache_uc_fsm_t;
localparam logic AMO_SC_SUCCESS = 1'b0;
localparam logic AMO_SC_FAILURE = 1'b1;
function automatic logic [63:0] prepare_amo_data_operand(
input logic [63:0] data_i,
input hpdcache_req_size_t size_i,
input hpdcache_req_addr_t addr_i,
input logic sign_extend_i
);
// 64-bits AMOs are already aligned, thus do nothing
if (size_i == hpdcache_req_size_t'(3)) begin
return data_i;
end
// 32-bits AMOs
else begin
if (addr_i[2] == 1'b1) begin
if (sign_extend_i) begin
return {{32{data_i[63]}}, data_i[63:32]};
end else begin
return {{32{ 1'b0}}, data_i[63:32]};
end
end else begin
if (sign_extend_i) begin
return {{32{data_i[31]}}, data_i[31: 0]};
end else begin
return {{32{ 1'b0}}, data_i[31: 0]};
end
end
end
endfunction;
function automatic logic [63:0] prepare_amo_data_result(
input logic [63:0] data_i,
input hpdcache_req_size_t size_i
);
// 64-bits AMOs are already aligned, thus do nothing
if (size_i == hpdcache_req_size_t'(3)) begin
return data_i;
end
// 32-bits AMOs
else begin
return {2{data_i[31:0]}};
end
endfunction;
function automatic logic amo_need_sign_extend(hpdcache_uc_op_t op);
unique case (1'b1)
op.is_amo_add,
op.is_amo_max,
op.is_amo_min: return 1'b1;
default : return 1'b0;
endcase;
endfunction
// }}}
// Internal signals and registers
// {{{
hpdcache_uc_fsm_t uc_fsm_q, uc_fsm_d;
hpdcache_uc_op_t req_op_q;
hpdcache_req_addr_t req_addr_q;
hpdcache_req_size_t req_size_q;
hpdcache_req_data_t req_data_q;
hpdcache_req_be_t req_be_q;
logic req_uc_q;
hpdcache_req_sid_t req_sid_q;
hpdcache_req_tid_t req_tid_q;
logic req_need_rsp_q;
logic uc_sc_retcode_q, uc_sc_retcode_d;
hpdcache_req_data_t rsp_rdata_q, rsp_rdata_d;
logic rsp_error_set, rsp_error_rst;
logic rsp_error_q;
logic mem_resp_write_valid_q, mem_resp_write_valid_d;
logic mem_resp_read_valid_q, mem_resp_read_valid_d;
hpdcache_req_data_t mem_req_write_data;
logic [63:0] amo_req_ld_data;
logic [63:0] amo_ld_data;
logic [63:0] amo_req_st_data;
logic [63:0] amo_st_data;
logic [ 7:0] amo_st_be;
logic [63:0] amo_result;
// }}}
// LR/SC reservation buffer logic
// {{{
logic lrsc_rsrv_valid_q;
hpdcache_req_addr_t lrsc_rsrv_addr_q, lrsc_rsrv_addr_d;
hpdcache_nline_t lrsc_rsrv_nline;
hpdcache_offset_t lrsc_rsrv_word;
hpdcache_offset_t lrsc_snoop_words;
hpdcache_nline_t lrsc_snoop_nline;
hpdcache_offset_t lrsc_snoop_base, lrsc_snoop_end;
logic lrsc_snoop_hit;
logic lrsc_snoop_reset;
hpdcache_nline_t lrsc_uc_nline;
hpdcache_offset_t lrsc_uc_word;
logic lrsc_uc_hit;
logic lrsc_uc_set, lrsc_uc_reset;
// NOTE: Reservation set for LR instruction is always 8-bytes in this
// implementation.
assign lrsc_rsrv_nline = hpdcache_get_req_addr_nline(lrsc_rsrv_addr_q),
lrsc_rsrv_word = hpdcache_get_req_addr_offset(lrsc_rsrv_addr_q) >> 3;
// Check hit on LR/SC reservation for snoop port (normal write accesses)
assign lrsc_snoop_words = (lrsc_snoop_size_i < 3) ? 1 : hpdcache_offset_t'((8'h1 << lrsc_snoop_size_i) >> 3),
lrsc_snoop_nline = hpdcache_get_req_addr_nline(lrsc_snoop_addr_i),
lrsc_snoop_base = hpdcache_get_req_addr_offset(lrsc_snoop_addr_i) >> 3,
lrsc_snoop_end = lrsc_snoop_base + lrsc_snoop_words;
assign lrsc_snoop_hit = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_snoop_nline) &
(lrsc_rsrv_word >= lrsc_snoop_base) &
(lrsc_rsrv_word < lrsc_snoop_end );
assign lrsc_snoop_reset = lrsc_snoop_i & lrsc_snoop_hit;
// Check hit on LR/SC reservation for AMOs and SC
assign lrsc_uc_nline = hpdcache_get_req_addr_nline(req_addr_i),
lrsc_uc_word = hpdcache_get_req_addr_offset(req_addr_i) >> 3;
assign lrsc_uc_hit = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_uc_nline) &
(lrsc_rsrv_word == lrsc_uc_word);
// }}}
// Uncacheable request FSM
// {{{
always_comb
begin : uc_fsm_comb
mem_resp_write_valid_d = mem_resp_write_valid_q;
mem_resp_read_valid_d = mem_resp_read_valid_q;
rsp_error_set = 1'b0;
rsp_error_rst = 1'b0;
lrsc_rsrv_addr_d = lrsc_rsrv_addr_q;
uc_sc_retcode_d = uc_sc_retcode_q;
wbuf_flush_all_o = 1'b0;
lrsc_uc_set = 1'b0;
lrsc_uc_reset = 1'b0;
uc_fsm_d = uc_fsm_q;
case (uc_fsm_q)
// Wait for a request
// {{{
UC_IDLE: begin
if (req_valid_i) begin
wbuf_flush_all_o = 1'b1;
unique case (1'b1)
req_op_i.is_ld,
req_op_i.is_st: begin
if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
uc_fsm_d = UC_MEM_REQ;
end else begin
uc_fsm_d = UC_WAIT_PENDING;
end
end
req_op_i.is_amo_swap,
req_op_i.is_amo_add,
req_op_i.is_amo_and,
req_op_i.is_amo_or,
req_op_i.is_amo_xor,
req_op_i.is_amo_max,
req_op_i.is_amo_maxu,
req_op_i.is_amo_min,
req_op_i.is_amo_minu,
req_op_i.is_amo_lr: begin
// Reset LR/SC reservation if AMO matches its address
lrsc_uc_reset = ~req_op_i.is_amo_lr & lrsc_uc_hit;
if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin
rsp_error_set = 1'b1;
uc_fsm_d = UC_CORE_RSP;
end else begin
if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
uc_fsm_d = UC_MEM_REQ;
end else begin
uc_fsm_d = UC_WAIT_PENDING;
end
end
end
req_op_i.is_amo_sc: begin
if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin
rsp_error_set = 1'b1;
uc_fsm_d = UC_CORE_RSP;
end else begin
// Reset previous reservation (if any)
lrsc_uc_reset = 1'b1;
// SC with valid reservation
if (lrsc_uc_hit) begin
if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
uc_fsm_d = UC_MEM_REQ;
end else begin
uc_fsm_d = UC_WAIT_PENDING;
end
end
// SC with no valid reservation, thus respond with the failure code
else begin
uc_sc_retcode_d = AMO_SC_FAILURE;
uc_fsm_d = UC_CORE_RSP;
end
end
end
default: begin
if (req_need_rsp_i) begin
rsp_error_set = 1'b1;
uc_fsm_d = UC_CORE_RSP;
end
end
endcase
end
end
// }}}
// Wait for the write buffer to be empty
// {{{
UC_WAIT_PENDING: begin
if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin
uc_fsm_d = UC_MEM_REQ;
end else begin
uc_fsm_d = UC_WAIT_PENDING;
end
end
// }}}
// Send request to memory
// {{{
UC_MEM_REQ: begin
uc_fsm_d = UC_MEM_REQ;
mem_resp_write_valid_d = 1'b0;
mem_resp_read_valid_d = 1'b0;
case (1'b1)
req_op_q.is_ld,
req_op_q.is_amo_lr: begin
if (mem_req_read_ready_i) begin
uc_fsm_d = UC_MEM_WAIT_RSP;
end
end
req_op_q.is_st,
req_op_q.is_amo_sc,
req_op_q.is_amo_swap,
req_op_q.is_amo_add,
req_op_q.is_amo_and,
req_op_q.is_amo_or,
req_op_q.is_amo_xor,
req_op_q.is_amo_max,
req_op_q.is_amo_maxu,
req_op_q.is_amo_min,
req_op_q.is_amo_minu: begin
if (mem_req_write_ready_i && mem_req_write_data_ready_i) begin
uc_fsm_d = UC_MEM_WAIT_RSP;
end else if (mem_req_write_ready_i) begin
uc_fsm_d = UC_MEM_WDATA_REQ;
end else if (mem_req_write_data_ready_i) begin
uc_fsm_d = UC_MEM_W_REQ;
end
end
endcase
end
// }}}
// Send write address
// {{{
UC_MEM_W_REQ: begin
mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i;
mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i;
if (mem_req_write_ready_i) begin
uc_fsm_d = UC_MEM_WAIT_RSP;
end else begin
uc_fsm_d = UC_MEM_W_REQ;
end
end
// }}}
// Send write data
// {{{
UC_MEM_WDATA_REQ: begin
mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i;
mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i;
if (mem_req_write_data_ready_i) begin
uc_fsm_d = UC_MEM_WAIT_RSP;
end else begin
uc_fsm_d = UC_MEM_WDATA_REQ;
end
end
// }}}
// Wait for the response from the memory
// {{{
UC_MEM_WAIT_RSP: begin
automatic bit rd_error;
automatic bit wr_error;
uc_fsm_d = UC_MEM_WAIT_RSP;
mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i;
mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i;
rd_error = mem_resp_read_valid_i &&
( mem_resp_read_i.mem_resp_r_error == HPDCACHE_MEM_RESP_NOK);
wr_error = mem_resp_write_valid_i &&
(mem_resp_write_i.mem_resp_w_error == HPDCACHE_MEM_RESP_NOK);
rsp_error_set = req_need_rsp_q & (rd_error | wr_error);
case (1'b1)
req_op_q.is_ld: begin
if (mem_resp_read_valid_i) begin
if (req_need_rsp_q) begin
uc_fsm_d = UC_CORE_RSP;
end else begin
uc_fsm_d = UC_IDLE;
end
end
end
req_op_q.is_st: begin
if (mem_resp_write_valid_i) begin
if (req_need_rsp_q) begin
uc_fsm_d = UC_CORE_RSP;
end else begin
uc_fsm_d = UC_IDLE;
end
end
end
req_op_q.is_amo_lr: begin
if (mem_resp_read_valid_i) begin
// set a new reservation
if (!rd_error)
begin
lrsc_uc_set = 1'b1;
lrsc_rsrv_addr_d = req_addr_q;
end
// in case of a memory error, do not make the reservation and
// invalidate an existing one (if valid)
else begin
lrsc_uc_reset = 1'b1;
end
if (req_uc_q || rd_error) begin
uc_fsm_d = UC_CORE_RSP;
end else begin
uc_fsm_d = UC_AMO_READ_DIR;
end
end
end
req_op_q.is_amo_sc: begin
if (mem_resp_write_valid_i) begin
automatic bit is_atomic;
is_atomic = mem_resp_write_i.mem_resp_w_is_atomic && !wr_error;
uc_sc_retcode_d = is_atomic ? AMO_SC_SUCCESS : AMO_SC_FAILURE;
if (req_uc_q || !is_atomic) begin
uc_fsm_d = UC_CORE_RSP;
end else begin
uc_fsm_d = UC_AMO_READ_DIR;
end
end
end
req_op_q.is_amo_swap,
req_op_q.is_amo_add,
req_op_q.is_amo_and,
req_op_q.is_amo_or,
req_op_q.is_amo_xor,
req_op_q.is_amo_max,
req_op_q.is_amo_maxu,
req_op_q.is_amo_min,
req_op_q.is_amo_minu: begin
// wait for both old data and write acknowledged were received
if ((mem_resp_read_valid_i && mem_resp_write_valid_i) ||
(mem_resp_read_valid_i && mem_resp_write_valid_q) ||
(mem_resp_read_valid_q && mem_resp_write_valid_i))
begin
if (req_uc_q || rsp_error_q || rd_error || wr_error) begin
uc_fsm_d = UC_CORE_RSP;
end else begin
uc_fsm_d = UC_AMO_READ_DIR;
end
end
end
endcase
end
// }}}
// Send the response to the requester
// {{{
UC_CORE_RSP: begin
if (core_rsp_ready_i) begin
rsp_error_rst = 1'b1;
uc_fsm_d = UC_IDLE;
end else begin
uc_fsm_d = UC_CORE_RSP;
end
end
// }}}
// Check for a cache hit on the AMO target address
// {{{
UC_AMO_READ_DIR: begin
uc_fsm_d = UC_AMO_WRITE_DATA;
end
// }}}
// Write the locally computed AMO result in the cache
// {{{
UC_AMO_WRITE_DATA: begin
uc_fsm_d = UC_CORE_RSP;
end
// }}}
endcase
end
// }}}
// AMO unit
// {{{
localparam hpdcache_uint AMO_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_DATA_WIDTH/64);
generate
if (AMO_WORD_INDEX_WIDTH > 0) begin : amo_operand_mux_gen
hpdcache_mux #(
.NINPUT (HPDCACHE_REQ_DATA_WIDTH/64),
.DATA_WIDTH (64),
.ONE_HOT_SEL (1'b0)
) amo_ld_data_mux_i (
.data_i (rsp_rdata_q),
.sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]),
.data_o (amo_req_ld_data)
);
hpdcache_mux #(
.NINPUT (HPDCACHE_REQ_DATA_WIDTH/64),
.DATA_WIDTH (64),
.ONE_HOT_SEL (1'b0)
) amo_st_data_mux_i (
.data_i (req_data_q),
.sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]),
.data_o (amo_req_st_data)
);
hpdcache_mux #(
.NINPUT (HPDCACHE_REQ_DATA_WIDTH/64),
.DATA_WIDTH (8),
.ONE_HOT_SEL (1'b0)
) amo_st_be_mux_i (
.data_i (req_be_q),
.sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]),
.data_o (amo_st_be)
);
end else begin
assign amo_req_ld_data = rsp_rdata_q;
assign amo_req_st_data = req_data_q;
assign amo_st_be = req_be_q;
end
endgenerate
assign amo_ld_data = prepare_amo_data_operand(amo_req_ld_data, req_size_q,
req_addr_q, amo_need_sign_extend(req_op_q));
assign amo_st_data = prepare_amo_data_operand(amo_req_st_data, req_size_q,
req_addr_q, amo_need_sign_extend(req_op_q));
hpdcache_amo amo_unit_i (
.ld_data_i (amo_ld_data),
.st_data_i (amo_st_data),
.op_i (req_op_q),
.result_o (amo_result)
);
assign dir_amo_match_o = (uc_fsm_q == UC_AMO_READ_DIR),
dir_amo_match_set_o = hpdcache_get_req_addr_set(req_addr_q),
dir_amo_match_tag_o = hpdcache_get_req_addr_tag(req_addr_q),
dir_amo_update_plru_o = dir_amo_match_o;
assign data_amo_write_o = (uc_fsm_q == UC_AMO_WRITE_DATA),
data_amo_write_enable_o = |dir_amo_hit_way_i,
data_amo_write_set_o = hpdcache_get_req_addr_set(req_addr_q),
data_amo_write_size_o = req_size_q,
data_amo_write_word_o = hpdcache_get_req_addr_word(req_addr_q),
data_amo_write_data_o = prepare_amo_data_result(amo_result, req_size_q),
data_amo_write_be_o = amo_st_be;
// }}}
// Core response outputs
// {{{
assign req_ready_o = (uc_fsm_q == UC_IDLE),
core_rsp_valid_o = (uc_fsm_q == UC_CORE_RSP);
// }}}
// Memory read request outputs
// {{{
always_comb
begin : mem_req_read_comb
mem_req_read_o.mem_req_addr = req_addr_q;
mem_req_read_o.mem_req_len = 0;
mem_req_read_o.mem_req_size = req_size_q;
mem_req_read_o.mem_req_id = mem_read_id_i;
mem_req_read_o.mem_req_cacheable = 1'b0;
mem_req_read_o.mem_req_command = HPDCACHE_MEM_READ;
mem_req_read_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD;
unique case (1'b1)
req_op_q.is_ld: begin
mem_req_read_valid_o = (uc_fsm_q == UC_MEM_REQ);
end
req_op_q.is_amo_lr: begin
mem_req_read_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_read_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_LDEX;
mem_req_read_valid_o = (uc_fsm_q == UC_MEM_REQ);
end
default: begin
mem_req_read_valid_o = 1'b0;
end
endcase
end
// }}}
// Memory write request outputs
// {{{
always_comb
begin : mem_req_write_comb
mem_req_write_data = req_data_q;
mem_req_write_o.mem_req_addr = req_addr_q;
mem_req_write_o.mem_req_len = 0;
mem_req_write_o.mem_req_size = req_size_q;
mem_req_write_o.mem_req_id = mem_write_id_i;
mem_req_write_o.mem_req_cacheable = 1'b0;
unique case (1'b1)
req_op_q.is_amo_sc: begin
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_STEX;
end
req_op_q.is_amo_swap: begin
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SWAP;
end
req_op_q.is_amo_add: begin
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD;
end
req_op_q.is_amo_and: begin
mem_req_write_data = ~req_data_q;
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_CLR;
end
req_op_q.is_amo_or: begin
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SET;
end
req_op_q.is_amo_xor: begin
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_EOR;
end
req_op_q.is_amo_max: begin
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SMAX;
end
req_op_q.is_amo_maxu: begin
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_UMAX;
end
req_op_q.is_amo_min: begin
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SMIN;
end
req_op_q.is_amo_minu: begin
mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_UMIN;
end
default: begin
mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE;
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD;
end
endcase
unique case (uc_fsm_q)
UC_MEM_REQ: begin
unique case (1'b1)
req_op_q.is_st,
req_op_q.is_amo_sc,
req_op_q.is_amo_swap,
req_op_q.is_amo_add,
req_op_q.is_amo_and,
req_op_q.is_amo_or,
req_op_q.is_amo_xor,
req_op_q.is_amo_max,
req_op_q.is_amo_maxu,
req_op_q.is_amo_min,
req_op_q.is_amo_minu: begin
mem_req_write_data_valid_o = 1'b1;
mem_req_write_valid_o = 1'b1;
end
default: begin
mem_req_write_data_valid_o = 1'b0;
mem_req_write_valid_o = 1'b0;
end
endcase
end
UC_MEM_W_REQ: begin
mem_req_write_valid_o = 1'b1;
mem_req_write_data_valid_o = 1'b0;
end
UC_MEM_WDATA_REQ: begin
mem_req_write_valid_o = 1'b0;
mem_req_write_data_valid_o = 1'b1;
end
default: begin
mem_req_write_valid_o = 1'b0;
mem_req_write_data_valid_o = 1'b0;
end
endcase
end
generate
// memory data width is bigger than the width of the core's interface
if (MEM_REQ_RATIO > 1) begin : mem_req_data_gen
// replicate data
assign mem_req_write_data_o.mem_req_w_data = {MEM_REQ_RATIO{mem_req_write_data}};
// demultiplex the byte-enable
hpdcache_demux #(
.NOUTPUT (MEM_REQ_RATIO),
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH/8)
) mem_write_be_demux_i (
.data_i (req_be_q),
.sel_i (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]),
.data_o (mem_req_write_data_o.mem_req_w_be)
);
end
// memory data width is equal to the width of the core's interface
else begin
assign mem_req_write_data_o.mem_req_w_data = mem_req_write_data;
assign mem_req_write_data_o.mem_req_w_be = req_be_q;
end
assign mem_req_write_data_o.mem_req_w_last = 1'b1;
endgenerate
// }}}
// Response handling
// {{{
logic [63:0] sc_retcode;
logic [63:0] sc_rdata;
assign sc_retcode = {{63{1'b0}}, uc_sc_retcode_q},
sc_rdata = prepare_amo_data_result(sc_retcode, req_size_q);
assign core_rsp_o.rdata = req_op_q.is_amo_sc ? {HPDCACHE_REQ_WORDS{sc_rdata}} : rsp_rdata_q,
core_rsp_o.sid = req_sid_q,
core_rsp_o.tid = req_tid_q,
core_rsp_o.error = rsp_error_q,
core_rsp_o.aborted = 1'b0;
// Resize the memory response data to the core response width
generate
// memory data width is bigger than the width of the core's interface
if (MEM_REQ_RATIO > 1) begin : core_rsp_data_gen
hpdcache_mux #(
.NINPUT (MEM_REQ_RATIO),
.DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH)
) data_read_rsp_mux_i(
.data_i (mem_resp_read_i.mem_resp_r_data),
.sel_i (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]),
.data_o (rsp_rdata_d)
);
end
// memory data width is equal to the width of the core's interface
else begin
assign rsp_rdata_d = mem_resp_read_i.mem_resp_r_data;
end
endgenerate
// This FSM is always ready to accept the response
assign mem_resp_read_ready_o = 1'b1,
mem_resp_write_ready_o = 1'b1;
// }}}
// Set cache request registers
// {{{
always_ff @(posedge clk_i)
begin : req_ff
if (req_valid_i && req_ready_o) begin
req_op_q <= req_op_i;
req_addr_q <= req_addr_i;
req_size_q <= req_size_i;
req_data_q <= req_data_i;
req_be_q <= req_be_i;
req_uc_q <= req_uc_i;
req_sid_q <= req_sid_i;
req_tid_q <= req_tid_i;
req_need_rsp_q <= req_need_rsp_i;
end
end
// }}}
// Uncacheable request FSM set state
// {{{
logic lrsc_rsrv_valid_set, lrsc_rsrv_valid_reset;
assign lrsc_rsrv_valid_set = lrsc_uc_set,
lrsc_rsrv_valid_reset = lrsc_uc_reset | lrsc_snoop_reset;
always_ff @(posedge clk_i or negedge rst_ni)
begin : uc_fsm_ff
if (!rst_ni) begin
uc_fsm_q <= UC_IDLE;
lrsc_rsrv_valid_q <= 1'b0;
end else begin
uc_fsm_q <= uc_fsm_d;
lrsc_rsrv_valid_q <= (~lrsc_rsrv_valid_q & lrsc_rsrv_valid_set ) |
( lrsc_rsrv_valid_q & ~lrsc_rsrv_valid_reset);
end
end
always_ff @(posedge clk_i)
begin : uc_amo_ff
lrsc_rsrv_addr_q <= lrsc_rsrv_addr_d;
uc_sc_retcode_q <= uc_sc_retcode_d;
end
// }}}
// Response registers
// {{{
always_ff @(posedge clk_i)
begin
if (mem_resp_read_valid_i) begin
rsp_rdata_q <= rsp_rdata_d;
end
mem_resp_write_valid_q <= mem_resp_write_valid_d;
mem_resp_read_valid_q <= mem_resp_read_valid_d;
end
always_ff @(posedge clk_i or negedge rst_ni)
begin
if (!rst_ni) begin
rsp_error_q <= 1'b0;
end else begin
rsp_error_q <= (~rsp_error_q & rsp_error_set) |
( rsp_error_q & ~rsp_error_rst);
end
end
// }}}
// Assertions
// {{{
// pragma translate_off
assert property (@(posedge clk_i) disable iff (!rst_ni)
(req_valid_i && req_op_i.is_ld) -> req_uc_i) else
$error("uc_handler: unexpected load request on cacheable region");
assert property (@(posedge clk_i) disable iff (!rst_ni)
(req_valid_i && req_op_i.is_st) -> req_uc_i) else
$error("uc_handler: unexpected store request on cacheable region");
assert property (@(posedge clk_i) disable iff (!rst_ni)
(req_valid_i && (req_op_i.is_amo_lr ||
req_op_i.is_amo_sc ||
req_op_i.is_amo_swap ||
req_op_i.is_amo_add ||
req_op_i.is_amo_and ||
req_op_i.is_amo_or ||
req_op_i.is_amo_xor ||
req_op_i.is_amo_max ||
req_op_i.is_amo_maxu ||
req_op_i.is_amo_min ||
req_op_i.is_amo_minu )) -> req_need_rsp_i) else
$error("uc_handler: amo requests shall need a response");
assert property (@(posedge clk_i) disable iff (!rst_ni)
(req_valid_i && (req_op_i.is_amo_lr ||
req_op_i.is_amo_sc ||
req_op_i.is_amo_swap ||
req_op_i.is_amo_add ||
req_op_i.is_amo_and ||
req_op_i.is_amo_or ||
req_op_i.is_amo_xor ||
req_op_i.is_amo_max ||
req_op_i.is_amo_maxu ||
req_op_i.is_amo_min ||
req_op_i.is_amo_minu )) -> (req_size_i inside {2,3})) else
$error("uc_handler: amo requests shall be 4 or 8 bytes wide");
assert property (@(posedge clk_i) disable iff (!rst_ni)
(mem_resp_write_valid_i || mem_resp_read_valid_i) -> (uc_fsm_q == UC_MEM_WAIT_RSP)) else
$error("uc_handler: unexpected response from memory");
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,678 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : HPDcache Write Buffer
* History :
*/
module hpdcache_wbuf
// Parameters
// {{{
#(
// Number of entries in the directory part of the Write Buffer
parameter int unsigned WBUF_DIR_ENTRIES = 0,
// Number of entries in the data part of the Write Buffer
parameter int unsigned WBUF_DATA_ENTRIES = 0,
// Width in bits of the write words
parameter int unsigned WBUF_WORD_WIDTH = 0,
// Number of words per line in the write buffer
parameter int unsigned WBUF_WORDS = 0,
// Width in bits of the physical address
parameter int unsigned WBUF_PA_WIDTH = 0,
// Maximum value of the time counter
parameter int unsigned WBUF_TIMECNT_MAX = 8,
// Number of most significant bits to check for read conflicts
parameter int unsigned WBUF_READ_MATCH_WIDTH = 0,
// Use a feedthrough FIFO on the send interface
parameter bit WBUF_SEND_FEEDTHROUGH = 0,
localparam int unsigned WBUF_OFFSET_WIDTH = $clog2((WBUF_WORD_WIDTH*WBUF_WORDS)/8),
localparam int unsigned WBUF_TAG_WIDTH = WBUF_PA_WIDTH - WBUF_OFFSET_WIDTH,
localparam int unsigned WBUF_WORD_OFFSET = $clog2(WBUF_WORD_WIDTH/8),
localparam int unsigned WBUF_DATA_PTR_WIDTH = $clog2(WBUF_DATA_ENTRIES),
localparam int unsigned WBUF_DIR_PTR_WIDTH = $clog2(WBUF_DIR_ENTRIES),
localparam int unsigned WBUF_TIMECNT_WIDTH = $clog2(WBUF_TIMECNT_MAX),
localparam type wbuf_addr_t = logic unsigned [ WBUF_PA_WIDTH-1:0],
localparam type wbuf_dir_ptr_t = logic unsigned [ WBUF_DIR_PTR_WIDTH-1:0],
localparam type wbuf_data_ptr_t = logic unsigned [ WBUF_DATA_PTR_WIDTH-1:0],
localparam type wbuf_data_t = logic [ WBUF_WORD_WIDTH-1:0],
localparam type wbuf_be_t = logic [ WBUF_WORD_WIDTH/8-1:0],
localparam type wbuf_data_buf_t = wbuf_data_t [ WBUF_WORDS-1:0],
localparam type wbuf_be_buf_t = wbuf_be_t [ WBUF_WORDS-1:0],
localparam type wbuf_tag_t = logic unsigned [ WBUF_TAG_WIDTH-1:0],
localparam type wbuf_match_t = logic unsigned [WBUF_READ_MATCH_WIDTH-1:0],
localparam type wbuf_timecnt_t = logic unsigned [ WBUF_TIMECNT_WIDTH-1:0]
)
// }}}
// Ports
// {{{
(
// Clock and reset signals
input logic clk_i,
input logic rst_ni,
// Global control signals
output logic empty_o,
output logic full_o,
input logic flush_all_i,
// Configuration signals
// Timer threshold
input wbuf_timecnt_t cfg_threshold_i,
// Reset timer on write
input logic cfg_reset_timecnt_on_write_i,
// Sequentialize write-after-write hazards
input logic cfg_sequential_waw_i,
// Inhibit write coalescing
input logic cfg_inhibit_write_coalescing_i,
// Write interface
input logic write_i,
output logic write_ready_o,
input wbuf_addr_t write_addr_i,
input wbuf_data_t write_data_i,
input wbuf_be_t write_be_i, // byte-enable
input logic write_uc_i, // uncacheable write
// Read hit interface
input wbuf_addr_t read_addr_i,
output logic read_hit_o,
input logic read_flush_hit_i,
// Replay hit interface
input wbuf_addr_t replay_addr_i,
input logic replay_is_read_i,
output logic replay_open_hit_o,
output logic replay_pend_hit_o,
output logic replay_sent_hit_o,
output logic replay_not_ready_o,
// Send interface
input logic send_meta_ready_i,
output logic send_meta_valid_o,
output wbuf_addr_t send_addr_o,
output wbuf_dir_ptr_t send_id_o,
output logic send_uc_o,
input logic send_data_ready_i,
output logic send_data_valid_o,
output wbuf_addr_t send_data_tag_o,
output wbuf_data_buf_t send_data_o,
output wbuf_be_buf_t send_be_o,
// Acknowledge interface
input logic ack_i,
input wbuf_dir_ptr_t ack_id_i,
input logic ack_error_i
);
// }}}
// Definition of constants, types and functions
// {{{
localparam int WBUF_SEND_FIFO_DEPTH = WBUF_DATA_ENTRIES;
typedef logic unsigned [31:0] wbuf_uint;
typedef enum logic [1:0] {
WBUF_FREE = 2'b00, // unused/free slot
WBUF_OPEN = 2'b01, // there are pending writes in this slot
WBUF_PEND = 2'b10, // the slot is waiting to be sent
WBUF_SENT = 2'b11 // the slot is sent and waits for the memory acknowledge
} wbuf_state_e;
typedef struct packed {
wbuf_data_ptr_t ptr;
wbuf_timecnt_t cnt;
wbuf_tag_t tag;
logic uc;
} wbuf_dir_entry_t;
typedef struct packed {
wbuf_data_buf_t data;
wbuf_be_buf_t be;
} wbuf_data_entry_t;
typedef struct packed {
wbuf_data_ptr_t send_data_ptr;
wbuf_tag_t send_data_tag;
} wbuf_send_data_t;
typedef struct packed {
wbuf_tag_t send_meta_tag;
wbuf_dir_ptr_t send_meta_id;
logic send_meta_uc;
} wbuf_send_meta_t;
function automatic wbuf_dir_ptr_t wbuf_dir_find_next(
input wbuf_dir_ptr_t curr_ptr,
input wbuf_state_e [WBUF_DIR_ENTRIES-1:0] dir_state,
input wbuf_state_e state);
automatic wbuf_dir_ptr_t next_ptr;
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
next_ptr = wbuf_dir_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DIR_ENTRIES);
if (dir_state[next_ptr] == state) begin
return next_ptr;
end
end
return curr_ptr;
endfunction
function automatic wbuf_data_ptr_t wbuf_data_find_next(
input wbuf_data_ptr_t curr_ptr,
input logic [WBUF_DATA_ENTRIES-1:0] data_valid,
input logic state);
automatic wbuf_data_ptr_t next_ptr;
for (int unsigned i = 0; i < WBUF_DATA_ENTRIES; i++) begin
next_ptr = wbuf_data_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DATA_ENTRIES);
if (data_valid[next_ptr] == state) begin
return next_ptr;
end
end
return curr_ptr;
endfunction
function automatic void wbuf_data_write(
output wbuf_data_buf_t wbuf_ret_data,
output wbuf_be_buf_t wbuf_ret_be,
input wbuf_data_buf_t wbuf_old_data,
input wbuf_be_buf_t wbuf_old_be,
input wbuf_data_buf_t wbuf_new_data,
input wbuf_be_buf_t wbuf_new_be);
for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
for (int unsigned b = 0; b < WBUF_WORD_WIDTH/8; b++) begin
wbuf_ret_data[w][b*8 +: 8] = wbuf_new_be[w][b] ?
wbuf_new_data[w][b*8 +: 8] :
wbuf_old_data[w][b*8 +: 8];
end
wbuf_ret_be[w] = wbuf_old_be[w] | wbuf_new_be[w];
end
endfunction
function automatic wbuf_match_t wbuf_tag_to_match_addr(wbuf_tag_t tag);
return tag[WBUF_TAG_WIDTH - 1:WBUF_TAG_WIDTH - WBUF_READ_MATCH_WIDTH];
endfunction
// }}}
// Definition of internal wires and registers
// {{{
wbuf_state_e [ WBUF_DIR_ENTRIES-1:0] wbuf_dir_state_q, wbuf_dir_state_d;
wbuf_dir_entry_t [ WBUF_DIR_ENTRIES-1:0] wbuf_dir_q, wbuf_dir_d;
logic [WBUF_DATA_ENTRIES-1:0] wbuf_data_valid_q, wbuf_data_valid_d;
wbuf_data_entry_t [WBUF_DATA_ENTRIES-1:0] wbuf_data_q, wbuf_data_d;
wbuf_dir_ptr_t wbuf_dir_free_ptr_q, wbuf_dir_free_ptr_d;
logic wbuf_dir_free;
wbuf_dir_ptr_t wbuf_dir_send_ptr_q, wbuf_dir_send_ptr_d;
wbuf_data_ptr_t wbuf_data_free_ptr_q, wbuf_data_free_ptr_d;
logic wbuf_data_free;
logic wbuf_write_free;
logic wbuf_write_hit_open;
logic wbuf_write_hit_pend;
logic wbuf_write_hit_sent;
wbuf_dir_ptr_t wbuf_write_hit_open_dir_ptr;
wbuf_dir_ptr_t wbuf_write_hit_pend_dir_ptr;
logic send_meta_valid;
logic send_meta_ready;
wbuf_send_meta_t send_meta_wdata, send_meta_rdata;
logic send_data_wok;
logic send_data_w;
wbuf_send_data_t send_data_d;
wbuf_send_data_t send_data_q;
wbuf_tag_t write_tag;
wbuf_data_buf_t write_data;
wbuf_be_buf_t write_be;
logic [WBUF_DIR_ENTRIES-1:0] replay_match;
logic [WBUF_DIR_ENTRIES-1:0] replay_open_hit;
logic [WBUF_DIR_ENTRIES-1:0] replay_pend_hit;
logic [WBUF_DIR_ENTRIES-1:0] replay_sent_hit;
genvar gen_i;
// }}}
// Global control signals
// {{{
always_comb
begin : empty_comb
empty_o = 1'b1;
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
empty_o &= (wbuf_dir_state_q[i] == WBUF_FREE);
end
end
always_comb
begin : full_comb
full_o = 1'b1;
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
full_o &= (wbuf_dir_state_q[i] != WBUF_FREE);
end
end
// }}}
// Write control
// {{{
assign write_tag = write_addr_i[WBUF_PA_WIDTH-1:WBUF_OFFSET_WIDTH];
always_comb
begin : wbuf_write_data_comb
for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
write_data[w] = write_data_i;
end
end
generate
if (WBUF_OFFSET_WIDTH > WBUF_WORD_OFFSET) begin : wbuf_write_be_gt_gen
always_comb
begin : wbuf_write_be_comb
for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
if (w == int'(write_addr_i[WBUF_OFFSET_WIDTH-1:WBUF_WORD_OFFSET])) begin
write_be[w] = write_be_i;
end else begin
write_be[w] = '0;
end
end
end
end else begin : wbuf_write_be_le_gen
always_comb
begin : wbuf_write_be_comb
for (int unsigned w = 0; w < WBUF_WORDS; w++) begin
write_be[w] = write_be_i;
end
end
end
endgenerate
always_comb
begin : wbuf_free_comb
wbuf_dir_free_ptr_d = wbuf_dir_free_ptr_q;
if (ack_i) begin
wbuf_dir_free_ptr_d = ack_id_i;
end else if (write_i && wbuf_write_free) begin
wbuf_dir_free_ptr_d = wbuf_dir_find_next(wbuf_dir_free_ptr_q, wbuf_dir_state_q, WBUF_FREE);
end
wbuf_data_free_ptr_d = wbuf_data_free_ptr_q;
if (send_data_valid_o && send_data_ready_i) begin
wbuf_data_free_ptr_d = send_data_q.send_data_ptr;
end else if (write_i && wbuf_write_free) begin
wbuf_data_free_ptr_d = wbuf_data_find_next(wbuf_data_free_ptr_q, wbuf_data_valid_q, 1'b0);
end
end
assign wbuf_dir_free = (wbuf_dir_state_q[wbuf_dir_free_ptr_q] == WBUF_FREE);
assign wbuf_data_free = ~wbuf_data_valid_q[wbuf_data_free_ptr_q];
always_comb
begin : wbuf_write_hit_comb
wbuf_write_hit_open = 1'b0;
wbuf_write_hit_pend = 1'b0;
wbuf_write_hit_sent = 1'b0;
wbuf_write_hit_open_dir_ptr = 0;
wbuf_write_hit_pend_dir_ptr = 0;
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
if (wbuf_dir_q[i].tag == write_tag) begin
unique case (wbuf_dir_state_q[i])
WBUF_OPEN: begin
wbuf_write_hit_open = 1'b1;
wbuf_write_hit_open_dir_ptr = wbuf_dir_ptr_t'(i);
end
WBUF_PEND: begin
wbuf_write_hit_pend = 1'b1;
wbuf_write_hit_pend_dir_ptr = wbuf_dir_ptr_t'(i);
end
WBUF_SENT: begin
wbuf_write_hit_sent = 1'b1;
end
default: begin
/* do nothing */
end
endcase
end
end
end
// Check if there is a match between the read address and the tag of one
// of the used slots in the write buffer directory
always_comb
begin : read_hit_comb
automatic logic [WBUF_DIR_ENTRIES-1:0] read_hit;
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
read_hit[i] = 1'b0;
unique case (wbuf_dir_state_q[i])
WBUF_OPEN, WBUF_PEND, WBUF_SENT: begin
automatic wbuf_addr_t wbuf_addr;
automatic wbuf_match_t wbuf_tag;
automatic wbuf_match_t read_tag;
wbuf_addr = wbuf_addr_t'(wbuf_dir_q[i].tag) << WBUF_OFFSET_WIDTH;
read_tag = read_addr_i[WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH];
wbuf_tag = wbuf_addr [WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH];
read_hit[i] = (read_tag == wbuf_tag) ? 1'b1 : 1'b0;
end
default: begin
/* do nothing */
end
endcase
end
read_hit_o = |read_hit;
end
// Check if there is a match between the replay address and the tag of one
// of the used slots in the write buffer directory
generate
for (gen_i = 0; gen_i < WBUF_DIR_ENTRIES; gen_i++) begin : replay_match_gen
assign replay_match[gen_i] = replay_is_read_i ?
/* replay is read: compare address block tag (e.g. cache line) */
(wbuf_tag_to_match_addr(wbuf_dir_q[gen_i].tag) ==
replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]) :
/* replay is write: compare wbuf tag */
(wbuf_dir_q[gen_i].tag ==
replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_TAG_WIDTH]);
assign replay_open_hit[gen_i] =
replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_OPEN);
assign replay_pend_hit[gen_i] =
replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_PEND);
assign replay_sent_hit[gen_i] =
replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_SENT);
end
endgenerate
assign replay_open_hit_o = |replay_open_hit,
replay_pend_hit_o = |replay_pend_hit,
replay_sent_hit_o = |replay_sent_hit;
always_comb
begin : replay_wbuf_not_ready_comb
replay_not_ready_o = 1'b0;
if (replay_pend_hit_o) begin
replay_not_ready_o = 1'b1;
end else if (replay_sent_hit_o && cfg_sequential_waw_i) begin
replay_not_ready_o = 1'b1;
end else if (!replay_open_hit_o && (!wbuf_dir_free || !wbuf_data_free)) begin
replay_not_ready_o = 1'b1;
end
end
assign wbuf_write_free =
wbuf_dir_free
& wbuf_data_free
& ~wbuf_write_hit_open
& ~wbuf_write_hit_pend
& ~(wbuf_write_hit_sent & cfg_sequential_waw_i);
assign write_ready_o = wbuf_write_free
| ((wbuf_write_hit_open | wbuf_write_hit_pend)
& ~cfg_inhibit_write_coalescing_i);
// }}}
// Update control
// {{{
always_comb
begin : wbuf_update_comb
automatic bit timeout;
automatic bit write_hit;
automatic bit read_hit;
automatic bit match_open_ptr;
automatic bit match_pend_ptr;
automatic bit match_free;
automatic bit send;
timeout = 1'b0;
write_hit = 1'b0;
read_hit = 1'b0;
match_open_ptr = 1'b0;
match_pend_ptr = 1'b0;
match_free = 1'b0;
send = 1'b0;
wbuf_dir_state_d = wbuf_dir_state_q;
wbuf_dir_d = wbuf_dir_q;
wbuf_data_d = wbuf_data_q;
send_data_w = 1'b0;
send_meta_valid = 1'b0;
for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin
case (wbuf_dir_state_q[i])
WBUF_FREE: begin
match_free = wbuf_write_free && (i == int'(wbuf_dir_free_ptr_q));
if (write_i && match_free) begin
send = (cfg_threshold_i == 0)
| write_uc_i
| flush_all_i
| cfg_inhibit_write_coalescing_i;
wbuf_dir_state_d[i] = send ? WBUF_PEND : WBUF_OPEN;
wbuf_dir_d[i].tag = write_tag;
wbuf_dir_d[i].cnt = 0;
wbuf_dir_d[i].ptr = wbuf_data_free_ptr_q;
wbuf_dir_d[i].uc = write_uc_i;
wbuf_data_write(
wbuf_data_d[wbuf_data_free_ptr_q].data,
wbuf_data_d[wbuf_data_free_ptr_q].be,
'0,
'0,
write_data,
write_be
);
end
end
WBUF_OPEN: begin
match_open_ptr = (i == int'(wbuf_write_hit_open_dir_ptr));
timeout = (wbuf_dir_q[i].cnt == (cfg_threshold_i - 1));
read_hit = read_flush_hit_i & wbuf_write_hit_open & match_open_ptr;
write_hit = write_i
& wbuf_write_hit_open
& match_open_ptr
& ~cfg_inhibit_write_coalescing_i;
if (!flush_all_i) begin
if (write_hit && cfg_reset_timecnt_on_write_i) begin
timeout = 1'b0;
wbuf_dir_d[i].cnt = 0;
end else if (!timeout) begin
wbuf_dir_d[i].cnt = wbuf_dir_q[i].cnt + 1;
end
if (read_hit | timeout | cfg_inhibit_write_coalescing_i) begin
wbuf_dir_state_d[i] = WBUF_PEND;
end
end else begin
wbuf_dir_state_d[i] = WBUF_PEND;
end
if (write_hit) begin
wbuf_data_write(
wbuf_data_d[wbuf_dir_q[i].ptr].data,
wbuf_data_d[wbuf_dir_q[i].ptr].be,
wbuf_data_q[wbuf_dir_q[i].ptr].data,
wbuf_data_q[wbuf_dir_q[i].ptr].be,
write_data,
write_be
);
end
end
WBUF_PEND: begin
match_pend_ptr = (i == int'(wbuf_write_hit_pend_dir_ptr));
write_hit = write_i
& wbuf_write_hit_pend
& match_pend_ptr
& ~cfg_inhibit_write_coalescing_i;
if (write_hit) begin
wbuf_data_write(
wbuf_data_d[wbuf_dir_q[i].ptr].data,
wbuf_data_d[wbuf_dir_q[i].ptr].be,
wbuf_data_q[wbuf_dir_q[i].ptr].data,
wbuf_data_q[wbuf_dir_q[i].ptr].be,
write_data,
write_be
);
end
if (i == int'(wbuf_dir_send_ptr_q)) begin
send_data_w = send_meta_ready;
send_meta_valid = send_data_wok;
if (send_meta_ready && send_data_wok) begin
wbuf_dir_state_d[i] = WBUF_SENT;
end
end
end
WBUF_SENT: begin
if (ack_i && (i == int'(ack_id_i))) begin
wbuf_dir_state_d[i] = WBUF_FREE;
end
end
endcase
end
end
always_comb
begin : wbuf_data_valid_comb
wbuf_data_valid_d = wbuf_data_valid_q;
// allocate a free data buffer on new write
if (write_i && wbuf_write_free) begin
wbuf_data_valid_d[wbuf_data_free_ptr_q] = 1'b1;
end
// de-allocate a data buffer as soon as it is send
if (send_data_valid_o && send_data_ready_i) begin
wbuf_data_valid_d[send_data_q.send_data_ptr] = 1'b0;
end
end
// }}}
// Send control
// {{{
// Data channel
hpdcache_fifo_reg #(
.FIFO_DEPTH (WBUF_SEND_FIFO_DEPTH),
.FEEDTHROUGH (WBUF_SEND_FEEDTHROUGH),
.fifo_data_t (wbuf_send_data_t)
) send_data_ptr_fifo_i (
.clk_i,
.rst_ni,
.w_i (send_data_w),
.wok_o (send_data_wok),
.wdata_i (send_data_d),
.r_i (send_data_ready_i),
.rok_o (send_data_valid_o),
.rdata_o (send_data_q)
);
assign send_data_d.send_data_ptr = wbuf_dir_q[wbuf_dir_send_ptr_q].ptr,
send_data_d.send_data_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag;
assign send_data_tag_o = wbuf_addr_t'(send_data_q.send_data_tag),
send_data_o = wbuf_data_q[send_data_q.send_data_ptr].data,
send_be_o = wbuf_data_q[send_data_q.send_data_ptr].be;
// Meta-data channel
hpdcache_fifo_reg #(
.FIFO_DEPTH (WBUF_SEND_FIFO_DEPTH),
.FEEDTHROUGH (WBUF_SEND_FEEDTHROUGH),
.fifo_data_t (wbuf_send_meta_t)
) send_meta_fifo_i (
.clk_i,
.rst_ni,
.w_i (send_meta_valid),
.wok_o (send_meta_ready),
.wdata_i (send_meta_wdata),
.r_i (send_meta_ready_i),
.rok_o (send_meta_valid_o),
.rdata_o (send_meta_rdata)
);
assign send_meta_wdata.send_meta_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag,
send_meta_wdata.send_meta_id = wbuf_dir_send_ptr_q,
send_meta_wdata.send_meta_uc = wbuf_dir_q[wbuf_dir_send_ptr_q].uc;
assign send_addr_o = { send_meta_rdata.send_meta_tag, {WBUF_OFFSET_WIDTH{1'b0}} },
send_id_o = send_meta_rdata.send_meta_id,
send_uc_o = send_meta_rdata.send_meta_uc;
// Send pointer
always_comb
begin : wbuf_send_comb
wbuf_dir_send_ptr_d = wbuf_dir_find_next(wbuf_dir_send_ptr_q, wbuf_dir_state_q, WBUF_PEND);
if (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND) begin
if (!send_meta_valid || !send_meta_ready) begin
wbuf_dir_send_ptr_d = wbuf_dir_send_ptr_q;
end
end
end
// }}}
// Internal state assignment
// {{{
always_ff @(posedge clk_i) wbuf_data_q <= wbuf_data_d;
always_ff @(posedge clk_i or negedge rst_ni)
begin : wbuf_state_ff
if (!rst_ni) begin
wbuf_dir_q <= '0;
wbuf_dir_state_q <= {WBUF_DIR_ENTRIES{WBUF_FREE}};
wbuf_data_valid_q <= '0;
wbuf_dir_free_ptr_q <= 0;
wbuf_dir_send_ptr_q <= 0;
wbuf_data_free_ptr_q <= 0;
end else begin
wbuf_dir_q <= wbuf_dir_d;
wbuf_dir_state_q <= wbuf_dir_state_d;
wbuf_data_valid_q <= wbuf_data_valid_d;
wbuf_dir_free_ptr_q <= wbuf_dir_free_ptr_d;
wbuf_dir_send_ptr_q <= wbuf_dir_send_ptr_d;
wbuf_data_free_ptr_q <= wbuf_data_free_ptr_d;
end
end
// }}}
// Assertions
// {{{
// pragma translate_off
initial assert(WBUF_WORDS inside {1, 2, 4, 8, 16}) else
$error("WBUF: width of data buffers must be a power of 2");
ack_sent_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
(ack_i -> (wbuf_dir_state_q[ack_id_i] == WBUF_SENT))) else
$error("WBUF: acknowledging a not SENT slot");
send_pend_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
(send_meta_valid -> (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND))) else
$error("WBUF: sending a not PEND slot");
send_valid_data_assert: assert property (@(posedge clk_i) disable iff (!rst_ni)
(send_data_valid_o -> (wbuf_data_valid_q[send_data_q.send_data_ptr] == 1'b1))) else
$error("WBUF: sending a not valid data");
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,228 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : HPDcache Write Buffer Wrapper
* History :
*/
/* This wrapper adapts the send interface of the write buffer to the memory
* interface of the cache.
*/
module hpdcache_wbuf_wrapper
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter int HPDcacheMemIdWidth = 8,
parameter int HPDcacheMemDataWidth = 512,
parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_req_w_t = logic,
parameter type hpdcache_mem_resp_w_t = logic,
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
)
// }}}
// Ports
// {{{
(
// Clock and reset signals
input logic clk_i,
input logic rst_ni,
// Global control signals
output logic empty_o,
output logic full_o,
input logic flush_all_i,
// Configuration signals
// Timer threshold
input wbuf_timecnt_t cfg_threshold_i,
// Reset timer on write
input logic cfg_reset_timecnt_on_write_i,
// Sequentialize write-after-write hazards
input logic cfg_sequential_waw_i,
// Inhibit write coalescing
input logic cfg_inhibit_write_coalescing_i,
// Write interface
input logic write_i,
output logic write_ready_o,
input wbuf_addr_t write_addr_i,
input wbuf_data_t write_data_i,
input wbuf_be_t write_be_i, // byte-enable
input logic write_uc_i, // uncacheable write
// Read hit interface
input wbuf_addr_t read_addr_i,
output logic read_hit_o,
input logic read_flush_hit_i,
// Replay hit interface
input wbuf_addr_t replay_addr_i,
input logic replay_is_read_i,
output logic replay_open_hit_o,
output logic replay_pend_hit_o,
output logic replay_sent_hit_o,
output logic replay_not_ready_o,
// Memory interface
input logic mem_req_write_ready_i,
output logic mem_req_write_valid_o,
output hpdcache_mem_req_t mem_req_write_o,
input logic mem_req_write_data_ready_i,
output logic mem_req_write_data_valid_o,
output hpdcache_mem_req_w_t mem_req_write_data_o,
output logic mem_resp_write_ready_o,
input logic mem_resp_write_valid_i,
input hpdcache_mem_resp_w_t mem_resp_write_i
);
// }}}
// Internal signals
// {{{
wbuf_addr_t send_addr;
wbuf_dir_ptr_t send_id;
logic send_uc;
wbuf_addr_t send_data_tag;
wbuf_data_buf_t send_data;
wbuf_be_buf_t send_be;
wbuf_dir_ptr_t ack_id;
logic ack_error;
// }}}
// Wrapped write buffer
// {{{
hpdcache_wbuf #(
.WBUF_DIR_ENTRIES (HPDCACHE_WBUF_DIR_ENTRIES),
.WBUF_DATA_ENTRIES (HPDCACHE_WBUF_DATA_ENTRIES),
.WBUF_WORD_WIDTH (HPDCACHE_REQ_DATA_WIDTH),
.WBUF_WORDS (HPDCACHE_WBUF_WORDS),
.WBUF_PA_WIDTH (HPDCACHE_PA_WIDTH),
.WBUF_TIMECNT_MAX ((2**HPDCACHE_WBUF_TIMECNT_WIDTH) - 1),
.WBUF_READ_MATCH_WIDTH (HPDCACHE_NLINE_WIDTH),
.WBUF_SEND_FEEDTHROUGH (HPDCACHE_WBUF_SEND_FEEDTHROUGH)
) hpdcache_wbuf_i (
.clk_i,
.rst_ni,
.empty_o,
.full_o,
.flush_all_i,
.cfg_threshold_i,
.cfg_reset_timecnt_on_write_i,
.cfg_sequential_waw_i,
.cfg_inhibit_write_coalescing_i,
.write_i,
.write_ready_o,
.write_addr_i,
.write_data_i,
.write_be_i,
.write_uc_i,
.read_addr_i,
.read_hit_o,
.read_flush_hit_i,
.replay_addr_i,
.replay_is_read_i,
.replay_open_hit_o,
.replay_pend_hit_o,
.replay_sent_hit_o,
.replay_not_ready_o,
.send_meta_ready_i (mem_req_write_ready_i),
.send_meta_valid_o (mem_req_write_valid_o),
.send_addr_o (send_addr),
.send_id_o (send_id),
.send_uc_o (send_uc),
.send_data_ready_i (mem_req_write_data_ready_i),
.send_data_valid_o (mem_req_write_data_valid_o),
.send_data_tag_o (send_data_tag),
.send_data_o (send_data),
.send_be_o (send_be),
.ack_i (mem_resp_write_valid_i),
.ack_id_i (ack_id),
.ack_error_i (ack_error)
);
// }}}
// Memory interface
// {{{
assign mem_req_write_o.mem_req_addr = send_addr,
mem_req_write_o.mem_req_len = 0,
mem_req_write_o.mem_req_size = get_hpdcache_mem_size(HPDCACHE_WBUF_DATA_WIDTH/8),
mem_req_write_o.mem_req_id = hpdcache_mem_id_t'(send_id),
mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE,
mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD,
mem_req_write_o.mem_req_cacheable = ~send_uc;
generate
localparam int unsigned WBUF_MEM_DATA_RATIO = HPDcacheMemDataWidth/HPDCACHE_WBUF_DATA_WIDTH;
localparam int unsigned WBUF_MEM_DATA_WORD_INDEX_WIDTH = $clog2(WBUF_MEM_DATA_RATIO);
assign mem_req_write_data_o.mem_req_w_last = 1'b1;
if (WBUF_MEM_DATA_RATIO > 1)
begin : wbuf_data_upsizing_gen
logic [HPDCACHE_WBUF_DATA_WIDTH/8-1:0][WBUF_MEM_DATA_RATIO-1:0] mem_req_be;
// demux send BE
hpdcache_demux #(
.NOUTPUT (WBUF_MEM_DATA_RATIO),
.DATA_WIDTH (HPDCACHE_WBUF_DATA_WIDTH/8),
.ONE_HOT_SEL (1'b0)
) mem_write_be_demux_i (
.data_i (send_be),
.sel_i (send_data_tag[0 +: WBUF_MEM_DATA_WORD_INDEX_WIDTH]),
.data_o (mem_req_be)
);
assign mem_req_write_data_o.mem_req_w_data = {WBUF_MEM_DATA_RATIO{send_data}},
mem_req_write_data_o.mem_req_w_be = mem_req_be;
end else if (WBUF_MEM_DATA_RATIO == 1)
begin : wbuf_data_forwarding_gen
assign mem_req_write_data_o.mem_req_w_data = send_data,
mem_req_write_data_o.mem_req_w_be = send_be;
end
// Assertions
// {{{
// pragma translate_off
initial assert(WBUF_MEM_DATA_RATIO > 0) else
$error($sformatf("WBUF: data width of mem interface (%d) shall be g.e. to wbuf data width(%d)",
HPDcacheMemDataWidth, HPDCACHE_WBUF_DATA_WIDTH));
// pragma translate_on
// }}}
endgenerate
assign mem_resp_write_ready_o = 1'b1,
ack_id = mem_resp_write_i.mem_resp_w_id[0 +: HPDCACHE_WBUF_DIR_PTR_WIDTH],
ack_error = (mem_resp_write_i.mem_resp_w_error != HPDCACHE_MEM_RESP_OK);
// }}}
// Assertions
// {{{
// pragma translate_off
initial assert (HPDCACHE_WBUF_DIR_PTR_WIDTH <= HPDcacheMemIdWidth) else
$fatal("HPDcacheMemIdWidth is not wide enough to fit all possible write buffer transactions");
// pragma translate_on
// }}}
endmodule

View File

@ -0,0 +1,374 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Riccardo Alidori, Cesar Fuguet
* Maintainers(s): Cesar Fuguet
* Creation Date : June, 2021
* Description : HPDcache Linear Hardware Memory Prefetcher.
* History :
*/
module hwpf_stride
import hwpf_stride_pkg::*;
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter int CACHE_LINE_BYTES = 64
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
// CSR
input logic csr_base_set_i,
input hwpf_stride_base_t csr_base_i,
input logic csr_param_set_i,
input hwpf_stride_param_t csr_param_i,
input logic csr_throttle_set_i,
input hwpf_stride_throttle_t csr_throttle_i,
output hwpf_stride_base_t csr_base_o,
output hwpf_stride_param_t csr_param_o,
output hwpf_stride_throttle_t csr_throttle_o,
// If high, the prefetcher is enabled and active
output logic busy_o,
// Snooping
// Address to snoop on requests ports
output hpdcache_nline_t snoop_nline_o,
// If set to one, the snoop address matched one of the requests
input snoop_match_i,
// D-Cache interface
output logic hpdcache_req_valid_o,
input logic hpdcache_req_ready_i,
output hpdcache_req_t hpdcache_req_o,
input logic hpdcache_rsp_valid_i,
input hpdcache_rsp_t hpdcache_rsp_i
);
// }}}
import hpdcache_pkg::hpdcache_req_addr_t;
// Definition of constants
// {{{
localparam int STRIDE_WIDTH = $bits(csr_param_i.stride);
localparam int NBLOCKS_WIDTH = $bits(csr_param_i.nblocks);
localparam int NLINES_WIDTH = $bits(csr_param_i.nlines);
localparam int NWAIT_WIDTH = $bits(csr_throttle_i.nwait);
localparam int INFLIGHT_WIDTH = $bits(csr_throttle_i.ninflight);
localparam int NLINES_CNT_WIDTH = NLINES_WIDTH;
// }}}
// Internal registers and signals
// {{{
// FSM
enum {
IDLE,
SNOOP,
SEND_REQ,
WAIT,
DONE,
ABORT
} state_d, state_q;
logic [NBLOCKS_WIDTH-1:0] nblocks_cnt_d, nblocks_cnt_q;
logic [NLINES_CNT_WIDTH-1:0] nlines_cnt_d, nlines_cnt_q;
logic [NWAIT_WIDTH-1:0] nwait_cnt_d, nwait_cnt_q;
logic [INFLIGHT_WIDTH-1:0] inflight_cnt_d, inflight_cnt_q;
logic inflight_inc, inflight_dec;
hwpf_stride_base_t csr_base_q;
hwpf_stride_base_t shadow_base_q, shadow_base_d;
hwpf_stride_param_t csr_param_q;
hwpf_stride_param_t shadow_param_q, shadow_param_d;
hwpf_stride_throttle_t csr_throttle_q;
hwpf_stride_throttle_t shadow_throttle_q, shadow_throttle_d;
hpdcache_nline_t request_nline_q, request_nline_d;
hpdcache_set_t hpdcache_req_set;
hpdcache_tag_t hpdcache_req_tag;
logic csr_base_update;
hpdcache_nline_t increment_stride;
logic is_inflight_max;
// Default assignment
assign increment_stride = hpdcache_nline_t'(shadow_param_q.stride) + 1'b1;
assign inflight_dec = hpdcache_rsp_valid_i;
assign snoop_nline_o = shadow_base_q.base_cline;
assign is_inflight_max = ( shadow_throttle_q.ninflight == '0 ) ?
1'b0 : ( inflight_cnt_q >= shadow_throttle_q.ninflight );
assign csr_base_o = csr_base_q;
assign csr_param_o = csr_param_q;
assign csr_throttle_o = csr_throttle_q;
// }}}
// Dcache outputs
// {{{
assign hpdcache_req_set = request_nline_q[0 +: HPDCACHE_SET_WIDTH],
hpdcache_req_tag = request_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];
assign hpdcache_req_o.addr_offset = { hpdcache_req_set, {HPDCACHE_OFFSET_WIDTH{1'b0}} },
hpdcache_req_o.wdata = '0,
hpdcache_req_o.op = HPDCACHE_REQ_CMO,
hpdcache_req_o.be = '1,
hpdcache_req_o.size = HPDCACHE_REQ_CMO_PREFETCH,
hpdcache_req_o.sid = '0, // this is set when connecting to the dcache
hpdcache_req_o.tid = '0, // this is set by the wrapper of the prefetcher
hpdcache_req_o.need_rsp = 1'b1,
hpdcache_req_o.phys_indexed = 1'b1,
hpdcache_req_o.addr_tag = hpdcache_req_tag,
hpdcache_req_o.pma.uncacheable = 1'b0,
hpdcache_req_o.pma.io = 1'b0;
// }}}
// Set state of internal registers
// {{{
always_ff @(posedge clk_i or negedge rst_ni)
begin
if (!rst_ni) begin
csr_base_q <= '0;
csr_param_q <= '0;
shadow_base_q <= '0;
shadow_param_q <= '0;
shadow_throttle_q <= '0;
request_nline_q <= '0;
state_q <= IDLE;
end else begin
if (csr_base_set_i) csr_base_q <= csr_base_i;
else if (csr_base_update) csr_base_q <= shadow_base_d;
if (csr_param_set_i) csr_param_q <= csr_param_i;
if (csr_throttle_set_i) csr_throttle_q <= csr_throttle_i;
shadow_base_q <= shadow_base_d;
shadow_param_q <= shadow_param_d;
shadow_throttle_q <= shadow_throttle_d;
request_nline_q <= request_nline_d;
state_q <= state_d;
end
end
// }}}
// Update internal counters
// {{{
always_comb begin : inflight_cnt
inflight_cnt_d = inflight_cnt_q;
// Every time we send a dcache request, increment the counter
if ( inflight_inc ) begin
inflight_cnt_d++;
end
// Every time we got a response from the cache, decrement the counter
if ( inflight_dec && ( inflight_cnt_q > 0 )) begin
inflight_cnt_d--;
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
nblocks_cnt_q <= '0;
nlines_cnt_q <= '0;
nwait_cnt_q <= '0;
inflight_cnt_q <= '0;
end else begin
nblocks_cnt_q <= nblocks_cnt_d;
nlines_cnt_q <= nlines_cnt_d;
nwait_cnt_q <= nwait_cnt_d;
inflight_cnt_q <= inflight_cnt_d;
end
end
// }}}
// FSM
// {{{
always_comb begin : fsm_control
// default assignments
hpdcache_req_valid_o = 1'b0;
nblocks_cnt_d = nblocks_cnt_q;
nlines_cnt_d = nlines_cnt_q;
nwait_cnt_d = nwait_cnt_q;
inflight_inc = 1'b0;
busy_o = 1'b0;
csr_base_update = 1'b0;
shadow_base_d = shadow_base_q;
shadow_param_d = shadow_param_q;
shadow_throttle_d = shadow_throttle_q;
request_nline_d = request_nline_q;
state_d = state_q;
case ( state_q )
IDLE: begin
// If enabled, go snooping the dcache ports
if ( csr_base_q.enable ) begin
shadow_base_d = csr_base_q;
if (( csr_param_q.nlines > 0 ) || ( csr_param_q.nblocks > 0 )) begin
shadow_param_d = csr_param_q;
shadow_throttle_d = csr_throttle_q;
state_d = SNOOP;
end else begin
// no prefetch needed, disarm immediately
shadow_base_d.enable = 1'b0;
csr_base_update = 1'b1;
end
end
end
SNOOP: begin
if ( csr_base_q.enable ) begin
// If a snooper matched an address, send the request
if ( snoop_match_i ) begin
state_d = SEND_REQ;
if ( shadow_param_q.nlines == 0 ) begin
// skip the first block
request_nline_d = shadow_base_q.base_cline +
hpdcache_nline_t'(increment_stride);
nblocks_cnt_d = ( shadow_param_q.nblocks > 0 ) ?
shadow_param_q.nblocks - 1 : 0;
nlines_cnt_d = 0;
// update the base cacheline to the first one of the next block
shadow_base_d.base_cline = request_nline_d;
end else begin
// skip the first cacheline (of the first block)
request_nline_d = shadow_base_q.base_cline + 1'b1;
nblocks_cnt_d = shadow_param_q.nblocks;
nlines_cnt_d = shadow_param_q.nlines - 1;
end
end
end else begin
state_d = IDLE;
end
end
SEND_REQ: begin
busy_o = 1'b1;
// make the prefetch request to memory
hpdcache_req_valid_o = 1'b1;
// we've got a grant, so we can move to the next request
if ( hpdcache_req_ready_i ) begin
inflight_inc = 1'b1;
if ( nlines_cnt_q == 0 ) begin
// go to the first cacheline of the next block
request_nline_d = shadow_base_q.base_cline +
hpdcache_nline_t'(increment_stride);
nblocks_cnt_d = ( nblocks_cnt_q > 0 ) ? nblocks_cnt_q - 1 : 0;
nlines_cnt_d = shadow_param_q.nlines;
// update the base cacheline to the first one of the next block
shadow_base_d.base_cline = request_nline_d;
end else begin
// go to the next cacheline (within the same block)
request_nline_d = request_nline_q + 1'b1;
nlines_cnt_d = nlines_cnt_q - 1;
end
// if the NWAIT parameter is equal 0, we can issue a request every cycle
if (( nblocks_cnt_q == 0 ) && ( nlines_cnt_q == 0 )) begin
state_d = DONE;
end else if ( shadow_throttle_q.nwait == 0 ) begin
// Wait if the number of inflight requests is greater than
// the maximum indicated. Otherwise, send the next request
state_d = is_inflight_max ? WAIT : SEND_REQ;
end else begin
// Wait the indicated cycles before sending the next request
nwait_cnt_d = shadow_throttle_q.nwait;
state_d = WAIT;
end
if ( !csr_base_q.enable ) state_d = ABORT;
end
end
WAIT: begin
// Wait until:
// - the indicated number of wait cycles between requests is reached (nwait)
// - the number of inflight requests is below the indicated maximum (ninflight)
busy_o = 1'b1;
if ( csr_base_q.enable ) begin
if ( !is_inflight_max && ( nwait_cnt_q == 0 )) begin
state_d = SEND_REQ;
end
if ( nwait_cnt_q > 0 ) begin
nwait_cnt_d = nwait_cnt_q - 1;
end
end else begin
state_d = ABORT;
end
end
DONE: begin
busy_o = 1'b1;
if ( csr_base_q.enable ) begin
if (( inflight_cnt_q == 0 ) && !is_inflight_max && ( nwait_cnt_q == 0 )) begin
// Copy back shadow base register into the user visible one
csr_base_update = 1'b1;
// Check the rearm bit
if ( shadow_base_q.rearm ) begin
state_d = SNOOP;
end else begin
state_d = IDLE;
// disarm the prefetcher
shadow_base_d.enable = 1'b0;
end
// Check the cycle bit
if ( shadow_base_q.cycle ) begin
// restore the base address
shadow_base_d.base_cline = csr_base_q.base_cline;
end
end
if ( nwait_cnt_q > 0 ) begin
nwait_cnt_d = nwait_cnt_q - 1;
end
end else begin
state_d = ABORT;
end
end
ABORT: begin
busy_o = 1'b1;
if ( inflight_cnt_q == 0 ) begin
state_d = IDLE;
end
end
endcase
end
// }}}
endmodule

View File

@ -0,0 +1,117 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Author(s) : Riccardo Alidori, Cesar Fuguet
* Creation Date : June, 2021
* Description : Hw prefetchers arbiter
* History :
*/
module hwpf_stride_arb
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter NUM_HW_PREFETCH = 4
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
// Dcache input interface
input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid_i,
output logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready_o,
input hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req_i,
output logic [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_valid_o,
output hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_o, // Not used
// Dcache output interface
output logic hpdcache_req_valid_o,
input logic hpdcache_req_ready_i,
output hpdcache_req_t hpdcache_req_o,
input logic hpdcache_rsp_valid_i,
input hpdcache_rsp_t hpdcache_rsp_i // Not used
);
// }}}
// Internal signals
// {{{
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid;
hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req;
logic [NUM_HW_PREFETCH-1:0] arb_req_gnt;
// }}}
// Requesters arbiter
// {{{
// Pack request ports
genvar gen_i;
generate
for (gen_i = 0; gen_i < NUM_HW_PREFETCH; gen_i++) begin : gen_hwpf_stride_req
assign hwpf_stride_req_ready_o[gen_i] = arb_req_gnt[gen_i] & hpdcache_req_ready_i,
hwpf_stride_req_valid[gen_i] = hwpf_stride_req_valid_i[gen_i],
hwpf_stride_req[gen_i] = hwpf_stride_req_i[gen_i];
end
endgenerate
// Arbiter
hpdcache_rrarb #(
.N (NUM_HW_PREFETCH)
) hwpf_stride_req_arbiter_i (
.clk_i,
.rst_ni,
.req_i (hwpf_stride_req_valid),
.gnt_o (arb_req_gnt),
.ready_i (hpdcache_req_ready_i)
);
// Request Multiplexor
hpdcache_mux #(
.NINPUT (NUM_HW_PREFETCH),
.DATA_WIDTH ($bits(hpdcache_req_t)),
.ONE_HOT_SEL (1'b1)
) hwpf_stride_req_mux_i (
.data_i (hwpf_stride_req),
.sel_i (arb_req_gnt),
.data_o (hpdcache_req_o)
);
assign hpdcache_req_valid_o = |arb_req_gnt;
// }}}
// Response demultiplexor
// {{{
// As the HW prefetcher does not need the TID field in the request, we
// use it to transport the identifier of the specific hardware
// prefetcher.
// This way we share the same SID for all HW prefetchers. Using
// different SIDs means that we need different ports to the cache and
// we actually want to reduce those.
always_comb
begin : resp_demux
for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin
hwpf_stride_rsp_valid_o[i] = hpdcache_rsp_valid_i && (i == int'(hpdcache_rsp_i.tid));
hwpf_stride_rsp_o[i] = hpdcache_rsp_i;
end
end
// }}}
endmodule

View File

@ -0,0 +1,68 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : January, 2023
* Description : High-Performance, Data-cache (HPDcache) HW memory
* prefetcher package
* History :
*/
package hwpf_stride_pkg;
// Base address configuration register of the hardware memory prefetcher
// {{{
typedef struct packed {
logic [63:6] base_cline;
logic [5:3] unused;
logic cycle;
logic rearm;
logic enable;
} hwpf_stride_base_t;
// }}}
// Parameters configuration register of the hardware memory prefetcher
// {{{
typedef struct packed {
logic [63:48] nblocks;
logic [47:32] nlines;
logic [31:0] stride;
} hwpf_stride_param_t;
// }}}
// Throttle configuration register of the hardware memory prefetcher
// {{{
typedef struct packed {
logic [31:16] ninflight;
logic [15:0] nwait;
} hwpf_stride_throttle_t;
// }}}
// Status register of the hardware memory prefetcher
// {{{
typedef struct packed {
logic [63:48] unused1;
logic [47:32] busy;
logic free;
logic [30:20] unused0;
logic [19:16] free_index;
logic [15:0] enabled;
} hwpf_stride_status_t;
// }}}
endpackage

View File

@ -0,0 +1,38 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Riccardo Alidori, Cesar Fuguet
* Creation Date : June, 2021
* Description : Snooper used by the hardware memory prefetcher
* History :
*/
module hwpf_stride_snooper
import hpdcache_pkg::*;
(
input logic en_i, // Snooper enable bit.
input hpdcache_nline_t base_nline_i, // Address to check
input hpdcache_nline_t snoop_addr_i, // Input address to snoop
output snoop_match_o // If high, the Snoopers matched the snoop_address
);
// The snooper match if enabled and the two addresses are equal
assign snoop_match_o = en_i && ( base_nline_i == snoop_addr_i );
endmodule

View File

@ -0,0 +1,265 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Riccardo Alidori, Cesar Fuguet
* Creation Date : June, 2021
* Description : Linear Hardware Memory Prefetcher wrapper.
* History :
*/
module hwpf_stride_wrapper
import hwpf_stride_pkg::*;
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter NUM_HW_PREFETCH = 4,
parameter NUM_SNOOP_PORTS = 1
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
// CSR
// {{{
input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_base_set_i,
input hwpf_stride_base_t [NUM_HW_PREFETCH-1:0] hwpf_stride_base_i,
output hwpf_stride_base_t [NUM_HW_PREFETCH-1:0] hwpf_stride_base_o,
input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_param_set_i,
input hwpf_stride_param_t [NUM_HW_PREFETCH-1:0] hwpf_stride_param_i,
output hwpf_stride_param_t [NUM_HW_PREFETCH-1:0] hwpf_stride_param_o,
input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_set_i,
input hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_i,
output hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_o,
output hwpf_stride_status_t hwpf_stride_status_o,
// }}}
// Snooping
// {{{
input logic [NUM_SNOOP_PORTS-1:0] snoop_valid_i,
input logic [NUM_SNOOP_PORTS-1:0] snoop_abort_i,
input hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_i,
input hpdcache_tag_t [NUM_SNOOP_PORTS-1:0] snoop_addr_tag_i,
input logic [NUM_SNOOP_PORTS-1:0] snoop_phys_indexed_i,
// }}}
// Dcache interface
// {{{
input hpdcache_req_sid_t hpdcache_req_sid_i,
output logic hpdcache_req_valid_o,
input logic hpdcache_req_ready_i,
output hpdcache_req_t hpdcache_req_o,
output logic hpdcache_req_abort_o,
output hpdcache_tag_t hpdcache_req_tag_o,
output hpdcache_pma_t hpdcache_req_pma_o,
input logic hpdcache_rsp_valid_i,
input hpdcache_rsp_t hpdcache_rsp_i
// }}}
);
// }}}
// Internal registers
// {{{
logic [NUM_SNOOP_PORTS-1:0] snoop_valid_q;
hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_q;
// }}}
// Internal signals
// {{{
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_enable;
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_free;
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_status_busy;
logic [3:0] hwpf_stride_status_free_idx;
hpdcache_nline_t [NUM_HW_PREFETCH-1:0] hwpf_snoop_nline;
logic [NUM_HW_PREFETCH-1:0] hwpf_snoop_match;
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid;
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready;
hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req;
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_valid;
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_ready;
hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req;
logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp_valid;
hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp;
// }}}
// Assertions
// {{{
// pragma translate_off
initial
begin
max_hwpf_stride_assert: assert (NUM_HW_PREFETCH <= 16) else
$error("hwpf_stride: maximum number of HW prefetchers is 16");
end
// pragma translate_on
// }}}
// Compute the status information
// {{{
always_comb begin: hwpf_stride_priority_encoder
hwpf_stride_status_free_idx = '0;
for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin
if (hwpf_stride_free[i]) begin
hwpf_stride_status_free_idx = i;
break;
end
end
end
// Free flag of engines
assign hwpf_stride_free = ~(hwpf_stride_enable | hwpf_stride_status_busy);
// Busy flags
assign hwpf_stride_status_o[63:32] = {{32-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_status_busy};
// Global free flag
assign hwpf_stride_status_o[31] = |hwpf_stride_free;
// Free Index
assign hwpf_stride_status_o[30:16] = {11'b0, hwpf_stride_status_free_idx};
// Enable flags
assign hwpf_stride_status_o[15:0] = {{16-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_enable};
// }}}
// Hardware prefetcher engines
// {{{
generate
for (genvar j = 0; j < NUM_SNOOP_PORTS; j++) begin
always_ff @(posedge clk_i or negedge rst_ni)
begin : snoop_ff
if (!rst_ni) begin
snoop_valid_q[j] <= 1'b0;
snoop_addr_offset_q[j] <= '0;
end else begin
if (snoop_phys_indexed_i[j]) begin
snoop_valid_q[j] <= snoop_valid_i[j];
snoop_addr_offset_q[j] <= snoop_addr_offset_i[j];
end
end
end
end
for (genvar i = 0; i < NUM_HW_PREFETCH; i++) begin
assign hwpf_stride_enable[i] = hwpf_stride_base_o[i].enable;
// Compute snoop match signals
// {{{
always_comb
begin : snoop_comb
hwpf_snoop_match[i] = 1'b0;
for (int j = 0; j < NUM_SNOOP_PORTS; j++) begin
automatic logic snoop_valid;
automatic hpdcache_req_offset_t snoop_offset;
automatic hpdcache_nline_t snoop_nline;
if (snoop_phys_indexed_i[j]) begin
snoop_valid = snoop_valid_i[j];
snoop_offset = snoop_addr_offset_i[j];
end else begin
snoop_valid = snoop_valid_q[j];
snoop_offset = snoop_addr_offset_q[j];
end
snoop_nline = {snoop_addr_tag_i[j], snoop_offset};
hwpf_snoop_match[i] |= (snoop_valid && !snoop_abort_i[j] &&
(hwpf_snoop_nline[i] == snoop_nline));
end
end
// }}}
hwpf_stride #(
.CACHE_LINE_BYTES ( HPDCACHE_CL_WIDTH/8 )
) hwpf_stride_i(
.clk_i,
.rst_ni,
.csr_base_set_i ( hwpf_stride_base_set_i[i] ),
.csr_base_i ( hwpf_stride_base_i[i] ),
.csr_param_set_i ( hwpf_stride_param_set_i[i] ),
.csr_param_i ( hwpf_stride_param_i[i] ),
.csr_throttle_set_i ( hwpf_stride_throttle_set_i[i] ),
.csr_throttle_i ( hwpf_stride_throttle_i[i] ),
.csr_base_o ( hwpf_stride_base_o[i] ),
.csr_param_o ( hwpf_stride_param_o[i] ),
.csr_throttle_o ( hwpf_stride_throttle_o[i] ),
.busy_o ( hwpf_stride_status_busy[i] ),
.snoop_nline_o ( hwpf_snoop_nline[i] ),
.snoop_match_i ( hwpf_snoop_match[i] ),
.hpdcache_req_valid_o ( hwpf_stride_req_valid[i] ),
.hpdcache_req_ready_i ( hwpf_stride_req_ready[i] ),
.hpdcache_req_o ( hwpf_stride_req[i] ),
.hpdcache_rsp_valid_i ( hwpf_stride_arb_in_rsp_valid[i] ),
.hpdcache_rsp_i ( hwpf_stride_arb_in_rsp[i] )
);
assign hwpf_stride_req_ready[i] = hwpf_stride_arb_in_req_ready[i],
hwpf_stride_arb_in_req_valid[i] = hwpf_stride_req_valid[i],
hwpf_stride_arb_in_req[i].addr_offset = hwpf_stride_req[i].addr_offset,
hwpf_stride_arb_in_req[i].wdata = hwpf_stride_req[i].wdata,
hwpf_stride_arb_in_req[i].op = hwpf_stride_req[i].op,
hwpf_stride_arb_in_req[i].be = hwpf_stride_req[i].be,
hwpf_stride_arb_in_req[i].size = hwpf_stride_req[i].size,
hwpf_stride_arb_in_req[i].sid = hpdcache_req_sid_i,
hwpf_stride_arb_in_req[i].tid = hpdcache_req_tid_t'(i),
hwpf_stride_arb_in_req[i].need_rsp = hwpf_stride_req[i].need_rsp,
hwpf_stride_arb_in_req[i].phys_indexed = hwpf_stride_req[i].phys_indexed,
hwpf_stride_arb_in_req[i].addr_tag = '0,
hwpf_stride_arb_in_req[i].pma = '0;
end
endgenerate
// }}}
// Hardware prefetcher arbiter betweem engines
// {{{
hwpf_stride_arb #(
.NUM_HW_PREFETCH ( NUM_HW_PREFETCH )
) hwpf_stride_arb_i (
.clk_i,
.rst_ni,
// DCache input interface
.hwpf_stride_req_valid_i ( hwpf_stride_arb_in_req_valid ),
.hwpf_stride_req_ready_o ( hwpf_stride_arb_in_req_ready ),
.hwpf_stride_req_i ( hwpf_stride_arb_in_req ),
.hwpf_stride_rsp_valid_o ( hwpf_stride_arb_in_rsp_valid ),
.hwpf_stride_rsp_o ( hwpf_stride_arb_in_rsp ),
// DCache output interface
.hpdcache_req_valid_o,
.hpdcache_req_ready_i,
.hpdcache_req_o,
.hpdcache_rsp_valid_i,
.hpdcache_rsp_i
);
assign hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests
hpdcache_req_tag_o = '0, // unused on physically indexed requests
hpdcache_req_pma_o = '0; // unused on physically indexed requests
// }}}
endmodule

View File

@ -0,0 +1,103 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : Dcache Memory Read Request Channel Arbiter
* History :
*/
module hpdcache_mem_req_read_arbiter
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter hpdcache_uint N = 0,
parameter type hpdcache_mem_req_t = logic
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
output logic mem_req_read_ready_o [N-1:0],
input logic mem_req_read_valid_i [N-1:0],
input hpdcache_mem_req_t mem_req_read_i [N-1:0],
input logic mem_req_read_ready_i,
output logic mem_req_read_valid_o,
output hpdcache_mem_req_t mem_req_read_o
);
// }}}
logic [N-1:0] mem_read_arb_req_valid;
hpdcache_mem_req_t [N-1:0] mem_read_arb_req;
logic [N-1:0] mem_read_arb_req_gnt;
logic req_valid;
genvar gen_i;
// Pack inputs
generate
for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen
assign mem_read_arb_req_valid[gen_i] = mem_req_read_valid_i[gen_i],
mem_read_arb_req [gen_i] = mem_req_read_i[gen_i];
end
endgenerate
assign req_valid = |(mem_read_arb_req_gnt & mem_read_arb_req_valid);
// Fixed-priority arbiter
hpdcache_fxarb #(
.N (N)
) hpdcache_fxarb_mem_req_write_i (
.clk_i,
.rst_ni,
.req_i (mem_read_arb_req_valid),
.gnt_o (mem_read_arb_req_gnt),
.ready_i (mem_req_read_ready_i)
);
// Demultiplexor for the ready signal
generate
for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen
assign mem_req_read_ready_o[gen_i] = mem_req_read_ready_i &
mem_read_arb_req_gnt[gen_i] & mem_read_arb_req_valid[gen_i];
end
endgenerate
assign mem_req_read_valid_o = req_valid;
// Multiplexor for requests
hpdcache_mux #(
.NINPUT (N),
.DATA_WIDTH ($bits(hpdcache_mem_req_t)),
.ONE_HOT_SEL (1'b1)
) mem_read_req_mux_i (
.data_i (mem_read_arb_req),
.sel_i (mem_read_arb_req_gnt),
.data_o (mem_req_read_o)
);
endmodule

View File

@ -0,0 +1,193 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : Dcache Memory Write Channels Arbiter
* History :
*/
module hpdcache_mem_req_write_arbiter
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter hpdcache_uint N = 0,
parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_req_w_t = logic
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
output logic mem_req_write_ready_o [N-1:0],
input logic mem_req_write_valid_i [N-1:0],
input hpdcache_mem_req_t mem_req_write_i [N-1:0],
output logic mem_req_write_data_ready_o [N-1:0],
input logic mem_req_write_data_valid_i [N-1:0],
input hpdcache_mem_req_w_t mem_req_write_data_i [N-1:0],
input logic mem_req_write_ready_i,
output logic mem_req_write_valid_o,
output hpdcache_mem_req_t mem_req_write_o,
input logic mem_req_write_data_ready_i,
output logic mem_req_write_data_valid_o,
output hpdcache_mem_req_w_t mem_req_write_data_o
);
// }}}
typedef enum {
REQ_IDLE,
REQ_META_SENT,
REQ_DATA_SENT
} req_send_fsm_t;
req_send_fsm_t req_send_fsm_q, req_send_fsm_d;
logic req_valid;
logic req_data_valid;
logic [N-1:0] mem_write_arb_req_valid;
hpdcache_mem_req_t [N-1:0] mem_write_arb_req;
logic [N-1:0] mem_write_arb_req_data_valid;
hpdcache_mem_req_w_t [N-1:0] mem_write_arb_req_data;
logic [N-1:0] mem_write_arb_req_gnt;
logic mem_write_arb_req_ready;
genvar gen_i;
generate
for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen
assign mem_write_arb_req_valid [gen_i] = mem_req_write_valid_i[gen_i],
mem_write_arb_req [gen_i] = mem_req_write_i[gen_i],
mem_write_arb_req_data_valid[gen_i] = mem_req_write_data_valid_i[gen_i],
mem_write_arb_req_data [gen_i] = mem_req_write_data_i[gen_i];
end
endgenerate
// Fixed-priority arbiter
hpdcache_fxarb #(
.N (2)
) hpdcache_fxarb_mem_req_write_i (
.clk_i,
.rst_ni,
.req_i (mem_write_arb_req_valid),
.gnt_o (mem_write_arb_req_gnt),
.ready_i (mem_write_arb_req_ready)
);
assign req_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_valid);
assign req_data_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_data_valid);
// Request sent FSM
//
// This FSM allows to make sure that the request and its corresponding
// data are sent in order. This is, when a requester sends a request, this
// FSM keeps the grant signal on this requester until it has sent the
// corresponding data.
//
// {{{
always_comb
begin : req_send_fsm_comb
req_send_fsm_d = req_send_fsm_q;
mem_write_arb_req_ready = 1'b0;
case (req_send_fsm_q)
REQ_IDLE:
if (req_valid && mem_req_write_ready_i) begin
if (req_data_valid) begin
if (mem_req_write_data_ready_i) begin
mem_write_arb_req_ready = 1'b1;
req_send_fsm_d = REQ_IDLE;
end else begin
req_send_fsm_d = REQ_META_SENT;
end
end
end else if (req_data_valid && mem_req_write_data_ready_i) begin
req_send_fsm_d = REQ_DATA_SENT;
end
REQ_META_SENT:
if (req_data_valid && mem_req_write_data_ready_i) begin
mem_write_arb_req_ready = 1'b1;
req_send_fsm_d = REQ_IDLE;
end
REQ_DATA_SENT:
if (req_valid && mem_req_write_ready_i) begin
mem_write_arb_req_ready = 1'b1;
req_send_fsm_d = REQ_IDLE;
end
endcase
end
always_ff @(posedge clk_i or negedge rst_ni)
begin : req_send_fsm_ff
if (!rst_ni) begin
req_send_fsm_q <= REQ_IDLE;
end else begin
req_send_fsm_q <= req_send_fsm_d;
end
end
// }}}
generate
for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen
assign mem_req_write_ready_o[gen_i] =
(mem_write_arb_req_gnt[gen_i] & mem_req_write_ready_i) &
(req_send_fsm_q != REQ_META_SENT);
assign mem_req_write_data_ready_o[gen_i] =
(mem_write_arb_req_gnt[gen_i] & mem_req_write_data_ready_i) &
(req_send_fsm_q != REQ_DATA_SENT);
end
endgenerate
// Output assignments
// {{{
assign mem_req_write_valid_o = req_valid & (req_send_fsm_q != REQ_META_SENT);
assign mem_req_write_data_valid_o = req_data_valid & (req_send_fsm_q != REQ_DATA_SENT);
hpdcache_mux #(
.NINPUT (N),
.DATA_WIDTH ($bits(hpdcache_mem_req_t)),
.ONE_HOT_SEL (1'b1)
) mem_write_req_mux_i (
.data_i (mem_write_arb_req),
.sel_i (mem_write_arb_req_gnt),
.data_o (mem_req_write_o)
);
hpdcache_mux #(
.NINPUT (N),
.DATA_WIDTH ($bits(hpdcache_mem_req_w_t)),
.ONE_HOT_SEL (1'b1)
) mem_write_data_req_mux_i (
.data_i (mem_write_arb_req_data),
.sel_i (mem_write_arb_req_gnt),
.data_o (mem_req_write_data_o)
);
// }}}
endmodule

View File

@ -0,0 +1,108 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : June, 2022
* Description : Dcache Memory Reponse Demultiplexer
* History :
*/
module hpdcache_mem_resp_demux
// Parameters
// {{{
#(
parameter int N = 0,
parameter type resp_t = logic,
parameter type resp_id_t = logic,
localparam int RT_DEPTH = (1 << $bits(resp_id_t)),
localparam type rt_t = resp_id_t [RT_DEPTH-1:0]
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
output logic mem_resp_ready_o,
input logic mem_resp_valid_i,
input resp_id_t mem_resp_id_i,
input resp_t mem_resp_i,
input logic mem_resp_ready_i [N-1:0],
output logic mem_resp_valid_o [N-1:0],
output resp_t mem_resp_o [N-1:0],
input rt_t mem_resp_rt_i
);
// }}}
typedef logic [$clog2(N)-1:0] sel_t;
logic [N-1:0] mem_resp_demux_valid;
resp_t [N-1:0] mem_resp_demux;
logic [N-1:0] mem_resp_demux_ready;
sel_t mem_resp_demux_sel;
// Route the response according to the response ID and the routing table
assign mem_resp_demux_sel = mem_resp_rt_i[int'(mem_resp_id_i)];
// Forward the response to the corresponding output port
hpdcache_demux #(
.NOUTPUT (N),
.DATA_WIDTH (1),
.ONE_HOT_SEL (0)
) i_resp_valid_demux (
.data_i (mem_resp_valid_i),
.sel_i (mem_resp_demux_sel),
.data_o (mem_resp_demux_valid)
);
hpdcache_demux #(
.NOUTPUT (N),
.DATA_WIDTH ($bits(resp_t)),
.ONE_HOT_SEL (0)
) i_resp_demux (
.data_i (mem_resp_i),
.sel_i (mem_resp_demux_sel),
.data_o (mem_resp_demux)
);
hpdcache_mux #(
.NINPUT (N),
.DATA_WIDTH (1),
.ONE_HOT_SEL (0)
) i_resp_ready_mux (
.data_i (mem_resp_demux_ready),
.sel_i (mem_resp_demux_sel),
.data_o (mem_resp_ready_o)
);
// Pack/unpack responses
generate
for (genvar gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_unpack_resp_gen
assign mem_resp_valid_o [gen_i] = mem_resp_demux_valid [gen_i];
assign mem_resp_o [gen_i] = mem_resp_demux [gen_i];
assign mem_resp_demux_ready [gen_i] = mem_resp_ready_i [gen_i];
end
endgenerate
endmodule : hpdcache_mem_resp_demux

View File

@ -0,0 +1,95 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : Dcache memory request to axi read channels
* History :
*/
module hpdcache_mem_to_axi_read
import hpdcache_pkg::*;
#(
parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_resp_r_t = logic,
parameter type ar_chan_t = logic,
parameter type r_chan_t = logic
)
(
output logic req_ready_o,
input logic req_valid_i,
input hpdcache_mem_req_t req_i,
input logic resp_ready_i,
output logic resp_valid_o,
output hpdcache_mem_resp_r_t resp_o,
output logic axi_ar_valid_o,
output ar_chan_t axi_ar_o,
input logic axi_ar_ready_i,
input logic axi_r_valid_i,
input r_chan_t axi_r_i,
output logic axi_r_ready_o
);
logic lock;
axi_pkg::cache_t cache;
hpdcache_mem_error_e resp;
assign lock = (req_i.mem_req_command == HPDCACHE_MEM_ATOMIC) &&
(req_i.mem_req_atomic == HPDCACHE_MEM_ATOMIC_LDEX);
assign cache = req_i.mem_req_cacheable ?
axi_pkg::CACHE_BUFFERABLE |
axi_pkg::CACHE_MODIFIABLE |
axi_pkg::CACHE_RD_ALLOC |
axi_pkg::CACHE_WR_ALLOC : '0;
always_comb
begin : resp_decode_comb
case (axi_r_i.resp)
axi_pkg::RESP_SLVERR,
axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK;
default: resp = HPDCACHE_MEM_RESP_OK;
endcase
end
assign req_ready_o = axi_ar_ready_i,
axi_ar_valid_o = req_valid_i,
axi_ar_o.id = req_i.mem_req_id,
axi_ar_o.addr = req_i.mem_req_addr,
axi_ar_o.len = req_i.mem_req_len,
axi_ar_o.size = req_i.mem_req_size,
axi_ar_o.burst = axi_pkg::BURST_INCR,
axi_ar_o.lock = lock,
axi_ar_o.cache = cache,
axi_ar_o.prot = '0,
axi_ar_o.qos = '0,
axi_ar_o.region = '0,
axi_ar_o.user = '0;
assign axi_r_ready_o = resp_ready_i,
resp_valid_o = axi_r_valid_i,
resp_o.mem_resp_r_error = resp,
resp_o.mem_resp_r_id = axi_r_i.id,
resp_o.mem_resp_r_data = axi_r_i.data,
resp_o.mem_resp_r_last = axi_r_i.last;
endmodule

View File

@ -0,0 +1,148 @@
/*
* Copyright 2023 CEA*
* *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA)
*
* SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
*
* Licensed under the Solderpad Hardware License v 2.1 (the License); you
* may not use this file except in compliance with the License, or, at your
* option, the Apache License version 2.0. You may obtain a copy of the
* License at
*
* https://solderpad.org/licenses/SHL-2.1/
*
* Unless required by applicable law or agreed to in writing, any work
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
/*
* Authors : Cesar Fuguet
* Creation Date : April, 2021
* Description : Dcache memory request to axi write channels
* History :
*/
module hpdcache_mem_to_axi_write
import hpdcache_pkg::*;
#(
parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_req_w_t = logic,
parameter type hpdcache_mem_resp_w_t = logic,
parameter type aw_chan_t = logic,
parameter type w_chan_t = logic,
parameter type b_chan_t = logic
)
(
output logic req_ready_o,
input logic req_valid_i,
input hpdcache_mem_req_t req_i,
output logic req_data_ready_o,
input logic req_data_valid_i,
input hpdcache_mem_req_w_t req_data_i,
input logic resp_ready_i,
output logic resp_valid_o,
output hpdcache_mem_resp_w_t resp_o,
output logic axi_aw_valid_o,
output aw_chan_t axi_aw_o,
input logic axi_aw_ready_i,
output logic axi_w_valid_o,
output w_chan_t axi_w_o,
input logic axi_w_ready_i,
input logic axi_b_valid_i,
input b_chan_t axi_b_i,
output logic axi_b_ready_o
);
logic lock;
axi_pkg::atop_t atop;
axi_pkg::cache_t cache;
hpdcache_mem_error_e resp;
always_comb
begin : atop_comb
lock = 1'b0;
atop = '0;
case (req_i.mem_req_command)
HPDCACHE_MEM_ATOMIC: begin
case (req_i.mem_req_atomic)
HPDCACHE_MEM_ATOMIC_STEX: lock = 1'b1;
HPDCACHE_MEM_ATOMIC_ADD : atop = {axi_pkg::ATOP_ATOMICLOAD,
axi_pkg::ATOP_LITTLE_END,
axi_pkg::ATOP_ADD};
HPDCACHE_MEM_ATOMIC_CLR : atop = {axi_pkg::ATOP_ATOMICLOAD,
axi_pkg::ATOP_LITTLE_END,
axi_pkg::ATOP_CLR};
HPDCACHE_MEM_ATOMIC_SET : atop = {axi_pkg::ATOP_ATOMICLOAD,
axi_pkg::ATOP_LITTLE_END,
axi_pkg::ATOP_SET};
HPDCACHE_MEM_ATOMIC_EOR : atop = {axi_pkg::ATOP_ATOMICLOAD,
axi_pkg::ATOP_LITTLE_END,
axi_pkg::ATOP_EOR};
HPDCACHE_MEM_ATOMIC_SMAX: atop = {axi_pkg::ATOP_ATOMICLOAD,
axi_pkg::ATOP_LITTLE_END,
axi_pkg::ATOP_SMAX};
HPDCACHE_MEM_ATOMIC_SMIN: atop = {axi_pkg::ATOP_ATOMICLOAD,
axi_pkg::ATOP_LITTLE_END,
axi_pkg::ATOP_SMIN};
HPDCACHE_MEM_ATOMIC_UMAX: atop = {axi_pkg::ATOP_ATOMICLOAD,
axi_pkg::ATOP_LITTLE_END,
axi_pkg::ATOP_UMAX};
HPDCACHE_MEM_ATOMIC_UMIN: atop = {axi_pkg::ATOP_ATOMICLOAD,
axi_pkg::ATOP_LITTLE_END,
axi_pkg::ATOP_UMIN};
HPDCACHE_MEM_ATOMIC_SWAP: atop = axi_pkg::ATOP_ATOMICSWAP;
endcase
end
endcase
end
assign cache = (req_i.mem_req_cacheable && !lock) ?
axi_pkg::CACHE_BUFFERABLE |
axi_pkg::CACHE_MODIFIABLE |
axi_pkg::CACHE_RD_ALLOC |
axi_pkg::CACHE_WR_ALLOC : '0;
always_comb
begin : resp_decode_comb
case (axi_b_i.resp)
axi_pkg::RESP_SLVERR,
axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK;
default: resp = HPDCACHE_MEM_RESP_OK;
endcase
end
assign req_ready_o = axi_aw_ready_i,
axi_aw_valid_o = req_valid_i,
axi_aw_o.id = req_i.mem_req_id,
axi_aw_o.addr = req_i.mem_req_addr,
axi_aw_o.len = req_i.mem_req_len,
axi_aw_o.size = req_i.mem_req_size,
axi_aw_o.burst = axi_pkg::BURST_INCR,
axi_aw_o.lock = lock,
axi_aw_o.cache = cache,
axi_aw_o.prot = '0,
axi_aw_o.qos = '0,
axi_aw_o.region = '0,
axi_aw_o.atop = atop,
axi_aw_o.user = '0;
assign req_data_ready_o = axi_w_ready_i,
axi_w_valid_o = req_data_valid_i,
axi_w_o.data = req_data_i.mem_req_w_data,
axi_w_o.strb = req_data_i.mem_req_w_be,
axi_w_o.last = req_data_i.mem_req_w_last,
axi_w_o.user = '0;
assign axi_b_ready_o = resp_ready_i,
resp_valid_o = axi_b_valid_i,
resp_o.mem_resp_w_error = resp,
resp_o.mem_resp_w_id = axi_b_i.id,
resp_o.mem_resp_w_is_atomic = (axi_b_i.resp == axi_pkg::RESP_EXOKAY);
endmodule

View File

@ -0,0 +1,826 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 12.11.2017
// Description: Handles cache misses.
// --------------
// MISS Handler
// --------------
module miss_handler
import ariane_pkg::*;
import std_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NR_PORTS = 4,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i, // flush request
output logic flush_ack_o, // acknowledge successful flush
output logic miss_o,
input logic busy_i, // dcache is busy with something
// Bypass or miss
input logic [NR_PORTS-1:0][$bits(miss_req_t)-1:0] miss_req_i,
// Bypass handling
output logic [NR_PORTS-1:0] bypass_gnt_o,
output logic [NR_PORTS-1:0] bypass_valid_o,
output logic [NR_PORTS-1:0][63:0] bypass_data_o,
// AXI port
output axi_req_t axi_bypass_o,
input axi_rsp_t axi_bypass_i,
// Miss handling (~> cacheline refill)
output logic [NR_PORTS-1:0] miss_gnt_o,
output logic [NR_PORTS-1:0] active_serving_o,
output logic [63:0] critical_word_o,
output logic critical_word_valid_o,
output axi_req_t axi_data_o,
input axi_rsp_t axi_data_i,
input logic [NR_PORTS-1:0][55:0] mshr_addr_i,
output logic [NR_PORTS-1:0] mshr_addr_matches_o,
output logic [NR_PORTS-1:0] mshr_index_matches_o,
// AMO
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// Port to SRAMs, for refill and eviction
output logic [DCACHE_SET_ASSOC-1:0] req_o,
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
output cache_line_t data_o,
output cl_be_t be_o,
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
output logic we_o
);
// Three MSHR ports + AMO port
parameter NR_BYPASS_PORTS = NR_PORTS + 1;
// FSM states
enum logic [3:0] {
IDLE, // 0
FLUSHING, // 1
FLUSH, // 2
WB_CACHELINE_FLUSH, // 3
FLUSH_REQ_STATUS, // 4
WB_CACHELINE_MISS, // 5
WAIT_GNT_SRAM, // 6
MISS, // 7
REQ_CACHELINE, // 8
MISS_REPL, // 9
SAVE_CACHELINE, // A
INIT, // B
AMO_REQ, // C
AMO_WAIT_RESP // D
}
state_d, state_q;
// Registers
mshr_t mshr_d, mshr_q;
logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q;
logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
// cache line to evict
cache_line_t evict_cl_d, evict_cl_q;
logic serve_amo_d, serve_amo_q;
// Request from one FSM
logic [ NR_PORTS-1:0] miss_req_valid;
logic [ NR_PORTS-1:0] miss_req_bypass;
logic [ NR_PORTS-1:0][63:0] miss_req_addr;
logic [ NR_PORTS-1:0][63:0] miss_req_wdata;
logic [ NR_PORTS-1:0] miss_req_we;
logic [ NR_PORTS-1:0][ 7:0] miss_req_be;
logic [ NR_PORTS-1:0][ 1:0] miss_req_size;
// Bypass AMO port
bypass_req_t amo_bypass_req;
bypass_rsp_t amo_bypass_rsp;
// Bypass ports <-> Arbiter
bypass_req_t [ NR_BYPASS_PORTS-1:0] bypass_ports_req;
bypass_rsp_t [ NR_BYPASS_PORTS-1:0] bypass_ports_rsp;
// Arbiter <-> Bypass AXI adapter
bypass_req_t bypass_adapter_req;
bypass_rsp_t bypass_adapter_rsp;
// Cache Line Refill <-> AXI
logic req_fsm_miss_valid;
logic [ 63:0] req_fsm_miss_addr;
logic [ DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata;
logic req_fsm_miss_we;
logic [ (DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be;
ariane_pkg::ad_req_t req_fsm_miss_req;
logic [ 1:0] req_fsm_miss_size;
logic gnt_miss_fsm;
logic valid_miss_fsm;
logic [ (DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm;
// Cache Management <-> LFSR
logic lfsr_enable;
logic [ DCACHE_SET_ASSOC-1:0] lfsr_oh;
logic [$clog2(DCACHE_SET_ASSOC-1)-1:0] lfsr_bin;
// AMOs
ariane_pkg::amo_t amo_op;
logic [ 63:0] amo_operand_b;
// ------------------------------
// Cache Management
// ------------------------------
always_comb begin : cache_management
automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way;
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin
evict_way[i] = data_i[i].valid & data_i[i].dirty;
valid_way[i] = data_i[i].valid;
end
// ----------------------
// Default Assignments
// ----------------------
// memory array
req_o = '0;
addr_o = '0;
data_o = '0;
be_o = '0;
we_o = '0;
// Cache controller
miss_gnt_o = '0;
active_serving_o = '0;
// LFSR replacement unit
lfsr_enable = 1'b0;
// to AXI refill
req_fsm_miss_valid = 1'b0;
req_fsm_miss_addr = '0;
req_fsm_miss_wdata = '0;
req_fsm_miss_we = 1'b0;
req_fsm_miss_be = '0;
req_fsm_miss_req = ariane_pkg::CACHE_LINE_REQ;
req_fsm_miss_size = 2'b11;
// to AXI bypass
amo_bypass_req.req = 1'b0;
amo_bypass_req.reqtype = ariane_pkg::SINGLE_REQ;
amo_bypass_req.amo = ariane_pkg::AMO_NONE;
amo_bypass_req.addr = '0;
amo_bypass_req.we = 1'b0;
amo_bypass_req.wdata = '0;
amo_bypass_req.be = '0;
amo_bypass_req.size = 2'b11;
amo_bypass_req.id = 4'b1011;
// core
flush_ack_o = 1'b0;
miss_o = 1'b0; // to performance counter
serve_amo_d = serve_amo_q;
// --------------------------------
// Flush and Miss operation
// --------------------------------
state_d = state_q;
cnt_d = cnt_q;
evict_way_d = evict_way_q;
evict_cl_d = evict_cl_q;
mshr_d = mshr_q;
// communicate to the requester which unit we are currently serving
active_serving_o[mshr_q.id] = mshr_q.valid;
// AMOs
amo_resp_o.ack = 1'b0;
amo_resp_o.result = '0;
amo_operand_b = '0;
case (state_q)
IDLE: begin
// lowest priority are AMOs, wait until everything else is served before going for the AMOs
if (amo_req_i.req && !busy_i) begin
// 1. Flush the cache
state_d = FLUSH_REQ_STATUS;
serve_amo_d = 1'b1;
cnt_d = '0;
end
// check if we want to flush and can flush e.g.: we are not busy anymore
// TODO: Check that the busy flag is indeed needed
if (flush_i && !busy_i) begin
state_d = FLUSH_REQ_STATUS;
cnt_d = '0;
end
// check if one of the state machines missed
for (int unsigned i = 0; i < NR_PORTS; i++) begin
// here comes the refill portion of code
if (miss_req_valid[i] && !miss_req_bypass[i]) begin
state_d = MISS;
// we are taking another request so don't take the AMO
serve_amo_d = 1'b0;
// save to MSHR
mshr_d.valid = 1'b1;
mshr_d.we = miss_req_we[i];
mshr_d.id = i;
mshr_d.addr = miss_req_addr[i][DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:0];
mshr_d.wdata = miss_req_wdata[i];
mshr_d.be = miss_req_be[i];
break;
end
end
end
// ~> we missed on the cache
MISS: begin
// 1. Check if there is an empty cache-line
// 2. If not -> evict one
req_o = '1;
addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
state_d = MISS_REPL;
miss_o = 1'b1;
end
// ~> second miss cycle
MISS_REPL: begin
// if all are valid we need to evict one, pseudo random from LFSR
if (&valid_way) begin
lfsr_enable = 1'b1;
evict_way_d = lfsr_oh;
// do we need to write back the cache line?
if (data_i[lfsr_bin].dirty) begin
state_d = WB_CACHELINE_MISS;
evict_cl_d.tag = data_i[lfsr_bin].tag;
evict_cl_d.data = data_i[lfsr_bin].data;
cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
// no - we can request a cache line now
end else state_d = REQ_CACHELINE;
// we have at least one free way
end else begin
// get victim cache-line by looking for the first non-valid bit
evict_way_d = get_victim_cl(~valid_way);
state_d = REQ_CACHELINE;
end
end
// ~> we can just load the cache-line, the way is store in evict_way_q
REQ_CACHELINE: begin
req_fsm_miss_valid = 1'b1;
req_fsm_miss_addr = mshr_q.addr;
if (gnt_miss_fsm) begin
state_d = SAVE_CACHELINE;
miss_gnt_o[mshr_q.id] = 1'b1;
end
end
// ~> replace the cacheline
SAVE_CACHELINE: begin
// calculate cacheline offset
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
cl_offset = mshr_q.addr[DCACHE_BYTE_OFFSET-1:3] << 6;
// we've got a valid response from refill unit
if (valid_miss_fsm) begin
addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
req_o = evict_way_q;
we_o = 1'b1;
be_o = '1;
be_o.vldrty = evict_way_q;
data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
data_o.data = data_miss_fsm;
data_o.valid = 1'b1;
data_o.dirty = 1'b0;
// is this a write?
if (mshr_q.we) begin
// Yes, so safe the updated data now
for (int i = 0; i < 8; i++) begin
// check if we really want to write the corresponding byte
if (mshr_q.be[i]) data_o.data[(cl_offset+i*8)+:8] = mshr_q.wdata[i];
end
// its immediately dirty if we write
data_o.dirty = 1'b1;
end
// reset MSHR
mshr_d.valid = 1'b0;
// go back to idle
state_d = IDLE;
end
end
// ------------------------------
// Write Back Operation
// ------------------------------
// ~> evict a cache line from way saved in evict_way_q
WB_CACHELINE_FLUSH, WB_CACHELINE_MISS: begin
req_fsm_miss_valid = 1'b1;
req_fsm_miss_addr = {
evict_cl_q.tag,
cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET],
{{DCACHE_BYTE_OFFSET} {1'b0}}
};
req_fsm_miss_be = '1;
req_fsm_miss_we = 1'b1;
req_fsm_miss_wdata = evict_cl_q.data;
// we've got a grant --> this is timing critical, think about it
if (gnt_miss_fsm) begin
// write status array
addr_o = cnt_q;
req_o = 1'b1;
we_o = 1'b1;
data_o.valid = INVALIDATE_ON_FLUSH ? 1'b0 : 1'b1;
// invalidate
be_o.vldrty = evict_way_q;
// go back to handling the miss or flushing, depending on where we came from
state_d = (state_q == WB_CACHELINE_MISS) ? MISS : FLUSH_REQ_STATUS;
end
end
// ------------------------------
// Flushing & Initialization
// ------------------------------
// ~> make another request to check the same cache-line if there are still some valid entries
FLUSH_REQ_STATUS: begin
req_o = '1;
addr_o = cnt_q;
state_d = FLUSHING;
end
FLUSHING: begin
// this has priority
// at least one of the cache lines is dirty
if (|evict_way) begin
// evict cache line, look for the first cache-line which is dirty
evict_way_d = get_victim_cl(evict_way);
evict_cl_d = data_i[one_hot_to_bin(evict_way)];
state_d = WB_CACHELINE_FLUSH;
// not dirty ~> increment and continue
end else begin
// increment and re-request
cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET);
state_d = FLUSH_REQ_STATUS;
addr_o = cnt_q;
req_o = 1'b1;
be_o.vldrty = INVALIDATE_ON_FLUSH ? '1 : '0;
we_o = 1'b1;
// finished with flushing operation, go back to idle
if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) begin
// only acknowledge if the flush wasn't triggered by an atomic
flush_ack_o = ~serve_amo_q;
// if we are flushing because of an AMO go to serve it
if (serve_amo_q) begin
state_d = AMO_REQ;
serve_amo_d = 1'b0;
end else begin
state_d = IDLE;
end
end
end
end
// ~> only called after reset
INIT: begin
// initialize status array
addr_o = cnt_q;
req_o = 1'b1;
we_o = 1'b1;
// only write the dirty array
be_o.vldrty = '1;
cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET);
// finished initialization
if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) state_d = IDLE;
end
// ----------------------
// AMOs
// ----------------------
// ~> we are here because we need to do the AMO, the cache is clean at this point
AMO_REQ: begin
amo_bypass_req.req = 1'b1;
amo_bypass_req.reqtype = ariane_pkg::SINGLE_REQ;
amo_bypass_req.amo = amo_req_i.amo_op;
// address is in operand a
amo_bypass_req.addr = amo_req_i.operand_a;
if (amo_req_i.amo_op != AMO_LR) begin
amo_bypass_req.we = 1'b1;
end
amo_bypass_req.size = amo_req_i.size;
// AXI implements CLR op instead of AND, negate operand
if (amo_req_i.amo_op == AMO_AND) begin
amo_operand_b = ~amo_req_i.operand_b;
end else begin
amo_operand_b = amo_req_i.operand_b;
end
// align data and byte-enable to correct byte lanes
amo_bypass_req.wdata = amo_operand_b;
if (amo_req_i.size == 2'b11) begin
// 64b transfer
amo_bypass_req.be = 8'b11111111;
end else begin
// 32b transfer
if (amo_req_i.operand_a[2:0] == '0) begin
// 64b aligned -> activate lower 4 byte lanes
amo_bypass_req.be = 8'b00001111;
end else begin
// 64b unaligned -> activate upper 4 byte lanes
amo_bypass_req.be = 8'b11110000;
amo_bypass_req.wdata = amo_operand_b[31:0] << 32;
end
end
// when request is accepted, wait for response
if (amo_bypass_rsp.gnt) begin
if (amo_bypass_rsp.valid) begin
state_d = IDLE;
amo_resp_o.ack = 1'b1;
amo_resp_o.result = amo_bypass_rsp.rdata;
end else begin
state_d = AMO_WAIT_RESP;
end
end
end
AMO_WAIT_RESP: begin
if (amo_bypass_rsp.valid) begin
state_d = IDLE;
amo_resp_o.ack = 1'b1;
// Request is assumed to be still valid (ack not granted yet)
if (amo_req_i.size == 2'b10) begin
// 32b request
logic [31:0] halfword;
if (amo_req_i.operand_a[2:0] == '0) begin
// 64b aligned -> activate lower 4 byte lanes
halfword = amo_bypass_rsp.rdata[31:0];
end else begin
// 64b unaligned -> activate upper 4 byte lanes
halfword = amo_bypass_rsp.rdata[63:32];
end
// Sign-extend 32b requests as per RISC-V spec
amo_resp_o.result = {{32{halfword[31]}}, halfword};
end else begin
// 64b request
amo_resp_o.result = amo_bypass_rsp.rdata;
end
end
end
endcase
end
// check MSHR for aliasing
always_comb begin
mshr_addr_matches_o = 'b0;
mshr_index_matches_o = 'b0;
for (int i = 0; i < NR_PORTS; i++) begin
// check mshr for potential matching of other units, exclude the unit currently being served
if (mshr_q.valid && mshr_addr_i[i][55:DCACHE_BYTE_OFFSET] == mshr_q.addr[55:DCACHE_BYTE_OFFSET]) begin
mshr_addr_matches_o[i] = 1'b1;
end
// same as previous, but checking only the index
if (mshr_q.valid && mshr_addr_i[i][DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == mshr_q.addr[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]) begin
mshr_index_matches_o[i] = 1'b1;
end
end
end
// --------------------
// Sequential Process
// --------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mshr_q <= '0;
state_q <= INIT;
cnt_q <= '0;
evict_way_q <= '0;
evict_cl_q <= '0;
serve_amo_q <= 1'b0;
end else begin
mshr_q <= mshr_d;
state_q <= state_d;
cnt_q <= cnt_d;
evict_way_q <= evict_way_d;
evict_cl_q <= evict_cl_d;
serve_amo_q <= serve_amo_d;
end
end
//pragma translate_off
`ifndef VERILATOR
// assert that cache only hits on one way
assert property (@(posedge clk_i) $onehot0(evict_way_q))
else $warning("Evict-way should be one-hot encoded");
`endif
//pragma translate_on
// ----------------------
// Pack bypass ports
// ----------------------
always_comb begin
logic [$clog2(NR_BYPASS_PORTS)-1:0] id;
// Pack MHSR ports first
for (id = 0; id < NR_PORTS; id++) begin
bypass_ports_req[id].req = miss_req_valid[id] & miss_req_bypass[id];
bypass_ports_req[id].reqtype = ariane_pkg::SINGLE_REQ;
bypass_ports_req[id].amo = AMO_NONE;
bypass_ports_req[id].id = 4'b1000 | 4'(id);
bypass_ports_req[id].addr = miss_req_addr[id];
bypass_ports_req[id].wdata = miss_req_wdata[id];
bypass_ports_req[id].we = miss_req_we[id];
bypass_ports_req[id].be = miss_req_be[id];
bypass_ports_req[id].size = miss_req_size[id];
bypass_gnt_o[id] = bypass_ports_rsp[id].gnt;
bypass_valid_o[id] = bypass_ports_rsp[id].valid;
bypass_data_o[id] = bypass_ports_rsp[id].rdata;
end
// AMO port has lowest priority
bypass_ports_req[id] = amo_bypass_req;
amo_bypass_rsp = bypass_ports_rsp[id];
end
// ----------------------
// Arbitrate bypass ports
// ----------------------
axi_adapter_arbiter #(
.NR_PORTS (NR_BYPASS_PORTS),
.MAX_OUTSTANDING_REQ(CVA6Cfg.MaxOutstandingStores),
.req_t (bypass_req_t),
.rsp_t (bypass_rsp_t)
) i_bypass_arbiter (
.clk_i (clk_i),
.rst_ni(rst_ni),
// Master Side
.req_i (bypass_ports_req),
.rsp_o (bypass_ports_rsp),
// Slave Side
.req_o (bypass_adapter_req),
.rsp_i (bypass_adapter_rsp)
);
// ----------------------
// Bypass AXI Interface
// ----------------------
// Cast bypass_adapter_req.addr to axi_adapter port size
logic [riscv::XLEN-1:0] bypass_addr;
assign bypass_addr = bypass_adapter_req.addr;
axi_adapter #(
.CVA6Cfg (CVA6Cfg),
.DATA_WIDTH (64),
.CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET),
.axi_req_t (axi_req_t),
.axi_rsp_t (axi_rsp_t)
) i_bypass_axi_adapter (
.clk_i(clk_i),
.rst_ni(rst_ni),
.req_i(bypass_adapter_req.req),
.type_i(bypass_adapter_req.reqtype),
.amo_i(bypass_adapter_req.amo),
.id_i(({{CVA6Cfg.AxiIdWidth - 4{1'b0}}, bypass_adapter_req.id})),
.addr_i(bypass_addr),
.wdata_i(bypass_adapter_req.wdata),
.we_i(bypass_adapter_req.we),
.be_i(bypass_adapter_req.be),
.size_i(bypass_adapter_req.size),
.gnt_o(bypass_adapter_rsp.gnt),
.valid_o(bypass_adapter_rsp.valid),
.rdata_o(bypass_adapter_rsp.rdata),
.id_o(), // not used, single outstanding request in arbiter
.critical_word_o(), // not used for single requests
.critical_word_valid_o(), // not used for single requests
.axi_req_o(axi_bypass_o),
.axi_resp_i(axi_bypass_i)
);
// ----------------------
// Cache Line AXI Refill
// ----------------------
// Cast req_fsm_miss_addr to axi_adapter port size
logic [riscv::XLEN-1:0] miss_addr;
assign miss_addr = req_fsm_miss_addr;
axi_adapter #(
.CVA6Cfg (CVA6Cfg),
.DATA_WIDTH (DCACHE_LINE_WIDTH),
.CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET),
.axi_req_t (axi_req_t),
.axi_rsp_t (axi_rsp_t)
) i_miss_axi_adapter (
.clk_i,
.rst_ni,
.req_i (req_fsm_miss_valid),
.type_i (req_fsm_miss_req),
.amo_i (AMO_NONE),
.gnt_o (gnt_miss_fsm),
.addr_i (miss_addr),
.we_i (req_fsm_miss_we),
.wdata_i (req_fsm_miss_wdata),
.be_i (req_fsm_miss_be),
.size_i (req_fsm_miss_size),
.id_i ({{CVA6Cfg.AxiIdWidth - 4{1'b0}}, 4'b0111}),
.valid_o (valid_miss_fsm),
.rdata_o (data_miss_fsm),
.id_o (),
.critical_word_o (critical_word_o),
.critical_word_valid_o(critical_word_valid_o),
.axi_req_o (axi_data_o),
.axi_resp_i (axi_data_i)
);
// -----------------
// Replacement LFSR
// -----------------
lfsr_8bit #(
.WIDTH(DCACHE_SET_ASSOC)
) i_lfsr (
.en_i (lfsr_enable),
.refill_way_oh (lfsr_oh),
.refill_way_bin(lfsr_bin),
.*
);
// -----------------
// Struct Split
// -----------------
// Hack as system verilog support in modelsim seems to be buggy here
always_comb begin
automatic miss_req_t miss_req;
for (int unsigned i = 0; i < NR_PORTS; i++) begin
miss_req = miss_req_t'(miss_req_i[i]);
miss_req_valid[i] = miss_req.valid;
miss_req_bypass[i] = miss_req.bypass;
miss_req_addr[i] = miss_req.addr;
miss_req_wdata[i] = miss_req.wdata;
miss_req_we[i] = miss_req.we;
miss_req_be[i] = miss_req.be;
miss_req_size[i] = miss_req.size;
end
end
endmodule
// --------------
// AXI Arbiter
// --------------
//
// Description: Arbitrates access to AXI refill/bypass
//
module axi_adapter_arbiter #(
parameter NR_PORTS = 4,
parameter MAX_OUTSTANDING_REQ = 0,
parameter type req_t = std_cache_pkg::bypass_req_t,
parameter type rsp_t = std_cache_pkg::bypass_rsp_t
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// Master ports
input req_t [NR_PORTS-1:0] req_i,
output rsp_t [NR_PORTS-1:0] rsp_o,
// Slave port
output req_t req_o,
input rsp_t rsp_i
);
localparam MAX_OUTSTANDING_CNT_WIDTH = $clog2(
MAX_OUTSTANDING_REQ + 1
) > 0 ? $clog2(
MAX_OUTSTANDING_REQ + 1
) : 1;
typedef logic [MAX_OUTSTANDING_CNT_WIDTH-1:0] outstanding_cnt_t;
enum logic {
IDLE,
SERVING
}
state_d, state_q;
req_t req_d, req_q;
logic [NR_PORTS-1:0] sel_d, sel_q;
outstanding_cnt_t outstanding_cnt_d, outstanding_cnt_q;
logic [NR_PORTS-1:0] req_flat;
logic any_unselected_port_valid;
for (genvar i = 0; i < NR_PORTS; i++) begin : gen_req_flat
assign req_flat[i] = req_i[i].req;
end
assign any_unselected_port_valid = |(req_flat & ~(1 << sel_q));
always_comb begin
sel_d = sel_q;
outstanding_cnt_d = outstanding_cnt_q;
state_d = state_q;
req_d = req_q;
req_o = req_q;
rsp_o = '0;
rsp_o[sel_q].rdata = rsp_i.rdata;
case (state_q)
IDLE: begin
// wait for incoming requests
for (int unsigned i = 0; i < NR_PORTS; i++) begin
if (req_i[i].req == 1'b1) begin
sel_d = i[$bits(sel_d)-1:0];
state_d = SERVING;
break;
end
end
req_d = req_i[sel_d];
req_o = req_i[sel_d];
rsp_o[sel_d].gnt = req_i[sel_d].req;
// Count outstanding transactions, i.e. requests which have been
// granted but response hasn't arrived yet
if (req_o.req && rsp_i.gnt) begin
req_d.req = 1'b0;
outstanding_cnt_d += 1;
end
end
SERVING: begin
// We can accept multiple outstanding transactions from same port.
// To ensure fairness, we allow this only if all other ports are idle
if ((!req_o.req) && !any_unselected_port_valid &&
(outstanding_cnt_q != (MAX_OUTSTANDING_REQ - 1))) begin
if (req_i[sel_q].req) begin
req_d = req_i[sel_q];
req_o = req_i[sel_q];
rsp_o[sel_q].gnt = 1'b1;
state_d = SERVING;
end
end
// Count outstanding transactions, i.e. requests which have been
// granted but response hasn't arrived yet
if (req_o.req && rsp_i.gnt) begin
req_d.req = 1'b0;
outstanding_cnt_d += 1;
end
if (rsp_i.valid) begin
outstanding_cnt_d -= 1;
rsp_o[sel_q].valid = 1'b1;
if ((outstanding_cnt_d == 0) && (!req_o.req || rsp_i.gnt)) state_d = IDLE;
end
end
default: /* default */;
endcase
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
sel_q <= '0;
req_q <= '0;
outstanding_cnt_q <= '0;
end else begin
state_q <= state_d;
sel_q <= sel_d;
req_q <= req_d;
outstanding_cnt_q <= outstanding_cnt_d;
end
end
// ------------
// Assertions
// ------------
//pragma translate_off
`ifndef VERILATOR
// make sure that we eventually get an rvalid after we received a grant
assert property (@(posedge clk_i) rsp_i.gnt |-> ##[1:$] rsp_i.valid)
else begin
$error("There was a grant without a rvalid");
$stop();
end
// assert that there is no grant without a request or outstanding transactions
assert property (@(negedge clk_i) rsp_i.gnt |-> req_o.req)
else begin
$error("There was a grant without a request.");
$stop();
end
// assert that the address does not contain X when request is sent
assert property (@(posedge clk_i) (req_o.req) |-> (!$isunknown(req_o.addr)))
else begin
$error("address contains X when request is set");
$stop();
end
`endif
//pragma translate_on
endmodule

View File

@ -0,0 +1,315 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Standard Ariane cache subsystem with instruction cache and
// write-back data cache.
module std_cache_subsystem
import ariane_pkg::*;
import std_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4,
parameter type axi_ar_chan_t = logic,
parameter type axi_aw_chan_t = logic,
parameter type axi_w_chan_t = logic,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i,
input logic rst_ni,
input riscv::priv_lvl_t priv_lvl_i,
// I$
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input icache_areq_t icache_areq_i, // to/from frontend
output icache_arsp_t icache_areq_o,
// data requests
input icache_dreq_t icache_dreq_i, // to/from frontend
output icache_drsp_t icache_dreq_o,
// AMOs
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// D$
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
output logic wbuffer_empty_o, // statically set to 1, as there is no wbuffer in this cache system
// Request ports
input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU
// memory side
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
);
assign wbuffer_empty_o = 1'b1;
axi_req_t axi_req_icache;
axi_rsp_t axi_resp_icache;
axi_req_t axi_req_bypass;
axi_rsp_t axi_resp_bypass;
axi_req_t axi_req_data;
axi_rsp_t axi_resp_data;
cva6_icache_axi_wrapper #(
.CVA6Cfg (CVA6Cfg),
.axi_req_t(axi_req_t),
.axi_rsp_t(axi_rsp_t)
) i_cva6_icache_axi_wrapper (
.clk_i (clk_i),
.rst_ni (rst_ni),
.priv_lvl_i(priv_lvl_i),
.flush_i (icache_flush_i),
.en_i (icache_en_i),
.miss_o (icache_miss_o),
.areq_i (icache_areq_i),
.areq_o (icache_areq_o),
.dreq_i (icache_dreq_i),
.dreq_o (icache_dreq_o),
.axi_req_o (axi_req_icache),
.axi_resp_i(axi_resp_icache)
);
// decreasing priority
// Port 0: PTW
// Port 1: Load Unit
// Port 2: Accelerator
// Port 3: Store Unit
std_nbdcache #(
.CVA6Cfg (CVA6Cfg),
.NumPorts (NumPorts),
.axi_req_t(axi_req_t),
.axi_rsp_t(axi_rsp_t)
) i_nbdcache (
.clk_i,
.rst_ni,
.enable_i (dcache_enable_i),
.flush_i (dcache_flush_i),
.flush_ack_o (dcache_flush_ack_o),
.miss_o (dcache_miss_o),
.axi_bypass_o(axi_req_bypass),
.axi_bypass_i(axi_resp_bypass),
.axi_data_o (axi_req_data),
.axi_data_i (axi_resp_data),
.req_ports_i (dcache_req_ports_i),
.req_ports_o (dcache_req_ports_o),
.amo_req_i,
.amo_resp_o
);
// -----------------------
// Arbitrate AXI Ports
// -----------------------
logic [1:0] w_select, w_select_fifo, w_select_arbiter;
logic [1:0] w_fifo_usage;
logic w_fifo_empty, w_fifo_full;
// AR Channel
stream_arbiter #(
.DATA_T(axi_ar_chan_t),
.N_INP (3)
) i_stream_arbiter_ar (
.clk_i,
.rst_ni,
.inp_data_i ({axi_req_icache.ar, axi_req_bypass.ar, axi_req_data.ar}),
.inp_valid_i({axi_req_icache.ar_valid, axi_req_bypass.ar_valid, axi_req_data.ar_valid}),
.inp_ready_o({axi_resp_icache.ar_ready, axi_resp_bypass.ar_ready, axi_resp_data.ar_ready}),
.oup_data_o (axi_req_o.ar),
.oup_valid_o(axi_req_o.ar_valid),
.oup_ready_i(axi_resp_i.ar_ready)
);
// AW Channel
stream_arbiter #(
.DATA_T(axi_aw_chan_t),
.N_INP (3)
) i_stream_arbiter_aw (
.clk_i,
.rst_ni,
.inp_data_i ({axi_req_icache.aw, axi_req_bypass.aw, axi_req_data.aw}),
.inp_valid_i({axi_req_icache.aw_valid, axi_req_bypass.aw_valid, axi_req_data.aw_valid}),
.inp_ready_o({axi_resp_icache.aw_ready, axi_resp_bypass.aw_ready, axi_resp_data.aw_ready}),
.oup_data_o (axi_req_o.aw),
.oup_valid_o(axi_req_o.aw_valid),
.oup_ready_i(axi_resp_i.aw_ready)
);
// WID has been removed in AXI 4 so we need to keep track which AW request has been accepted
// to forward the correct write data.
always_comb begin
w_select = 0;
unique casez (axi_req_o.aw.id)
4'b0111: w_select = 2; // dcache
4'b1???: w_select = 1; // bypass
default: w_select = 0; // icache
endcase
end
// W Channel
fifo_v3 #(
.DATA_WIDTH (2),
// we can have a maximum of 4 oustanding transactions as each port is blocking
.DEPTH (4),
.FALL_THROUGH(1'b1)
) i_fifo_w_channel (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (w_fifo_full),
.empty_o (), // leave open
.usage_o (w_fifo_usage),
.data_i (w_select),
// a new transaction was requested and granted
.push_i (axi_req_o.aw_valid & axi_resp_i.aw_ready),
// write ID to select the output MUX
.data_o (w_select_fifo),
// transaction has finished
.pop_i (axi_req_o.w_valid & axi_resp_i.w_ready & axi_req_o.w.last)
);
// In fall-through mode, the empty_o will be low when push_i is high (on zero usage).
// We do not want this here. Also, usage_o is missing the MSB, so on full fifo, usage_o is zero.
assign w_fifo_empty = w_fifo_usage == 0 && !w_fifo_full;
// icache will never write so select it as default (e.g.: when no arbitration is active)
// this is equal to setting it to zero
assign w_select_arbiter = w_fifo_empty ? (axi_req_o.aw_valid ? w_select : 0) : w_select_fifo;
stream_mux #(
.DATA_T(axi_w_chan_t),
.N_INP (3)
) i_stream_mux_w (
.inp_data_i ({axi_req_data.w, axi_req_bypass.w, axi_req_icache.w}),
.inp_valid_i({axi_req_data.w_valid, axi_req_bypass.w_valid, axi_req_icache.w_valid}),
.inp_ready_o({axi_resp_data.w_ready, axi_resp_bypass.w_ready, axi_resp_icache.w_ready}),
.inp_sel_i (w_select_arbiter),
.oup_data_o (axi_req_o.w),
.oup_valid_o(axi_req_o.w_valid),
.oup_ready_i(axi_resp_i.w_ready)
);
// Route responses based on ID
// 0000 -> I$
// 0111 -> D$
// 1??? -> Bypass
// R Channel
assign axi_resp_icache.r = axi_resp_i.r;
assign axi_resp_bypass.r = axi_resp_i.r;
assign axi_resp_data.r = axi_resp_i.r;
logic [1:0] r_select;
always_comb begin
r_select = 0;
unique casez (axi_resp_i.r.id)
4'b0111: r_select = 0; // dcache
4'b1???: r_select = 1; // bypass
4'b0000: r_select = 2; // icache
default: r_select = 0;
endcase
end
stream_demux #(
.N_OUP(3)
) i_stream_demux_r (
.inp_valid_i(axi_resp_i.r_valid),
.inp_ready_o(axi_req_o.r_ready),
.oup_sel_i (r_select),
.oup_valid_o({axi_resp_icache.r_valid, axi_resp_bypass.r_valid, axi_resp_data.r_valid}),
.oup_ready_i({axi_req_icache.r_ready, axi_req_bypass.r_ready, axi_req_data.r_ready})
);
// B Channel
logic [1:0] b_select;
assign axi_resp_icache.b = axi_resp_i.b;
assign axi_resp_bypass.b = axi_resp_i.b;
assign axi_resp_data.b = axi_resp_i.b;
always_comb begin
b_select = 0;
unique casez (axi_resp_i.b.id)
4'b0111: b_select = 0; // dcache
4'b1???: b_select = 1; // bypass
4'b0000: b_select = 2; // icache
default: b_select = 0;
endcase
end
stream_demux #(
.N_OUP(3)
) i_stream_demux_b (
.inp_valid_i(axi_resp_i.b_valid),
.inp_ready_o(axi_req_o.b_ready),
.oup_sel_i (b_select),
.oup_valid_o({axi_resp_icache.b_valid, axi_resp_bypass.b_valid, axi_resp_data.b_valid}),
.oup_ready_i({axi_req_icache.b_ready, axi_req_bypass.b_ready, axi_req_data.b_ready})
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
a_invalid_instruction_fetch :
assert property (
@(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
else
$warning(
1,
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr,
icache_dreq_o.data
);
a_invalid_write_data :
assert property (
@(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> |dcache_req_ports_i[NumPorts-1].data_be |-> (|dcache_req_ports_i[NumPorts-1].data_wdata) !== 1'hX)
else
$warning(
1,
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
{
dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index
},
dcache_req_ports_i[NumPorts-1].data_be,
dcache_req_ports_i[NumPorts-1].data_wdata
);
generate
for (genvar j = 0; j < NumPorts - 1; j++) begin
a_invalid_read_data :
assert property (
@(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
else
$warning(
1,
"[l1 dcache] reading invalid data on port %01d: data=%016X",
j,
dcache_req_ports_o[j].data_rdata
);
end
endgenerate
`endif
//pragma translate_on
endmodule // std_cache_subsystem

View File

@ -0,0 +1,279 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 13.10.2017
// Description: Nonblocking private L1 dcache
module std_nbdcache
import std_cache_pkg::*;
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// Cache management
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a LD/ST
// AMOs
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// Request ports
input dcache_req_i_t [NumPorts-1:0] req_ports_i, // request ports
output dcache_req_o_t [NumPorts-1:0] req_ports_o, // request ports
// Cache AXI refill port
output axi_req_t axi_data_o,
input axi_rsp_t axi_data_i,
output axi_req_t axi_bypass_o,
input axi_rsp_t axi_bypass_i
);
import std_cache_pkg::*;
// -------------------------------
// Controller <-> Arbiter
// -------------------------------
// 1. Miss handler
// 2. PTW
// 3. Load Unit
// 4. Accelerator
// 5. Store unit
logic [ NumPorts:0][ DCACHE_SET_ASSOC-1:0] req;
logic [ NumPorts:0][DCACHE_INDEX_WIDTH-1:0] addr;
logic [ NumPorts:0] gnt;
cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata;
logic [ NumPorts:0][ DCACHE_TAG_WIDTH-1:0] tag;
cache_line_t [ NumPorts:0] wdata;
logic [ NumPorts:0] we;
cl_be_t [ NumPorts:0] be;
logic [ DCACHE_SET_ASSOC-1:0] hit_way;
// -------------------------------
// Controller <-> Miss unit
// -------------------------------
logic [ NumPorts-1:0] busy;
logic [ NumPorts-1:0][ 55:0] mshr_addr;
logic [ NumPorts-1:0] mshr_addr_matches;
logic [ NumPorts-1:0] mshr_index_matches;
logic [ 63:0] critical_word;
logic critical_word_valid;
logic [ NumPorts-1:0][ $bits(miss_req_t)-1:0] miss_req;
logic [ NumPorts-1:0] miss_gnt;
logic [ NumPorts-1:0] active_serving;
logic [ NumPorts-1:0] bypass_gnt;
logic [ NumPorts-1:0] bypass_valid;
logic [ NumPorts-1:0][ 63:0] bypass_data;
// -------------------------------
// Arbiter <-> Datram,
// -------------------------------
logic [ DCACHE_SET_ASSOC-1:0] req_ram;
logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
logic we_ram;
cache_line_t wdata_ram;
cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
// ------------------
// Cache Controller
// ------------------
generate
for (genvar i = 0; i < NumPorts; i++) begin : master_ports
cache_ctrl #(
.CVA6Cfg(CVA6Cfg)
) i_cache_ctrl (
.bypass_i (~enable_i),
.busy_o (busy[i]),
// from core
.req_port_i(req_ports_i[i]),
.req_port_o(req_ports_o[i]),
// to SRAM array
.req_o (req[i+1]),
.addr_o (addr[i+1]),
.gnt_i (gnt[i+1]),
.data_i (rdata),
.tag_o (tag[i+1]),
.data_o (wdata[i+1]),
.we_o (we[i+1]),
.be_o (be[i+1]),
.hit_way_i (hit_way),
.miss_req_o (miss_req[i]),
.miss_gnt_i (miss_gnt[i]),
.active_serving_i (active_serving[i]),
.critical_word_i (critical_word),
.critical_word_valid_i(critical_word_valid),
.bypass_gnt_i (bypass_gnt[i]),
.bypass_valid_i (bypass_valid[i]),
.bypass_data_i (bypass_data[i]),
.mshr_addr_o (mshr_addr[i]),
.mshr_addr_matches_i (mshr_addr_matches[i]),
.mshr_index_matches_i(mshr_index_matches[i]),
.*
);
end
endgenerate
// ------------------
// Miss Handling Unit
// ------------------
miss_handler #(
.CVA6Cfg (CVA6Cfg),
.NR_PORTS (NumPorts),
.axi_req_t(axi_req_t),
.axi_rsp_t(axi_rsp_t)
) i_miss_handler (
.flush_i (flush_i),
.busy_i (|busy),
// AMOs
.amo_req_i (amo_req_i),
.amo_resp_o (amo_resp_o),
.miss_req_i (miss_req),
.miss_gnt_o (miss_gnt),
.bypass_gnt_o (bypass_gnt),
.bypass_valid_o (bypass_valid),
.bypass_data_o (bypass_data),
.critical_word_o (critical_word),
.critical_word_valid_o(critical_word_valid),
.mshr_addr_i (mshr_addr),
.mshr_addr_matches_o (mshr_addr_matches),
.mshr_index_matches_o (mshr_index_matches),
.active_serving_o (active_serving),
.req_o (req[0]),
.addr_o (addr[0]),
.data_i (rdata),
.be_o (be[0]),
.data_o (wdata[0]),
.we_o (we[0]),
.axi_bypass_o,
.axi_bypass_i,
.axi_data_o,
.axi_data_i,
.*
);
assign tag[0] = '0;
// --------------
// Memory Arrays
// --------------
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
sram #(
.DATA_WIDTH(DCACHE_LINE_WIDTH),
.NUM_WORDS (DCACHE_NUM_WORDS)
) data_sram (
.req_i (req_ram[i]),
.rst_ni (rst_ni),
.we_i (we_ram),
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.wuser_i('0),
.wdata_i(wdata_ram.data),
.be_i (be_ram.data),
.ruser_o(),
.rdata_o(rdata_ram[i].data),
.*
);
sram #(
.DATA_WIDTH(DCACHE_TAG_WIDTH),
.NUM_WORDS (DCACHE_NUM_WORDS)
) tag_sram (
.req_i (req_ram[i]),
.rst_ni (rst_ni),
.we_i (we_ram),
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.wuser_i('0),
.wdata_i(wdata_ram.tag),
.be_i (be_ram.tag),
.ruser_o(),
.rdata_o(rdata_ram[i].tag),
.*
);
end
// ----------------
// Valid/Dirty Regs
// ----------------
// align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
// note: if you have an SRAM that supports flat bit enables for your target technology,
// you can use it here to save the extra 4x overhead introduced by this workaround.
logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[8*i] = wdata_ram.dirty;
assign dirty_wdata[8*i+1] = wdata_ram.valid;
assign rdata_ram[i].dirty = dirty_rdata[8*i];
assign rdata_ram[i].valid = dirty_rdata[8*i+1];
end
sram #(
.USER_WIDTH(1),
.DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH),
.NUM_WORDS (DCACHE_NUM_WORDS)
) valid_dirty_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (|req_ram),
.we_i (we_ram),
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.wuser_i('0),
.wdata_i(dirty_wdata),
.be_i (be_ram.vldrty),
.ruser_o(),
.rdata_o(dirty_rdata)
);
// ------------------------------------------------
// Tag Comparison and memory arbitration
// ------------------------------------------------
tag_cmp #(
.CVA6Cfg (CVA6Cfg),
.NR_PORTS (NumPorts + 1),
.ADDR_WIDTH (DCACHE_INDEX_WIDTH),
.DCACHE_SET_ASSOC(DCACHE_SET_ASSOC)
) i_tag_cmp (
.req_i (req),
.gnt_o (gnt),
.addr_i (addr),
.wdata_i (wdata),
.we_i (we),
.be_i (be),
.rdata_o (rdata),
.tag_i (tag),
.hit_way_o(hit_way),
.req_o (req_ram),
.addr_o (addr_ram),
.wdata_o(wdata_ram),
.we_o (we_ram),
.be_o (be_ram),
.rdata_i(rdata_ram),
.*
);
//pragma translate_off
initial begin
assert (DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16})
else $fatal(1, "Cache line size needs to be a power of two multiple of AxiDataWidth");
end
//pragma translate_on
endmodule

View File

@ -0,0 +1,106 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
// --------------
// Tag Compare
// --------------
//
// Description: Arbitrates access to cache memories, simplified request grant protocol
// checks for hit or miss on cache
//
module tag_cmp #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NR_PORTS = 3,
parameter int unsigned ADDR_WIDTH = 64,
parameter type l_data_t = std_cache_pkg::cache_line_t,
parameter type l_be_t = std_cache_pkg::cl_be_t,
parameter int unsigned DCACHE_SET_ASSOC = 8
) (
input logic clk_i,
input logic rst_ni,
input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i,
output logic [NR_PORTS-1:0] gnt_o,
input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
input l_data_t [NR_PORTS-1:0] wdata_i,
input logic [NR_PORTS-1:0] we_i,
input l_be_t [NR_PORTS-1:0] be_i,
output l_data_t [DCACHE_SET_ASSOC-1:0] rdata_o,
input logic [NR_PORTS-1:0][ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way
output logic [DCACHE_SET_ASSOC-1:0] req_o,
output logic [ ADDR_WIDTH-1:0] addr_o,
output l_data_t wdata_o,
output logic we_o,
output l_be_t be_o,
input l_data_t [DCACHE_SET_ASSOC-1:0] rdata_i
);
assign rdata_o = rdata_i;
// one hot encoded
logic [NR_PORTS-1:0] id_d, id_q;
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] sel_tag;
always_comb begin : tag_sel
sel_tag = '0;
for (int unsigned i = 0; i < NR_PORTS; i++) if (id_q[i]) sel_tag = tag_i[i];
end
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
end
always_comb begin
gnt_o = '0;
id_d = '0;
wdata_o = '0;
req_o = '0;
addr_o = '0;
be_o = '0;
we_o = '0;
// Request Side
// priority select
for (int unsigned i = 0; i < NR_PORTS; i++) begin
req_o = req_i[i];
id_d = (1'b1 << i);
gnt_o[i] = 1'b1;
addr_o = addr_i[i];
be_o = be_i[i];
we_o = we_i[i];
wdata_o = wdata_i[i];
if (req_i[i]) break;
end
`ifndef SYNTHESIS
`ifndef VERILATOR
// assert that cache only hits on one way
// this only needs to be checked one cycle after all ways have been requested
onehot :
assert property (@(posedge clk_i) disable iff (!rst_ni) &req_i |=> $onehot0(hit_way_o))
else begin
$fatal(1, "Hit should be one-hot encoded");
end
`endif
`endif
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
id_q <= 0;
end else begin
id_q <= id_d;
end
end
endmodule

View File

@ -0,0 +1,712 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 08.08.2018
// Description: adapter module to connect the L1D$ and L1I$ to a 64bit AXI bus.
//
module wt_axi_adapter
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned ReqFifoDepth = 2,
parameter int unsigned MetaFifoDepth = wt_cache_pkg::DCACHE_MAX_TX,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i,
input logic rst_ni,
// icache
input logic icache_data_req_i,
output logic icache_data_ack_o,
input icache_req_t icache_data_i,
// returning packets must be consumed immediately
output logic icache_rtrn_vld_o,
output icache_rtrn_t icache_rtrn_o,
// dcache
input logic dcache_data_req_i,
output logic dcache_data_ack_o,
input dcache_req_t dcache_data_i,
// returning packets must be consumed immediately
output logic dcache_rtrn_vld_o,
output dcache_rtrn_t dcache_rtrn_o,
// AXI port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i,
// Invalidations
input logic [63:0] inval_addr_i,
input logic inval_valid_i,
output logic inval_ready_o
);
// support up to 512bit cache lines
localparam AxiNumWords = (ariane_pkg::ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH > ariane_pkg::DCACHE_LINE_WIDTH) +
(ariane_pkg::DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH <= ariane_pkg::DCACHE_LINE_WIDTH) ;
localparam MaxNumWords = $clog2(CVA6Cfg.AxiDataWidth / 8);
localparam AxiRdBlenIcache = ariane_pkg::ICACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1;
localparam AxiRdBlenDcache = ariane_pkg::DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1;
///////////////////////////////////////////////////////
// request path
///////////////////////////////////////////////////////
icache_req_t icache_data;
logic icache_data_full, icache_data_empty;
dcache_req_t dcache_data;
logic dcache_data_full, dcache_data_empty;
logic [1:0] arb_req, arb_ack;
logic arb_idx, arb_gnt;
logic axi_rd_req, axi_rd_gnt;
logic axi_wr_req, axi_wr_gnt;
logic axi_wr_valid, axi_rd_valid, axi_rd_rdy, axi_wr_rdy;
logic axi_rd_lock, axi_wr_lock, axi_rd_exokay, axi_wr_exokay, wr_exokay;
logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr, axi_wr_addr;
logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen;
logic [2:0] axi_rd_size, axi_wr_size;
logic [CVA6Cfg.AxiIdWidth-1:0]
axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out, wr_id_out;
logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] axi_wr_data;
logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] axi_wr_user;
logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
logic [CVA6Cfg.AxiUserWidth-1:0] axi_rd_user;
logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] axi_wr_be;
logic [5:0] axi_wr_atop;
logic invalidate;
logic [$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] amo_off_d, amo_off_q;
// AMO generates r beat
logic amo_gen_r_d, amo_gen_r_q;
logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] icache_rtrn_tid_d, icache_rtrn_tid_q;
logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_tid_d, dcache_rtrn_tid_q;
logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_rd_tid, dcache_rtrn_wr_tid;
logic dcache_rd_pop, dcache_wr_pop;
logic icache_rd_full, icache_rd_empty;
logic dcache_rd_full, dcache_rd_empty;
logic dcache_wr_full, dcache_wr_empty;
assign icache_data_ack_o = icache_data_req_i & ~icache_data_full;
assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full;
// arbiter
assign arb_req = {
~(dcache_data_empty | dcache_wr_full | dcache_rd_full), ~(icache_data_empty | icache_rd_full)
};
assign arb_gnt = axi_rd_gnt | axi_wr_gnt;
rr_arb_tree #(
.NumIn (2),
.DataWidth(1),
.AxiVldRdy(1'b1),
.LockIn (1'b1)
) i_rr_arb_tree (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.rr_i ('0),
.req_i (arb_req),
.gnt_o (arb_ack),
.data_i ('0),
.gnt_i (arb_gnt),
.req_o (),
.data_o (),
.idx_o (arb_idx)
);
// request side
always_comb begin : p_axi_req
// write channel
axi_wr_id_in = {{CVA6Cfg.AxiIdWidth-1{1'b0}}, arb_idx};
axi_wr_data[0] = {(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.data}};
axi_wr_user[0] = dcache_data.user;
// Cast to AXI address width
axi_wr_addr = {{CVA6Cfg.AxiAddrWidth-riscv::PLEN{1'b0}}, dcache_data.paddr};
axi_wr_size = dcache_data.size;
axi_wr_req = 1'b0;
axi_wr_blen = '0;// single word writes
axi_wr_be = '0;
axi_wr_lock = '0;
axi_wr_atop = '0;
amo_off_d = amo_off_q;
amo_gen_r_d = amo_gen_r_q;
// read channel
axi_rd_id_in = {{CVA6Cfg.AxiIdWidth-1{1'b0}}, arb_idx};
axi_rd_req = 1'b0;
axi_rd_lock = '0;
axi_rd_blen = '0;
if (dcache_data.paddr[2] == 1'b0) begin
axi_wr_user = {{64 - CVA6Cfg.AxiUserWidth{1'b0}}, dcache_data.user};
end else begin
axi_wr_user = {dcache_data.user, {64 - CVA6Cfg.AxiUserWidth{1'b0}}};
end
// arbiter mux
if (arb_idx) begin
// Cast to AXI address width
axi_rd_addr = {{CVA6Cfg.AxiAddrWidth - riscv::PLEN{1'b0}}, dcache_data.paddr};
// If dcache_data.size MSB is set, we want to read as much as possible
axi_rd_size = dcache_data.size[2] ? MaxNumWords[2:0] : dcache_data.size;
if (dcache_data.size[2]) begin
axi_rd_blen = AxiRdBlenDcache[$clog2(AxiNumWords)-1:0];
end
end else begin
// Cast to AXI address width
axi_rd_addr = {{CVA6Cfg.AxiAddrWidth - riscv::PLEN{1'b0}}, icache_data.paddr};
axi_rd_size = MaxNumWords[2:0]; // always request max number of words in case of ifill
if (!icache_data.nc) begin
axi_rd_blen = AxiRdBlenIcache[$clog2(AxiNumWords)-1:0];
end
end
// signal that an invalidation message
// needs to be generated
invalidate = 1'b0;
// decode message type
if (|arb_req) begin
if (arb_idx == 0) begin
//////////////////////////////////////
// IMISS
axi_rd_req = 1'b1;
//////////////////////////////////////
end else begin
unique case (dcache_data.rtype)
//////////////////////////////////////
wt_cache_pkg::DCACHE_LOAD_REQ: begin
axi_rd_req = 1'b1;
end
//////////////////////////////////////
wt_cache_pkg::DCACHE_STORE_REQ: begin
axi_wr_req = 1'b1;
axi_wr_be = '0;
unique case (dcache_data.size[1:0])
2'b00:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte
2'b01:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = '1; // hword
2'b10:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = '1; // word
default:
if (riscv::IS_XLEN64)
axi_wr_be[0][dcache_data.paddr[$clog2(
CVA6Cfg.AxiDataWidth/8
)-1:0]+:8] = '1; // dword
endcase
end
//////////////////////////////////////
wt_cache_pkg::DCACHE_ATOMIC_REQ: begin
if (CVA6Cfg.RVA) begin
// default
// push back an invalidation here.
// since we only keep one read tx in flight, and since
// the dcache drains all writes/reads before executing
// an atomic, this is safe.
invalidate = arb_gnt;
axi_wr_req = 1'b1;
axi_wr_be = '0;
unique case (dcache_data.size[1:0])
2'b00:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte
2'b01:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] =
'1; // hword
2'b10:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] =
'1; // word
default:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:8] =
'1; // dword
endcase
amo_gen_r_d = 1'b1;
// need to use a separate ID here, so concat an additional bit
axi_wr_id_in[1] = 1'b1;
unique case (dcache_data.amo_op)
AMO_LR: begin
axi_rd_lock = 1'b1;
axi_rd_req = 1'b1;
axi_rd_id_in[1] = 1'b1;
// tie to zero in this special case
axi_wr_req = 1'b0;
axi_wr_be = '0;
end
AMO_SC: begin
axi_wr_lock = 1'b1;
amo_gen_r_d = 1'b0;
// needed to properly encode success. store the result at offset within the returned
// AXI data word aligned with the requested word size.
amo_off_d = dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-
1:0] & ~((1 << dcache_data.size[1:0]) - 1);
end
// RISC-V atops have a load semantic
AMO_SWAP: axi_wr_atop = axi_pkg::ATOP_ATOMICSWAP;
AMO_ADD:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD
};
AMO_AND: begin
// in this case we need to invert the data to get a "CLR"
axi_wr_data[0] = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.data}};
axi_wr_user = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.user}};
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR
};
end
AMO_OR:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET
};
AMO_XOR:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR
};
AMO_MAX:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX
};
AMO_MAXU:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX
};
AMO_MIN:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN
};
AMO_MINU:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN
};
default: ; // Do nothing
endcase
end
end
default: ; // Do nothing
//////////////////////////////////////
endcase
end
end
end
fifo_v3 #(
.dtype(icache_req_t),
.DEPTH(ReqFifoDepth)
) i_icache_data_fifo (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (icache_data_full),
.empty_o (icache_data_empty),
.usage_o (),
.data_i (icache_data_i),
.push_i (icache_data_ack_o),
.data_o (icache_data),
.pop_i (arb_ack[0])
);
fifo_v3 #(
.dtype(dcache_req_t),
.DEPTH(ReqFifoDepth)
) i_dcache_data_fifo (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (dcache_data_full),
.empty_o (dcache_data_empty),
.usage_o (),
.data_i (dcache_data_i),
.push_i (dcache_data_ack_o),
.data_o (dcache_data),
.pop_i (arb_ack[1])
);
///////////////////////////////////////////////////////
// meta info feedback fifos
///////////////////////////////////////////////////////
logic icache_rtrn_rd_en, dcache_rtrn_rd_en;
logic icache_rtrn_vld_d, icache_rtrn_vld_q, dcache_rtrn_vld_d, dcache_rtrn_vld_q;
fifo_v3 #(
.DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
.DEPTH (MetaFifoDepth)
) i_rd_icache_id (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (icache_rd_full),
.empty_o (icache_rd_empty),
.usage_o (),
.data_i (icache_data.tid),
.push_i (arb_ack[0] & axi_rd_gnt),
.data_o (icache_rtrn_tid_d),
.pop_i (icache_rtrn_vld_d)
);
fifo_v3 #(
.DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
.DEPTH (MetaFifoDepth)
) i_rd_dcache_id (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (dcache_rd_full),
.empty_o (dcache_rd_empty),
.usage_o (),
.data_i (dcache_data.tid),
.push_i (arb_ack[1] & axi_rd_gnt),
.data_o (dcache_rtrn_rd_tid),
.pop_i (dcache_rd_pop)
);
fifo_v3 #(
.DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
.DEPTH (MetaFifoDepth)
) i_wr_dcache_id (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (dcache_wr_full),
.empty_o (dcache_wr_empty),
.usage_o (),
.data_i (dcache_data.tid),
.push_i (arb_ack[1] & axi_wr_gnt),
.data_o (dcache_rtrn_wr_tid),
.pop_i (dcache_wr_pop)
);
// select correct tid to return
assign dcache_rtrn_tid_d = (dcache_wr_pop) ? dcache_rtrn_wr_tid : dcache_rtrn_rd_tid;
///////////////////////////////////////////////////////
// return path
///////////////////////////////////////////////////////
// buffer write responses
logic b_full, b_empty, b_push, b_pop;
assign axi_wr_rdy = ~b_full;
assign b_push = axi_wr_valid & axi_wr_rdy;
fifo_v3 #(
.DATA_WIDTH (CVA6Cfg.AxiIdWidth + 1),
.DEPTH (MetaFifoDepth),
.FALL_THROUGH(1'b1)
) i_b_fifo (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (b_full),
.empty_o (b_empty),
.usage_o (),
.data_i ({axi_wr_exokay, axi_wr_id_out}),
.push_i (b_push),
.data_o ({wr_exokay, wr_id_out}),
.pop_i (b_pop)
);
// buffer read responses in shift regs
logic icache_first_d, icache_first_q, dcache_first_d, dcache_first_q;
logic [ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0]
icache_rd_shift_user_d, icache_rd_shift_user_q;
logic [DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0]
dcache_rd_shift_user_d, dcache_rd_shift_user_q;
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
icache_rd_shift_d, icache_rd_shift_q;
logic [DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
dcache_rd_shift_d, dcache_rd_shift_q;
wt_cache_pkg::dcache_in_t dcache_rtrn_type_d, dcache_rtrn_type_q;
wt_cache_pkg::dcache_inval_t dcache_rtrn_inv_d, dcache_rtrn_inv_q;
logic dcache_sc_rtrn, axi_rd_last;
always_comb begin : p_axi_rtrn_shift
// output directly from regs
icache_rtrn_o = '0;
icache_rtrn_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK;
icache_rtrn_o.tid = icache_rtrn_tid_q;
icache_rtrn_o.data = icache_rd_shift_q;
icache_rtrn_o.user = icache_rd_shift_user_q;
icache_rtrn_vld_o = icache_rtrn_vld_q;
dcache_rtrn_o = '0;
dcache_rtrn_o.rtype = dcache_rtrn_type_q;
dcache_rtrn_o.inv = dcache_rtrn_inv_q;
dcache_rtrn_o.tid = dcache_rtrn_tid_q;
dcache_rtrn_o.data = dcache_rd_shift_q;
dcache_rtrn_o.user = dcache_rd_shift_user_q;
dcache_rtrn_vld_o = dcache_rtrn_vld_q;
// read shift registers
icache_rd_shift_d = icache_rd_shift_q;
icache_rd_shift_user_d = icache_rd_shift_user_q;
dcache_rd_shift_d = dcache_rd_shift_q;
dcache_rd_shift_user_d = dcache_rd_shift_user_q;
icache_first_d = icache_first_q;
dcache_first_d = dcache_first_q;
if (icache_rtrn_rd_en) begin
icache_first_d = axi_rd_last;
if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
icache_rd_shift_d[0] = axi_rd_data;
end else begin
icache_rd_shift_d = {
axi_rd_data, icache_rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]
};
end
icache_rd_shift_user_d = {
axi_rd_user, icache_rd_shift_user_q[ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]
};
// if this is a single word transaction, we need to make sure that word is placed at offset 0
if (icache_first_q) begin
icache_rd_shift_d[0] = axi_rd_data;
icache_rd_shift_user_d[0] = axi_rd_user;
end
end
if (dcache_rtrn_rd_en) begin
dcache_first_d = axi_rd_last;
if (DCACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
dcache_rd_shift_d[0] = axi_rd_data;
end else begin
dcache_rd_shift_d = {
axi_rd_data, dcache_rd_shift_q[DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]
};
end
dcache_rd_shift_user_d = {
axi_rd_user, dcache_rd_shift_user_q[DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]
};
// if this is a single word transaction, we need to make sure that word is placed at offset 0
if (dcache_first_q) begin
dcache_rd_shift_d[0] = axi_rd_data;
dcache_rd_shift_user_d[0] = axi_rd_user;
end
end else if (CVA6Cfg.RVA && dcache_sc_rtrn) begin
// encode lr/sc success
dcache_rd_shift_d[0] = '0;
dcache_rd_shift_user_d[0] = '0;
dcache_rd_shift_d[0][amo_off_q*8] = (wr_exokay) ? '0 : 1'b1;
dcache_rd_shift_user_d[0][amo_off_q*8] = (wr_exokay) ? '0 : 1'b1;
end
end
// decode virtual read channels of icache
always_comb begin : p_axi_rtrn_decode
// we are not ready when invalidating
// note: b's are buffered separately
axi_rd_rdy = ~invalidate;
icache_rtrn_rd_en = 1'b0;
icache_rtrn_vld_d = 1'b0;
// decode virtual icache channel,
// this is independent on dcache decoding below
if (axi_rd_valid && axi_rd_id_out == 0 && axi_rd_rdy) begin
icache_rtrn_rd_en = 1'b1;
icache_rtrn_vld_d = axi_rd_last;
end
dcache_rtrn_rd_en = 1'b0;
dcache_rtrn_vld_d = 1'b0;
dcache_rd_pop = 1'b0;
dcache_wr_pop = 1'b0;
dcache_rtrn_inv_d = '0;
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_LOAD_ACK;
b_pop = 1'b0;
dcache_sc_rtrn = 1'b0;
// External invalidation requests (from coprocessor). This is safe as
// there are no other transactions when a coprocessor has pending stores.
inval_ready_o = 1'b0;
if (inval_valid_i) begin
inval_ready_o = 1'b1;
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
dcache_rtrn_vld_d = 1'b1;
dcache_rtrn_inv_d.all = 1'b1;
dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
//////////////////////////////////////
// dcache needs some special treatment
// for arbitration and decoding of atomics
//////////////////////////////////////
// this is safe, there is no other read tx in flight than this atomic.
// note that this self invalidation is handled in this way due to the
// write-through cache architecture, which is aligned with the openpiton
// cache subsystem.
end else if (CVA6Cfg.RVA && invalidate) begin
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
dcache_rtrn_vld_d = 1'b1;
dcache_rtrn_inv_d.all = 1'b1;
dcache_rtrn_inv_d.idx = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
//////////////////////////////////////
// read responses
// note that in case of atomics, the dcache sequentializes requests and
// guarantees that there are no other pending transactions in flight
end else if (axi_rd_valid && axi_rd_id_out[0] && axi_rd_rdy) begin
dcache_rtrn_rd_en = 1'b1;
dcache_rtrn_vld_d = axi_rd_last;
// if this was an atomic op
if (CVA6Cfg.RVA && axi_rd_id_out[1]) begin
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK;
// check if transaction was issued over write channel and pop that ID
if (!dcache_wr_empty) begin
dcache_wr_pop = axi_rd_last;
// if this is not the case, there MUST be an id in the read channel (LR)
end else begin
dcache_rd_pop = axi_rd_last;
end
end else begin
dcache_rd_pop = axi_rd_last;
end
//////////////////////////////////////
// write responses, check b fifo
end else if (!b_empty) begin
b_pop = 1'b1;
// this was an atomic
if (CVA6Cfg.RVA && wr_id_out[1]) begin
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK;
// silently discard b response if we already popped the fifo
// with a R beat (iff the amo transaction generated an R beat)
if (!amo_gen_r_q) begin
dcache_rtrn_vld_d = 1'b1;
dcache_wr_pop = 1'b1;
dcache_sc_rtrn = 1'b1;
end
end else begin
// regular response
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_STORE_ACK;
dcache_rtrn_vld_d = 1'b1;
dcache_wr_pop = 1'b1;
end
end
//////////////////////////////////////
end
// remote invalidations are not supported yet (this needs a cache coherence protocol)
// note that the atomic transactions would also need a "master exclusive monitor" in that case
// assign icache_rtrn_o.inv.idx = '0;
// assign icache_rtrn_o.inv.way = '0;
// assign icache_rtrn_o.inv.vld = '0;
// assign icache_rtrn_o.inv.all = '0;
// assign dcache_rtrn_o.inv.idx = '0;
// assign dcache_rtrn_o.inv.way = '0;
// assign dcache_rtrn_o.inv.vld = '0;
// assign dcache_rtrn_o.inv.all = '0;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf
if (!rst_ni) begin
icache_first_q <= 1'b1;
dcache_first_q <= 1'b1;
icache_rd_shift_q <= '0;
icache_rd_shift_user_q <= '0;
dcache_rd_shift_q <= '0;
dcache_rd_shift_user_q <= '0;
icache_rtrn_vld_q <= '0;
dcache_rtrn_vld_q <= '0;
icache_rtrn_tid_q <= '0;
dcache_rtrn_tid_q <= '0;
dcache_rtrn_type_q <= wt_cache_pkg::DCACHE_LOAD_ACK;
dcache_rtrn_inv_q <= '0;
amo_off_q <= '0;
amo_gen_r_q <= 1'b0;
end else begin
icache_first_q <= icache_first_d;
dcache_first_q <= dcache_first_d;
icache_rd_shift_q <= icache_rd_shift_d;
icache_rd_shift_user_q <= icache_rd_shift_user_d;
dcache_rd_shift_q <= dcache_rd_shift_d;
dcache_rd_shift_user_q <= dcache_rd_shift_user_d;
icache_rtrn_vld_q <= icache_rtrn_vld_d;
dcache_rtrn_vld_q <= dcache_rtrn_vld_d;
icache_rtrn_tid_q <= icache_rtrn_tid_d;
dcache_rtrn_tid_q <= dcache_rtrn_tid_d;
dcache_rtrn_type_q <= dcache_rtrn_type_d;
dcache_rtrn_inv_q <= dcache_rtrn_inv_d;
amo_off_q <= amo_off_d;
amo_gen_r_q <= amo_gen_r_d;
end
end
///////////////////////////////////////////////////////
// axi protocol shim
///////////////////////////////////////////////////////
axi_shim #(
.CVA6Cfg (CVA6Cfg),
.AxiNumWords(AxiNumWords),
.axi_req_t (axi_req_t),
.axi_rsp_t (axi_rsp_t)
) i_axi_shim (
.clk_i (clk_i),
.rst_ni (rst_ni),
.rd_req_i (axi_rd_req),
.rd_gnt_o (axi_rd_gnt),
.rd_addr_i (axi_rd_addr),
.rd_blen_i (axi_rd_blen),
.rd_size_i (axi_rd_size),
.rd_id_i (axi_rd_id_in),
.rd_rdy_i (axi_rd_rdy),
.rd_lock_i (axi_rd_lock),
.rd_last_o (axi_rd_last),
.rd_valid_o (axi_rd_valid),
.rd_data_o (axi_rd_data),
.rd_user_o (axi_rd_user),
.rd_id_o (axi_rd_id_out),
.rd_exokay_o(axi_rd_exokay),
.wr_req_i (axi_wr_req),
.wr_gnt_o (axi_wr_gnt),
.wr_addr_i (axi_wr_addr),
.wr_data_i (axi_wr_data),
.wr_user_i (axi_wr_user),
.wr_be_i (axi_wr_be),
.wr_blen_i (axi_wr_blen),
.wr_size_i (axi_wr_size),
.wr_id_i (axi_wr_id_in),
.wr_lock_i (axi_wr_lock),
.wr_atop_i (axi_wr_atop),
.wr_rdy_i (axi_wr_rdy),
.wr_valid_o (axi_wr_valid),
.wr_id_o (axi_wr_id_out),
.wr_exokay_o(axi_wr_exokay),
.axi_req_o (axi_req_o),
.axi_resp_i (axi_resp_i)
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
`endif
//pragma translate_on
endmodule // wt_l15_adapter

View File

@ -0,0 +1,233 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Ariane cache subsystem that is compatible with the OpenPiton
// coherent memory system.
//
// Define PITON_ARIANE if you want to use this cache.
// Define DCACHE_TYPE if you want to use this cache
// with a standard 64 bit AXI interface instead of the OpenPiton
// L1.5 interface.
module wt_cache_subsystem
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4,
parameter type noc_req_t = logic,
parameter type noc_resp_t = logic
) (
input logic clk_i,
input logic rst_ni,
// I$
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input icache_areq_t icache_areq_i, // to/from frontend
output icache_arsp_t icache_areq_o,
// data requests
input icache_dreq_t icache_dreq_i, // to/from frontend
output icache_drsp_t icache_dreq_o,
// D$
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
// For Performance Counter
output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
// AMO interface
input amo_req_t dcache_amo_req_i,
output amo_resp_t dcache_amo_resp_o,
// Request ports
input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU
// writebuffer status
output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o,
// memory side
output noc_req_t noc_req_o,
input noc_resp_t noc_resp_i,
// Invalidations
input logic [63:0] inval_addr_i,
input logic inval_valid_i,
output logic inval_ready_o
// TODO: interrupt interface
);
logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld;
wt_cache_pkg::icache_req_t icache_adapter;
wt_cache_pkg::icache_rtrn_t adapter_icache;
logic dcache_adapter_data_req, adapter_dcache_data_ack, adapter_dcache_rtrn_vld;
wt_cache_pkg::dcache_req_t dcache_adapter;
wt_cache_pkg::dcache_rtrn_t adapter_dcache;
cva6_icache #(
// use ID 0 for icache reads
.CVA6Cfg(CVA6Cfg),
.RdTxId (0)
) i_cva6_icache (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (icache_flush_i),
.en_i (icache_en_i),
.miss_o (icache_miss_o),
.areq_i (icache_areq_i),
.areq_o (icache_areq_o),
.dreq_i (icache_dreq_i),
.dreq_o (icache_dreq_o),
.mem_rtrn_vld_i(adapter_icache_rtrn_vld),
.mem_rtrn_i (adapter_icache),
.mem_data_req_o(icache_adapter_data_req),
.mem_data_ack_i(adapter_icache_data_ack),
.mem_data_o (icache_adapter)
);
// Note:
// Ports 0/1 for PTW and LD unit are read only.
// they have equal prio and are RR arbited
// Port 2 is write only and goes into the merging write buffer
wt_dcache #(
.CVA6Cfg (CVA6Cfg),
// use ID 1 for dcache reads and amos. note that the writebuffer
// uses all IDs up to DCACHE_MAX_TX-1 for write transactions.
.RdAmoTxId(1)
) i_wt_dcache (
.clk_i (clk_i),
.rst_ni (rst_ni),
.enable_i (dcache_enable_i),
.flush_i (dcache_flush_i),
.flush_ack_o (dcache_flush_ack_o),
.miss_o (dcache_miss_o),
.wbuffer_empty_o (wbuffer_empty_o),
.wbuffer_not_ni_o(wbuffer_not_ni_o),
.amo_req_i (dcache_amo_req_i),
.amo_resp_o (dcache_amo_resp_o),
.req_ports_i (dcache_req_ports_i),
.req_ports_o (dcache_req_ports_o),
.miss_vld_bits_o (miss_vld_bits_o),
.mem_rtrn_vld_i (adapter_dcache_rtrn_vld),
.mem_rtrn_i (adapter_dcache),
.mem_data_req_o (dcache_adapter_data_req),
.mem_data_ack_i (adapter_dcache_data_ack),
.mem_data_o (dcache_adapter)
);
///////////////////////////////////////////////////////
// memory plumbing, either use 64bit AXI port or native
// L15 cache interface (derived from OpenSPARC CCX).
///////////////////////////////////////////////////////
`ifdef PITON_ARIANE
wt_l15_adapter #(
.CVA6Cfg(CVA6Cfg),
) i_adapter (
.clk_i (clk_i),
.rst_ni (rst_ni),
.icache_data_req_i(icache_adapter_data_req),
.icache_data_ack_o(adapter_icache_data_ack),
.icache_data_i (icache_adapter),
.icache_rtrn_vld_o(adapter_icache_rtrn_vld),
.icache_rtrn_o (adapter_icache),
.dcache_data_req_i(dcache_adapter_data_req),
.dcache_data_ack_o(adapter_dcache_data_ack),
.dcache_data_i (dcache_adapter),
.dcache_rtrn_vld_o(adapter_dcache_rtrn_vld),
.dcache_rtrn_o (adapter_dcache),
.l15_req_o (noc_req_o),
.l15_rtrn_i (noc_resp_i)
);
`else
wt_axi_adapter #(
.CVA6Cfg (CVA6Cfg),
.axi_req_t(noc_req_t),
.axi_rsp_t(noc_resp_t)
) i_adapter (
.clk_i (clk_i),
.rst_ni (rst_ni),
.icache_data_req_i(icache_adapter_data_req),
.icache_data_ack_o(adapter_icache_data_ack),
.icache_data_i (icache_adapter),
.icache_rtrn_vld_o(adapter_icache_rtrn_vld),
.icache_rtrn_o (adapter_icache),
.dcache_data_req_i(dcache_adapter_data_req),
.dcache_data_ack_o(adapter_dcache_data_ack),
.dcache_data_i (dcache_adapter),
.dcache_rtrn_vld_o(adapter_dcache_rtrn_vld),
.dcache_rtrn_o (adapter_dcache),
.axi_req_o (noc_req_o),
.axi_resp_i (noc_resp_i),
.inval_addr_i (inval_addr_i),
.inval_valid_i (inval_valid_i),
.inval_ready_o (inval_ready_o)
);
`endif
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
a_invalid_instruction_fetch :
assert property (
@(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
else
$warning(
1,
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr,
icache_dreq_o.data
);
for (genvar j = 0; j < riscv::XLEN / 8; j++) begin : gen_invalid_write_assertion
a_invalid_write_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> dcache_req_ports_i[NumPorts-1].data_be[j] |-> (|dcache_req_ports_i[NumPorts-1].data_wdata[j*8+:8] !== 1'hX))
else
$warning(
1,
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X, databe=%016X",
{
dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index
},
dcache_req_ports_i[NumPorts-1].data_be,
dcache_req_ports_i[NumPorts-1].data_wdata,
dcache_req_ports_i[NumPorts-1].data_be & dcache_req_ports_i[NumPorts-1].data_wdata
);
end
for (genvar j = 0; j < NumPorts - 1; j++) begin : gen_assertion
a_invalid_read_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
else
$warning(
1,
"[l1 dcache] reading invalid data on port %01d: data=%016X",
j,
dcache_req_ports_o[j].data_rdata
);
end
`endif
//pragma translate_on
endmodule // wt_cache_subsystem

View File

@ -0,0 +1,360 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: Write-Through Data cache that is compatible with openpiton.
module wt_dcache
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4, // number of miss ports
// ID to be used for read and AMO transactions.
// note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions
parameter logic [CACHE_ID_WIDTH-1:0] RdAmoTxId = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// Cache management
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o,
// AMO interface
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// Request ports
input dcache_req_i_t [NumPorts-1:0] req_ports_i,
output dcache_req_o_t [NumPorts-1:0] req_ports_o,
output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
);
// miss unit <-> read controllers
logic cache_en;
// miss unit <-> memory
logic wr_cl_vld;
logic wr_cl_nc;
logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we;
logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag;
logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx;
logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off;
logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data;
logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user;
logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be;
logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits;
logic [ DCACHE_SET_ASSOC-1:0] wr_req;
logic wr_ack;
logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_idx;
logic [ DCACHE_OFFSET_WIDTH-1:0] wr_off;
riscv::xlen_t wr_data;
logic [ (riscv::XLEN/8)-1:0] wr_data_be;
logic [ DCACHE_USER_WIDTH-1:0] wr_user;
// miss unit <-> controllers/wbuffer
logic [ NumPorts-1:0] miss_req;
logic [ NumPorts-1:0] miss_ack;
logic [ NumPorts-1:0] miss_nc;
logic [ NumPorts-1:0] miss_we;
logic [ NumPorts-1:0][ riscv::XLEN-1:0] miss_wdata;
logic [ NumPorts-1:0][ DCACHE_USER_WIDTH-1:0] miss_wuser;
logic [ NumPorts-1:0][ riscv::PLEN-1:0] miss_paddr;
logic [ NumPorts-1:0][ 2:0] miss_size;
logic [ NumPorts-1:0][ CACHE_ID_WIDTH-1:0] miss_id;
logic [ NumPorts-1:0] miss_replay;
logic [ NumPorts-1:0] miss_rtrn_vld;
logic [ CACHE_ID_WIDTH-1:0] miss_rtrn_id;
// memory <-> read controllers/miss unit
logic [ NumPorts-1:0] rd_prio;
logic [ NumPorts-1:0] rd_tag_only;
logic [ NumPorts-1:0] rd_req;
logic [ NumPorts-1:0] rd_ack;
logic [ NumPorts-1:0][ DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [ NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx;
logic [ NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off;
riscv::xlen_t rd_data;
logic [ DCACHE_USER_WIDTH-1:0] rd_user;
logic [ DCACHE_SET_ASSOC-1:0] rd_vld_bits;
logic [ DCACHE_SET_ASSOC-1:0] rd_hit_oh;
// miss unit <-> wbuffer
logic [ DCACHE_MAX_TX-1:0][ riscv::PLEN-1:0] tx_paddr;
logic [ DCACHE_MAX_TX-1:0] tx_vld;
// wbuffer <-> memory
wbuffer_t [ DCACHE_WBUF_DEPTH-1:0] wbuffer_data;
///////////////////////////////////////////////////////
// miss handling unit
///////////////////////////////////////////////////////
wt_dcache_missunit #(
.CVA6Cfg (CVA6Cfg),
.AmoTxId (RdAmoTxId),
.NumPorts(NumPorts)
) i_wt_dcache_missunit (
.clk_i (clk_i),
.rst_ni (rst_ni),
.enable_i (enable_i),
.flush_i (flush_i),
.flush_ack_o (flush_ack_o),
.miss_o (miss_o),
.wbuffer_empty_i(wbuffer_empty_o),
.cache_en_o (cache_en),
// amo interface
.amo_req_i (amo_req_i),
.amo_resp_o (amo_resp_o),
// miss handling interface
.miss_req_i (miss_req),
.miss_ack_o (miss_ack),
.miss_nc_i (miss_nc),
.miss_we_i (miss_we),
.miss_wdata_i (miss_wdata),
.miss_wuser_i (miss_wuser),
.miss_paddr_i (miss_paddr),
.miss_vld_bits_i(miss_vld_bits_o),
.miss_size_i (miss_size),
.miss_id_i (miss_id),
.miss_replay_o (miss_replay),
.miss_rtrn_vld_o(miss_rtrn_vld),
.miss_rtrn_id_o (miss_rtrn_id),
// from writebuffer
.tx_paddr_i (tx_paddr),
.tx_vld_i (tx_vld),
// cache memory interface
.wr_cl_vld_o (wr_cl_vld),
.wr_cl_nc_o (wr_cl_nc),
.wr_cl_we_o (wr_cl_we),
.wr_cl_tag_o (wr_cl_tag),
.wr_cl_idx_o (wr_cl_idx),
.wr_cl_off_o (wr_cl_off),
.wr_cl_data_o (wr_cl_data),
.wr_cl_user_o (wr_cl_user),
.wr_cl_data_be_o(wr_cl_data_be),
.wr_vld_bits_o (wr_vld_bits),
// memory interface
.mem_rtrn_vld_i (mem_rtrn_vld_i),
.mem_rtrn_i (mem_rtrn_i),
.mem_data_req_o (mem_data_req_o),
.mem_data_ack_i (mem_data_ack_i),
.mem_data_o (mem_data_o)
);
///////////////////////////////////////////////////////
// read controllers (LD unit and PTW/MMU)
///////////////////////////////////////////////////////
// 0 is used by MMU, 1 by READ access requests
for (genvar k = 0; k < NumPorts - 1; k++) begin : gen_rd_ports
// set these to high prio ports
if ((k == 0 && MMU_PRESENT) || (k == 1) || (k == 2 && CVA6Cfg.EnableAccelerator)) begin
assign rd_prio[k] = 1'b1;
wt_dcache_ctrl #(
.CVA6Cfg(CVA6Cfg),
.RdTxId (RdAmoTxId)
) i_wt_dcache_ctrl (
.clk_i (clk_i),
.rst_ni (rst_ni),
.cache_en_i (cache_en),
// reqs from core
.req_port_i (req_ports_i[k]),
.req_port_o (req_ports_o[k]),
// miss interface
.miss_req_o (miss_req[k]),
.miss_ack_i (miss_ack[k]),
.miss_we_o (miss_we[k]),
.miss_wdata_o (miss_wdata[k]),
.miss_wuser_o (miss_wuser[k]),
.miss_vld_bits_o(miss_vld_bits_o[k]),
.miss_paddr_o (miss_paddr[k]),
.miss_nc_o (miss_nc[k]),
.miss_size_o (miss_size[k]),
.miss_id_o (miss_id[k]),
.miss_replay_i (miss_replay[k]),
.miss_rtrn_vld_i(miss_rtrn_vld[k]),
// used to detect readout mux collisions
.wr_cl_vld_i (wr_cl_vld),
// cache mem interface
.rd_tag_o (rd_tag[k]),
.rd_idx_o (rd_idx[k]),
.rd_off_o (rd_off[k]),
.rd_req_o (rd_req[k]),
.rd_tag_only_o (rd_tag_only[k]),
.rd_ack_i (rd_ack[k]),
.rd_data_i (rd_data),
.rd_user_i (rd_user),
.rd_vld_bits_i (rd_vld_bits),
.rd_hit_oh_i (rd_hit_oh)
);
end else begin
assign rd_prio[k] = 1'b0;
assign req_ports_o[k] = '0;
assign miss_req[k] = 1'b0;
assign miss_we[k] = 1'b0;
assign miss_wdata[k] = {{riscv::XLEN} {1'b0}};
assign miss_wuser[k] = {{DCACHE_USER_WIDTH} {1'b0}};
assign miss_vld_bits_o[k] = {{DCACHE_SET_ASSOC} {1'b0}};
assign miss_paddr[k] = {{riscv::PLEN} {1'b0}};
assign miss_nc[k] = 1'b0;
assign miss_size[k] = 3'b0;
assign miss_id[k] = {{CACHE_ID_WIDTH} {1'b0}};
assign rd_tag[k] = {{DCACHE_TAG_WIDTH} {1'b0}};
assign rd_idx[k] = {{DCACHE_CL_IDX_WIDTH} {1'b0}};
assign rd_off[k] = {{DCACHE_OFFSET_WIDTH} {1'b0}};
assign rd_req[k] = 1'b0;
assign rd_tag_only[k] = 1'b0;
end
end
///////////////////////////////////////////////////////
// store unit controller
///////////////////////////////////////////////////////
// set read port to low priority
assign rd_prio[NumPorts-1] = 1'b0;
wt_dcache_wbuffer #(
.CVA6Cfg(CVA6Cfg)
) i_wt_dcache_wbuffer (
.clk_i (clk_i),
.rst_ni (rst_ni),
.empty_o (wbuffer_empty_o),
.not_ni_o (wbuffer_not_ni_o),
// TODO: fix this
.cache_en_i (cache_en),
// .cache_en_i ( '0 ),
// request ports from core (store unit)
.req_port_i (req_ports_i[NumPorts-1]),
.req_port_o (req_ports_o[NumPorts-1]),
// miss unit interface
.miss_req_o (miss_req[NumPorts-1]),
.miss_ack_i (miss_ack[NumPorts-1]),
.miss_we_o (miss_we[NumPorts-1]),
.miss_wdata_o (miss_wdata[NumPorts-1]),
.miss_wuser_o (miss_wuser[NumPorts-1]),
.miss_vld_bits_o(miss_vld_bits_o[NumPorts-1]),
.miss_paddr_o (miss_paddr[NumPorts-1]),
.miss_nc_o (miss_nc[NumPorts-1]),
.miss_size_o (miss_size[NumPorts-1]),
.miss_id_o (miss_id[NumPorts-1]),
.miss_rtrn_vld_i(miss_rtrn_vld[NumPorts-1]),
.miss_rtrn_id_i (miss_rtrn_id),
// cache read interface
.rd_tag_o (rd_tag[NumPorts-1]),
.rd_idx_o (rd_idx[NumPorts-1]),
.rd_off_o (rd_off[NumPorts-1]),
.rd_req_o (rd_req[NumPorts-1]),
.rd_tag_only_o (rd_tag_only[NumPorts-1]),
.rd_ack_i (rd_ack[NumPorts-1]),
.rd_data_i (rd_data),
.rd_vld_bits_i (rd_vld_bits),
.rd_hit_oh_i (rd_hit_oh),
// incoming invalidations/cache refills
.wr_cl_vld_i (wr_cl_vld),
.wr_cl_idx_i (wr_cl_idx),
// single word write interface
.wr_req_o (wr_req),
.wr_ack_i (wr_ack),
.wr_idx_o (wr_idx),
.wr_off_o (wr_off),
.wr_data_o (wr_data),
.wr_user_o (wr_user),
.wr_data_be_o (wr_data_be),
// write buffer forwarding
.wbuffer_data_o (wbuffer_data),
.tx_paddr_o (tx_paddr),
.tx_vld_o (tx_vld)
);
///////////////////////////////////////////////////////
// memory arrays, arbitration and tag comparison
///////////////////////////////////////////////////////
wt_dcache_mem #(
.CVA6Cfg (CVA6Cfg),
.NumPorts(NumPorts)
) i_wt_dcache_mem (
.clk_i (clk_i),
.rst_ni (rst_ni),
// read ports
.rd_prio_i (rd_prio),
.rd_tag_i (rd_tag),
.rd_idx_i (rd_idx),
.rd_off_i (rd_off),
.rd_req_i (rd_req),
.rd_tag_only_i (rd_tag_only),
.rd_ack_o (rd_ack),
.rd_vld_bits_o (rd_vld_bits),
.rd_hit_oh_o (rd_hit_oh),
.rd_data_o (rd_data),
.rd_user_o (rd_user),
// cacheline write port
.wr_cl_vld_i (wr_cl_vld),
.wr_cl_nc_i (wr_cl_nc),
.wr_cl_we_i (wr_cl_we),
.wr_cl_tag_i (wr_cl_tag),
.wr_cl_idx_i (wr_cl_idx),
.wr_cl_off_i (wr_cl_off),
.wr_cl_data_i (wr_cl_data),
.wr_cl_user_i (wr_cl_user),
.wr_cl_data_be_i(wr_cl_data_be),
.wr_vld_bits_i (wr_vld_bits),
// single word write port
.wr_req_i (wr_req),
.wr_ack_o (wr_ack),
.wr_idx_i (wr_idx),
.wr_off_i (wr_off),
.wr_data_i (wr_data),
.wr_user_i (wr_user),
.wr_data_be_i (wr_data_be),
// write buffer forwarding
.wbuffer_data_i (wbuffer_data)
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
// check for concurrency issues
//pragma translate_off
`ifndef VERILATOR
flush :
assert property (
@(posedge clk_i) disable iff (!rst_ni) flush_i |-> flush_ack_o |-> wbuffer_empty_o)
else $fatal(1, "[l1 dcache] flushed cache implies flushed wbuffer");
initial begin
// assert wrong parameterizations
assert (DCACHE_INDEX_WIDTH <= 12)
else $fatal(1, "[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on
endmodule // wt_dcache

View File

@ -0,0 +1,299 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: DCache controller for read port
module wt_dcache_ctrl
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic cache_en_i,
// core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// interface to miss handler
output logic miss_req_o,
input logic miss_ack_i,
output logic miss_we_o, // unused (set to 0)
output riscv::xlen_t miss_wdata_o, // unused (set to 0)
output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, // unused (set to 0)
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index
output logic [riscv::PLEN-1:0] miss_paddr_o,
output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline
output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // set to constant ID
input logic miss_replay_i, // request collided with pending miss - have to replay the request
input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory
// used to detect readout mux collisions
input logic wr_cl_vld_i,
// cache memory interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to zero here
input logic rd_ack_i,
input riscv::xlen_t rd_data_i,
input logic [DCACHE_USER_WIDTH-1:0] rd_user_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i
);
// controller FSM
typedef enum logic [2:0] {
IDLE,
READ,
MISS_REQ,
MISS_WAIT,
KILL_MISS,
KILL_MISS_ACK,
REPLAY_REQ,
REPLAY_READ
} state_e;
state_e state_d, state_q;
logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q;
logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q;
logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q;
logic [DCACHE_TID_WIDTH-1:0] id_d, id_q;
logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q;
logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q;
logic [1:0] data_size_d, data_size_q;
///////////////////////////////////////////////////////
// misc
///////////////////////////////////////////////////////
// map address to tag/idx/offset and save
assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q;
assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q;
assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q;
assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0] : address_off_q;
assign id_d = (req_port_o.data_gnt) ? req_port_i.data_id : id_q;
assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q;
assign rd_tag_o = address_tag_d;
assign rd_idx_o = address_idx_d;
assign rd_off_o = address_off_d;
assign req_port_o.data_rdata = rd_data_i;
assign req_port_o.data_ruser = rd_user_i;
assign req_port_o.data_rid = id_q;
// to miss unit
assign miss_vld_bits_o = vld_data_q;
assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q};
assign miss_size_o = (miss_nc_o) ? {1'b0, data_size_q} : 3'b111;
// noncacheable if request goes to I/O space, or if cache is disabled
assign miss_nc_o = (~cache_en_i) | (~config_pkg::is_inside_cacheable_regions(
CVA6Cfg,
{{{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {DCACHE_INDEX_WIDTH{1'b0}}}
));
assign miss_we_o = '0;
assign miss_wdata_o = '0;
assign miss_wuser_o = '0;
assign miss_id_o = RdTxId;
assign rd_req_d = rd_req_o;
assign rd_ack_d = rd_ack_i;
assign rd_tag_only_o = '0;
///////////////////////////////////////////////////////
// main control logic
///////////////////////////////////////////////////////
always_comb begin : p_fsm
// default assignment
state_d = state_q;
save_tag = 1'b0;
rd_req_o = 1'b0;
miss_req_o = 1'b0;
req_port_o.data_rvalid = 1'b0;
req_port_o.data_gnt = 1'b0;
// interfaces
unique case (state_q)
//////////////////////////////////
// wait for an incoming request
IDLE: begin
if (req_port_i.data_req) begin
rd_req_o = 1'b1;
// if read ack then ack the `req_port_o`, and goto `READ` state
if (rd_ack_i) begin
state_d = READ;
req_port_o.data_gnt = 1'b1;
end
end
end
//////////////////////////////////
// check whether we have a hit
// in case the cache is disabled,
// or in case the address is NC, we
// reuse the miss mechanism to handle
// the request
READ, REPLAY_READ: begin
// speculatively request cache line
rd_req_o = 1'b1;
// kill -> go back to IDLE
if (req_port_i.kill_req) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end else if (req_port_i.tag_valid | state_q == REPLAY_READ) begin
save_tag = (state_q != REPLAY_READ);
if (wr_cl_vld_i || !rd_ack_q) begin
state_d = REPLAY_REQ;
// we've got a hit
end else if ((|rd_hit_oh_i) && cache_en_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
// we can handle another request
if (rd_ack_i && req_port_i.data_req) begin
state_d = READ;
req_port_o.data_gnt = 1'b1;
end
// we've got a miss
end else begin
state_d = MISS_REQ;
end
end
end
//////////////////////////////////
// issue request
MISS_REQ: begin
miss_req_o = 1'b1;
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (miss_ack_i) begin
state_d = KILL_MISS;
end else begin
state_d = KILL_MISS_ACK;
end
end else if (miss_replay_i) begin
state_d = REPLAY_REQ;
end else if (miss_ack_i) begin
state_d = MISS_WAIT;
end
end
//////////////////////////////////
// wait until the memory transaction
// returns.
MISS_WAIT: begin
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (miss_rtrn_vld_i) begin
state_d = IDLE;
end else begin
state_d = KILL_MISS;
end
end else if (miss_rtrn_vld_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end
end
//////////////////////////////////
// replay read request
REPLAY_REQ: begin
rd_req_o = 1'b1;
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
state_d = IDLE;
end else if (rd_ack_i) begin
state_d = REPLAY_READ;
end
end
//////////////////////////////////
KILL_MISS_ACK: begin
miss_req_o = 1'b1;
// in this case the miss handler did not issue
// a transaction and we can safely go to idle
if (miss_replay_i) begin
state_d = IDLE;
end else if (miss_ack_i) begin
state_d = KILL_MISS;
end
end
//////////////////////////////////
// killed miss,
// wait until miss unit responds and
// go back to idle
KILL_MISS: begin
if (miss_rtrn_vld_i) begin
state_d = IDLE;
end
end
default: begin
// we should never get here
state_d = IDLE;
end
endcase // state_q
end
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin
state_q <= IDLE;
address_tag_q <= '0;
address_idx_q <= '0;
address_off_q <= '0;
id_q <= '0;
vld_data_q <= '0;
data_size_q <= '0;
rd_req_q <= '0;
rd_ack_q <= '0;
end else begin
state_q <= state_d;
address_tag_q <= address_tag_d;
address_idx_q <= address_idx_d;
address_off_q <= address_off_d;
id_q <= id_d;
vld_data_q <= vld_data_d;
data_size_q <= data_size_d;
rd_req_q <= rd_req_d;
rd_ack_q <= rd_ack_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
hot1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) (!rd_ack_i) |=> cache_en_i |-> $onehot0(
rd_hit_oh_i
))
else $fatal(1, "[l1 dcache ctrl] rd_hit_oh_i signal must be hot1");
initial begin
// assert wrong parameterizations
assert (DCACHE_INDEX_WIDTH <= 12)
else
$fatal(1, "[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on
endmodule // wt_dcache_ctrl

View File

@ -0,0 +1,428 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: Memory arrays, arbiter and tag comparison for WT dcache.
//
//
// Notes: 1) all ports can trigger a readout of all ways, and the way where the tag hits is selected
//
// 2) only port0 can write full cache lines. higher ports are read only. also, port0 can only read the tag array,
// and does not trigger a cache line readout.
//
// 3) the single word write port is a separate port without access to the tag memory.
// these single word writes can interleave with read operations if they go to different
// cacheline offsets, since each word offset is placed into a different SRAM bank.
//
// 4) Read ports with same priority are RR arbited. but high prio ports (rd_prio_i[port_nr] = '1b1) will stall
// low prio ports (rd_prio_i[port_nr] = '1b0)
module wt_dcache_mem
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 3
) (
input logic clk_i,
input logic rst_ni,
// ports
input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later
input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i,
input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i,
input logic [NumPorts-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways
input logic [NumPorts-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays
input logic [NumPorts-1:0] rd_prio_i, // 0: low prio, 1: high prio
output logic [NumPorts-1:0] rd_ack_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o,
output riscv::xlen_t rd_data_o,
output logic [DCACHE_USER_WIDTH-1:0] rd_user_o,
// only available on port 0, uses address signals of port 0
input logic wr_cl_vld_i,
input logic wr_cl_nc_i, // noncacheable access
input logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline
input logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i,
input logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
input logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i,
input logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data_i,
input logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i,
input logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i,
input logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits_i,
// separate port for single word write, no tag access
input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3]
output logic wr_ack_o,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i,
input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i,
input riscv::xlen_t wr_data_i,
input logic [DCACHE_USER_WIDTH-1:0] wr_user_i,
input logic [(riscv::XLEN/8)-1:0] wr_data_be_i,
// forwarded wbuffer
input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i
);
// functions
function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh(
input logic [DCACHE_NUM_BANKS_WIDTH-1:0] in);
logic [DCACHE_NUM_BANKS-1:0] out;
out = '0;
out[in] = 1'b1;
return out;
endfunction
// number of bits needed to address AXI data. If AxiDataWidth equals XLEN this parameter
// is not needed. Therefore, increment it by one to avoid reverse range select during elaboration.
localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == riscv::XLEN ? $clog2(
CVA6Cfg.AxiDataWidth / 8
) + 1 : $clog2(
CVA6Cfg.AxiDataWidth / 8
);
logic [DCACHE_NUM_BANKS-1:0] bank_req;
logic [DCACHE_NUM_BANKS-1:0] bank_we;
logic [DCACHE_NUM_BANKS-1:0][ DCACHE_SET_ASSOC-1:0][(riscv::XLEN/8)-1:0] bank_be;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx;
logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q;
logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_wdata; //
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_rdata; //
logic [DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] rdata_cl; // selected word from each cacheline
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_wuser; //
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_ruser; //
logic [DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] ruser_cl; // selected word from each cacheline
logic [DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem
logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh;
logic [ (riscv::XLEN/8)-1:0] wbuffer_be;
riscv::xlen_t wbuffer_rdata, rdata;
logic [DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser;
logic [riscv::PLEN-1:0] wbuffer_cmp_addr;
logic cmp_en_d, cmp_en_q;
logic rd_acked;
logic [NumPorts-1:0] bank_collision, rd_req_masked, rd_req_prio;
///////////////////////////////////////////////////////
// arbiter
///////////////////////////////////////////////////////
// Priority is highest for lowest read port index
//
// SRAM bank mapping:
//
// Bank 0 Bank 2
// [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] ..
// byte enable mapping
for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_bank
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : gen_bank_way
assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*(riscv::XLEN/8) +: (riscv::XLEN/8)] :
(wr_req_i[j] & wr_ack_o) ? wr_data_be_i :
'0;
assign bank_wdata[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_i[k*riscv::XLEN +: riscv::XLEN] :
wr_data_i;
assign bank_wuser[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_user_i[k*DCACHE_USER_WIDTH +: DCACHE_USER_WIDTH] :
wr_user_i;
end
end
assign vld_wdata = wr_vld_bits_i;
assign vld_addr = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign rd_tag = rd_tag_i[vld_sel_q]; //delayed by one cycle
assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d];
assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign vld_req = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0;
// priority masking
// disable low prio requests when any of the high prio reqs is present
assign rd_req_prio = rd_req_i & rd_prio_i;
assign rd_req_masked = (|rd_req_prio) ? rd_req_prio : rd_req_i;
logic rd_req;
rr_arb_tree #(
.NumIn (NumPorts),
.DataWidth(1)
) i_rr_arb_tree (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.rr_i ('0),
.req_i (rd_req_masked),
.gnt_o (rd_ack_o),
.data_i ('0),
.gnt_i (~wr_cl_vld_i),
.req_o (rd_req),
.data_o (),
.idx_o (vld_sel_d)
);
assign rd_acked = rd_req & ~wr_cl_vld_i;
always_comb begin : p_bank_req
vld_we = wr_cl_vld_i;
bank_req = '0;
wr_ack_o = '0;
bank_we = '0;
bank_idx = '{default: wr_idx_i};
for (int k = 0; k < NumPorts; k++) begin
bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] == wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES];
end
if (wr_cl_vld_i & |wr_cl_we_i) begin
bank_req = '1;
bank_we = '1;
bank_idx = '{default: wr_cl_idx_i};
end else begin
if (rd_acked) begin
if (!rd_tag_only_i[vld_sel_d]) begin
bank_req =
dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]] = rd_idx_i[vld_sel_d];
end
end
if (|wr_req_i) begin
if (rd_tag_only_i[vld_sel_d] || !(rd_ack_o[vld_sel_d] && bank_collision[vld_sel_d])) begin
wr_ack_o = 1'b1;
bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
end
end
end
end
///////////////////////////////////////////////////////
// tag comparison, hit generatio, readoud muxes
///////////////////////////////////////////////////////
logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_off;
logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off;
logic [ $clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx;
logic [ $clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx;
assign cmp_en_d = (|vld_req) & ~vld_we;
// word tag comparison in write buffer
assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} :
{rd_tag, bank_idx_q, bank_off_q};
// hit generation
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
// tag comparison of ways >0
assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q;
// byte offset mux of ways >0
assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i];
assign ruser_cl[i] = bank_ruser[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i];
end
for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_wbuffer_hit
assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & ({{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_data_i[k].wtag} == (wbuffer_cmp_addr >> riscv::XLEN_ALIGN_BYTES));
end
lzc #(
.WIDTH(DCACHE_WBUF_DEPTH)
) i_lzc_wbuffer_hit (
.in_i (wbuffer_hit_oh),
.cnt_o (wbuffer_hit_idx),
.empty_o()
);
lzc #(
.WIDTH(DCACHE_SET_ASSOC)
) i_lzc_rd_hit (
.in_i (rd_hit_oh_o),
.cnt_o (rd_hit_idx),
.empty_o()
);
assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data;
assign wbuffer_ruser = wbuffer_data_i[wbuffer_hit_idx].user;
assign wbuffer_be = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0;
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset
// In case of an uncached read, return the desired XLEN-bit segment of the most recent AXI read
assign wr_cl_off = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == riscv::XLEN) ? '0 :
{{DCACHE_OFFSET_WIDTH-AXI_OFFSET_WIDTH{1'b0}}, wr_cl_off_i[AXI_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]} :
wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES];
end else begin : gen_piton_offset
assign wr_cl_off = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
end
always_comb begin
if (wr_cl_vld_i) begin
rdata = wr_cl_data_i[wr_cl_off*riscv::XLEN+:riscv::XLEN];
ruser = wr_cl_user_i[wr_cl_off*DCACHE_USER_WIDTH+:DCACHE_USER_WIDTH];
end else begin
rdata = rdata_cl[rd_hit_idx];
ruser = ruser_cl[rd_hit_idx];
end
end
// overlay bytes that hit in the write buffer
for (genvar k = 0; k < (riscv::XLEN / 8); k++) begin : gen_rd_data
assign rd_data_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k+:8] : rdata[8*k+:8];
end
for (genvar k = 0; k < DCACHE_USER_WIDTH / 8; k++) begin : gen_rd_user
assign rd_user_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_ruser[8*k+:8] : ruser[8*k+:8];
end
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata[DCACHE_SET_ASSOC-1:0];
for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_data_banks
// Data RAM
sram #(
.USER_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * DATA_USER_WIDTH),
.DATA_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * riscv::XLEN),
.USER_EN (ariane_pkg::DATA_USER_EN),
.NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS)
) i_data_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (bank_req[k]),
.we_i (bank_we[k]),
.addr_i (bank_idx[k]),
.wuser_i(bank_wuser[k]),
.wdata_i(bank_wdata[k]),
.be_i (bank_be[k]),
.ruser_o(bank_ruser[k]),
.rdata_o(bank_rdata[k])
);
end
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_srams
assign tag_rdata[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH-1:0];
assign rd_vld_bits_o[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH];
// Tag RAM
sram #(
// tag + valid bit
.DATA_WIDTH(ariane_pkg::DCACHE_TAG_WIDTH + 1),
.NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS)
) i_tag_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (vld_req[i]),
.we_i (vld_we),
.addr_i (vld_addr),
.wuser_i('0),
.wdata_i({vld_wdata[i], wr_cl_tag_i}),
.be_i ('1),
.ruser_o(),
.rdata_o(vld_tag_rdata[i])
);
end
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin
bank_idx_q <= '0;
bank_off_q <= '0;
vld_sel_q <= '0;
cmp_en_q <= '0;
end else begin
bank_idx_q <= bank_idx_d;
bank_off_q <= bank_off_d;
vld_sel_q <= vld_sel_d;
cmp_en_q <= cmp_en_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
initial begin
cach_line_width_axi :
assert (DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth)
else $fatal(1, "[l1 dcache] cache line size needs to be greater or equal AXI data width");
end
initial begin
axi_xlen :
assert (CVA6Cfg.AxiDataWidth >= riscv::XLEN)
else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal XLEN");
end
initial begin
cach_line_width_xlen :
assert (DCACHE_LINE_WIDTH > riscv::XLEN)
else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than XLEN");
end
hit_hot1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(
rd_hit_oh_o
))
else $fatal(1, "[l1 dcache] rd_hit_oh_o signal must be hot1");
word_write_hot1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) wr_ack_o |-> $onehot0(wr_req_i))
else $fatal(1, "[l1 dcache] wr_req_i signal must be hot1");
wbuffer_hit_hot1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(
wbuffer_hit_oh
))
else $fatal(1, "[l1 dcache] wbuffer_hit_oh signal must be hot1");
// this is only used for verification!
logic vld_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
if (!rst_ni) begin
vld_mirror <= '{default: '0};
tag_mirror <= '{default: '0};
end else begin
for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin
if (vld_req[i] & vld_we) begin
vld_mirror[vld_addr][i] <= vld_wdata[i];
tag_mirror[vld_addr][i] <= wr_cl_tag_i;
end
end
end
end
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_dubl_test
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata);
end
tag_write_duplicate :
assert property (
@(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
else $fatal(1, "[l1 dcache] cannot allocate a CL that is already present in the cache");
`endif
//pragma translate_on
endmodule // wt_dcache_mem

View File

@ -0,0 +1,645 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: miss controller for WT dcache. Note that the current assumption
// is that the port with the highest index issues writes instead of reads.
module wt_dcache_missunit
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter logic [CACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs
parameter int unsigned NumPorts = 4 // number of miss ports
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// cache management, signals from/to core
input logic enable_i, // from CSR
input logic flush_i, // flush request, this waits for pending tx (write, read) to finish and will clear the cache
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
// local cache management signals
input logic wbuffer_empty_i,
output logic cache_en_o, // local cache enable signal
// AMO interface
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// miss handling interface (ld, ptw, wbuffer)
input logic [NumPorts-1:0] miss_req_i,
output logic [NumPorts-1:0] miss_ack_o,
input logic [NumPorts-1:0] miss_nc_i,
input logic [NumPorts-1:0] miss_we_i,
input logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata_i,
input logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser_i,
input logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr_i,
input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i,
input logic [NumPorts-1:0][2:0] miss_size_i,
input logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID
// signals that the request collided with a pending read
output logic [NumPorts-1:0] miss_replay_o,
// signals response from memory
output logic [NumPorts-1:0] miss_rtrn_vld_o,
output logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads
// from writebuffer
input logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_i, // used to check for address collisions with read operations
input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations
// write interface to cache memory
output logic wr_cl_vld_o, // writes a full cacheline
output logic wr_cl_nc_o, // writes a full cacheline
output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline
output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o,
output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o,
output logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o,
output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o,
output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o,
// memory interface
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
);
// functions
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh(
input logic [L1D_WAY_WIDTH-1:0] in);
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out;
out = '0;
out[in] = 1'b1;
return out;
endfunction
// align the physical address to the specified size:
// 000: bytes
// 001: hword
// 010: word
// 011: dword
// 111: DCACHE line
function automatic logic [riscv::PLEN-1:0] paddrSizeAlign(input logic [riscv::PLEN-1:0] paddr,
input logic [2:0] size);
logic [riscv::PLEN-1:0] out;
out = paddr;
unique case (size)
3'b001: out[0:0] = '0;
3'b010: out[1:0] = '0;
3'b011: out[2:0] = '0;
3'b111: out[DCACHE_OFFSET_WIDTH-1:0] = '0;
default: ;
endcase
return out;
endfunction : paddrSizeAlign
// controller FSM
typedef enum logic [2:0] {
IDLE,
DRAIN,
AMO,
FLUSH,
STORE_WAIT,
LOAD_WAIT,
AMO_WAIT
} state_e;
state_e state_d, state_q;
// MSHR for reads
typedef struct packed {
logic [riscv::PLEN-1:0] paddr;
logic [2:0] size;
logic [DCACHE_SET_ASSOC-1:0] vld_bits;
logic [CACHE_ID_WIDTH-1:0] id;
logic nc;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way;
logic [$clog2(NumPorts)-1:0] miss_port_idx;
} mshr_t;
mshr_t mshr_d, mshr_q;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way, inv_way, rnd_way;
logic mshr_vld_d, mshr_vld_q, mshr_vld_q1;
logic mshr_allocate;
logic update_lfsr, all_ways_valid;
logic enable_d, enable_q;
logic flush_ack_d, flush_ack_q;
logic flush_en, flush_done;
logic mask_reads, lock_reqs;
logic amo_sel, miss_is_write;
logic amo_req_d, amo_req_q;
logic [63:0] amo_rtrn_mux;
riscv::xlen_t amo_data, amo_data_a, amo_data_b;
riscv::xlen_t amo_user; //DCACHE USER ? DATA_USER_WIDTH
logic [riscv::PLEN-1:0] tmp_paddr;
logic [$clog2(NumPorts)-1:0] miss_port_idx;
logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q;
logic [NumPorts-1:0] miss_req_masked_d, miss_req_masked_q;
logic inv_vld, inv_vld_all, cl_write_en;
logic load_ack, store_ack, amo_ack;
logic [NumPorts-1:0] mshr_rdrd_collision_d, mshr_rdrd_collision_q;
logic [NumPorts-1:0] mshr_rdrd_collision;
logic tx_rdwr_collision, mshr_rdwr_collision;
///////////////////////////////////////////////////////
// input arbitration and general control sigs
///////////////////////////////////////////////////////
assign cache_en_o = enable_q;
assign cnt_d = (flush_en) ? cnt_q + 1 : '0;
assign flush_done = (cnt_q == wt_cache_pkg::DCACHE_NUM_WORDS - 1);
assign miss_req_masked_d = (lock_reqs) ? miss_req_masked_q :
(mask_reads) ? miss_we_i & miss_req_i : miss_req_i;
assign miss_is_write = miss_we_i[miss_port_idx];
// read port arbiter
lzc #(
.WIDTH(NumPorts)
) i_lzc_reqs (
.in_i (miss_req_masked_d),
.cnt_o (miss_port_idx),
.empty_o()
);
always_comb begin : p_ack
miss_ack_o = '0;
if (!amo_sel) begin
miss_ack_o[miss_port_idx] = mem_data_ack_i & mem_data_req_o;
end
end
///////////////////////////////////////////////////////
// MSHR and way replacement logic (only for read ops)
///////////////////////////////////////////////////////
// find invalid cache line
lzc #(
.WIDTH(ariane_pkg::DCACHE_SET_ASSOC)
) i_lzc_inv (
.in_i (~miss_vld_bits_i[miss_port_idx]),
.cnt_o (inv_way),
.empty_o(all_ways_valid)
);
// generate random cacheline index
lfsr #(
.LfsrWidth(8),
.OutWidth ($clog2(ariane_pkg::DCACHE_SET_ASSOC))
) i_lfsr_inv (
.clk_i (clk_i),
.rst_ni(rst_ni),
.en_i (update_lfsr),
.out_o (rnd_way)
);
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign mshr_d.size = (mshr_allocate) ? miss_size_i[miss_port_idx] : mshr_q.size;
assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i[miss_port_idx] : mshr_q.paddr;
assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits;
assign mshr_d.id = (mshr_allocate) ? miss_id_i[miss_port_idx] : mshr_q.id;
assign mshr_d.nc = (mshr_allocate) ? miss_nc_i[miss_port_idx] : mshr_q.nc;
assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way;
assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx;
// currently we only have one outstanding read TX, hence an incoming load clears the MSHR
assign mshr_vld_d = (mshr_allocate) ? 1'b1 : (load_ack) ? 1'b0 : mshr_vld_q;
assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0;
for (genvar k = 0; k < NumPorts; k++) begin : gen_rdrd_collision
assign mshr_rdrd_collision[k] = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1);
assign mshr_rdrd_collision_d[k] = (!miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k];
end
// read/write collision, stalls the corresponding request
// write port[NumPorts-1] collides with MSHR_Q
assign mshr_rdwr_collision = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[NumPorts-1][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && mshr_vld_q;
// read collides with inflight TX
always_comb begin : p_tx_coll
tx_rdwr_collision = 1'b0;
for (int k = 0; k < DCACHE_MAX_TX; k++) begin
tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k];
end
end
///////////////////////////////////////////////////////
// to memory
///////////////////////////////////////////////////////
// if size = 32bit word, select appropriate offset, replicate for openpiton...
if (CVA6Cfg.RVA) begin
if (riscv::IS_XLEN64) begin : gen_amo_64b_data
assign amo_data_a = {amo_req_i.operand_b[0+:32], amo_req_i.operand_b[0+:32]};
assign amo_data_b = amo_req_i.operand_b;
end else begin : gen_amo_32b_data
assign amo_data_a = amo_req_i.operand_b[0+:32];
end
end
always_comb begin
if (CVA6Cfg.RVA) begin
if (riscv::IS_XLEN64) begin
if (amo_req_i.size == 2'b10) begin
amo_data = amo_data_a;
end else begin
amo_data = amo_data_b;
end
end else begin
amo_data = amo_data_a;
end
if (ariane_pkg::DATA_USER_EN) begin
amo_user = amo_data;
end else begin
amo_user = '0;
end
end
end
if (CVA6Cfg.RVA) begin
// note: openpiton returns a full cacheline!
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_rtrn_mux
if (CVA6Cfg.AxiDataWidth > 64) begin
assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[$clog2(
CVA6Cfg.AxiDataWidth/8
)-1:3]*64+:64];
end else begin
assign amo_rtrn_mux = mem_rtrn_i.data[0+:64];
end
end else begin : gen_piton_rtrn_mux
assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64+:64];
end
// always sign extend 32bit values
assign amo_resp_o.result = (amo_req_i.size==2'b10) ? {{32{amo_rtrn_mux[amo_req_i.operand_a[2]*32 + 31]}},amo_rtrn_mux[amo_req_i.operand_a[2]*32 +: 32]} :
amo_rtrn_mux ;
assign amo_req_d = amo_req_i.req;
end
// outgoing memory requests (AMOs are always uncached)
assign mem_data_o.tid = (CVA6Cfg.RVA && amo_sel) ? AmoTxId : miss_id_i[miss_port_idx];
assign mem_data_o.nc = (CVA6Cfg.RVA && amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx];
assign mem_data_o.way = (CVA6Cfg.RVA && amo_sel) ? '0 : repl_way;
assign mem_data_o.data = (CVA6Cfg.RVA && amo_sel) ? amo_data : miss_wdata_i[miss_port_idx];
assign mem_data_o.user = (CVA6Cfg.RVA && amo_sel) ? amo_user : miss_wuser_i[miss_port_idx];
assign mem_data_o.size = (CVA6Cfg.RVA && amo_sel) ? {1'b0, amo_req_i.size} : miss_size_i [miss_port_idx];
assign mem_data_o.amo_op = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.amo_op : AMO_NONE;
assign tmp_paddr = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.operand_a[riscv::PLEN-1:0] : miss_paddr_i[miss_port_idx];
assign mem_data_o.paddr = paddrSizeAlign(tmp_paddr, mem_data_o.size);
///////////////////////////////////////////////////////
// back-off mechanism for LR/SC completion guarantee
///////////////////////////////////////////////////////
logic sc_fail, sc_pass, sc_backoff_over;
exp_backoff #(
.Seed (3),
.MaxExp(16)
) i_exp_backoff (
.clk_i,
.rst_ni,
.set_i (sc_fail),
.clr_i (sc_pass),
.is_zero_o(sc_backoff_over)
);
///////////////////////////////////////////////////////
// responses from memory
///////////////////////////////////////////////////////
// keep track of pending stores
logic store_sent;
logic [$clog2(wt_cache_pkg::DCACHE_MAX_TX + 1)-1:0] stores_inflight_d, stores_inflight_q;
assign store_sent = mem_data_req_o & mem_data_ack_i & (mem_data_o.rtype == DCACHE_STORE_REQ);
assign stores_inflight_d = (store_ack && store_sent) ? stores_inflight_q :
(store_ack) ? stores_inflight_q - 1 :
(store_sent) ? stores_inflight_q + 1 :
stores_inflight_q;
// incoming responses
always_comb begin : p_rtrn_logic
load_ack = 1'b0;
store_ack = 1'b0;
amo_ack = 1'b0;
inv_vld = 1'b0;
inv_vld_all = 1'b0;
sc_fail = 1'b0;
sc_pass = 1'b0;
miss_rtrn_vld_o = '0;
if (mem_rtrn_vld_i) begin
unique case (mem_rtrn_i.rtype)
DCACHE_LOAD_ACK: begin
if (mshr_vld_q) begin
load_ack = 1'b1;
miss_rtrn_vld_o[mshr_q.miss_port_idx] = 1'b1;
end
end
DCACHE_STORE_ACK: begin
if (stores_inflight_q > 0) begin
store_ack = 1'b1;
miss_rtrn_vld_o[NumPorts-1] = 1'b1;
end
end
DCACHE_ATOMIC_ACK: begin
if (CVA6Cfg.RVA) begin
if (amo_req_q) begin
amo_ack = 1'b1;
// need to set SC backoff counter if
// this op failed
if (amo_req_i.amo_op == AMO_SC) begin
if (amo_resp_o.result > 0) begin
sc_fail = 1'b1;
end else begin
sc_pass = 1'b1;
end
end
end
end
end
DCACHE_INV_REQ: begin
inv_vld = mem_rtrn_i.inv.vld | mem_rtrn_i.inv.all;
inv_vld_all = mem_rtrn_i.inv.all;
end
// TODO:
// DCACHE_INT_REQ: begin
// end
default: begin
end
endcase
end
end
// to write buffer
assign miss_rtrn_id_o = mem_rtrn_i.tid;
///////////////////////////////////////////////////////
// writes to cache memory
///////////////////////////////////////////////////////
// cacheline write port
assign wr_cl_nc_o = mshr_q.nc;
assign wr_cl_vld_o = load_ack | (|wr_cl_we_o);
assign wr_cl_we_o = (flush_en) ? '1 : (inv_vld_all) ? '1 : (inv_vld) ? dcache_way_bin2oh(
mem_rtrn_i.inv.way
) : (cl_write_en) ? dcache_way_bin2oh(
mshr_q.repl_way
) : '0;
assign wr_vld_bits_o = (flush_en) ? '0 : (inv_vld) ? '0 : (cl_write_en) ? dcache_way_bin2oh(
mshr_q.repl_way
) : '0;
assign wr_cl_idx_o = (flush_en) ? cnt_q :
(inv_vld) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] :
mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_cl_data_o = mem_rtrn_i.data;
assign wr_cl_user_o = mem_rtrn_i.user;
assign wr_cl_data_be_o = (cl_write_en) ? '1 : '0;// we only write complete cachelines into the memory
// only non-NC responses write to the cache
assign cl_write_en = load_ack & ~mshr_q.nc;
///////////////////////////////////////////////////////
// main control logic for generating tx
///////////////////////////////////////////////////////
always_comb begin : p_fsm
// default assignment
state_d = state_q;
flush_ack_o = 1'b0;
mem_data_o.rtype = DCACHE_LOAD_REQ;
mem_data_req_o = 1'b0;
amo_resp_o.ack = 1'b0;
miss_replay_o = '0;
// disabling cache is possible anytime, enabling goes via flush
enable_d = enable_q & enable_i;
flush_ack_d = flush_ack_q;
flush_en = 1'b0;
amo_sel = 1'b0;
update_lfsr = 1'b0;
mshr_allocate = 1'b0;
lock_reqs = 1'b0;
mask_reads = mshr_vld_q;
// interfaces
unique case (state_q)
//////////////////////////////////
// wait for misses / amo ops
IDLE: begin
if (flush_i || (enable_i && !enable_q)) begin
if (wbuffer_empty_i && !mshr_vld_q) begin
flush_ack_d = flush_i;
state_d = FLUSH;
end else begin
state_d = DRAIN;
end
end else if (CVA6Cfg.RVA && amo_req_i.req) begin
if (wbuffer_empty_i && !mshr_vld_q) begin
state_d = AMO;
end else begin
state_d = DRAIN;
end
// we've got a miss to handle
end else if (|miss_req_masked_d) begin
// this is a write miss, just pass through (but check whether write collides with MSHR)
if (miss_is_write) begin
// stall in case this write collides with the MSHR address
if (!mshr_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
if (!mem_data_ack_i) begin
state_d = STORE_WAIT;
end
end
// this is a read miss, can only allocate 1 MSHR
// in case of a load_ack we can accept a new miss, since the MSHR is being cleared
end else if (!mshr_vld_q || load_ack) begin
// replay the read request in case the address has collided with MSHR during the time the request was pending
// i.e., the cache state may have been updated in the mean time due to a refill at the same CL address
if (mshr_rdrd_collision_d[miss_port_idx]) begin
miss_replay_o[miss_port_idx] = 1'b1;
// stall in case this CL address overlaps with a write TX that is in flight
end else if (!tx_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ;
update_lfsr = all_ways_valid & mem_data_ack_i; // need to evict a random way
mshr_allocate = mem_data_ack_i;
if (!mem_data_ack_i) begin
state_d = LOAD_WAIT;
end
end
end
end
end
//////////////////////////////////
// wait until this request is acked
STORE_WAIT: begin
lock_reqs = 1'b1;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
if (mem_data_ack_i) begin
state_d = IDLE;
end
end
//////////////////////////////////
// wait until this request is acked
LOAD_WAIT: begin
lock_reqs = 1'b1;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ;
if (mem_data_ack_i) begin
update_lfsr = all_ways_valid; // need to evict a random way
mshr_allocate = 1'b1;
state_d = IDLE;
end
end
//////////////////////////////////
// only handle stores, do not accept new read requests
// wait until MSHR is cleared and wbuffer is empty
DRAIN: begin
mask_reads = 1'b1;
// these are writes, check whether they collide with MSHR
if (|miss_req_masked_d && !mshr_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
end
if (wbuffer_empty_i && !mshr_vld_q) begin
state_d = IDLE;
end
end
//////////////////////////////////
// flush the cache
FLUSH: begin
// internal flush signal
flush_en = 1'b1;
if (flush_done) begin
state_d = IDLE;
flush_ack_o = flush_ack_q;
flush_ack_d = 1'b0;
enable_d = enable_i;
end
end
//////////////////////////////////
// send out amo op request
AMO: begin
if (CVA6Cfg.RVA) begin
mem_data_o.rtype = DCACHE_ATOMIC_REQ;
amo_sel = 1'b1;
// if this is an LR, we need to consult the backoff counter
if ((amo_req_i.amo_op != AMO_LR) || sc_backoff_over) begin
mem_data_req_o = 1'b1;
if (mem_data_ack_i) begin
state_d = AMO_WAIT;
end
end
end
end
//////////////////////////////////
// block and wait until AMO OP returns
AMO_WAIT: begin
if (CVA6Cfg.RVA) begin
amo_sel = 1'b1;
if (amo_ack) begin
amo_resp_o.ack = 1'b1;
state_d = IDLE;
end
end
end
//////////////////////////////////
default: begin
// we should never get here
state_d = IDLE;
end
endcase // state_q
end
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin
state_q <= FLUSH;
cnt_q <= '0;
enable_q <= '0;
flush_ack_q <= '0;
mshr_vld_q <= '0;
mshr_vld_q1 <= '0;
mshr_q <= '0;
mshr_rdrd_collision_q <= '0;
miss_req_masked_q <= '0;
amo_req_q <= '0;
stores_inflight_q <= '0;
end else begin
state_q <= state_d;
cnt_q <= cnt_d;
enable_q <= enable_d;
flush_ack_q <= flush_ack_d;
mshr_vld_q <= mshr_vld_d;
mshr_vld_q1 <= mshr_vld_q;
mshr_q <= mshr_d;
mshr_rdrd_collision_q <= mshr_rdrd_collision_d;
miss_req_masked_q <= miss_req_masked_d;
amo_req_q <= amo_req_d;
stores_inflight_q <= stores_inflight_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
read_tid :
assert property (
@(posedge clk_i) disable iff (!rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id)
else $fatal(1, "[l1 dcache missunit] TID of load response doesn't match");
read_ports :
assert property (
@(posedge clk_i) disable iff (!rst_ni) |miss_req_i[NumPorts-2:0] |-> miss_we_i[NumPorts-2:0] == 0)
else $fatal(1, "[l1 dcache missunit] only last port can issue write requests");
write_port :
assert property (
@(posedge clk_i) disable iff (!rst_ni) miss_req_i[NumPorts-1] |-> miss_we_i[NumPorts-1])
else $fatal(1, "[l1 dcache missunit] last port can only issue write requests");
initial begin
// assert wrong parameterizations
assert (NumPorts >= 2)
else
$fatal(
1, "[l1 dcache missunit] at least two ports are required (one read port, one write port)"
);
end
`endif
//pragma translate_on
endmodule // wt_dcache_missunit

View File

@ -0,0 +1,635 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: coalescing write buffer for WT dcache
//
// A couple of notes:
//
// 1) the write buffer behaves as a fully-associative cache, and is therefore coalescing.
// this cache is used by the cache readout logic to forward data to the load unit.
//
// each byte can be in the following states (valid/dirty/txblock):
//
// 0/0/0: invalid -> free entry in the buffer
// 1/1/0: valid and dirty, Byte is hence not part of TX in-flight
// 1/0/1: valid and not dirty, Byte is part of a TX in-flight
// 1/1/1: valid and, part of tx and dirty. this means that the byte has been
// overwritten while in TX and needs to be retransmitted once the write of that byte returns.
// 1/0/0: this would represent a clean state, but is never reached in the wbuffer in the current implementation.
// this is because when a TX returns, and the byte is in state [1/0/1], it is written to cache if needed and
// its state is immediately cleared to 0/x/x.
//
// this state is used to distinguish between bytes that have been written and not
// yet sent to the memory subsystem, and bytes that are part of a transaction.
//
// 2) further, each word in the write buffer has a cache states (checked, hit_oh)
//
// checked == 0: unknown cache state
// checked == 1: cache state has been looked up, valid way is stored in "hit_oh"
//
// cache invalidations/refills affecting a particular word will clear its word state to 0,
// so another lookup has to be done. note that these lookups are triggered as soon as there is
// a valid word with checked == 0 in the write buffer.
//
// 3) returning write ACKs trigger a cache update if the word is present in the cache, and evict that
// word from the write buffer. if the word is not allocated to the cache, it is just evicted from the write buffer.
// if the word cache state is VOID, the pipeline is stalled until it is clear whether that word is in the cache or not.
//
// 4) we handle NC writes using the writebuffer circuitry. upon an NC request, the writebuffer will first be drained.
// then, only the NC word is written into the write buffer and no further write requests are acknowledged until that
// word has been evicted from the write buffer.
module wt_dcache_wbuffer
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic cache_en_i, // writes are treated as NC if disabled
output logic empty_o, // asserted if no data is present in write buffer
output logic not_ni_o, // asserted if no ni data is present in write buffer
// core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// interface to miss handler
input logic miss_ack_i,
output logic [riscv::PLEN-1:0] miss_paddr_o,
output logic miss_req_o,
output logic miss_we_o, // always 1 here
output riscv::xlen_t miss_wdata_o,
output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o,
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0)
output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, //
output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1)
// write responses from memory
input logic miss_rtrn_vld_i,
input logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear
// cache read interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays
input logic rd_ack_i,
input riscv::xlen_t rd_data_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i,
// cacheline writes
input logic wr_cl_vld_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
// cache word write interface
output logic [DCACHE_SET_ASSOC-1:0] wr_req_o,
input logic wr_ack_i,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o,
output riscv::xlen_t wr_data_o,
output logic [(riscv::XLEN/8)-1:0] wr_data_be_o,
output logic [DCACHE_USER_WIDTH-1:0] wr_user_o,
// to forwarding logic and miss unit
output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o,
output logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_o, // used to check for address collisions with read operations
output logic [DCACHE_MAX_TX-1:0] tx_vld_o
);
tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q;
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q;
logic [DCACHE_WBUF_DEPTH-1:0] valid;
logic [DCACHE_WBUF_DEPTH-1:0] dirty;
logic [DCACHE_WBUF_DEPTH-1:0] tocheck;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit;
//logic [DCACHE_WBUF_DEPTH-1:0][7:0] bdirty;
logic [DCACHE_WBUF_DEPTH-1:0][(riscv::XLEN/8)-1:0] bdirty;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0]
next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr;
logic [CACHE_ID_WIDTH-1:0] tx_id, rtrn_id;
logic [riscv::XLEN_ALIGN_BYTES-1:0] bdirty_off;
logic [(riscv::XLEN/8)-1:0] tx_be;
logic [riscv::PLEN-1:0] wr_paddr, rd_paddr, extract_tag;
logic [DCACHE_TAG_WIDTH-1:0] rd_tag_d, rd_tag_q;
logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_d, rd_hit_oh_q;
logic check_en_d, check_en_q, check_en_q1;
logic full, dirty_rd_en, rdy;
logic rtrn_empty, evict;
logic [DCACHE_WBUF_DEPTH-1:0] ni_pending_d, ni_pending_q;
logic wbuffer_wren;
logic free_tx_slots;
logic wr_cl_vld_q, wr_cl_vld_d;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d;
logic [riscv::PLEN-1:0] debug_paddr[DCACHE_WBUF_DEPTH-1:0];
wbuffer_t wbuffer_check_mux, wbuffer_dirty_mux;
///////////////////////////////////////////////////////
// misc
///////////////////////////////////////////////////////
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] miss_tag;
logic is_nc_miss;
logic is_ni;
assign miss_tag = miss_paddr_o[ariane_pkg::DCACHE_INDEX_WIDTH+:ariane_pkg::DCACHE_TAG_WIDTH];
assign is_nc_miss = !config_pkg::is_inside_cacheable_regions(
CVA6Cfg,
{
{64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {DCACHE_INDEX_WIDTH{1'b0}}
}
);
assign miss_nc_o = !cache_en_i || is_nc_miss;
// Non-idempotent if request goes to NI region
assign is_ni = config_pkg::is_inside_nonidempotent_regions(
CVA6Cfg,
{
{64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}},
req_port_i.address_tag,
{DCACHE_INDEX_WIDTH{1'b0}}
}
);
assign miss_we_o = 1'b1;
assign miss_vld_bits_o = '0;
assign wbuffer_data_o = wbuffer_q;
for (genvar k = 0; k < DCACHE_MAX_TX; k++) begin : gen_tx_vld
assign tx_vld_o[k] = tx_stat_q[k].vld;
assign tx_paddr_o[k] = {
{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[tx_stat_q[k].ptr].wtag << riscv::XLEN_ALIGN_BYTES
};
end
///////////////////////////////////////////////////////
// openpiton does not understand byte enable sigs
// need to convert to the four cases:
// 00: byte
// 01: halfword
// 10: word
// 11: dword
// non-contiguous writes need to be serialized!
// e.g. merged dwords with BE like this: 8'b01001100
///////////////////////////////////////////////////////
// get byte offset
lzc #(
.WIDTH(riscv::XLEN / 8)
) i_vld_bdirty (
.in_i (bdirty[dirty_ptr]),
.cnt_o (bdirty_off),
.empty_o()
);
// add the offset to the physical base address of this buffer entry
assign miss_paddr_o = {wbuffer_dirty_mux.wtag, bdirty_off};
assign miss_id_o = tx_id;
// is there any dirty word to be transmitted, and is there a free TX slot?
assign miss_req_o = (|dirty) && free_tx_slots;
// get size of aligned words, and the corresponding byte enables
// note: openpiton can only handle aligned offsets + size, and hence
// we have to split unaligned data into multiple transfers (see toSize64)
// e.g. if we have the following valid bytes: 0011_1001 -> TX0: 0000_0001, TX1: 0000_1000, TX2: 0011_0000
if (riscv::IS_XLEN64) begin : gen_size_64b
assign miss_size_o = {1'b0, toSize64(bdirty[dirty_ptr])};
end else begin : gen_size_32b
assign miss_size_o = {1'b0, toSize32(bdirty[dirty_ptr])};
end
// replicate transfers shorter than a dword
assign miss_wdata_o = riscv::IS_XLEN64 ? repData64(
wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]
) : repData32(
wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]
);
if (ariane_pkg::DATA_USER_EN) begin
assign miss_wuser_o = riscv::IS_XLEN64 ? repData64(
wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]
) : repData32(
wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]
);
end else begin
assign miss_wuser_o = '0;
end
assign tx_be = riscv::IS_XLEN64 ? to_byte_enable8(
bdirty_off, miss_size_o[1:0]
) : to_byte_enable4(
bdirty_off, miss_size_o[1:0]
);
///////////////////////////////////////////////////////
// TX status registers and ID counters
///////////////////////////////////////////////////////
// TODO: todo: make this fall through if timing permits it
fifo_v3 #(
.FALL_THROUGH(1'b0),
.DATA_WIDTH ($clog2(DCACHE_MAX_TX)),
.DEPTH (DCACHE_MAX_TX)
) i_rtrn_id_fifo (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (),
.empty_o (rtrn_empty),
.usage_o (),
.data_i (miss_rtrn_id_i),
.push_i (miss_rtrn_vld_i),
.data_o (rtrn_id),
.pop_i (evict)
);
always_comb begin : p_tx_stat
tx_stat_d = tx_stat_q;
evict = 1'b0;
wr_req_o = '0;
// clear entry if it is clear whether it can be pushed to the cache or not
if ((!rtrn_empty) && wbuffer_q[rtrn_ptr].checked) begin
// check if data is clean and can be written, otherwise skip
// check if CL is present, otherwise skip
if ((|wr_data_be_o) && (|wbuffer_q[rtrn_ptr].hit_oh)) begin
wr_req_o = wbuffer_q[rtrn_ptr].hit_oh;
if (wr_ack_i) begin
evict = 1'b1;
tx_stat_d[rtrn_id].vld = 1'b0;
end
end else begin
evict = 1'b1;
tx_stat_d[rtrn_id].vld = 1'b0;
end
end
// allocate a new entry
if (dirty_rd_en) begin
tx_stat_d[tx_id].vld = 1'b1;
tx_stat_d[tx_id].ptr = dirty_ptr;
tx_stat_d[tx_id].be = tx_be;
end
end
assign free_tx_slots = |(~tx_vld_o);
// next word to lookup in the cache
rr_arb_tree #(
.NumIn (DCACHE_MAX_TX),
.LockIn (1'b1),
.DataWidth(1)
) i_tx_id_rr (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.rr_i ('0),
.req_i (~tx_vld_o),
.gnt_o (),
.data_i ('0),
.gnt_i (dirty_rd_en),
.req_o (),
.data_o (),
.idx_o (tx_id)
);
///////////////////////////////////////////////////////
// cache readout & update
///////////////////////////////////////////////////////
assign extract_tag = rd_paddr >> DCACHE_INDEX_WIDTH;
assign rd_tag_d = extract_tag[DCACHE_TAG_WIDTH-1:0];
// trigger TAG readout in cache
assign rd_tag_only_o = 1'b1;
assign rd_paddr = {
{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_check_mux.wtag << riscv::XLEN_ALIGN_BYTES
};
assign rd_req_o = |tocheck;
assign rd_tag_o = rd_tag_q; //delay by one cycle
assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign check_en_d = rd_req_o & rd_ack_i;
// cache update port
assign rtrn_ptr = tx_stat_q[rtrn_id].ptr;
// if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache
// when the TX returns
assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty);
assign wr_paddr = {
{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[rtrn_ptr].wtag << riscv::XLEN_ALIGN_BYTES
};
assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_data_o = wbuffer_q[rtrn_ptr].data;
assign wr_user_o = wbuffer_q[rtrn_ptr].user;
///////////////////////////////////////////////////////
// readout of status bits, index calculation
///////////////////////////////////////////////////////
logic [DCACHE_WBUF_DEPTH-1:0][DCACHE_CL_IDX_WIDTH-1:0] wtag_comp;
assign wr_cl_vld_d = wr_cl_vld_i;
assign wr_cl_idx_d = wr_cl_idx_i;
for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_flags
// only for debug, will be pruned
if (CVA6Cfg.DebugEn) begin
assign debug_paddr[k] = {
{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[k].wtag << riscv::XLEN_ALIGN_BYTES
};
end
// dirty bytes that are ready for transmission.
// note that we cannot retransmit a word that is already in-flight
// since the multiple transactions might overtake each other in the memory system!
assign bdirty[k] = (|wbuffer_q[k].txblock) ? '0 : wbuffer_q[k].dirty & wbuffer_q[k].valid;
assign dirty[k] = |bdirty[k];
assign valid[k] = |wbuffer_q[k].valid;
assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]});
// checks if an invalidation/cache refill hits a particular word
// note: an invalidation can hit multiple words!
// need to respect previous cycle, too, since we add a cycle of latency to the rd_hit_oh_i signal...
assign wtag_comp[k] = wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES-1:DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES];
assign inval_hit[k] = (wr_cl_vld_d & valid[k] & (wtag_comp[k] == wr_cl_idx_d)) |
(wr_cl_vld_q & valid[k] & (wtag_comp[k] == wr_cl_idx_q));
// these word have to be looked up in the cache
assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k];
end
assign wr_ptr = (|wbuffer_hit_oh) ? hit_ptr : next_ptr;
assign rdy = (|wbuffer_hit_oh) | (~full);
// next free entry in the buffer
lzc #(
.WIDTH(DCACHE_WBUF_DEPTH)
) i_vld_lzc (
.in_i (~valid),
.cnt_o (next_ptr),
.empty_o(full)
);
// get index of hit
lzc #(
.WIDTH(DCACHE_WBUF_DEPTH)
) i_hit_lzc (
.in_i (wbuffer_hit_oh),
.cnt_o (hit_ptr),
.empty_o()
);
// next dirty word to serve
rr_arb_tree #(
.NumIn (DCACHE_WBUF_DEPTH),
.LockIn (1'b1),
.DataType(wbuffer_t)
) i_dirty_rr (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.rr_i ('0),
.req_i (dirty),
.gnt_o (),
.data_i (wbuffer_q),
.gnt_i (dirty_rd_en),
.req_o (),
.data_o (wbuffer_dirty_mux),
.idx_o (dirty_ptr)
);
// next word to lookup in the cache
rr_arb_tree #(
.NumIn (DCACHE_WBUF_DEPTH),
.DataType(wbuffer_t)
) i_clean_rr (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.rr_i ('0),
.req_i (tocheck),
.gnt_o (),
.data_i (wbuffer_q),
.gnt_i (check_en_d),
.req_o (),
.data_o (wbuffer_check_mux),
.idx_o (check_ptr_d)
);
///////////////////////////////////////////////////////
// update logic
///////////////////////////////////////////////////////
assign req_port_o.data_rvalid = '0;
assign req_port_o.data_rdata = '0;
assign req_port_o.data_ruser = '0;
assign req_port_o.data_rid = '0;
assign rd_hit_oh_d = rd_hit_oh_i;
logic ni_inside, ni_conflict;
assign ni_inside = |ni_pending_q;
assign ni_conflict = CVA6Cfg.NonIdemPotenceEn && is_ni && ni_inside;
assign not_ni_o = !ni_inside;
assign empty_o = !(|valid);
// TODO: rewrite and separate into MUXES and write strobe logic
always_comb begin : p_buffer
wbuffer_d = wbuffer_q;
ni_pending_d = ni_pending_q;
dirty_rd_en = 1'b0;
req_port_o.data_gnt = 1'b0;
wbuffer_wren = 1'b0;
// TAG lookup returns, mark corresponding word
if (check_en_q1) begin
if (|wbuffer_q[check_ptr_q1].valid) begin
wbuffer_d[check_ptr_q1].checked = 1'b1;
wbuffer_d[check_ptr_q1].hit_oh = rd_hit_oh_q;
end
end
// if an invalidation or cache line refill comes in and hits on the write buffer,
// we have to discard our knowledge of the corresponding cacheline state
for (int k = 0; k < DCACHE_WBUF_DEPTH; k++) begin
if (inval_hit[k]) begin
wbuffer_d[k].checked = 1'b0;
end
end
// once TX write response came back, we can clear the TX block. if it was not dirty, we
// can completely evict it - otherwise we have to leave it there for retransmission
if (evict) begin
for (int k = 0; k < (riscv::XLEN / 8); k++) begin
if (tx_stat_q[rtrn_id].be[k]) begin
wbuffer_d[rtrn_ptr].txblock[k] = 1'b0;
if (!wbuffer_q[rtrn_ptr].dirty[k]) begin
wbuffer_d[rtrn_ptr].valid[k] = 1'b0;
// NOTE: this is not strictly needed, but makes it much
// easier to debug, since no invalid data remains in the buffer
// wbuffer_d[rtrn_ptr].data[k*8 +:8] = '0;
end
end
end
// if all bytes are evicted, clear the cache status flag
if (wbuffer_d[rtrn_ptr].valid == 0) begin
wbuffer_d[rtrn_ptr].checked = 1'b0;
ni_pending_d[rtrn_ptr] = 1'b0;
end
end
// mark bytes sent out to the memory system
if (miss_req_o && miss_ack_i) begin
dirty_rd_en = 1'b1;
for (int k = 0; k < (riscv::XLEN / 8); k++) begin
if (tx_be[k]) begin
wbuffer_d[dirty_ptr].dirty[k] = 1'b0;
wbuffer_d[dirty_ptr].txblock[k] = 1'b1;
end
end
end
// write new word into the buffer
if (req_port_i.data_req && rdy) begin
// in case we have an NI address, need to drain the buffer first
// in case we are serving an NI address, we block until it is written to memory
if (!ni_conflict) begin //empty of NI operations
wbuffer_wren = 1'b1;
req_port_o.data_gnt = 1'b1;
ni_pending_d[wr_ptr] = is_ni;
wbuffer_d[wr_ptr].checked = 1'b0;
wbuffer_d[wr_ptr].wtag = {
req_port_i.address_tag,
req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]
};
// mark bytes as dirty
for (int k = 0; k < (riscv::XLEN / 8); k++) begin
if (req_port_i.data_be[k]) begin
wbuffer_d[wr_ptr].valid[k] = 1'b1;
wbuffer_d[wr_ptr].dirty[k] = 1'b1;
wbuffer_d[wr_ptr].data[k*8+:8] = req_port_i.data_wdata[k*8+:8];
if (ariane_pkg::DATA_USER_EN) begin
wbuffer_d[wr_ptr].user[k*8+:8] = req_port_i.data_wuser[k*8+:8];
end else begin
wbuffer_d[wr_ptr].user[k*8+:8] = '0;
end
end
end
end
end
end
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin
wbuffer_q <= '{default: '0};
tx_stat_q <= '{default: '0};
ni_pending_q <= '0;
check_ptr_q <= '0;
check_ptr_q1 <= '0;
check_en_q <= '0;
check_en_q1 <= '0;
rd_tag_q <= '0;
rd_hit_oh_q <= '0;
wr_cl_vld_q <= '0;
wr_cl_idx_q <= '0;
end else begin
wbuffer_q <= wbuffer_d;
tx_stat_q <= tx_stat_d;
ni_pending_q <= ni_pending_d;
check_ptr_q <= check_ptr_d;
check_ptr_q1 <= check_ptr_q;
check_en_q <= check_en_d;
check_en_q1 <= check_en_q;
rd_tag_q <= rd_tag_d;
rd_hit_oh_q <= rd_hit_oh_d;
wr_cl_vld_q <= wr_cl_vld_d;
wr_cl_idx_q <= wr_cl_idx_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
hot1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> $onehot0(
wbuffer_hit_oh
))
else $fatal(1, "[l1 dcache wbuffer] wbuffer_hit_oh signal must be hot1");
tx_status :
assert property (
@(posedge clk_i) disable iff (!rst_ni) evict && miss_ack_i && miss_req_o |-> (tx_id != rtrn_id))
else $fatal(1, "[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle");
tx_valid0 :
assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> tx_stat_q[rtrn_id].vld)
else $fatal(1, "[l1 dcache wbuffer] evicting invalid transaction slot");
tx_valid1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid)
else $fatal(1, "[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid");
write_full :
assert property (
@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> req_port_o.data_gnt |-> ((!full) || (|wbuffer_hit_oh)))
else $fatal(1, "[l1 dcache wbuffer] cannot write if full or no hit");
unused0 :
assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.tag_valid)
else $fatal(1, "[l1 dcache wbuffer] req_port_i.tag_valid should not be asserted");
unused1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.kill_req)
else $fatal(1, "[l1 dcache wbuffer] req_port_i.kill_req should not be asserted");
for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_assert1
for (genvar j = 0; j < (riscv::XLEN / 8); j++) begin : gen_assert2
byteStates :
assert property (
@(posedge clk_i) disable iff (!rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} )
else
$fatal(
1,
"[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b",
j,
k,
wbuffer_q[k].valid[j],
wbuffer_q[k].dirty[j],
wbuffer_q[k].txblock[j]
);
end
end
`endif
//pragma translate_on
endmodule // wt_dcache_wbuffer

View File

@ -0,0 +1,298 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 15.04.2017
// Description: Commits to the architectural state resulting from the scoreboard.
module commit_stage
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic halt_i, // request to halt the core
input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline
output exception_t exception_o, // take exception to controller
output logic dirty_fp_state_o, // mark the F state as dirty
input logic single_step_i, // we are in single step debug mode
// from scoreboard
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit
output logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_o, // acknowledge that we are indeed committing
// to register file
output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o, // register file write address
output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o, // register file write data
output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o, // register file write enable
output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o, // floating point register enable
// Atomic memory operations
input amo_resp_t amo_resp_i, // result of AMO operation
// to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline)
output logic [riscv::VLEN-1:0] pc_o,
// to/from CSR file
output fu_op csr_op_o, // decoded CSR operation
output riscv::xlen_t csr_wdata_o, // data to write to CSR
input riscv::xlen_t csr_rdata_i, // data to read from CSR
input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit)
output logic csr_write_fflags_o, // write the fflags CSR
// commit signals to ex
output logic commit_lsu_o, // commit the pending store
input logic commit_lsu_ready_i, // commit buffer of LSU is ready
output logic [TRANS_ID_BITS-1:0] commit_tran_id_o, // transaction id of first commit port
output logic amo_valid_commit_o, // valid AMO in commit stage
input logic no_st_pending_i, // there is no store pending
output logic commit_csr_o, // commit the pending CSR instruction
output logic fence_i_o, // flush I$ and pipeline
output logic fence_o, // flush D$ and pipeline
output logic flush_commit_o, // request a pipeline flush
output logic sfence_vma_o // flush TLBs and pipeline
);
// ila_0 i_ila_commit (
// .clk(clk_i), // input wire clk
// .probe0(commit_instr_i[0].pc), // input wire [63:0] probe0
// .probe1(commit_instr_i[1].pc), // input wire [63:0] probe1
// .probe2(commit_instr_i[0].valid), // input wire [0:0] probe2
// .probe3(commit_instr_i[1].valid), // input wire [0:0] probe3
// .probe4(commit_ack_o[0]), // input wire [0:0] probe4
// .probe5(commit_ack_o[0]), // input wire [0:0] probe5
// .probe6(1'b0), // input wire [0:0] probe6
// .probe7(1'b0), // input wire [0:0] probe7
// .probe8(1'b0), // input wire [0:0] probe8
// .probe9(1'b0) // input wire [0:0] probe9
// );
for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_waddr
assign waddr_o[i] = commit_instr_i[i].rd[4:0];
end
assign pc_o = commit_instr_i[0].pc;
// Dirty the FP state if we are committing anything related to the FPU
always_comb begin : dirty_fp_state
dirty_fp_state_o = 1'b0;
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
dirty_fp_state_o |= commit_ack_o[i] & (commit_instr_i[i].fu inside {FPU, FPU_VEC} || (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
commit_instr_i[i].op
)));
// Check if we issued a vector floating-point instruction to the accellerator
dirty_fp_state_o |= commit_instr_i[i].fu == ACCEL && commit_instr_i[i].vfp;
end
end
assign commit_tran_id_o = commit_instr_i[0].trans_id;
logic instr_0_is_amo;
assign instr_0_is_amo = is_amo(commit_instr_i[0].op);
// -------------------
// Commit Instruction
// -------------------
// write register file or commit instruction in LSU or CSR Buffer
always_comb begin : commit
// default assignments
commit_ack_o[0] = 1'b0;
amo_valid_commit_o = 1'b0;
we_gpr_o[0] = 1'b0;
we_fpr_o = '{default: 1'b0};
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
// amos will commit on port 0
wdata_o[0] = (CVA6Cfg.RVA && amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = {riscv::XLEN{1'b0}};
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
csr_write_fflags_o = 1'b0;
flush_commit_o = 1'b0;
// we will not commit the instruction if we took an exception
// and we do not commit the instruction if we requested a halt
if (commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i) begin
// we can definitely write the register file
// if the instruction is not committing anything the destination
commit_ack_o[0] = 1'b1;
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[0].op)) begin
we_fpr_o[0] = 1'b1;
end else begin
we_gpr_o[0] = 1'b1;
end
// check whether the instruction we retire was a store
if ((!CVA6Cfg.RVA && commit_instr_i[0].fu == STORE) || (CVA6Cfg.RVA && commit_instr_i[0].fu == STORE && !instr_0_is_amo)) begin
// check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store)
if (commit_lsu_ready_i) begin
commit_ack_o[0] = 1'b1;
commit_lsu_o = 1'b1;
// stall in case the store buffer is not able to accept anymore instructions
end else begin
commit_ack_o[0] = 1'b0;
end
end
// ---------
// FPU Flags
// ---------
if (CVA6Cfg.FpPresent) begin
if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
// write the CSR with potential exception flags from retiring floating point instruction
csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[0].ex.cause[4:0]};
csr_write_fflags_o = 1'b1;
commit_ack_o[0] = 1'b1;
end
end
// ---------
// CSR Logic
// ---------
// check whether the instruction we retire was a CSR instruction and it did not
// throw an exception
if (commit_instr_i[0].fu == CSR) begin
// write the CSR file
csr_op_o = commit_instr_i[0].op;
csr_wdata_o = commit_instr_i[0].result;
if (!csr_exception_i.valid) begin
commit_csr_o = 1'b1;
wdata_o[0] = csr_rdata_i;
commit_ack_o[0] = 1'b1;
end else begin
commit_ack_o[0] = 1'b0;
we_gpr_o[0] = 1'b0;
end
end
// ------------------
// SFENCE.VMA Logic
// ------------------
// sfence.vma is idempotent so we can safely re-execute it after returning
// from interrupt service routine
// check if this instruction was a SFENCE_VMA
if (CVA6Cfg.RVS && commit_instr_i[0].op == SFENCE_VMA) begin
// no store pending so we can flush the TLBs and pipeline
sfence_vma_o = no_st_pending_i;
// wait for the store buffer to drain until flushing the pipeline
commit_ack_o[0] = no_st_pending_i;
end
// ------------------
// FENCE.I Logic
// ------------------
// fence.i is idempotent so we can safely re-execute it after returning
// from interrupt service routine
// Fence synchronizes data and instruction streams. That means that we need to flush the private icache
// and the private dcache. This is the most expensive instruction.
if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && DCACHE_TYPE == int'(config_pkg::WB) && commit_instr_i[0].fu != STORE)) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the I$
fence_i_o = no_st_pending_i;
end
// ------------------
// FENCE Logic
// ------------------
// fence is idempotent so we can safely re-execute it after returning
// from interrupt service routine
if (commit_instr_i[0].op == FENCE) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the D$
fence_o = no_st_pending_i;
end
// ------------------
// AMO
// ------------------
if (CVA6Cfg.RVA && instr_0_is_amo) begin
// AMO finished
commit_ack_o[0] = amo_resp_i.ack;
// flush the pipeline
flush_commit_o = amo_resp_i.ack;
amo_valid_commit_o = 1'b1;
we_gpr_o[0] = amo_resp_i.ack;
end
end
if (CVA6Cfg.NrCommitPorts > 1) begin
commit_ack_o[1] = 1'b0;
we_gpr_o[1] = 1'b0;
wdata_o[1] = commit_instr_i[1].result;
// -----------------
// Commit Port 2
// -----------------
// check if the second instruction can be committed as well and the first wasn't a CSR instruction
// also if we are in single step mode don't retire the second instruction
if (commit_ack_o[0] && commit_instr_i[1].valid
&& !halt_i
&& !(commit_instr_i[0].fu inside {CSR})
&& !flush_dcache_i
&& !instr_0_is_amo
&& !single_step_i) begin
// only if the first instruction didn't throw an exception and this instruction won't throw an exception
// and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC
if (!exception_o.valid && !commit_instr_i[1].ex.valid
&& (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[1].op)) we_fpr_o[1] = 1'b1;
else we_gpr_o[1] = 1'b1;
commit_ack_o[1] = 1'b1;
// additionally check if we are retiring an FPU instruction because we need to make sure that we write all
// exception flags
if (CVA6Cfg.FpPresent && commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin
if (csr_write_fflags_o)
csr_wdata_o = {
{riscv::XLEN - 5{1'b0}},
(commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])
};
else csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[1].ex.cause[4:0]};
csr_write_fflags_o = 1'b1;
end
end
end
end
end
// -----------------------------
// Exception & Interrupt Logic
// -----------------------------
// here we know for sure that we are taking the exception
always_comb begin : exception_handling
// Multiple simultaneous interrupts and traps at the same privilege level are handled in the following decreasing
// priority order: external interrupts, software interrupts, timer interrupts, then finally any synchronous traps. (1.10 p.30)
// interrupts are correctly prioritized in the CSR reg file, exceptions are prioritized here
exception_o.valid = 1'b0;
exception_o.cause = '0;
exception_o.tval = '0;
// we need a valid instruction in the commit stage
if (commit_instr_i[0].valid) begin
// ------------------------
// check for CSR exception
// ------------------------
if (csr_exception_i.valid) begin
exception_o = csr_exception_i;
// if no earlier exception happened the commit instruction will still contain
// the instruction bits from the ID stage. If a earlier exception happened we don't care
// as we will overwrite it anyway in the next IF bl
exception_o.tval = commit_instr_i[0].ex.tval;
end
// ------------------------
// Earlier Exceptions
// ------------------------
// but we give precedence to exceptions which happened earlier e.g.: instruction page
// faults for example
if (commit_instr_i[0].ex.valid) begin
exception_o = commit_instr_i[0].ex;
end
end
// Don't take any exceptions iff:
// - If we halted the processor
if (halt_i) begin
exception_o.valid = 1'b0;
end
end
endmodule

View File

@ -0,0 +1,935 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License. //
//
// Author: Florian Zaruba - zarubaf@iis.ee.ethz.ch
// Engineer: Sven Stucki - svstucki@student.ethz.ch
//
// Design Name: Compressed instruction decoder
// Project Name: zero-riscy
// Language: SystemVerilog
//
// Description: Decodes RISC-V compressed instructions into their RV32
// equivalent. This module is fully combinatorial.
module compressed_decoder #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic [31:0] instr_i,
output logic [31:0] instr_o,
output logic illegal_instr_o,
output logic is_compressed_o
);
// -------------------
// Compressed Decoder
// -------------------
always_comb begin
illegal_instr_o = 1'b0;
instr_o = '0;
is_compressed_o = 1'b1;
instr_o = instr_i;
// I: | imm[11:0] | rs1 | funct3 | rd | opcode |
// S: | imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode |
unique case (instr_i[1:0])
// C0
riscv::OpcodeC0: begin
unique case (instr_i[15:13])
riscv::OpcodeC0Addi4spn: begin
// c.addi4spn -> addi rd', x2, imm
instr_o = {
2'b0,
instr_i[10:7],
instr_i[12:11],
instr_i[5],
instr_i[6],
2'b00,
5'h02,
3'b000,
2'b01,
instr_i[4:2],
riscv::OpcodeOpImm
};
if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1;
end
riscv::OpcodeC0Fld: begin
if (CVA6Cfg.FpPresent) begin
// c.fld -> fld rd', imm(rs1')
// CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 |
instr_o = {
4'b0,
instr_i[6:5],
instr_i[12:10],
3'b000,
2'b01,
instr_i[9:7],
3'b011,
2'b01,
instr_i[4:2],
riscv::OpcodeLoadFp
};
end else begin
illegal_instr_o = 1'b1;
end
end
riscv::OpcodeC0Lw: begin
// c.lw -> lw rd', imm(rs1')
instr_o = {
5'b0,
instr_i[5],
instr_i[12:10],
instr_i[6],
2'b00,
2'b01,
instr_i[9:7],
3'b010,
2'b01,
instr_i[4:2],
riscv::OpcodeLoad
};
end
riscv::OpcodeC0Ld: begin
// RV64
// c.ld -> ld rd', imm(rs1')
// RV32
// c.flw -> flw fprd', imm(rs1')
if (riscv::IS_XLEN64) begin
// CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 |
instr_o = {
4'b0,
instr_i[6:5],
instr_i[12:10],
3'b000,
2'b01,
instr_i[9:7],
3'b011,
2'b01,
instr_i[4:2],
riscv::OpcodeLoad
};
end else begin
if (CVA6Cfg.FpPresent) begin
// CFLW: | funct3 (change to LW) | imm[5:3] | rs1' | imm[2|6] | rd' | C0 |
instr_o = {
5'b0,
instr_i[5],
instr_i[12:10],
instr_i[6],
2'b00,
2'b01,
instr_i[9:7],
3'b010,
2'b01,
instr_i[4:2],
riscv::OpcodeLoadFp
};
end else begin
illegal_instr_o = 1'b1;
end
end
end
riscv::OpcodeC0Zcb: begin
if (CVA6Cfg.RVZCB) begin
unique case (instr_i[12:10])
3'b000: begin
// c.lbu -> lbu rd', uimm(rs1')
instr_o = {
10'b0,
instr_i[5],
instr_i[6],
2'b01,
instr_i[9:7],
3'b100,
2'b01,
instr_i[4:2],
riscv::OpcodeLoad
};
end
3'b001: begin
if (instr_i[6]) begin
// c.lh -> lh rd', uimm(rs1')
instr_o = {
10'b0,
instr_i[5],
1'b0,
2'b01,
instr_i[9:7],
3'b001,
2'b01,
instr_i[4:2],
riscv::OpcodeLoad
};
end else begin
// c.lhu -> lhu rd', uimm(rs1')
instr_o = {
10'b0,
instr_i[5],
1'b0,
2'b01,
instr_i[9:7],
3'b101,
2'b01,
instr_i[4:2],
riscv::OpcodeLoad
};
end
end
3'b010: begin
// c.sb -> sb rs2', uimm(rs1')
instr_o = {
7'b0,
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b000,
3'b0,
instr_i[5],
instr_i[6],
riscv::OpcodeStore
};
end
3'b011: begin
// c.sh -> sh rs2', uimm(rs1')
instr_o = {
7'b0,
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b001,
3'b0,
instr_i[5],
1'b0,
riscv::OpcodeStore
};
end
default: begin
illegal_instr_o = 1'b1;
end
endcase
end else begin
instr_o = instr_i;
illegal_instr_o = 1'b1;
end
end
riscv::OpcodeC0Fsd: begin
if (CVA6Cfg.FpPresent) begin
// c.fsd -> fsd rs2', imm(rs1')
instr_o = {
4'b0,
instr_i[6:5],
instr_i[12],
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b011,
instr_i[11:10],
3'b000,
riscv::OpcodeStoreFp
};
end else begin
illegal_instr_o = 1'b1;
end
end
riscv::OpcodeC0Sw: begin
// c.sw -> sw rs2', imm(rs1')
instr_o = {
5'b0,
instr_i[5],
instr_i[12],
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b010,
instr_i[11:10],
instr_i[6],
2'b00,
riscv::OpcodeStore
};
end
riscv::OpcodeC0Sd: begin
// RV64
// c.sd -> sd rs2', imm(rs1')
// RV32
// c.fsw -> fsw fprs2', imm(rs1')
if (riscv::IS_XLEN64) begin
instr_o = {
4'b0,
instr_i[6:5],
instr_i[12],
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b011,
instr_i[11:10],
3'b000,
riscv::OpcodeStore
};
end else begin
if (CVA6Cfg.FpPresent) begin
instr_o = {
5'b0,
instr_i[5],
instr_i[12],
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b010,
instr_i[11:10],
instr_i[6],
2'b00,
riscv::OpcodeStoreFp
};
end else begin
illegal_instr_o = 1'b1;
end
end
end
default: begin
illegal_instr_o = 1'b1;
end
endcase
end
// C1
riscv::OpcodeC1: begin
unique case (instr_i[15:13])
riscv::OpcodeC1Addi: begin
// c.addi -> addi rd, rd, nzimm
// c.nop -> addi 0, 0, 0
instr_o = {
{6{instr_i[12]}},
instr_i[12],
instr_i[6:2],
instr_i[11:7],
3'b0,
instr_i[11:7],
riscv::OpcodeOpImm
};
end
riscv::OpcodeC1Addiw: begin // or riscv::OpcodeC1Jal for RV32IC
if (riscv::IS_XLEN64) begin
// c.addiw -> addiw rd, rd, nzimm for RV64IC
if (instr_i[11:7] != 5'h0) begin // only valid if the destination is not r0
instr_o = {
{6{instr_i[12]}},
instr_i[12],
instr_i[6:2],
instr_i[11:7],
3'b0,
instr_i[11:7],
riscv::OpcodeOpImm32
};
end else begin
illegal_instr_o = 1'b1;
end
end else begin
// c.jal -> jal x1, imm for RV32IC only
instr_o = {
instr_i[12],
instr_i[8],
instr_i[10:9],
instr_i[6],
instr_i[7],
instr_i[2],
instr_i[11],
instr_i[5:3],
{9{instr_i[12]}},
5'b1,
riscv::OpcodeJal
};
end
end
riscv::OpcodeC1Li: begin
// c.li -> addi rd, x0, nzimm
instr_o = {
{6{instr_i[12]}},
instr_i[12],
instr_i[6:2],
5'b0,
3'b0,
instr_i[11:7],
riscv::OpcodeOpImm
};
end
riscv::OpcodeC1LuiAddi16sp: begin
// c.lui -> lui rd, imm
instr_o = {{15{instr_i[12]}}, instr_i[6:2], instr_i[11:7], riscv::OpcodeLui};
if (instr_i[11:7] == 5'h02) begin
// c.addi16sp -> addi x2, x2, nzimm
instr_o = {
{3{instr_i[12]}},
instr_i[4:3],
instr_i[5],
instr_i[2],
instr_i[6],
4'b0,
5'h02,
3'b000,
5'h02,
riscv::OpcodeOpImm
};
end
if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
end
riscv::OpcodeC1MiscAlu: begin
unique case (instr_i[11:10])
2'b00, 2'b01: begin
// 00: c.srli -> srli rd, rd, shamt
// 01: c.srai -> srai rd, rd, shamt
instr_o = {
1'b0,
instr_i[10],
4'b0,
instr_i[12],
instr_i[6:2],
2'b01,
instr_i[9:7],
3'b101,
2'b01,
instr_i[9:7],
riscv::OpcodeOpImm
};
end
2'b10: begin
// c.andi -> andi rd, rd, imm
instr_o = {
{6{instr_i[12]}},
instr_i[12],
instr_i[6:2],
2'b01,
instr_i[9:7],
3'b111,
2'b01,
instr_i[9:7],
riscv::OpcodeOpImm
};
end
2'b11: begin
unique case ({
instr_i[12], instr_i[6:5]
})
3'b000: begin
// c.sub -> sub rd', rd', rs2'
instr_o = {
2'b01,
5'b0,
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b000,
2'b01,
instr_i[9:7],
riscv::OpcodeOp
};
end
3'b001: begin
// c.xor -> xor rd', rd', rs2'
instr_o = {
7'b0,
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b100,
2'b01,
instr_i[9:7],
riscv::OpcodeOp
};
end
3'b010: begin
// c.or -> or rd', rd', rs2'
instr_o = {
7'b0,
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b110,
2'b01,
instr_i[9:7],
riscv::OpcodeOp
};
end
3'b011: begin
// c.and -> and rd', rd', rs2'
instr_o = {
7'b0,
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b111,
2'b01,
instr_i[9:7],
riscv::OpcodeOp
};
end
3'b100: begin
if (riscv::IS_XLEN64) begin
// c.subw -> subw rd', rd', rs2'
instr_o = {
2'b01,
5'b0,
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b000,
2'b01,
instr_i[9:7],
riscv::OpcodeOp32
};
end else begin
illegal_instr_o = 1'b1;
end
end
3'b101: begin
if (riscv::IS_XLEN64) begin
// c.addw -> addw rd', rd', rs2'
instr_o = {
2'b00,
5'b0,
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b000,
2'b01,
instr_i[9:7],
riscv::OpcodeOp32
};
end else begin
illegal_instr_o = 1'b1;
end
end
3'b110: begin
if (CVA6Cfg.RVZCB) begin
// c.mul -> mul rd', rd', rs2'
instr_o = {
6'b0,
1'b1,
2'b01,
instr_i[4:2],
2'b01,
instr_i[9:7],
3'b000,
2'b01,
instr_i[9:7],
riscv::OpcodeOp
};
end else begin
instr_o = instr_i;
illegal_instr_o = 1'b1;
end
end
3'b111: begin
if (CVA6Cfg.RVZCB) begin
unique case (instr_i[4:2])
3'b000: begin
// c.zext.b -> andi rd', rd', 0xff
instr_o = {
4'b0,
8'hFF,
2'b01,
instr_i[9:7],
3'b111,
2'b01,
instr_i[9:7],
riscv::OpcodeOpImm
};
end
3'b001: begin
if (CVA6Cfg.RVB) begin
// c.sext.b -> sext.b rd', rd'
instr_o = {
7'h30,
5'h4,
2'b01,
instr_i[9:7],
3'b001,
2'b01,
instr_i[9:7],
riscv::OpcodeOpImm
};
end else illegal_instr_o = 1'b1;
end
3'b010: begin
if (CVA6Cfg.RVB) begin
// c.zext.h -> zext.h rd', rd'
if (riscv::IS_XLEN64) begin
instr_o = {
7'h4,
5'h0,
2'b01,
instr_i[9:7],
3'b100,
2'b01,
instr_i[9:7],
riscv::OpcodeOp32
};
end else begin
instr_o = {
7'h4,
5'h0,
2'b01,
instr_i[9:7],
3'b100,
2'b01,
instr_i[9:7],
riscv::OpcodeOp
};
end
end else illegal_instr_o = 1'b1;
end
3'b011: begin
if (CVA6Cfg.RVB) begin
// c.sext.h -> sext.h rd', rd'
instr_o = {
7'h30,
5'h5,
2'b01,
instr_i[9:7],
3'b001,
2'b01,
instr_i[9:7],
riscv::OpcodeOpImm
};
end else illegal_instr_o = 1'b1;
end
3'b100: begin
if (CVA6Cfg.RVB) begin
// c.zext.w -> add.uw
if (riscv::IS_XLEN64) begin
instr_o = {
7'h4,
5'h0,
2'b01,
instr_i[9:7],
3'b000,
2'b01,
instr_i[9:7],
riscv::OpcodeOp32
};
end else begin
illegal_instr_o = 1'b1;
end
end else illegal_instr_o = 1'b1;
end
3'b101: begin
// c.not -> xori rd', rd', -1
instr_o = {
12'hFFF,
2'b01,
instr_i[9:7],
3'b100,
2'b01,
instr_i[9:7],
riscv::OpcodeOpImm
};
end
default: begin
instr_o = instr_i;
illegal_instr_o = 1;
end
endcase
end
end
endcase
end
endcase
end
riscv::OpcodeC1J: begin
// 101: c.j -> jal x0, imm
instr_o = {
instr_i[12],
instr_i[8],
instr_i[10:9],
instr_i[6],
instr_i[7],
instr_i[2],
instr_i[11],
instr_i[5:3],
{9{instr_i[12]}},
4'b0,
~instr_i[15],
riscv::OpcodeJal
};
end
riscv::OpcodeC1Beqz, riscv::OpcodeC1Bnez: begin
// 0: c.beqz -> beq rs1', x0, imm
// 1: c.bnez -> bne rs1', x0, imm
instr_o = {
{4{instr_i[12]}},
instr_i[6:5],
instr_i[2],
5'b0,
2'b01,
instr_i[9:7],
2'b00,
instr_i[13],
instr_i[11:10],
instr_i[4:3],
instr_i[12],
riscv::OpcodeBranch
};
end
endcase
end
// C2
riscv::OpcodeC2: begin
unique case (instr_i[15:13])
riscv::OpcodeC2Slli: begin
// c.slli -> slli rd, rd, shamt
instr_o = {
6'b0,
instr_i[12],
instr_i[6:2],
instr_i[11:7],
3'b001,
instr_i[11:7],
riscv::OpcodeOpImm
};
end
riscv::OpcodeC2Fldsp: begin
if (CVA6Cfg.FpPresent) begin
// c.fldsp -> fld rd, imm(x2)
instr_o = {
3'b0,
instr_i[4:2],
instr_i[12],
instr_i[6:5],
3'b000,
5'h02,
3'b011,
instr_i[11:7],
riscv::OpcodeLoadFp
};
end else begin
illegal_instr_o = 1'b1;
end
end
riscv::OpcodeC2Lwsp: begin
// c.lwsp -> lw rd, imm(x2)
instr_o = {
4'b0,
instr_i[3:2],
instr_i[12],
instr_i[6:4],
2'b00,
5'h02,
3'b010,
instr_i[11:7],
riscv::OpcodeLoad
};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
riscv::OpcodeC2Ldsp: begin
// RV64
// c.ldsp -> ld rd, imm(x2)
// RV32
// c.flwsp -> flw fprd, imm(x2)
if (riscv::IS_XLEN64) begin
instr_o = {
3'b0,
instr_i[4:2],
instr_i[12],
instr_i[6:5],
3'b000,
5'h02,
3'b011,
instr_i[11:7],
riscv::OpcodeLoad
};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end else begin
if (CVA6Cfg.FpPresent) begin
instr_o = {
4'b0,
instr_i[3:2],
instr_i[12],
instr_i[6:4],
2'b00,
5'h02,
3'b010,
instr_i[11:7],
riscv::OpcodeLoadFp
};
end else begin
illegal_instr_o = 1'b1;
end
end
end
riscv::OpcodeC2JalrMvAdd: begin
if (instr_i[12] == 1'b0) begin
// c.mv -> add rd/rs1, x0, rs2
instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOp};
if (instr_i[6:2] == 5'b0) begin
// c.jr -> jalr x0, rd/rs1, 0
instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, riscv::OpcodeJalr};
// rs1 != 0
illegal_instr_o = (instr_i[11:7] != '0) ? 1'b0 : 1'b1;
end
end else begin
// c.add -> add rd, rd, rs2
instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOp};
if (instr_i[6:2] == 5'b0) begin
if (instr_i[11:7] == 5'b0) begin
// c.ebreak -> ebreak
instr_o = {32'h00_10_00_73};
end else begin
// c.jalr -> jalr x1, rs1, 0
instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, riscv::OpcodeJalr};
end
end
end
end
riscv::OpcodeC2Fsdsp: begin
if (CVA6Cfg.FpPresent) begin
// c.fsdsp -> fsd rs2, imm(x2)
instr_o = {
3'b0,
instr_i[9:7],
instr_i[12],
instr_i[6:2],
5'h02,
3'b011,
instr_i[11:10],
3'b000,
riscv::OpcodeStoreFp
};
end else begin
illegal_instr_o = 1'b1;
end
end
riscv::OpcodeC2Swsp: begin
// c.swsp -> sw rs2, imm(x2)
instr_o = {
4'b0,
instr_i[8:7],
instr_i[12],
instr_i[6:2],
5'h02,
3'b010,
instr_i[11:9],
2'b00,
riscv::OpcodeStore
};
end
riscv::OpcodeC2Sdsp: begin
// RV64
// c.sdsp -> sd rs2, imm(x2)
// RV32
// c.fswsp -> fsw fprs2, imm(x2)
if (riscv::IS_XLEN64) begin
instr_o = {
3'b0,
instr_i[9:7],
instr_i[12],
instr_i[6:2],
5'h02,
3'b011,
instr_i[11:10],
3'b000,
riscv::OpcodeStore
};
end else begin
if (CVA6Cfg.FpPresent) begin
instr_o = {
4'b0,
instr_i[8:7],
instr_i[12],
instr_i[6:2],
5'h02,
3'b010,
instr_i[11:9],
2'b00,
riscv::OpcodeStoreFp
};
end else begin
illegal_instr_o = 1'b1;
end
end
end
default: begin
illegal_instr_o = 1'b1;
end
endcase
end
// normal instruction
default: is_compressed_o = 1'b0;
endcase
// Check if the instruction was illegal, if it was then output the offending instruction (zero-extended)
if (illegal_instr_o) begin
instr_o = instr_i;
end
end
endmodule

View File

@ -0,0 +1,194 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.05.2017
// Description: Flush controller
module controller
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
output logic set_pc_commit_o, // Set PC om PC Gen
output logic flush_if_o, // Flush the IF stage
output logic flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard
output logic flush_id_o, // Flush ID stage
output logic flush_ex_o, // Flush EX stage
output logic flush_bp_o, // Flush branch predictors
output logic flush_icache_o, // Flush ICache
output logic flush_dcache_o, // Flush DCache
input logic flush_dcache_ack_i, // Acknowledge the whole DCache Flush
output logic flush_tlb_o, // Flush TLBs
input logic halt_csr_i, // Halt request from CSR (WFI instruction)
input logic halt_acc_i, // Halt request from accelerator dispatcher
output logic halt_o, // Halt signal to commit stage
input logic eret_i, // Return from exception
input logic ex_valid_i, // We got an exception, flush the pipeline
input logic set_debug_pc_i, // set the debug pc from CSR
input bp_resolve_t resolved_branch_i, // We got a resolved branch, check if we need to flush the front-end
input logic flush_csr_i, // We got an instruction which altered the CSR, flush the pipeline
input logic fence_i_i, // fence.i in
input logic fence_i, // fence in
input logic sfence_vma_i, // We got an instruction to flush the TLBs and pipeline
input logic flush_commit_i, // Flush request from commit stage
input logic flush_acc_i // Flush request from accelerator
);
// active fence - high if we are currently flushing the dcache
logic fence_active_d, fence_active_q;
logic flush_dcache;
// ------------
// Flush CTRL
// ------------
always_comb begin : flush_ctrl
fence_active_d = fence_active_q;
set_pc_commit_o = 1'b0;
flush_if_o = 1'b0;
flush_unissued_instr_o = 1'b0;
flush_id_o = 1'b0;
flush_ex_o = 1'b0;
flush_dcache = 1'b0;
flush_icache_o = 1'b0;
flush_tlb_o = 1'b0;
flush_bp_o = 1'b0;
// ------------
// Mis-predict
// ------------
// flush on mispredict
if (resolved_branch_i.is_mispredict) begin
// flush only un-issued instructions
flush_unissued_instr_o = 1'b1;
// and if stage
flush_if_o = 1'b1;
end
// ---------------------------------
// FENCE
// ---------------------------------
if (fence_i) begin
// this can be seen as a CSR instruction with side-effect
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
flush_dcache = 1'b1;
fence_active_d = 1'b1;
end
end
// ---------------------------------
// FENCE.I
// ---------------------------------
if (fence_i_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_icache_o = 1'b1;
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
flush_dcache = 1'b1;
fence_active_d = 1'b1;
end
end
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
// wait for the acknowledge here
if (flush_dcache_ack_i && fence_active_q) begin
fence_active_d = 1'b0;
// keep the flush dcache signal high as long as we didn't get the acknowledge from the cache
end else if (fence_active_q) begin
flush_dcache = 1'b1;
end
end
// ---------------------------------
// SFENCE.VMA
// ---------------------------------
if (CVA6Cfg.RVS && sfence_vma_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_tlb_o = 1'b1;
end
// Set PC to commit stage and flush pipeline
if (flush_csr_i || flush_acc_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
end else if (CVA6Cfg.RVA && flush_commit_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
end
// ---------------------------------
// 1. Exception
// 2. Return from exception
// ---------------------------------
if (ex_valid_i || eret_i || (CVA6Cfg.DebugEn && set_debug_pc_i)) begin
// don't flush pcgen as we want to take the exception: Flush PCGen is not a flush signal
// for the PC Gen stage but instead tells it to take the PC we gave it
set_pc_commit_o = 1'b0;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
// this potentially reduces performance, but is needed
// to suppress speculative fetches to virtual memory from
// machine mode. TODO: remove when PMA checkers have been
// added to the system
flush_bp_o = 1'b1;
end
end
// ----------------------
// Halt Logic
// ----------------------
always_comb begin
// halt the core if the fence is active
halt_o = halt_csr_i || halt_acc_i || (DCACHE_TYPE == int'(config_pkg::WB) && fence_active_q);
end
// ----------------------
// Registers
// ----------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
fence_active_q <= 1'b0;
flush_dcache_o <= 1'b0;
end else begin
fence_active_q <= fence_active_d;
// register on the flush signal, this signal might be critical
flush_dcache_o <= flush_dcache;
end
end
endmodule

View File

@ -0,0 +1,76 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 05.05.2017
// Description: Buffer to hold CSR address, this acts like a functional unit
// to the scoreboard.
module csr_buffer
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input fu_data_t fu_data_i,
output logic csr_ready_o, // FU is ready e.g. not busy
input logic csr_valid_i, // Input is valid
output riscv::xlen_t csr_result_o,
input logic csr_commit_i, // commit the pending CSR OP
// to CSR file
output logic [11:0] csr_addr_o // CSR address to commit stage
);
// this is a single entry store buffer for the address of the CSR
// which we are going to need in the commit stage
struct packed {
logic [11:0] csr_address;
logic valid;
}
csr_reg_n, csr_reg_q;
// control logic, scoreboard signals
assign csr_result_o = fu_data_i.operand_a;
assign csr_addr_o = csr_reg_q.csr_address;
// write logic
always_comb begin : write
csr_reg_n = csr_reg_q;
// by default we are ready
csr_ready_o = 1'b1;
// if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready
if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i) csr_ready_o = 1'b0;
// if we got a valid from the scoreboard
// store the CSR address
if (csr_valid_i) begin
csr_reg_n.csr_address = fu_data_i.operand_b[11:0];
csr_reg_n.valid = 1'b1;
end
// if we get a commit and no new valid instruction -> clear the valid bit
if (csr_commit_i && ~csr_valid_i) begin
csr_reg_n.valid = 1'b0;
end
// clear the buffer if we flushed
if (flush_i) csr_reg_n.valid = 1'b0;
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
csr_reg_q <= '{default: 0};
end else begin
csr_reg_q <= csr_reg_n;
end
end
endmodule

File diff suppressed because it is too large Load Diff

1401
test/type_param/core/cva6.sv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,31 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Author: Nils Wistoff <nwistoff@iis.ee.ethz.ch>
// Module stub for the cva6_accel_first_pass_decoder. Replace this with your accelerator's
// first pass decoder.
module cva6_accel_first_pass_decoder
import ariane_pkg::*;
(
input logic [31:0] instruction_i, // instruction from IF
input riscv::xs_t fs_i, // floating point extension status
input riscv::xs_t vs_i, // vector extension status
output logic is_accel_o, // is an accelerator instruction
output scoreboard_entry_t instruction_o, // predecoded instruction
output logic illegal_instr_o, // is an illegal instruction
output logic is_control_flow_instr_o // is a control flow instruction
);
assign is_accel_o = 1'b0;
assign instruction_o = '0;
assign illegal_instr_o = 1'b0;
assign is_control_flow_instr_o = 1'b0;
$error("cva6_accel_first_pass_decoder: instantiated non-functional module stub.\
Please replace this with your accelerator's first pass decoder \
(or unset ENABLE_ACCELERATOR).");
endmodule : cva6_accel_first_pass_decoder

View File

@ -0,0 +1,294 @@
// Copyright 2024 Thales DIS France SAS
//
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Original Author: Yannick Casamatta - Thales
// Date: 09/01/2024
module cva6_rvfi
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type rvfi_instr_t = logic,
parameter type rvfi_probes_t = logic
) (
input logic clk_i,
input logic rst_ni,
input rvfi_probes_t rvfi_probes_i,
output rvfi_instr_t [CVA6Cfg.NrCommitPorts-1:0] rvfi_o
);
// ------------------------------------------
// CVA6 configuration
// ------------------------------------------
// Extended config
localparam bit RVF = (riscv::IS_XLEN64 | riscv::IS_XLEN32) & CVA6Cfg.FpuEn;
localparam bit RVD = (riscv::IS_XLEN64 ? 1 : 0) & CVA6Cfg.FpuEn;
localparam bit FpPresent = RVF | RVD | CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8;
localparam bit NSX = CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8 | CVA6Cfg.XFVec; // Are non-standard extensions present?
localparam int unsigned FLen = RVD ? 64 : // D ext.
RVF ? 32 : // F ext.
CVA6Cfg.XF16 ? 16 : // Xf16 ext.
CVA6Cfg.XF16ALT ? 16 : // Xf16alt ext.
CVA6Cfg.XF8 ? 8 : // Xf8 ext.
1; // Unused in case of no FP
// Transprecision floating-point extensions configuration
localparam bit RVFVec = RVF & CVA6Cfg.XFVec & FLen>32; // FP32 vectors available if vectors and larger fmt enabled
localparam bit XF16Vec = CVA6Cfg.XF16 & CVA6Cfg.XFVec & FLen>16; // FP16 vectors available if vectors and larger fmt enabled
localparam bit XF16ALTVec = CVA6Cfg.XF16ALT & CVA6Cfg.XFVec & FLen>16; // FP16ALT vectors available if vectors and larger fmt enabled
localparam bit XF8Vec = CVA6Cfg.XF8 & CVA6Cfg.XFVec & FLen>8; // FP8 vectors available if vectors and larger fmt enabled
localparam bit EnableAccelerator = CVA6Cfg.RVV; // Currently only used by V extension (Ara)
localparam int unsigned NrWbPorts = (CVA6Cfg.CvxifEn || EnableAccelerator) ? 5 : 4;
localparam NrRgprPorts = 2;
localparam bit NonIdemPotenceEn = CVA6Cfg.NrNonIdempotentRules && CVA6Cfg.NonIdempotentLength; // Currently only used by V extension (Ara)
localparam config_pkg::cva6_cfg_t CVA6ExtendCfg = {
CVA6Cfg.NrCommitPorts,
CVA6Cfg.AxiAddrWidth,
CVA6Cfg.AxiDataWidth,
CVA6Cfg.AxiIdWidth,
CVA6Cfg.AxiUserWidth,
CVA6Cfg.NrLoadBufEntries,
CVA6Cfg.FpuEn,
CVA6Cfg.XF16,
CVA6Cfg.XF16ALT,
CVA6Cfg.XF8,
CVA6Cfg.RVA,
CVA6Cfg.RVB,
CVA6Cfg.RVV,
CVA6Cfg.RVC,
CVA6Cfg.RVZCB,
CVA6Cfg.XFVec,
CVA6Cfg.CvxifEn,
CVA6Cfg.ZiCondExtEn,
// Extended
bit'(RVF),
bit'(RVD),
bit'(FpPresent),
bit'(NSX),
unsigned'(FLen),
bit'(RVFVec),
bit'(XF16Vec),
bit'(XF16ALTVec),
bit'(XF8Vec),
unsigned'(NrRgprPorts),
unsigned'(NrWbPorts),
bit'(EnableAccelerator),
CVA6Cfg.RVS,
CVA6Cfg.RVU,
CVA6Cfg.HaltAddress,
CVA6Cfg.ExceptionAddress,
CVA6Cfg.RASDepth,
CVA6Cfg.BTBEntries,
CVA6Cfg.BHTEntries,
CVA6Cfg.DmBaseAddress,
CVA6Cfg.NrPMPEntries,
CVA6Cfg.PMPCfgRstVal,
CVA6Cfg.PMPAddrRstVal,
CVA6Cfg.PMPEntryReadOnly,
CVA6Cfg.NOCType,
CVA6Cfg.NrNonIdempotentRules,
CVA6Cfg.NonIdempotentAddrBase,
CVA6Cfg.NonIdempotentLength,
CVA6Cfg.NrExecuteRegionRules,
CVA6Cfg.ExecuteRegionAddrBase,
CVA6Cfg.ExecuteRegionLength,
CVA6Cfg.NrCachedRegionRules,
CVA6Cfg.CachedRegionAddrBase,
CVA6Cfg.CachedRegionLength,
CVA6Cfg.MaxOutstandingStores,
CVA6Cfg.DebugEn,
NonIdemPotenceEn,
CVA6Cfg.AxiBurstWriteEn
};
logic flush;
logic issue_instr_ack;
logic fetch_entry_valid;
logic [ 31:0] instruction;
logic is_compressed;
logic [ TRANS_ID_BITS-1:0] issue_pointer;
logic [CVA6ExtendCfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer;
logic flush_unissued_instr;
logic decoded_instr_valid;
logic decoded_instr_ack;
riscv::xlen_t rs1_forwarding;
riscv::xlen_t rs2_forwarding;
scoreboard_entry_t [CVA6ExtendCfg.NrCommitPorts-1:0] commit_instr;
exception_t ex_commit;
riscv::priv_lvl_t priv_lvl;
lsu_ctrl_t lsu_ctrl;
logic [ CVA6ExtendCfg.NrWbPorts-1:0][ riscv::XLEN-1:0] wbdata;
logic [CVA6ExtendCfg.NrCommitPorts-1:0] commit_ack;
logic [ riscv::PLEN-1:0] mem_paddr;
logic debug_mode;
logic [CVA6ExtendCfg.NrCommitPorts-1:0][ riscv::XLEN-1:0] wdata;
logic [ riscv::VLEN-1:0] lsu_addr;
logic [ (riscv::XLEN/8)-1:0] lsu_rmask;
logic [ (riscv::XLEN/8)-1:0] lsu_wmask;
logic [ TRANS_ID_BITS-1:0] lsu_addr_trans_id;
assign flush = rvfi_probes_i.flush;
assign issue_instr_ack = rvfi_probes_i.issue_instr_ack;
assign fetch_entry_valid = rvfi_probes_i.fetch_entry_valid;
assign instruction = rvfi_probes_i.instruction;
assign is_compressed = rvfi_probes_i.is_compressed;
assign issue_pointer = rvfi_probes_i.issue_pointer;
assign commit_pointer = rvfi_probes_i.commit_pointer;
assign flush_unissued_instr = rvfi_probes_i.flush_unissued_instr;
assign decoded_instr_valid = rvfi_probes_i.decoded_instr_valid;
assign decoded_instr_ack = rvfi_probes_i.decoded_instr_ack;
assign rs1_forwarding = rvfi_probes_i.rs1_forwarding;
assign rs2_forwarding = rvfi_probes_i.rs2_forwarding;
assign commit_instr = rvfi_probes_i.commit_instr;
assign ex_commit = rvfi_probes_i.ex_commit;
assign priv_lvl = rvfi_probes_i.priv_lvl;
assign lsu_ctrl = rvfi_probes_i.lsu_ctrl;
assign wbdata = rvfi_probes_i.wbdata;
assign commit_ack = rvfi_probes_i.commit_ack;
assign mem_paddr = rvfi_probes_i.mem_paddr;
assign debug_mode = rvfi_probes_i.debug_mode;
assign wdata = rvfi_probes_i.wdata;
assign lsu_addr = lsu_ctrl.vaddr;
assign lsu_rmask = lsu_ctrl.fu == LOAD ? lsu_ctrl.be : '0;
assign lsu_wmask = lsu_ctrl.fu == STORE ? lsu_ctrl.be : '0;
assign lsu_addr_trans_id = lsu_ctrl.trans_id;
//ID STAGE
typedef struct packed {
logic valid;
logic [31:0] instr;
} issue_struct_t;
issue_struct_t issue_n, issue_q;
always_comb begin
issue_n = issue_q;
if (issue_instr_ack) issue_n.valid = 1'b0;
if ((!issue_q.valid || issue_instr_ack) && fetch_entry_valid) begin
issue_n.valid = 1'b1;
issue_n.instr = (is_compressed) ? {{16{1'b0}}, instruction[15:0]} : instruction;
end
if (flush) issue_n.valid = 1'b0;
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
issue_q <= '0;
end else begin
issue_q <= issue_n;
end
end
//ISSUE STAGE
// this is the FIFO struct of the issue queue
typedef struct packed {
riscv::xlen_t rs1_rdata;
riscv::xlen_t rs2_rdata;
logic [riscv::VLEN-1:0] lsu_addr;
logic [(riscv::XLEN/8)-1:0] lsu_rmask;
logic [(riscv::XLEN/8)-1:0] lsu_wmask;
riscv::xlen_t lsu_wdata;
logic [31:0] instr;
} sb_mem_t;
sb_mem_t [NR_SB_ENTRIES-1:0] mem_q, mem_n;
always_comb begin : issue_fifo
mem_n = mem_q;
if (decoded_instr_valid && decoded_instr_ack && !flush_unissued_instr) begin
mem_n[issue_pointer] = '{
rs1_rdata: rs1_forwarding,
rs2_rdata: rs2_forwarding,
lsu_addr: '0,
lsu_rmask: '0,
lsu_wmask: '0,
lsu_wdata: '0,
instr: issue_q.instr
};
end
if (lsu_rmask != 0) begin
mem_n[lsu_addr_trans_id].lsu_addr = lsu_addr;
mem_n[lsu_addr_trans_id].lsu_rmask = lsu_rmask;
end else if (lsu_wmask != 0) begin
mem_n[lsu_addr_trans_id].lsu_addr = lsu_addr;
mem_n[lsu_addr_trans_id].lsu_wmask = lsu_wmask;
mem_n[lsu_addr_trans_id].lsu_wdata = wbdata[1];
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin : regs
if (!rst_ni) begin
mem_q <= '{default: sb_mem_t'(0)};
end else begin
mem_q <= mem_n;
end
end
//----------------------------------------------------------------------------------------------------------
// PACK
//----------------------------------------------------------------------------------------------------------
always_comb begin
for (int i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin
logic exception;
exception = commit_instr[i].valid && ex_commit.valid;
rvfi_o[i].valid = (commit_ack[i] && !ex_commit.valid) ||
(exception && (ex_commit.cause == riscv::ENV_CALL_MMODE ||
ex_commit.cause == riscv::ENV_CALL_SMODE ||
ex_commit.cause == riscv::ENV_CALL_UMODE));
rvfi_o[i].insn = mem_q[commit_pointer[i]].instr;
// when trap, the instruction is not executed
rvfi_o[i].trap = exception;
rvfi_o[i].cause = ex_commit.cause;
rvfi_o[i].mode = (CVA6ExtendCfg.DebugEn && debug_mode) ? 2'b10 : priv_lvl;
rvfi_o[i].ixl = riscv::XLEN == 64 ? 2 : 1;
rvfi_o[i].rs1_addr = commit_instr[i].rs1[4:0];
rvfi_o[i].rs2_addr = commit_instr[i].rs2[4:0];
rvfi_o[i].rd_addr = commit_instr[i].rd[4:0];
rvfi_o[i].rd_wdata = (CVA6ExtendCfg.FpPresent && is_rd_fpr(commit_instr[i].op)) ?
commit_instr[i].result : wdata[i];
rvfi_o[i].pc_rdata = commit_instr[i].pc;
rvfi_o[i].mem_addr = mem_q[commit_pointer[i]].lsu_addr;
// So far, only write paddr is reported. TODO: read paddr
rvfi_o[i].mem_paddr = mem_paddr;
rvfi_o[i].mem_wmask = mem_q[commit_pointer[i]].lsu_wmask;
rvfi_o[i].mem_wdata = mem_q[commit_pointer[i]].lsu_wdata;
rvfi_o[i].mem_rmask = mem_q[commit_pointer[i]].lsu_rmask;
rvfi_o[i].mem_rdata = commit_instr[i].result;
rvfi_o[i].rs1_rdata = mem_q[commit_pointer[i]].rs1_rdata;
rvfi_o[i].rs2_rdata = mem_q[commit_pointer[i]].rs2_rdata;
end
end
endmodule

View File

@ -0,0 +1,81 @@
// Copyright 2024 Thales DIS France SAS
//
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Original Author: Yannick Casamatta - Thales
// Date: 09/01/2024
module cva6_rvfi_probes
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type rvfi_probes_t = logic
) (
input logic flush_i,
input logic issue_instr_ack_i,
input logic fetch_entry_valid_i,
input logic [31:0] instruction_i,
input logic is_compressed_i,
input logic [TRANS_ID_BITS-1:0] issue_pointer_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer_i,
input logic flush_unissued_instr_i,
input logic decoded_instr_valid_i,
input logic decoded_instr_ack_i,
input riscv::xlen_t rs1_forwarding_i,
input riscv::xlen_t rs2_forwarding_i,
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
input exception_t ex_commit_i,
input riscv::priv_lvl_t priv_lvl_i,
input lsu_ctrl_t lsu_ctrl_i,
input logic [ CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
input logic [ riscv::PLEN-1:0] mem_paddr_i,
input logic debug_mode_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i,
output rvfi_probes_t rvfi_probes_o
);
always_comb begin
rvfi_probes_o = '0;
rvfi_probes_o.flush = flush_i;
rvfi_probes_o.issue_instr_ack = issue_instr_ack_i;
rvfi_probes_o.fetch_entry_valid = fetch_entry_valid_i;
rvfi_probes_o.instruction = instruction_i;
rvfi_probes_o.is_compressed = is_compressed_i;
rvfi_probes_o.issue_pointer = issue_pointer_i;
rvfi_probes_o.commit_pointer = commit_pointer_i;
rvfi_probes_o.flush_unissued_instr = flush_unissued_instr_i;
rvfi_probes_o.decoded_instr_valid = decoded_instr_valid_i;
rvfi_probes_o.decoded_instr_ack = decoded_instr_ack_i;
rvfi_probes_o.rs1_forwarding = rs1_forwarding_i;
rvfi_probes_o.rs2_forwarding = rs2_forwarding_i;
rvfi_probes_o.commit_instr = commit_instr_i;
rvfi_probes_o.ex_commit = ex_commit_i;
rvfi_probes_o.priv_lvl = priv_lvl_i;
rvfi_probes_o.lsu_ctrl = lsu_ctrl_i;
rvfi_probes_o.wbdata = wbdata_i;
rvfi_probes_o.commit_ack = commit_ack_i;
rvfi_probes_o.mem_paddr = mem_paddr_i;
rvfi_probes_o.debug_mode = debug_mode_i;
rvfi_probes_o.wdata = wdata_i;
end
endmodule

View File

@ -0,0 +1,155 @@
// Copyright 2021 Thales DIS design services SAS
//
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
// Example coprocessor adds rs1,rs2(,rs3) together and gives back the result to the CPU via the CoreV-X-Interface.
// Coprocessor delays the sending of the result depending on result least significant bits.
module cvxif_example_coprocessor
import cvxif_pkg::*;
import cvxif_instr_pkg::*;
(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input cvxif_req_t cvxif_req_i,
output cvxif_resp_t cvxif_resp_o
);
//Compressed interface
logic x_compressed_valid_i;
logic x_compressed_ready_o;
x_compressed_req_t x_compressed_req_i;
x_compressed_resp_t x_compressed_resp_o;
//Issue interface
logic x_issue_valid_i;
logic x_issue_ready_o;
x_issue_req_t x_issue_req_i;
x_issue_resp_t x_issue_resp_o;
//Commit interface
logic x_commit_valid_i;
x_commit_t x_commit_i;
//Memory interface
logic x_mem_valid_o;
logic x_mem_ready_i;
x_mem_req_t x_mem_req_o;
x_mem_resp_t x_mem_resp_i;
//Memory result interface
logic x_mem_result_valid_i;
x_mem_result_t x_mem_result_i;
//Result interface
logic x_result_valid_o;
logic x_result_ready_i;
x_result_t x_result_o;
assign x_compressed_valid_i = cvxif_req_i.x_compressed_valid;
assign x_compressed_req_i = cvxif_req_i.x_compressed_req;
assign x_issue_valid_i = cvxif_req_i.x_issue_valid;
assign x_issue_req_i = cvxif_req_i.x_issue_req;
assign x_commit_valid_i = cvxif_req_i.x_commit_valid;
assign x_commit_i = cvxif_req_i.x_commit;
assign x_mem_ready_i = cvxif_req_i.x_mem_ready;
assign x_mem_resp_i = cvxif_req_i.x_mem_resp;
assign x_mem_result_valid_i = cvxif_req_i.x_mem_result_valid;
assign x_mem_result_i = cvxif_req_i.x_mem_result;
assign x_result_ready_i = cvxif_req_i.x_result_ready;
assign cvxif_resp_o.x_compressed_ready = x_compressed_ready_o;
assign cvxif_resp_o.x_compressed_resp = x_compressed_resp_o;
assign cvxif_resp_o.x_issue_ready = x_issue_ready_o;
assign cvxif_resp_o.x_issue_resp = x_issue_resp_o;
assign cvxif_resp_o.x_mem_valid = x_mem_valid_o;
assign cvxif_resp_o.x_mem_req = x_mem_req_o;
assign cvxif_resp_o.x_result_valid = x_result_valid_o;
assign cvxif_resp_o.x_result = x_result_o;
//Compressed interface
assign x_compressed_ready_o = '0;
assign x_compressed_resp_o.instr = '0;
assign x_compressed_resp_o.accept = '0;
instr_decoder #(
.NbInstr (cvxif_instr_pkg::NbInstr),
.CoproInstr(cvxif_instr_pkg::CoproInstr)
) instr_decoder_i (
.clk_i (clk_i),
.x_issue_req_i (x_issue_req_i),
.x_issue_resp_o(x_issue_resp_o)
);
typedef struct packed {
x_issue_req_t req;
x_issue_resp_t resp;
} x_issue_t;
logic fifo_full, fifo_empty;
logic x_issue_ready_q;
logic instr_push, instr_pop;
x_issue_t req_i;
x_issue_t req_o;
assign instr_push = x_issue_resp_o.accept ? 1 : 0;
assign instr_pop = (x_commit_i.x_commit_kill && x_commit_valid_i) || x_result_valid_o;
assign x_issue_ready_q = ~fifo_full; // if something is in the fifo, the instruction is being processed
// so we can't receive anything else
assign req_i.req = x_issue_req_i;
assign req_i.resp = x_issue_resp_o;
always_ff @(posedge clk_i or negedge rst_ni) begin : regs
if (!rst_ni) begin
x_issue_ready_o <= 1;
end else begin
x_issue_ready_o <= x_issue_ready_q;
end
end
fifo_v3 #(
.FALL_THROUGH(1), //data_o ready and pop in the same cycle
.DATA_WIDTH (64),
.DEPTH (8),
.dtype (x_issue_t)
) fifo_commit_i (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (fifo_full),
.empty_o (fifo_empty),
.usage_o (),
.data_i (req_i),
.push_i (instr_push),
.data_o (req_o),
.pop_i (instr_pop)
);
logic [3:0] c;
counter #(
.WIDTH(4)
) counter_i (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (~x_commit_i.x_commit_kill && x_commit_valid_i),
.en_i (1'b1),
.load_i (),
.down_i (),
.d_i (),
.q_o (c),
.overflow_o()
);
always_comb begin
x_result_o.data = req_o.req.rs[0] + req_o.req.rs[1] + (X_NUM_RS == 3 ? req_o.req.rs[2] : 0);
x_result_valid_o = (c == x_result_o.data[3:0]) && ~fifo_empty ? 1 : 0;
x_result_o.id = req_o.req.id;
x_result_o.rd = req_o.req.instr[11:7];
x_result_o.we = req_o.resp.writeback & x_result_valid_o;
x_result_o.exc = 0;
x_result_o.exccode = 0;
end
endmodule

View File

@ -0,0 +1,47 @@
// Copyright 2021 Thales DIS design services SAS
//
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
package cvxif_instr_pkg;
typedef struct packed {
logic [31:0] instr;
logic [31:0] mask;
cvxif_pkg::x_issue_resp_t resp;
} copro_issue_resp_t;
// 2 Possible RISCV instructions for Coprocessor
parameter int unsigned NbInstr = 2;
parameter copro_issue_resp_t CoproInstr[NbInstr] = '{
'{
instr: 32'b00000_00_00000_00000_0_00_00000_0101011, // custom1 opcode
mask: 32'b00000_00_00000_00000_0_00_00000_1111111,
resp : '{
accept : 1'b1,
writeback : 1'b0,
dualwrite : 1'b0,
dualread : 1'b0,
loadstore : 1'b0,
exc : 1'b0
}
},
'{
instr: 32'b00000_00_00000_00000_0_00_00000_1011011, // custom2 opcode
mask: 32'b00000_00_00000_00000_0_00_00000_1111111,
resp : '{
accept : 1'b1,
writeback : 1'b1,
dualwrite : 1'b0,
dualread : 1'b0,
loadstore : 1'b0,
exc : 1'b0
}
}
};
endpackage

View File

@ -0,0 +1,49 @@
// Copyright 2021 Thales DIS design services SAS
//
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
module instr_decoder
import cvxif_pkg::*;
#(
parameter int NbInstr = 1,
parameter cvxif_instr_pkg::copro_issue_resp_t CoproInstr[NbInstr] = {0}
) (
input logic clk_i,
input x_issue_req_t x_issue_req_i,
output x_issue_resp_t x_issue_resp_o
);
logic [NbInstr-1:0] sel;
for (genvar i = 0; i < NbInstr; i++) begin : gen_predecoder_selector
assign sel[i] = ((CoproInstr[i].mask & x_issue_req_i.instr) == CoproInstr[i].instr);
end
always_comb begin
x_issue_resp_o.accept = '0;
x_issue_resp_o.writeback = '0;
x_issue_resp_o.dualwrite = '0;
x_issue_resp_o.dualread = '0;
x_issue_resp_o.loadstore = '0;
x_issue_resp_o.exc = '0;
for (int unsigned i = 0; i < NbInstr; i++) begin
if (sel[i]) begin
x_issue_resp_o.accept = CoproInstr[i].resp.accept;
x_issue_resp_o.writeback = CoproInstr[i].resp.writeback;
x_issue_resp_o.dualwrite = CoproInstr[i].resp.dualwrite;
x_issue_resp_o.dualread = CoproInstr[i].resp.dualread;
x_issue_resp_o.loadstore = CoproInstr[i].resp.loadstore;
x_issue_resp_o.exc = CoproInstr[i].resp.exc;
end
end
end
assert property (@(posedge clk_i) $onehot0(sel))
else $warning("This offloaded instruction is valid for multiple coprocessor instructions !");
endmodule

View File

@ -0,0 +1,112 @@
// Copyright 2021 Thales DIS design services SAS
//
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Original Author: Guillaume CHAUVON (guillaume.chauvon@thalesgroup.com)
// Functional Unit for the logic of the CoreV-X-Interface
module cvxif_fu
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input fu_data_t fu_data_i,
input riscv::priv_lvl_t priv_lvl_i,
//from issue
input logic x_valid_i,
output logic x_ready_o,
input logic [ 31:0] x_off_instr_i,
//to writeback
output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
output exception_t x_exception_o,
output riscv::xlen_t x_result_o,
output logic x_valid_o,
output logic x_we_o,
//to coprocessor
output cvxif_pkg::cvxif_req_t cvxif_req_o,
input cvxif_pkg::cvxif_resp_t cvxif_resp_i
);
localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS;
logic illegal_n, illegal_q;
logic [TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q;
logic [31:0] illegal_instr_n, illegal_instr_q;
logic [X_NUM_RS-1:0] rs_valid;
if (cvxif_pkg::X_NUM_RS == 3) begin : gen_third_operand
assign rs_valid = 3'b111;
end else begin : gen_no_third_operand
assign rs_valid = 2'b11;
end
always_comb begin
cvxif_req_o = '0;
cvxif_req_o.x_result_ready = 1'b1;
x_ready_o = cvxif_resp_i.x_issue_ready;
if (x_valid_i) begin
cvxif_req_o.x_issue_valid = x_valid_i;
cvxif_req_o.x_issue_req.instr = x_off_instr_i;
cvxif_req_o.x_issue_req.mode = priv_lvl_i;
cvxif_req_o.x_issue_req.id = fu_data_i.trans_id;
cvxif_req_o.x_issue_req.rs[0] = fu_data_i.operand_a;
cvxif_req_o.x_issue_req.rs[1] = fu_data_i.operand_b;
if (cvxif_pkg::X_NUM_RS == 3) begin
cvxif_req_o.x_issue_req.rs[2] = fu_data_i.imm;
end
cvxif_req_o.x_issue_req.rs_valid = rs_valid;
cvxif_req_o.x_commit_valid = x_valid_i;
cvxif_req_o.x_commit.id = fu_data_i.trans_id;
cvxif_req_o.x_commit.x_commit_kill = 1'b0;
end
end
always_comb begin
illegal_n = illegal_q;
illegal_id_n = illegal_id_q;
illegal_instr_n = illegal_instr_q;
if (~cvxif_resp_i.x_issue_resp.accept && cvxif_req_o.x_issue_valid && cvxif_resp_i.x_issue_ready && ~illegal_n) begin
illegal_n = 1'b1;
illegal_id_n = cvxif_req_o.x_issue_req.id;
illegal_instr_n = cvxif_req_o.x_issue_req.instr;
end
x_valid_o = cvxif_resp_i.x_result_valid; //Read result only when CVXIF is enabled
x_trans_id_o = x_valid_o ? cvxif_resp_i.x_result.id : '0;
x_result_o = x_valid_o ? cvxif_resp_i.x_result.data : '0;
x_exception_o.cause = x_valid_o ? {{(riscv::XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0;
x_exception_o.valid = x_valid_o ? cvxif_resp_i.x_result.exc : '0;
x_exception_o.tval = '0;
x_we_o = x_valid_o ? cvxif_resp_i.x_result.we : '0;
if (illegal_n) begin
if (~x_valid_o) begin
x_trans_id_o = illegal_id_n;
x_result_o = '0;
x_valid_o = 1'b1;
x_exception_o.cause = riscv::ILLEGAL_INSTR;
x_exception_o.valid = 1'b1;
x_exception_o.tval = illegal_instr_n;
x_we_o = '0;
illegal_n = '0; // Reset flag for illegal instr. illegal_id and illegal instr values are a don't care, no need to reset it.
end
end
end
always_ff @(posedge clk_i, negedge rst_ni) begin
if (~rst_ni) begin
illegal_q <= 1'b0;
illegal_id_q <= '0;
illegal_instr_q <= '0;
end else begin
illegal_q <= illegal_n;
illegal_id_q <= illegal_id_n;
illegal_instr_q <= illegal_instr_n;
end
end
endmodule

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,413 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 19.04.2017
// Description: Instantiation of all functional units residing in the execute stage
module ex_stage
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned ASID_WIDTH = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic debug_mode_i,
input logic [riscv::VLEN-1:0] rs1_forwarding_i,
input logic [riscv::VLEN-1:0] rs2_forwarding_i,
input fu_data_t fu_data_i,
input logic [riscv::VLEN-1:0] pc_i, // PC of current instruction
input logic is_compressed_instr_i, // we need to know if this was a compressed instruction
// in order to calculate the next PC on a mis-predict
// Fixed latency unit(s)
output riscv::xlen_t flu_result_o,
output logic [TRANS_ID_BITS-1:0] flu_trans_id_o, // ID of scoreboard entry at which to write back
output exception_t flu_exception_o,
output logic flu_ready_o, // FLU is ready
output logic flu_valid_o, // FLU result is valid
// Branches and Jumps
// ALU 1
input logic alu_valid_i, // Output is valid
// Branch Unit
input logic branch_valid_i, // we are using the branch unit
input branchpredict_sbe_t branch_predict_i,
output bp_resolve_t resolved_branch_o, // the branch engine uses the write back from the ALU
output logic resolve_branch_o, // to ID signaling that we resolved the branch
// CSR
input logic csr_valid_i,
output logic [11:0] csr_addr_o,
input logic csr_commit_i,
// MULT
input logic mult_valid_i, // Output is valid
// LSU
output logic lsu_ready_o, // FU is ready
input logic lsu_valid_i, // Input is valid
output logic load_valid_o,
output riscv::xlen_t load_result_o,
output logic [TRANS_ID_BITS-1:0] load_trans_id_o,
output exception_t load_exception_o,
output logic store_valid_o,
output riscv::xlen_t store_result_o,
output logic [TRANS_ID_BITS-1:0] store_trans_id_o,
output exception_t store_exception_o,
input logic lsu_commit_i,
output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
input logic stall_st_pending_i,
output logic no_st_pending_o,
input logic amo_valid_commit_i,
// FPU
output logic fpu_ready_o, // FU is ready
input logic fpu_valid_i, // Output is valid
input logic [1:0] fpu_fmt_i, // FP format
input logic [2:0] fpu_rm_i, // FP rm
input logic [2:0] fpu_frm_i, // FP frm csr
input logic [6:0] fpu_prec_i, // FP precision control
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
output riscv::xlen_t fpu_result_o,
output logic fpu_valid_o,
output exception_t fpu_exception_o,
// CoreV-X-Interface
input logic x_valid_i,
output logic x_ready_o,
input logic [31:0] x_off_instr_i,
output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
output exception_t x_exception_o,
output riscv::xlen_t x_result_o,
output logic x_valid_o,
output logic x_we_o,
output cvxif_pkg::cvxif_req_t cvxif_req_o,
input cvxif_pkg::cvxif_resp_t cvxif_resp_i,
input logic acc_valid_i, // Output is valid
// Memory Management
input logic enable_translation_i,
input logic en_ld_st_translation_i,
input logic flush_tlb_i,
input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input logic mxr_i,
input logic [riscv::PPNW-1:0] satp_ppn_i,
input logic [ ASID_WIDTH-1:0] asid_i,
// icache translation requests
input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o,
// interface to dcache
input dcache_req_o_t [2:0] dcache_req_ports_i,
output dcache_req_i_t [2:0] dcache_req_ports_o,
input logic dcache_wbuffer_empty_i,
input logic dcache_wbuffer_not_ni_i,
output amo_req_t amo_req_o, // request to cache subsytem
input amo_resp_t amo_resp_i, // response from cache subsystem
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
// PMPs
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
// RVFI
output lsu_ctrl_t rvfi_lsu_ctrl_o,
output [riscv::PLEN-1:0] rvfi_mem_paddr_o
);
// -------------------------
// Fixed Latency Units
// -------------------------
// all fixed latency units share a single issue port and a sing write
// port into the scoreboard. At the moment those are:
// 1. ALU - all operations are single cycle
// 2. Branch unit: operation is single cycle, the ALU is needed
// for comparison
// 3. CSR: This is a small buffer which saves the address of the CSR.
// The value is then re-fetched once the instruction retires. The buffer
// is only a single entry deep, hence this operation will block all
// other operations once this buffer is full. This should not be a major
// concern though as CSRs are infrequent.
// 4. Multiplier/Divider: The multiplier has a fixed latency of 1 cycle.
// The issue logic will take care of not issuing
// another instruction if it will collide on the
// output port. Divisions are arbitrary in length
// they will simply block the issue of all other
// instructions.
logic current_instruction_is_sfence_vma;
// These two register store the rs1 and rs2 parameters in case of `SFENCE_VMA`
// instruction to be used for TLB flush in the next clock cycle.
logic [ASID_WIDTH-1:0] asid_to_be_flushed;
logic [riscv::VLEN-1:0] vaddr_to_be_flushed;
// from ALU to branch unit
logic alu_branch_res; // branch comparison result
riscv::xlen_t alu_result, csr_result, mult_result;
logic [riscv::VLEN-1:0] branch_result;
logic csr_ready, mult_ready;
logic [TRANS_ID_BITS-1:0] mult_trans_id;
logic mult_valid;
// 1. ALU (combinatorial)
// data silence operation
fu_data_t alu_data;
assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0;
alu #(
.CVA6Cfg(CVA6Cfg)
) alu_i (
.clk_i,
.rst_ni,
.fu_data_i (alu_data),
.result_o (alu_result),
.alu_branch_res_o(alu_branch_res)
);
// 2. Branch Unit (combinatorial)
// we don't silence the branch unit as this is already critical and we do
// not want to add another layer of logic
branch_unit #(
.CVA6Cfg(CVA6Cfg)
) branch_unit_i (
.clk_i,
.rst_ni,
.debug_mode_i,
.fu_data_i,
.pc_i,
.is_compressed_instr_i,
// any functional unit is valid, check that there is no accidental mis-predict
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) ,
.branch_valid_i,
.branch_comp_res_i(alu_branch_res),
.branch_result_o(branch_result),
.branch_predict_i,
.resolved_branch_o,
.resolve_branch_o,
.branch_exception_o(flu_exception_o)
);
// 3. CSR (sequential)
csr_buffer #(
.CVA6Cfg(CVA6Cfg)
) csr_buffer_i (
.clk_i,
.rst_ni,
.flush_i,
.fu_data_i,
.csr_valid_i,
.csr_ready_o (csr_ready),
.csr_result_o(csr_result),
.csr_commit_i,
.csr_addr_o
);
assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i | mult_valid;
// result MUX
always_comb begin
// Branch result as default case
flu_result_o = {{riscv::XLEN - riscv::VLEN{1'b0}}, branch_result};
flu_trans_id_o = fu_data_i.trans_id;
// ALU result
if (alu_valid_i) begin
flu_result_o = alu_result;
// CSR result
end else if (csr_valid_i) begin
flu_result_o = csr_result;
end else if (mult_valid) begin
flu_result_o = mult_result;
flu_trans_id_o = mult_trans_id;
end
end
// ready flags for FLU
always_comb begin
flu_ready_o = csr_ready & mult_ready;
end
// 4. Multiplication (Sequential)
fu_data_t mult_data;
// input silencing of multiplier
assign mult_data = mult_valid_i ? fu_data_i : '0;
mult #(
.CVA6Cfg(CVA6Cfg)
) i_mult (
.clk_i,
.rst_ni,
.flush_i,
.mult_valid_i,
.fu_data_i (mult_data),
.result_o (mult_result),
.mult_valid_o (mult_valid),
.mult_ready_o (mult_ready),
.mult_trans_id_o(mult_trans_id)
);
// ----------------
// FPU
// ----------------
generate
if (CVA6Cfg.FpPresent) begin : fpu_gen
fu_data_t fpu_data;
assign fpu_data = fpu_valid_i ? fu_data_i : '0;
fpu_wrap #(
.CVA6Cfg(CVA6Cfg)
) fpu_i (
.clk_i,
.rst_ni,
.flush_i,
.fpu_valid_i,
.fpu_ready_o,
.fu_data_i(fpu_data),
.fpu_fmt_i,
.fpu_rm_i,
.fpu_frm_i,
.fpu_prec_i,
.fpu_trans_id_o,
.result_o (fpu_result_o),
.fpu_valid_o,
.fpu_exception_o
);
end else begin : no_fpu_gen
assign fpu_ready_o = '0;
assign fpu_trans_id_o = '0;
assign fpu_result_o = '0;
assign fpu_valid_o = '0;
assign fpu_exception_o = '0;
end
endgenerate
// ----------------
// Load-Store Unit
// ----------------
fu_data_t lsu_data;
assign lsu_data = lsu_valid_i ? fu_data_i : '0;
load_store_unit #(
.CVA6Cfg (CVA6Cfg),
.ASID_WIDTH(ASID_WIDTH)
) lsu_i (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.fu_data_i (lsu_data),
.lsu_ready_o,
.lsu_valid_i,
.load_trans_id_o,
.load_result_o,
.load_valid_o,
.load_exception_o,
.store_trans_id_o,
.store_result_o,
.store_valid_o,
.store_exception_o,
.commit_i (lsu_commit_i),
.commit_ready_o (lsu_commit_ready_o),
.commit_tran_id_i,
.enable_translation_i,
.en_ld_st_translation_i,
.icache_areq_i,
.icache_areq_o,
.priv_lvl_i,
.ld_st_priv_lvl_i,
.sum_i,
.mxr_i,
.satp_ppn_i,
.asid_i,
.asid_to_be_flushed_i (asid_to_be_flushed),
.vaddr_to_be_flushed_i(vaddr_to_be_flushed),
.flush_tlb_i,
.itlb_miss_o,
.dtlb_miss_o,
.dcache_req_ports_i,
.dcache_req_ports_o,
.dcache_wbuffer_empty_i,
.dcache_wbuffer_not_ni_i,
.amo_valid_commit_i,
.amo_req_o,
.amo_resp_i,
.pmpcfg_i,
.pmpaddr_i,
.rvfi_lsu_ctrl_o,
.rvfi_mem_paddr_o
);
if (CVA6Cfg.CvxifEn) begin : gen_cvxif
fu_data_t cvxif_data;
assign cvxif_data = x_valid_i ? fu_data_i : '0;
cvxif_fu #(
.CVA6Cfg(CVA6Cfg)
) cvxif_fu_i (
.clk_i,
.rst_ni,
.fu_data_i,
.priv_lvl_i(ld_st_priv_lvl_i),
.x_valid_i,
.x_ready_o,
.x_off_instr_i,
.x_trans_id_o,
.x_exception_o,
.x_result_o,
.x_valid_o,
.x_we_o,
.cvxif_req_o,
.cvxif_resp_i
);
end else begin : gen_no_cvxif
assign cvxif_req_o = '0;
assign x_trans_id_o = '0;
assign x_exception_o = '0;
assign x_result_o = '0;
assign x_valid_o = '0;
end
if (CVA6Cfg.RVS) begin
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
current_instruction_is_sfence_vma <= 1'b0;
end else begin
if (flush_i) begin
current_instruction_is_sfence_vma <= 1'b0;
end else if ((fu_data_i.operation == SFENCE_VMA) && csr_valid_i) begin
current_instruction_is_sfence_vma <= 1'b1;
end
end
end
// This process stores the rs1 and rs2 parameters of a SFENCE_VMA instruction.
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
asid_to_be_flushed <= '0;
vaddr_to_be_flushed <= '0;
// if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen
end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin
vaddr_to_be_flushed <= rs1_forwarding_i;
asid_to_be_flushed <= rs2_forwarding_i[ASID_WIDTH-1:0];
end
end
end else begin
assign current_instruction_is_sfence_vma = 1'b0;
assign asid_to_be_flushed = '0;
assign vaddr_to_be_flushed = '0;
end
endmodule

View File

@ -0,0 +1,568 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Stefan Mach, ETH Zurich
// Date: 12.04.2018
// Description: Wrapper for the floating-point unit
module fpu_wrap
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic fpu_valid_i,
output logic fpu_ready_o,
input fu_data_t fu_data_i,
input logic [ 1:0] fpu_fmt_i,
input logic [ 2:0] fpu_rm_i,
input logic [ 2:0] fpu_frm_i,
input logic [ 6:0] fpu_prec_i,
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
output logic [ CVA6Cfg.FLen-1:0] result_o,
output logic fpu_valid_o,
output exception_t fpu_exception_o
);
// this is a workaround
// otherwise compilation might issue an error if FLEN=0
enum logic {
READY,
STALL
}
state_q, state_d;
if (CVA6Cfg.FpPresent) begin : fpu_gen
logic [CVA6Cfg.FLen-1:0] operand_a_i;
logic [CVA6Cfg.FLen-1:0] operand_b_i;
logic [CVA6Cfg.FLen-1:0] operand_c_i;
assign operand_a_i = fu_data_i.operand_a[CVA6Cfg.FLen-1:0];
assign operand_b_i = fu_data_i.operand_b[CVA6Cfg.FLen-1:0];
assign operand_c_i = fu_data_i.imm[CVA6Cfg.FLen-1:0];
//-----------------------------------
// FPnew config from FPnew package
//-----------------------------------
localparam OPBITS = fpnew_pkg::OP_BITS;
localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam IFMTBITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
// Features (enabled formats, vectors etc.)
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: unsigned'(riscv::XLEN), // parameterized using XLEN
EnableVectors: CVA6Cfg.XFVec,
EnableNanBox: 1'b1,
FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT},
IntFmtMask: {
CVA6Cfg.XFVec && CVA6Cfg.XF8,
CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT),
1'b1,
1'b1
}
};
// Implementation (number of registers etc)
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt
'{
unsigned'(LAT_COMP_FP32),
unsigned'(LAT_COMP_FP64),
unsigned'(LAT_COMP_FP16),
unsigned'(LAT_COMP_FP8),
unsigned'(LAT_COMP_FP16ALT)
}, // ADDMUL
'{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT
'{default: unsigned'(LAT_NONCOMP)}, // NONCOMP
'{default: unsigned'(LAT_CONV)}
}, // CONV
UnitTypes: '{
'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}
}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
//-------------------------------------------------
// Inputs to the FPU and protocol inversion buffer
//-------------------------------------------------
logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a;
logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b;
logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c;
logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op;
logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod;
logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt;
logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt;
logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt;
logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm;
logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op;
logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag;
logic fpu_in_ready, fpu_in_valid;
logic fpu_out_ready, fpu_out_valid;
logic [4:0] fpu_status;
// FSM to handle protocol inversion
logic hold_inputs;
logic use_hold;
//-----------------------------
// Translate inputs
//-----------------------------
always_comb begin : input_translation
automatic logic vec_replication; // control honoring of replication flag
automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB)
automatic logic check_ah; // Decide for AH from RM field encoding
// Default Values
operand_a_d = operand_a_i;
operand_b_d = operand_b_i; // immediates come through this port unless used as operand
operand_c_d = operand_c_i; // immediates come through this port unless used as operand
fpu_op_d = fpnew_pkg::SGNJ; // sign injection by default
fpu_op_mod_d = 1'b0;
fpu_dstfmt_d = fpnew_pkg::FP32;
fpu_ifmt_d = fpnew_pkg::INT32;
fpu_rm_d = fpu_rm_i;
fpu_vec_op_d = fu_data_i.fu == FPU_VEC;
fpu_tag_d = fu_data_i.trans_id;
vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field
replicate_c = 1'b0;
check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i
// Scalar Rounding Modes - some ops encode inside RM but use smaller range
if (!(fpu_rm_i inside {[3'b000 : 3'b100]})) fpu_rm_d = fpu_frm_i;
// Vectorial ops always consult FRM
if (fpu_vec_op_d) fpu_rm_d = fpu_frm_i;
// Formats
unique case (fpu_fmt_i)
// FP32
2'b00: fpu_dstfmt_d = fpnew_pkg::FP32;
// FP64 or FP16ALT (vectorial)
2'b01: fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64;
// FP16 or FP16ALT (scalar)
2'b10: begin
if (!fpu_vec_op_d && fpu_rm_i == 3'b101) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
else fpu_dstfmt_d = fpnew_pkg::FP16;
end
// FP8
default: fpu_dstfmt_d = fpnew_pkg::FP8;
endcase
// By default, set src=dst
fpu_srcfmt_d = fpu_dstfmt_d;
// Operations (this can modify the rounding mode field and format!)
unique case (fu_data_i.operation)
// Addition
FADD: begin
fpu_op_d = fpnew_pkg::ADD;
replicate_c = 1'b1; // second operand is in C
end
// Subtraction is modified ADD
FSUB: begin
fpu_op_d = fpnew_pkg::ADD;
fpu_op_mod_d = 1'b1;
replicate_c = 1'b1; // second operand is in C
end
// Multiplication
FMUL: fpu_op_d = fpnew_pkg::MUL;
// Division
FDIV: fpu_op_d = fpnew_pkg::DIV;
// Min/Max - OP is encoded in rm (000-001)
FMIN_MAX: begin
fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Square Root
FSQRT: fpu_op_d = fpnew_pkg::SQRT;
// Fused Multiply Add
FMADD: fpu_op_d = fpnew_pkg::FMADD;
// Fused Multiply Subtract is modified FMADD
FMSUB: begin
fpu_op_d = fpnew_pkg::FMADD;
fpu_op_mod_d = 1'b1;
end
// Fused Negated Multiply Subtract
FNMSUB: fpu_op_d = fpnew_pkg::FNMSUB;
// Fused Negated Multiply Add is modified FNMSUB
FNMADD: begin
fpu_op_d = fpnew_pkg::FNMSUB;
fpu_op_mod_d = 1'b1;
end
// Float to Int Cast - Op encoded in lowest two imm bits or rm
FCVT_F2I: begin
fpu_op_d = fpnew_pkg::F2I;
// Vectorial Ops encoded in R bit
if (fpu_vec_op_d) begin
fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op
unique case (fpu_fmt_i)
2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
endcase
// Scalar casts encoded in imm
end else begin
fpu_op_mod_d = operand_c_i[0];
if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
else fpu_ifmt_d = fpnew_pkg::INT32;
end
end
// Int to Float Cast - Op encoded in lowest two imm bits or rm
FCVT_I2F: begin
fpu_op_d = fpnew_pkg::I2F;
// Vectorial Ops encoded in R bit
if (fpu_vec_op_d) begin
fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op
unique case (fpu_fmt_i)
2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
endcase
// Scalar casts encoded in imm
end else begin
fpu_op_mod_d = operand_c_i[0];
if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
else fpu_ifmt_d = fpnew_pkg::INT32;
end
end
// Float to Float Cast - Source format encoded in lowest two/three imm bits
FCVT_F2F: begin
fpu_op_d = fpnew_pkg::F2F;
// Vectorial ops encoded in lowest two imm bits
if (fpu_vec_op_d) begin
vec_replication = 1'b0; // no replication for casts (not needed)
unique case (operand_c_i[1:0])
2'b00: fpu_srcfmt_d = fpnew_pkg::FP32;
2'b01: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
2'b10: fpu_srcfmt_d = fpnew_pkg::FP16;
2'b11: fpu_srcfmt_d = fpnew_pkg::FP8;
endcase
// Scalar ops encoded in lowest three imm bits
end else begin
unique case (operand_c_i[2:0])
3'b000: fpu_srcfmt_d = fpnew_pkg::FP32;
3'b001: fpu_srcfmt_d = fpnew_pkg::FP64;
3'b010: fpu_srcfmt_d = fpnew_pkg::FP16;
3'b110: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
3'b011: fpu_srcfmt_d = fpnew_pkg::FP8;
default: ; // Do nothing
endcase
end
end
// Scalar Sign Injection - op encoded in rm (000-010)
FSGNJ: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding
FMV_F2X: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box
fpu_op_mod_d = 1'b1; // no NaN-Boxing
check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand
end
// Move from GPR to FPR - mapped to NOP since no recoding
FMV_X2F: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box
check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand
end
// Scalar Comparisons - op encoded in rm (000-010)
FCMP: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Classification
FCLASS: begin
fpu_op_d = fpnew_pkg::CLASSIFY;
fpu_rm_d = {
1'b0, fpu_rm_i[1:0]
}; // mask out AH encoding bit - CLASS doesn't care anyways
check_ah = 1'b1; // AH has RM MSB encoding
end
// Vectorial Minimum - set up scalar encoding in rm
VFMIN: begin
fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = 3'b000; // min
end
// Vectorial Maximum - set up scalar encoding in rm
VFMAX: begin
fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = 3'b001; // max
end
// Vectorial Sign Injection - set up scalar encoding in rm
VFSGNJ: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b000; // sgnj
end
// Vectorial Negated Sign Injection - set up scalar encoding in rm
VFSGNJN: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b001; // sgnjn
end
// Vectorial Xored Sign Injection - set up scalar encoding in rm
VFSGNJX: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b010; // sgnjx
end
// Vectorial Equals - set up scalar encoding in rm
VFEQ: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b010; // eq
end
// Vectorial Not Equals - set up scalar encoding in rm
VFNE: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b010; // eq
end
// Vectorial Less Than - set up scalar encoding in rm
VFLT: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b001; // lt
end
// Vectorial Greater or Equal - set up scalar encoding in rm
VFGE: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b001; // lt
end
// Vectorial Less or Equal - set up scalar encoding in rm
VFLE: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b000; // le
end
// Vectorial Greater Than - set up scalar encoding in rm
VFGT: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b000; // le
end
// Vectorial Convert-and-Pack from FP32, lower 4 entries
VFCPKAB_S: begin
fpu_op_d = fpnew_pkg::CPKAB;
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
end
// Vectorial Convert-and-Pack from FP32, upper 4 entries
VFCPKCD_S: begin
fpu_op_d = fpnew_pkg::CPKCD;
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
end
// Vectorial Convert-and-Pack from FP64, lower 4 entries
VFCPKAB_D: begin
fpu_op_d = fpnew_pkg::CPKAB;
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
end
// Vectorial Convert-and-Pack from FP64, upper 4 entries
VFCPKCD_D: begin
fpu_op_d = fpnew_pkg::CPKCD;
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
end
// No changes per default
default: ; //nothing
endcase
// Scalar AH encoding fixing
if (!fpu_vec_op_d && check_ah) if (fpu_rm_i[2]) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
// Replication
if (fpu_vec_op_d && vec_replication) begin
if (replicate_c) begin
unique case (fpu_dstfmt_d)
fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i;
fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}};
fpnew_pkg::FP8:
operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}};
default: ; // Do nothing
endcase // fpu_dstfmt_d
end else begin
unique case (fpu_dstfmt_d)
fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i;
fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}};
fpnew_pkg::FP8:
operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}};
default: ; // Do nothing
endcase // fpu_dstfmt_d
end
end
end
//---------------------------------------------------------
// Upstream protocol inversion: InValid depends on InReady
//---------------------------------------------------------
always_comb begin : p_inputFSM
// Default Values
fpu_ready_o = 1'b0;
fpu_in_valid = 1'b0;
hold_inputs = 1'b0; // hold register disabled
use_hold = 1'b0; // inputs go directly to unit
state_d = state_q; // stay in the same state
// FSM
unique case (state_q)
// Default state, ready for instructions
READY: begin
fpu_ready_o = 1'b1; // Act as if FPU ready
fpu_in_valid = fpu_valid_i; // Forward input valid to FPU
// There is a transaction but the FPU can't handle it
if (fpu_valid_i & ~fpu_in_ready) begin
fpu_ready_o = 1'b0; // No token given to Issue
hold_inputs = 1'b1; // save inputs to the holding register
state_d = STALL; // stall future incoming requests
end
end
// We're stalling the upstream (ready=0)
STALL: begin
fpu_in_valid = 1'b1; // we have data for the FPU
use_hold = 1'b1; // the data comes from the hold reg
// Wait until it's consumed
if (fpu_in_ready) begin
fpu_ready_o = 1'b1; // Give a token to issue
state_d = READY; // accept future requests
end
end
// Default: emit default values
default: ;
endcase
// Flushing will override issue and go back to idle
if (flush_i) begin
state_d = READY;
end
end
// Buffer register and FSM state holding
always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg
if (~rst_ni) begin
state_q <= READY;
operand_a_q <= '0;
operand_b_q <= '0;
operand_c_q <= '0;
fpu_op_q <= '0;
fpu_op_mod_q <= '0;
fpu_srcfmt_q <= '0;
fpu_dstfmt_q <= '0;
fpu_ifmt_q <= '0;
fpu_rm_q <= '0;
fpu_vec_op_q <= '0;
fpu_tag_q <= '0;
end else begin
state_q <= state_d;
// Hold register is [TRIGGERED] by FSM
if (hold_inputs) begin
operand_a_q <= operand_a_d;
operand_b_q <= operand_b_d;
operand_c_q <= operand_c_d;
fpu_op_q <= fpu_op_d;
fpu_op_mod_q <= fpu_op_mod_d;
fpu_srcfmt_q <= fpu_srcfmt_d;
fpu_dstfmt_q <= fpu_dstfmt_d;
fpu_ifmt_q <= fpu_ifmt_d;
fpu_rm_q <= fpu_rm_d;
fpu_vec_op_q <= fpu_vec_op_d;
fpu_tag_q <= fpu_tag_d;
end
end
end
// Select FPU input data: from register if valid data in register, else directly from input
assign operand_a = use_hold ? operand_a_q : operand_a_d;
assign operand_b = use_hold ? operand_b_q : operand_b_d;
assign operand_c = use_hold ? operand_c_q : operand_c_d;
assign fpu_op = use_hold ? fpu_op_q : fpu_op_d;
assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d;
assign fpu_srcfmt = use_hold ? fpu_srcfmt_q : fpu_srcfmt_d;
assign fpu_dstfmt = use_hold ? fpu_dstfmt_q : fpu_dstfmt_d;
assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d;
assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d;
assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d;
assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d;
// Consolidate operands
logic [2:0][CVA6Cfg.FLen-1:0] fpu_operands;
assign fpu_operands[0] = operand_a;
assign fpu_operands[1] = operand_b;
assign fpu_operands[2] = operand_c;
//---------------
// FPU instance
//---------------
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation(FPU_IMPLEMENTATION),
.TagType (logic [TRANS_ID_BITS-1:0])
) i_fpnew_bulk (
.clk_i,
.rst_ni,
.operands_i (fpu_operands),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rm)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_srcfmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dstfmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_ifmt)),
.vectorial_op_i(fpu_vec_op),
.tag_i (fpu_tag),
.simd_mask_i (1'b1),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i,
.result_o,
.status_o (fpu_status),
.tag_o (fpu_trans_id_o),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
.busy_o ( /* unused */)
);
// Pack status flag into exception cause, tval ignored in wb, exception is always invalid
assign fpu_exception_o.cause = {59'h0, fpu_status};
assign fpu_exception_o.valid = 1'b0;
// Donwstream write port is dedicated to FPU and always ready
assign fpu_out_ready = 1'b1;
// Downstream valid from unit
assign fpu_valid_o = fpu_out_valid;
end
endmodule

View File

@ -0,0 +1,215 @@
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
// Copyright 2023 - Thales for additionnal conribution.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 2.0 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.02.2018
// Migrated: Luis Vitorio Cargnini, IEEE
// Date: 09.06.2018
// FPGA optimization: Sebastien Jacq, Thales
// Date: 2023-01-30
// branch history table - 2 bit saturation counter
module bht #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NR_ENTRIES = 1024
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic debug_mode_i,
input logic [ riscv::VLEN-1:0] vpc_i,
input ariane_pkg::bht_update_t bht_update_i,
// we potentially need INSTR_PER_FETCH predictions/cycle
output ariane_pkg::bht_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_prediction_o
);
// the last bit is always zero, we don't need it for indexing
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
// re-shape the branch history table
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
// number of bits needed to index the row
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
// we are not interested in all bits of the address
unread i_unread (.d_i(|vpc_i));
struct packed {
logic valid;
logic [1:0] saturation_counter;
}
bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
logic [ROW_INDEX_BITS-1:0] update_row_index;
assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
assign update_pc = bht_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
if (CVA6Cfg.RVC) begin : gen_update_row_index
assign update_row_index = bht_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
end else begin
assign update_row_index = '0;
end
if (!ariane_pkg::FPGA_EN) begin : gen_asic_bht // ASIC TARGET
logic [1:0] saturation_counter;
// prediction assignment
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output
assign bht_prediction_o[i].valid = bht_q[index][i].valid;
assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1;
end
always_comb begin : update_bht
bht_d = bht_q;
saturation_counter = bht_q[update_pc][update_row_index].saturation_counter;
if ((bht_update_i.valid && CVA6Cfg.DebugEn && !debug_mode_i) || (bht_update_i.valid && !CVA6Cfg.DebugEn)) begin
bht_d[update_pc][update_row_index].valid = 1'b1;
if (saturation_counter == 2'b11) begin
// we can safely decrease it
if (!bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
// then check if it saturated in the negative regime e.g.: branch not taken
end else if (saturation_counter == 2'b00) begin
// we can safely increase it
if (bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
else bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
end
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
for (int unsigned i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
bht_q[i][j] <= '0;
end
end
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
bht_q[i][j].valid <= 1'b0;
bht_q[i][j].saturation_counter <= 2'b10;
end
end
end else begin
bht_q <= bht_d;
end
end
end
end else begin : gen_fpga_bht //FPGA TARGETS
// number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(ariane_pkg::bht_t);
logic [ ROW_INDEX_BITS-1:0] row_index;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_ram_we;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1;
ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht;
ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_updated;
if (CVA6Cfg.RVC) begin : gen_row_index
assign row_index = vpc_i[ROW_ADDR_BITS+OFFSET-1:OFFSET];
end else begin
assign row_index = '0;
end
// -------------------------
// prediction assignment & update Branch History Table
// -------------------------
always_comb begin : prediction_update_bht
bht_ram_we = '0;
bht_ram_read_address_0 = '0;
bht_ram_read_address_1 = '0;
bht_ram_write_address = '0;
bht_ram_wdata = '0;
bht_updated = '0;
bht = '0;
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (row_index == i) begin
bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
bht_prediction_o[i].valid = bht_ram_rdata_0[i*BRAM_WORD_BITS+2];
bht_prediction_o[i].taken = bht_ram_rdata_0[i*BRAM_WORD_BITS+1];
end
end
if (bht_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
bht[i].saturation_counter = bht_ram_rdata_1[i*BRAM_WORD_BITS+:2];
if (bht[i].saturation_counter == 2'b11) begin
// we can safely decrease it
if (!bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
else bht_updated[i].saturation_counter = 2'b11;
// then check if it saturated in the negative regime e.g.: branch not taken
end else if (bht[i].saturation_counter == 2'b00) begin
// we can safely increase it
if (bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
else bht_updated[i].saturation_counter = 2'b00;
end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
else bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
end
bht_updated[i].valid = 1'b1;
bht_ram_we[i] = 1'b1;
bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
//bht_ram_wdata[(i+1)*BRAM_WORD_BITS-1] = 1'b1; //valid
bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
bht_updated[i].valid, bht_updated[i].saturation_counter
};
end
end
end
end
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_ram
AsyncThreePortRam #(
.ADDR_WIDTH($clog2(NR_ROWS)),
.DATA_DEPTH(NR_ROWS),
.DATA_WIDTH(BRAM_WORD_BITS)
) i_bht_ram (
.Clk_CI (clk_i),
.WrEn_SI (bht_ram_we[i]),
.WrAddr_DI (bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrData_DI (bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdAddr_DI_0(bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.RdAddr_DI_1(bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.RdData_DO_0(bht_ram_rdata_0[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdData_DO_1(bht_ram_rdata_1[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
);
end
end
endmodule

View File

@ -0,0 +1,185 @@
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 2.0 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.02.2018
// Migrated: Luis Vitorio Cargnini, IEEE
// Date: 09.06.2018
//
// Additional contributions by:
// Sebastien Jacq, Thales - sjthales on github.com
// Date: 2022-12-01
//
// Description: This module is an adaptation of the BTB (Branch Target Buffer)
// module both FPGA and ASIC targets.
// Prediction target address is stored in BRAM on FPGA while for
// original module, target address is stored in D flip-flop.
// For FPGA flushing is not supported because the frontend module
// flushing signal is not connected.
//
// branch target buffer
module btb #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int NR_ENTRIES = 8
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush the btb
input logic debug_mode_i,
input logic [riscv::VLEN-1:0] vpc_i, // virtual PC from IF stage
input ariane_pkg::btb_update_t btb_update_i, // update btb with this information
output ariane_pkg::btb_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb
);
// the last bit is always zero, we don't need it for indexing
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
// re-shape the branch history table
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
// number of bits needed to index the row
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
// prevent aliasing to degrade performance
localparam ANTIALIAS_BITS = 8;
// number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t);
// we are not interested in all bits of the address
unread i_unread (.d_i(|vpc_i));
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
logic [ROW_INDEX_BITS-1:0] update_row_index;
assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
assign update_pc = btb_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
if (CVA6Cfg.RVC) begin : gen_update_row_index
assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
end else begin
assign update_row_index = '0;
end
if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_ram_csel_prediction[i] = 1'b1;
assign btb_ram_we_prediction[i] = 1'b0;
assign btb_ram_wdata_prediction = '0;
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS];
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_ram_csel_update = '0;
btb_ram_we_update = '0;
btb_ram_addr_update = '0;
btb_ram_wdata_update = '0;
if (btb_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
btb_ram_csel_update[i] = 1'b1;
btb_ram_we_update[i] = 1'b1;
btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
1'b1, btb_update_i.target_address
};
end
end
end
end
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram
SyncDpRam #(
.ADDR_WIDTH($clog2(NR_ROWS)),
.DATA_DEPTH(NR_ROWS),
.DATA_WIDTH(BRAM_WORD_BITS),
.OUT_REGS (0),
.SIM_INIT (1)
) i_btb_ram (
.Clk_CI (clk_i),
.Rst_RBI (rst_ni),
//----------------------------
.CSelA_SI (btb_ram_csel_update[i]),
.WrEnA_SI (btb_ram_we_update[i]),
.AddrA_DI (btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrDataA_DI(btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdDataA_DO(),
//-----------------------------
.CSelB_SI (btb_ram_csel_prediction[i]),
.WrEnB_SI (btb_ram_we_prediction[i]),
.AddrB_DI (btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrDataB_DI(btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdDataB_DO(btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
);
end
end else begin : gen_asic_btb // ASIC TARGET
// typedef for all branch target entries
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
ariane_pkg::btb_prediction_t
btb_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
btb_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_d = btb_q;
if (btb_update_i.valid && !debug_mode_i) begin
btb_d[update_pc][update_row_index].valid = 1'b1;
// the target address is simply updated
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
// Bias the branches to be taken upon first arrival
for (int i = 0; i < NR_ROWS; i++) btb_q[i] <= '{default: 0};
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
btb_q[i][j].valid <= 1'b0;
end
end
end else begin
btb_q <= btb_d;
end
end
end
end
endmodule

View File

@ -0,0 +1,516 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.02.2018
// Description: Ariane Instruction Fetch Frontend
//
// This module interfaces with the instruction cache, handles control
// change request from the back-end and does branch prediction.
module frontend
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush request for PCGEN
input logic flush_bp_i, // flush branch prediction
input logic halt_i, // halt commit stage
input logic debug_mode_i,
// global input
input logic [riscv::VLEN-1:0] boot_addr_i,
// Set a new PC
// mispredict
input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB
// from commit, when flushing the whole pipeline
input logic set_pc_commit_i, // Take the PC from commit stage
input logic [riscv::VLEN-1:0] pc_commit_i, // PC of instruction in commit stage
// CSR input
input logic [riscv::VLEN-1:0] epc_i, // exception PC which we need to return to
input logic eret_i, // return from exception
input logic [riscv::VLEN-1:0] trap_vector_base_i, // base of trap vector
input logic ex_valid_i, // exception is valid - from commit
input logic set_debug_pc_i, // jump to debug address
// Instruction Fetch
output icache_dreq_t icache_dreq_o,
input icache_drsp_t icache_dreq_i,
// instruction output port -> to processor back-end
output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
output logic fetch_entry_valid_o, // instruction in IF is valid
input logic fetch_entry_ready_i // ID acknowledged this instruction
);
// Instruction Cache Registers, from I$
logic [ FETCH_WIDTH-1:0] icache_data_q;
logic icache_valid_q;
ariane_pkg::frontend_exception_t icache_ex_valid_q;
logic [ riscv::VLEN-1:0] icache_vaddr_q;
logic instr_queue_ready;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed;
// upper-most branch-prediction from last cycle
btb_prediction_t btb_q;
bht_prediction_t bht_q;
// instruction fetch is ready
logic if_ready;
logic [riscv::VLEN-1:0] npc_d, npc_q; // next PC
// indicates whether we come out of reset (then we need to load boot_addr_i)
logic npc_rst_load_q;
logic replay;
logic [ riscv::VLEN-1:0] replay_addr;
// shift amount
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt;
// address will always be 16 bit aligned, make this explicit here
if (CVA6Cfg.RVC) begin : gen_shamt
assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1];
end else begin
assign shamt = 1'b0;
end
// -----------------------
// Ctrl Flow Speculation
// -----------------------
// RVI ctrl flow prediction
logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvi_imm;
// RVC branching
logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvc_imm;
// re-aligned instruction and address (coming from cache - combinationally)
logic [INSTR_PER_FETCH-1:0][ 31:0] instr;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr;
logic [INSTR_PER_FETCH-1:0] instruction_valid;
// BHT, BTB and RAS prediction
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction;
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
ras_t ras_predict;
logic [ riscv::VLEN-1:0] vpc_btb;
// branch-predict update
logic is_mispredict;
logic ras_push, ras_pop;
logic [ riscv::VLEN-1:0] ras_update;
// Instruction FIFO
logic [ riscv::VLEN-1:0] predict_address;
cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf;
logic serving_unaligned;
// Re-align instructions
instr_realign #(
.CVA6Cfg(CVA6Cfg)
) i_instr_realign (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (icache_dreq_o.kill_s2),
.valid_i (icache_valid_q),
.serving_unaligned_o(serving_unaligned),
.address_i (icache_vaddr_q),
.data_i (icache_data_q),
.valid_o (instruction_valid),
.addr_o (addr),
.instr_o (instr)
);
// --------------------
// Branch Prediction
// --------------------
// select the right branch prediction result
// in case we are serving an unaligned instruction in instr[0] we need to take
// the prediction we saved from the previous fetch
if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2(
INSTR_PER_FETCH
):1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2(
INSTR_PER_FETCH
):1]];
// for all other predictions we can use the generated address to index
// into the branch prediction data structures
for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address
assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
end
end else begin
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][1]];
end
;
// for the return address stack it doens't matter as we have the
// address of the call/return already
logic bp_valid;
logic [INSTR_PER_FETCH-1:0] is_branch;
logic [INSTR_PER_FETCH-1:0] is_call;
logic [INSTR_PER_FETCH-1:0] is_jump;
logic [INSTR_PER_FETCH-1:0] is_return;
logic [INSTR_PER_FETCH-1:0] is_jalr;
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
// branch history table -> BHT
assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]);
// function calls -> RAS
assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]);
// function return -> RAS
assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]);
// unconditional jumps with known target -> immediately resolved
assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]);
// unconditional jumps with unknown target -> BTB
assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
end
// taken/not taken
always_comb begin
taken_rvi_cf = '0;
taken_rvc_cf = '0;
predict_address = '0;
for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF;
ras_push = 1'b0;
ras_pop = 1'b0;
ras_update = '0;
// lower most prediction gets precedence
for (int i = INSTR_PER_FETCH - 1; i >= 0; i--) begin
unique case ({
is_branch[i], is_return[i], is_jump[i], is_jalr[i]
})
4'b0000: ; // regular instruction e.g.: no branch
// unconditional jump to register, we need the BTB to resolve this
4'b0001: begin
ras_pop = 1'b0;
ras_push = 1'b0;
if (CVA6Cfg.BTBEntries && btb_prediction_shifted[i].valid) begin
predict_address = btb_prediction_shifted[i].target_address;
cf_type[i] = ariane_pkg::JumpR;
end
end
// its an unconditional jump to an immediate
4'b0010: begin
ras_pop = 1'b0;
ras_push = 1'b0;
taken_rvi_cf[i] = rvi_jump[i];
taken_rvc_cf[i] = rvc_jump[i];
cf_type[i] = ariane_pkg::Jump;
end
// return
4'b0100: begin
// make sure to only alter the RAS if we actually consumed the instruction
ras_pop = ras_predict.valid & instr_queue_consumed[i];
ras_push = 1'b0;
predict_address = ras_predict.ra;
cf_type[i] = ariane_pkg::Return;
end
// branch prediction
4'b1000: begin
ras_pop = 1'b0;
ras_push = 1'b0;
// if we have a valid dynamic prediction use it
if (bht_prediction_shifted[i].valid) begin
taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken;
taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken;
// otherwise default to static prediction
end else begin
// set if immediate is negative - static prediction
taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1];
taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1];
end
if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin
cf_type[i] = ariane_pkg::Branch;
end
end
default: ;
// default: $error("Decoded more than one control flow");
endcase
// if this instruction, in addition, is a call, save the resulting address
// but only if we actually consumed the address
if (is_call[i]) begin
ras_push = instr_queue_consumed[i];
ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
end
// calculate the jump target address
if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin
predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
end
end
end
// or reduce struct
always_comb begin
bp_valid = 1'b0;
// BP cannot be valid if we have a return instruction and the RAS is not giving a valid address
// Check that we encountered a control flow and that for a return the RAS
// contains a valid prediction.
for (int i = 0; i < INSTR_PER_FETCH; i++)
bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid));
end
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
// Cache interface
assign icache_dreq_o.req = instr_queue_ready;
assign if_ready = icache_dreq_i.ready & instr_queue_ready;
// We need to flush the cache pipeline if:
// 1. We mispredicted
// 2. Want to flush the whole processor front-end
// 3. Need to replay an instruction because the fetch-fifo was full
assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay;
// if we have a valid branch-prediction we need to only kill the last cache request
// also if we killed the first stage we also need to kill the second stage (inclusive flush)
assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
// Update Control Flow Predictions
bht_update_t bht_update;
btb_update_t btb_update;
// assert on branch, deassert when resolved
logic speculative_q, speculative_d;
assign speculative_d = (speculative_q && !resolved_branch_i.valid || |is_branch || |is_return || |is_jalr) && !flush_i;
assign icache_dreq_o.spec = speculative_d;
assign bht_update.valid = resolved_branch_i.valid
& (resolved_branch_i.cf_type == ariane_pkg::Branch);
assign bht_update.pc = resolved_branch_i.pc;
assign bht_update.taken = resolved_branch_i.is_taken;
// only update mispredicted branches e.g. no returns from the RAS
assign btb_update.valid = resolved_branch_i.valid
& resolved_branch_i.is_mispredict
& (resolved_branch_i.cf_type == ariane_pkg::JumpR);
assign btb_update.pc = resolved_branch_i.pc;
assign btb_update.target_address = resolved_branch_i.target_address;
// -------------------
// Next PC
// -------------------
// next PC (NPC) can come from (in order of precedence):
// 0. Default assignment/replay instruction
// 1. Branch Predict taken
// 2. Control flow change request (misprediction)
// 3. Return from environment call
// 4. Exception/Interrupt
// 5. Pipeline Flush because of CSR side effects
// Mis-predict handling is a little bit different
// select PC a.k.a PC Gen
always_comb begin : npc_select
automatic logic [riscv::VLEN-1:0] fetch_address;
// check whether we come out of reset
// this is a workaround. some tools have issues
// having boot_addr_i in the asynchronous
// reset assignment to npc_q, even though
// boot_addr_i will be assigned a constant
// on the top-level.
if (npc_rst_load_q) begin
npc_d = boot_addr_i;
fetch_address = boot_addr_i;
end else begin
fetch_address = npc_q;
// keep stable by default
npc_d = npc_q;
end
// 0. Branch Prediction
if (bp_valid) begin
fetch_address = predict_address;
npc_d = predict_address;
end
// 1. Default assignment
if (if_ready) begin
npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4;
end
// 2. Replay instruction fetch
if (replay) begin
npc_d = replay_addr;
end
// 3. Control flow change request
if (is_mispredict) begin
npc_d = resolved_branch_i.target_address;
end
// 4. Return from environment call
if (eret_i) begin
npc_d = epc_i;
end
// 5. Exception/Interrupt
if (ex_valid_i) begin
npc_d = trap_vector_base_i;
end
// 6. Pipeline Flush because of CSR side effects
// On a pipeline flush start fetching from the next address
// of the instruction in the commit stage
// we either came here from a flush request of a CSR instruction or AMO,
// so as CSR or AMO instructions do not exist in a compressed form
// we can unconditionally do PC + 4 here
// or if the commit stage is halted, just take the current pc of the
// instruction in the commit stage
// TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
if (set_pc_commit_i) begin
npc_d = pc_commit_i + (halt_i ? '0 : {{riscv::VLEN - 3{1'b0}}, 3'b100});
end
// 7. Debug
// enter debug on a hard-coded base-address
if (CVA6Cfg.DebugEn && set_debug_pc_i)
npc_d = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.HaltAddress[riscv::VLEN-1:0];
icache_dreq_o.vaddr = fetch_address;
end
logic [FETCH_WIDTH-1:0] icache_data;
// re-align the cache line
assign icache_data = icache_dreq_i.data >> {shamt, 4'b0};
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
npc_rst_load_q <= 1'b1;
npc_q <= '0;
speculative_q <= '0;
icache_data_q <= '0;
icache_valid_q <= 1'b0;
icache_vaddr_q <= 'b0;
icache_ex_valid_q <= ariane_pkg::FE_NONE;
btb_q <= '0;
bht_q <= '0;
end else begin
npc_rst_load_q <= 1'b0;
npc_q <= npc_d;
speculative_q <= speculative_d;
icache_valid_q <= icache_dreq_i.valid;
if (icache_dreq_i.valid) begin
icache_data_q <= icache_data;
icache_vaddr_q <= icache_dreq_i.vaddr;
// Map the only three exceptions which can occur in the frontend to a two bit enum
if (ariane_pkg::MMU_PRESENT && icache_dreq_i.ex.cause == riscv::INSTR_PAGE_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_PAGE_FAULT;
end else if (icache_dreq_i.ex.cause == riscv::INSTR_ACCESS_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT;
end else begin
icache_ex_valid_q <= ariane_pkg::FE_NONE;
end
// save the uppermost prediction
btb_q <= btb_prediction[INSTR_PER_FETCH-1];
bht_q <= bht_prediction[INSTR_PER_FETCH-1];
end
end
end
if (CVA6Cfg.RASDepth == 0) begin
assign ras_predict = '0;
end else begin : ras_gen
ras #(
.CVA6Cfg(CVA6Cfg),
.DEPTH (CVA6Cfg.RASDepth)
) i_ras (
.clk_i,
.rst_ni,
.flush_i(flush_bp_i),
.push_i (ras_push),
.pop_i (ras_pop),
.data_i (ras_update),
.data_o (ras_predict)
);
end
//For FPGA, BTB is implemented in read synchronous BRAM
//while for ASIC, BTB is implemented in D flip-flop
//and can be read at the same cycle.
assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q;
if (CVA6Cfg.BTBEntries == 0) begin
assign btb_prediction = '0;
end else begin : btb_gen
btb #(
.CVA6Cfg (CVA6Cfg),
.NR_ENTRIES(CVA6Cfg.BTBEntries)
) i_btb (
.clk_i,
.rst_ni,
.flush_i (flush_bp_i),
.debug_mode_i,
.vpc_i (vpc_btb),
.btb_update_i (btb_update),
.btb_prediction_o(btb_prediction)
);
end
if (CVA6Cfg.BHTEntries == 0) begin
assign bht_prediction = '0;
end else begin : bht_gen
bht #(
.CVA6Cfg (CVA6Cfg),
.NR_ENTRIES(CVA6Cfg.BHTEntries)
) i_bht (
.clk_i,
.rst_ni,
.flush_i (flush_bp_i),
.debug_mode_i,
.vpc_i (icache_vaddr_q),
.bht_update_i (bht_update),
.bht_prediction_o(bht_prediction)
);
end
// we need to inspect up to INSTR_PER_FETCH instructions for branches
// and jumps
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan
instr_scan #(
.CVA6Cfg(CVA6Cfg)
) i_instr_scan (
.instr_i (instr[i]),
.rvi_return_o(rvi_return[i]),
.rvi_call_o (rvi_call[i]),
.rvi_branch_o(rvi_branch[i]),
.rvi_jalr_o (rvi_jalr[i]),
.rvi_jump_o (rvi_jump[i]),
.rvi_imm_o (rvi_imm[i]),
.rvc_branch_o(rvc_branch[i]),
.rvc_jump_o (rvc_jump[i]),
.rvc_jr_o (rvc_jr[i]),
.rvc_return_o(rvc_return[i]),
.rvc_jalr_o (rvc_jalr[i]),
.rvc_call_o (rvc_call[i]),
.rvc_imm_o (rvc_imm[i])
);
end
instr_queue #(
.CVA6Cfg(CVA6Cfg)
) i_instr_queue (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.instr_i (instr), // from re-aligner
.addr_i (addr), // from re-aligner
.exception_i (icache_ex_valid_q), // from I$
.exception_addr_i (icache_vaddr_q),
.predict_address_i (predict_address),
.cf_type_i (cf_type),
.valid_i (instruction_valid), // from re-aligner
.consumed_o (instr_queue_consumed),
.ready_o (instr_queue_ready),
.replay_o (replay),
.replay_addr_o (replay_addr),
.fetch_entry_o (fetch_entry_o), // to back-end
.fetch_entry_valid_o(fetch_entry_valid_o), // to back-end
.fetch_entry_ready_i(fetch_entry_ready_i) // to back-end
);
// pragma translate_off
`ifndef VERILATOR
initial begin
assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64)
else $fatal(1, "[frontend] fetch width != not supported");
end
`endif
// pragma translate_on
endmodule

View File

@ -0,0 +1,459 @@
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 26.10.2018sim:/ariane_tb/dut/i_ariane/i_frontend/icache_ex_valid_q
// Description: Instruction Queue, separates instruction front-end from processor
// back-end.
//
// This is an optimized instruction queue which supports the handling of
// compressed instructions (16 bit instructions). Internally it is organized as
// FETCH_ENTRY x 32 bit queues which are filled in a consecutive manner. Two pointers
// point into (`idx_is_q` and `idx_ds_q`) the fill port and the read port. The read port
// is designed so that it will easily allow for multiple issue implementation.
// The input supports arbitrary power of two instruction fetch widths.
//
// The queue supports handling of branch prediction and will take care of
// only saving a valid instruction stream.
//
// Furthermore it contains a replay interface in case the instruction queue
// is already full. As instructions are in general easily replayed this should
// increase the efficiency as I$ misses are potentially hidden. This stands in
// contrast to pessimistic actions (early stalling) or credit based approaches.
// Credit based systems might be difficult to implement with the current system
// as we do not exactly know how much space we are going to need in the fifos
// as each instruction can take either one or two slots.
//
// So the consumed/valid interface degenerates to a `information` interface. If the
// upstream circuits keeps pushing the queue will discard the information
// and start replaying from the point were it could last manage to accept instructions.
//
// The instruction front-end will stop issuing instructions as soon as the
// fifo is full. This will gate the logic if the processor is e.g.: halted
//
// TODO(zarubaf): The instruction queues can be reduced to 16 bit. Potentially
// the replay mechanism gets more complicated as it can be that a 32 bit instruction
// can not be pushed at once.
module instr_queue
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i,
output logic ready_o,
output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o,
// we've encountered an exception, at this point the only possible exceptions are page-table faults
input ariane_pkg::frontend_exception_t exception_i,
input logic [riscv::VLEN-1:0] exception_addr_i,
// branch predict
input logic [riscv::VLEN-1:0] predict_address_i,
input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i,
// replay instruction because one of the FIFO was already full
output logic replay_o,
output logic [riscv::VLEN-1:0] replay_addr_o, // address at which to replay this instruction
// to processor backend
output ariane_pkg::fetch_entry_t fetch_entry_o,
output logic fetch_entry_valid_o,
input logic fetch_entry_ready_i
);
typedef struct packed {
logic [31:0] instr; // instruction word
ariane_pkg::cf_t cf; // branch was taken
ariane_pkg::frontend_exception_t ex; // exception happened
logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception
} instr_data_t;
logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] branch_index;
// instruction queues
logic [ariane_pkg::INSTR_PER_FETCH-1:0][$clog2(
ariane_pkg::FETCH_FIFO_DEPTH
)-1:0] instr_queue_usage;
instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty;
logic instr_overflow;
// address queue
logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage;
logic [ riscv::VLEN-1:0] address_out;
logic pop_address;
logic push_address;
logic full_address;
logic empty_address;
logic address_overflow;
// input stream counter
logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] idx_is_d, idx_is_q;
// Registers
// output FIFO select, one-hot
logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q;
logic [riscv::VLEN-1:0] pc_d, pc_q; // current PC
logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush
logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask;
logic branch_empty;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken;
// shift amount, e.g.: instructions we want to retire
logic [ariane_pkg::LOG2_INSTR_PER_FETCH:0] popcount;
logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] shamt;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid;
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] consumed_extended;
// FIFO mask
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] fifo_pos_extended;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_pos;
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0][31:0] instr;
ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH*2-1:0] cf;
// replay interface
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_overflow_fifo;
assign ready_o = ~(|instr_queue_full) & ~full_address;
if (ariane_pkg::RVC) begin : gen_multiple_instr_per_fetch_with_C
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_unpack_taken
assign taken[i] = cf_type_i[i] != ariane_pkg::NoCF;
end
// calculate a branch mask, e.g.: get the first taken branch
lzc #(
.WIDTH(ariane_pkg::INSTR_PER_FETCH),
.MODE (0) // count trailing zeros
) i_lzc_branch_index (
.in_i (taken), // we want to count trailing zeros
.cnt_o (branch_index), // first branch on branch_index
.empty_o(branch_empty)
);
// the first index is for sure valid
// for example (64 bit fetch):
// taken mask: 0 1 1 0
// leading zero count = 1
// 0 0 0 1, 1 1 1 << 1 = 0 0 1 1, 1 1 0
// take the upper 4 bits: 0 0 1 1
assign branch_mask_extended = {{{ariane_pkg::INSTR_PER_FETCH-1}{1'b0}}, {{ariane_pkg::INSTR_PER_FETCH}{1'b1}}} << branch_index;
assign branch_mask = branch_mask_extended[ariane_pkg::INSTR_PER_FETCH * 2 - 2:ariane_pkg::INSTR_PER_FETCH - 1];
// mask with taken branches to get the actual amount of instructions we want to push
assign valid = valid_i & branch_mask;
// rotate right again
assign consumed_extended = {push_instr_fifo, push_instr_fifo} >> idx_is_q;
assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0];
// count the numbers of valid instructions we've pushed from this package
popcount #(
.INPUT_WIDTH(ariane_pkg::INSTR_PER_FETCH)
) i_popcount (
.data_i (push_instr_fifo),
.popcount_o(popcount)
);
assign shamt = popcount[$bits(shamt)-1:0];
// save the shift amount for next cycle
assign idx_is_d = idx_is_q + shamt;
// ----------------------
// Input interface
// ----------------------
// rotate left by the current position
assign fifo_pos_extended = {valid, valid} << idx_is_q;
// we just care about the upper bits
assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH];
// the fifo_position signal can directly be used to guide the push signal of each FIFO
// make sure it is not full
assign push_instr = fifo_pos & ~instr_queue_full;
// duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input
assign instr[i] = instr_i[i];
assign instr[i+ariane_pkg::INSTR_PER_FETCH] = instr_i[i];
assign cf[i] = cf_type_i[i];
assign cf[i+ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i];
end
// shift the inputs
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select
/* verilator lint_off WIDTH */
assign instr_data_in[i].instr = instr[i+idx_is_q];
assign instr_data_in[i].cf = cf[i+idx_is_q];
assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet
assign instr_data_in[i].ex_vaddr = exception_addr_i;
/* verilator lint_on WIDTH */
end
end else begin : gen_multiple_instr_per_fetch_without_C
assign taken = '0;
assign branch_empty = '0;
assign branch_index = '0;
assign branch_mask_extended = '0;
assign branch_mask = '0;
assign consumed_extended = '0;
assign fifo_pos_extended = '0;
assign fifo_pos = '0;
assign instr = '0;
assign popcount = '0;
assign shamt = '0;
assign valid = '0;
assign consumed_o = push_instr_fifo[0];
// ----------------------
// Input interface
// ----------------------
assign push_instr = valid_i & ~instr_queue_full;
/* verilator lint_off WIDTH */
assign instr_data_in[0].instr = instr_i[0];
assign instr_data_in[0].cf = cf_type_i[0];
assign instr_data_in[0].ex = exception_i; // exceptions hold for the whole fetch packet
assign instr_data_in[0].ex_vaddr = exception_addr_i;
/* verilator lint_on WIDTH */
end
// ----------------------
// Replay Logic
// ----------------------
// We need to replay a instruction fetch iff:
// 1. One of the instruction data FIFOs was full and we needed it
// (e.g.: we pushed and it was full)
// 2. The address/branch predict FIFO was full
// if one of the FIFOs was full we need to replay the faulting instruction
if (ariane_pkg::RVC == 1'b1) begin : gen_instr_overflow_fifo_with_C
assign instr_overflow_fifo = instr_queue_full & fifo_pos;
end else begin : gen_instr_overflow_fifo_without_C
assign instr_overflow_fifo = instr_queue_full & valid_i;
end
assign instr_overflow = |instr_overflow_fifo; // at least one instruction overflowed
assign address_overflow = full_address & push_address;
assign replay_o = instr_overflow | address_overflow;
if (ariane_pkg::RVC) begin : gen_replay_addr_o_with_c
// select the address, in the case of an address fifo overflow just
// use the base of this package
// if we successfully pushed some instructions we can output the next instruction
// which we didn't manage to push
assign replay_addr_o = (address_overflow) ? addr_i[0] : addr_i[shamt];
end else begin : gen_replay_addr_o_without_C
assign replay_addr_o = addr_i[0];
end
// ----------------------
// Downstream interface
// ----------------------
// as long as there is at least one queue which can take the value we have a valid instruction
assign fetch_entry_valid_o = ~(&instr_queue_empty);
if (ariane_pkg::RVC) begin : gen_downstream_itf_with_c
always_comb begin
idx_ds_d = idx_ds_q;
pop_instr = '0;
// assemble fetch entry
fetch_entry_o.instruction = '0;
fetch_entry_o.address = pc_q;
fetch_entry_o.ex.valid = 1'b0;
fetch_entry_o.ex.cause = '0;
fetch_entry_o.ex.tval = '0;
fetch_entry_o.branch_predict.predict_address = address_out;
fetch_entry_o.branch_predict.cf = ariane_pkg::NoCF;
// output mux select
for (int unsigned i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (idx_ds_q[i]) begin
if (instr_data_out[i].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin
fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT;
end else begin
fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT;
end
fetch_entry_o.instruction = instr_data_out[i].instr;
fetch_entry_o.ex.valid = instr_data_out[i].ex != ariane_pkg::FE_NONE;
fetch_entry_o.ex.tval = {
{(riscv::XLEN - riscv::VLEN) {1'b0}}, instr_data_out[i].ex_vaddr
};
fetch_entry_o.branch_predict.cf = instr_data_out[i].cf;
pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i;
end
end
// rotate the pointer left
if (fetch_entry_ready_i) begin
idx_ds_d = {
idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1]
};
end
end
end else begin : gen_downstream_itf_without_c
always_comb begin
idx_ds_d = '0;
idx_is_d = '0;
fetch_entry_o.instruction = instr_data_out[0].instr;
fetch_entry_o.address = pc_q;
fetch_entry_o.ex.valid = instr_data_out[0].ex != ariane_pkg::FE_NONE;
if (instr_data_out[0].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin
fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT;
end else begin
fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT;
end
fetch_entry_o.ex.tval = {{64 - riscv::VLEN{1'b0}}, instr_data_out[0].ex_vaddr};
fetch_entry_o.branch_predict.predict_address = address_out;
fetch_entry_o.branch_predict.cf = instr_data_out[0].cf;
pop_instr[0] = fetch_entry_valid_o & fetch_entry_ready_i;
end
end
// TODO(zarubaf): This needs to change for dual-issue
// if the handshaking is successful and we had a prediction pop one address entry
assign pop_address = ((fetch_entry_o.branch_predict.cf != ariane_pkg::NoCF) & |pop_instr);
// ----------------------
// Calculate (Next) PC
// ----------------------
always_comb begin
pc_d = pc_q;
reset_address_d = flush_i ? 1'b1 : reset_address_q;
if (fetch_entry_ready_i) begin
// TODO(zarubaf): This needs to change for a dual issue implementation
// advance the PC
if (ariane_pkg::RVC == 1'b1) begin : gen_pc_with_c_extension
pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4);
end else begin : gen_pc_without_c_extension
pc_d = pc_q + 'd4;
end
end
if (pop_address) pc_d = address_out;
// we previously flushed so we need to reset the address
if (valid_i[0] && reset_address_q) begin
// this is the base of the first instruction
pc_d = addr_i[0];
reset_address_d = 1'b0;
end
end
// FIFOs
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_instr_fifo
// Make sure we don't save any instructions if we couldn't save the address
assign push_instr_fifo[i] = push_instr[i] & ~address_overflow;
fifo_v3 #(
.DEPTH(ariane_pkg::FETCH_FIFO_DEPTH),
.dtype(instr_data_t)
) i_fifo_instr_data (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.testmode_i(1'b0),
.full_o (instr_queue_full[i]),
.empty_o (instr_queue_empty[i]),
.usage_o (instr_queue_usage[i]),
.data_i (instr_data_in[i]),
.push_i (push_instr_fifo[i]),
.data_o (instr_data_out[i]),
.pop_i (pop_instr[i])
);
end
// or reduce and check whether we are retiring a taken branch (might be that the corresponding)
// fifo is full.
always_comb begin
push_address = 1'b0;
// check if we are pushing a ctrl flow change, if so save the address
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
push_address |= push_instr[i] & (instr_data_in[i].cf != ariane_pkg::NoCF);
end
end
fifo_v3 #(
.DEPTH (ariane_pkg::FETCH_FIFO_DEPTH), // TODO(zarubaf): Fork out to separate param
.DATA_WIDTH(riscv::VLEN)
) i_fifo_address (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.testmode_i(1'b0),
.full_o (full_address),
.empty_o (empty_address),
.usage_o (address_queue_usage),
.data_i (predict_address_i),
.push_i (push_address & ~full_address),
.data_o (address_out),
.pop_i (pop_address)
);
unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage}));
unread i_unread_branch_mask (.d_i(|branch_mask_extended));
unread i_unread_lzc (.d_i(|{branch_empty}));
unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals
unread i_unread_instr_fifo (.d_i(|instr_queue_usage));
if (ariane_pkg::RVC) begin : gen_pc_q_with_c
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
idx_ds_q <= 'b1;
idx_is_q <= '0;
pc_q <= '0;
reset_address_q <= 1'b1;
end else begin
pc_q <= pc_d;
reset_address_q <= reset_address_d;
if (flush_i) begin
// one-hot encoded
idx_ds_q <= 'b1;
// binary encoded
idx_is_q <= '0;
reset_address_q <= 1'b1;
end else begin
idx_ds_q <= idx_ds_d;
idx_is_q <= idx_is_d;
end
end
end
end else begin : gen_pc_q_without_C
assign idx_ds_q = '0;
assign idx_is_q = '0;
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
pc_q <= '0;
reset_address_q <= 1'b1;
end else begin
pc_q <= pc_d;
reset_address_q <= reset_address_d;
if (flush_i) begin
reset_address_q <= 1'b1;
end
end
end
end
// pragma translate_off
`ifndef VERILATOR
replay_address_fifo :
assert property (@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i)
else $fatal(1, "[instr_queue] Pushing address although replay asserted");
output_select_onehot :
assert property (@(posedge clk_i) $onehot0(idx_ds_q))
else begin
$error("Output select should be one-hot encoded");
$stop();
end
`endif
// pragma translate_on
endmodule

View File

@ -0,0 +1,83 @@
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 2.0 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Author: Florian Zaruba, ETH Zurich
// Date: 08.02.2018
// Migrated: Luis Vitorio Cargnini, IEEE
// Date: 09.06.2018
// ------------------------------
// Instruction Scanner
// ------------------------------
module instr_scan #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic [ 31:0] instr_i, // expect aligned instruction, compressed or not
output logic rvi_return_o,
output logic rvi_call_o,
output logic rvi_branch_o,
output logic rvi_jalr_o,
output logic rvi_jump_o,
output logic [riscv::VLEN-1:0] rvi_imm_o,
output logic rvc_branch_o,
output logic rvc_jump_o,
output logic rvc_jr_o,
output logic rvc_return_o,
output logic rvc_jalr_o,
output logic rvc_call_o,
output logic [riscv::VLEN-1:0] rvc_imm_o
);
logic is_rvc;
assign is_rvc = (instr_i[1:0] != 2'b11);
logic rv32_rvc_jal;
assign rv32_rvc_jal = (riscv::XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1));
logic is_xret;
assign is_xret = logic'(instr_i[31:30] == 2'b00) & logic'(instr_i[28:0] == 29'b10000001000000000000001110011);
// check that rs1 is either x1 or x5 and that rd is not rs1
assign rvi_return_o = rvi_jalr_o & ((instr_i[19:15] == 5'd1) | instr_i[19:15] == 5'd5)
& (instr_i[19:15] != instr_i[11:7]);
// Opocde is JAL[R] and destination register is either x1 or x5
assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5);
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? ariane_pkg::uj_imm(
instr_i
) : ariane_pkg::sb_imm(
instr_i
);
assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch);
assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr);
assign rvi_jump_o = logic'(instr_i[6:0] == riscv::OpcodeJal) | is_xret;
// opcode JAL
assign rvc_jump_o = ((instr_i[15:13] == riscv::OpcodeC1J) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)) | rv32_rvc_jal;
// always links to register 0
logic is_jal_r;
assign is_jal_r = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd)
& (instr_i[6:2] == 5'b00000)
& (instr_i[1:0] == riscv::OpcodeC2)
& is_rvc;
assign rvc_jr_o = is_jal_r & ~instr_i[12];
// always links to register 1 e.g.: it is a jump
assign rvc_jalr_o = is_jal_r & instr_i[12];
assign rvc_call_o = rvc_jalr_o | rv32_rvc_jal;
assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez))
& (instr_i[1:0] == riscv::OpcodeC1)
& is_rvc;
// check that rs1 is x1 or x5
assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o;
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvc_imm_o = (instr_i[14]) ? {{56+riscv::VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0}
: {{53+riscv::VLEN-64{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0};
endmodule

View File

@ -0,0 +1,71 @@
//Copyright (C) 2018 to present,
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 2.0 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.02.2018
// Migrated: Luis Vitorio Cargnini, IEEE
// Date: 09.06.2018
// return address stack
module ras #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DEPTH = 2
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic push_i,
input logic pop_i,
input logic [riscv::VLEN-1:0] data_i,
output ariane_pkg::ras_t data_o
);
ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q;
assign data_o = stack_q[0];
always_comb begin
stack_d = stack_q;
// push on the stack
if (push_i) begin
stack_d[0].ra = data_i;
// mark the new return address as valid
stack_d[0].valid = 1'b1;
stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0];
end
if (pop_i) begin
stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1];
// we popped the value so invalidate the end of the stack
stack_d[DEPTH-1].valid = 1'b0;
stack_d[DEPTH-1].ra = 'b0;
end
// leave everything untouched and just push the latest value to the
// top of the stack
if (pop_i && push_i) begin
stack_d = stack_q;
stack_d[0].ra = data_i;
stack_d[0].valid = 1'b1;
end
if (flush_i) begin
stack_d = '0;
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
stack_q <= '0;
end else begin
stack_q <= stack_d;
end
end
endmodule

View File

@ -0,0 +1,143 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 15.04.2017
// Description: Instruction decode, contains the logic for decode,
// issue and read operands.
module id_stage #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic debug_req_i,
// from IF
input ariane_pkg::fetch_entry_t fetch_entry_i,
input logic fetch_entry_valid_i,
output logic fetch_entry_ready_o, // acknowledge the instruction (fetch entry)
// to ID
output ariane_pkg::scoreboard_entry_t issue_entry_o, // a decoded instruction
output logic issue_entry_valid_o, // issue entry is valid
output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions
input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions
output logic rvfi_is_compressed_o,
// from CSR file
input riscv::priv_lvl_t priv_lvl_i, // current privilege level
input riscv::xs_t fs_i, // floating point extension status
input logic [2:0] frm_i, // floating-point dynamic rounding mode
input riscv::xs_t vs_i, // vector extension status
input logic [1:0] irq_i,
input ariane_pkg::irq_ctrl_t irq_ctrl_i,
input logic debug_mode_i, // we are in debug mode
input logic tvm_i,
input logic tw_i,
input logic tsr_i
);
// ID/ISSUE register stage
typedef struct packed {
logic valid;
ariane_pkg::scoreboard_entry_t sbe;
logic is_ctrl_flow;
} issue_struct_t;
issue_struct_t issue_n, issue_q;
logic is_control_flow_instr;
ariane_pkg::scoreboard_entry_t decoded_instruction;
logic is_illegal;
logic [31:0] instruction;
logic is_compressed;
if (CVA6Cfg.RVC) begin
// ---------------------------------------------------------
// 1. Check if they are compressed and expand in case they are
// ---------------------------------------------------------
compressed_decoder #(
.CVA6Cfg(CVA6Cfg)
) compressed_decoder_i (
.instr_i (fetch_entry_i.instruction),
.instr_o (instruction),
.illegal_instr_o(is_illegal),
.is_compressed_o(is_compressed)
);
end else begin
assign instruction = fetch_entry_i.instruction;
assign is_illegal = '0;
assign is_compressed = '0;
end
assign rvfi_is_compressed_o = is_compressed;
// ---------------------------------------------------------
// 2. Decode and emit instruction to issue stage
// ---------------------------------------------------------
decoder #(
.CVA6Cfg(CVA6Cfg)
) decoder_i (
.debug_req_i,
.irq_ctrl_i,
.irq_i,
.pc_i (fetch_entry_i.address),
.is_compressed_i (is_compressed),
.is_illegal_i (is_illegal),
.instruction_i (instruction),
.compressed_instr_i (fetch_entry_i.instruction[15:0]),
.branch_predict_i (fetch_entry_i.branch_predict),
.ex_i (fetch_entry_i.ex),
.priv_lvl_i (priv_lvl_i),
.debug_mode_i (debug_mode_i),
.fs_i,
.frm_i,
.vs_i,
.tvm_i,
.tw_i,
.tsr_i,
.instruction_o (decoded_instruction),
.is_control_flow_instr_o(is_control_flow_instr)
);
// ------------------
// Pipeline Register
// ------------------
assign issue_entry_o = issue_q.sbe;
assign issue_entry_valid_o = issue_q.valid;
assign is_ctrl_flow_o = issue_q.is_ctrl_flow;
always_comb begin
issue_n = issue_q;
fetch_entry_ready_o = 1'b0;
// Clear the valid flag if issue has acknowledged the instruction
if (issue_instr_ack_i) issue_n.valid = 1'b0;
// if we have a space in the register and the fetch is valid, go get it
// or the issue stage is currently acknowledging an instruction, which means that we will have space
// for a new instruction
if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin
fetch_entry_ready_o = 1'b1;
issue_n = '{1'b1, decoded_instruction, is_control_flow_instr};
end
// invalidate the pipeline register on a flush
if (flush_i) issue_n.valid = 1'b0;
end
// -------------------------
// Registers (ID <-> Issue)
// -------------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
issue_q <= '0;
end else begin
issue_q <= issue_n;
end
end
endmodule

View File

@ -0,0 +1,47 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Authors: Matheus Cavalcante <matheusd@iis.ee.ethz.ch>
// Nils Wistoff <nwistoff@iis.ee.ethz.ch>
// Package defining the accelerator interface as used by Ara + CVA6
package acc_pkg;
// ----------------------
// Accelerator Interface
// ----------------------
typedef struct packed {
logic req_valid;
logic resp_ready;
riscv::instruction_t insn;
riscv::xlen_t rs1;
riscv::xlen_t rs2;
fpnew_pkg::roundmode_e frm;
logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id;
logic store_pending;
// Invalidation interface
logic acc_cons_en;
logic inval_ready;
} accelerator_req_t;
typedef struct packed {
logic req_ready;
logic resp_valid;
riscv::xlen_t result;
logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id;
logic error;
// Metadata
logic store_pending;
logic store_complete;
logic load_complete;
logic [4:0] fflags;
logic fflags_valid;
// Invalidation interface
logic inval_valid;
logic [63:0] inval_addr;
} accelerator_resp_t;
endpackage

View File

@ -0,0 +1,994 @@
/* Copyright 2018 ETH Zurich and University of Bologna.
* Copyright and related rights are licensed under the Solderpad Hardware
* License, Version 0.51 (the License); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
* or agreed to in writing, software, hardware and materials distributed under
* this License is distributed on an AS IS BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* File: ariane_pkg.sv
* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
* Date: 8.4.2017
*
* Description: Contains all the necessary defines for Ariane
* in one package.
*/
// this is needed to propagate the
// configuration in case Ariane is
// instantiated in OpenPiton
`ifdef PITON_ARIANE
`include "l15.tmp.h"
`endif
/// This package contains `functions` and global defines for CVA6.
/// *Note*: There are some parameters here as well which will eventually be
/// moved out to favour a fully parameterizable core.
package ariane_pkg;
// TODO: Slowly move those parameters to the new system.
localparam NR_SB_ENTRIES = cva6_config_pkg::CVA6ConfigNrScoreboardEntries; // number of scoreboard entries
localparam TRANS_ID_BITS = $clog2(
NR_SB_ENTRIES
); // depending on the number of scoreboard entries we need that many bits
// to uniquely identify the entry in the scoreboard
localparam ASID_WIDTH = (riscv::XLEN == 64) ? 16 : 1;
localparam BITS_SATURATION_COUNTER = 2;
localparam ISSUE_WIDTH = 1;
// depth of store-buffers, this needs to be a power of two
localparam logic [2:0] DEPTH_SPEC = 'd4;
localparam int unsigned DCACHE_TYPE = int'(cva6_config_pkg::CVA6ConfigDcacheType);
// if DCACHE_TYPE = cva6_config_pkg::WT
// we can use a small commit queue since we have a write buffer in the dcache
// we could in principle do without the commit queue in this case, but the timing degrades if we do that due
// to longer paths into the commit stage
// if DCACHE_TYPE = cva6_config_pkg::WB
// allocate more space for the commit buffer to be on the save side, this needs to be a power of two
localparam logic [2:0] DEPTH_COMMIT = 'd4;
localparam bit FPGA_EN = cva6_config_pkg::CVA6ConfigFPGAEn; // Is FPGA optimization of CV32A6
localparam bit RVC = cva6_config_pkg::CVA6ConfigCExtEn; // Is C extension configuration
// Transprecision float unit
localparam int unsigned LAT_COMP_FP32 = 'd2;
localparam int unsigned LAT_COMP_FP64 = 'd3;
localparam int unsigned LAT_COMP_FP16 = 'd1;
localparam int unsigned LAT_COMP_FP16ALT = 'd1;
localparam int unsigned LAT_COMP_FP8 = 'd1;
localparam int unsigned LAT_DIVSQRT = 'd2;
localparam int unsigned LAT_NONCOMP = 'd1;
localparam int unsigned LAT_CONV = 'd2;
localparam riscv::xlen_t OPENHWGROUP_MVENDORID = {{riscv::XLEN - 32{1'b0}}, 32'h0602};
localparam riscv::xlen_t ARIANE_MARCHID = {{riscv::XLEN - 32{1'b0}}, 32'd3};
// 32 registers
localparam REG_ADDR_SIZE = 5;
// Read ports for general purpose register files
localparam NR_RGPR_PORTS = 2;
// static debug hartinfo
// debug causes
localparam logic [2:0] CauseBreakpoint = 3'h1;
localparam logic [2:0] CauseTrigger = 3'h2;
localparam logic [2:0] CauseRequest = 3'h3;
localparam logic [2:0] CauseSingleStep = 3'h4;
// amount of data count registers implemented
localparam logic [3:0] DataCount = 4'h2;
// address where data0-15 is shadowed or if shadowed in a CSR
// address of the first CSR used for shadowing the data
localparam logic [11:0] DataAddr = 12'h380; // we are aligned with Rocket here
typedef struct packed {
logic [31:24] zero1;
logic [23:20] nscratch;
logic [19:17] zero0;
logic dataaccess;
logic [15:12] datasize;
logic [11:0] dataaddr;
} hartinfo_t;
localparam hartinfo_t DebugHartInfo = '{
zero1: '0,
nscratch: 2, // Debug module needs at least two scratch regs
zero0: '0,
dataaccess: 1'b1, // data registers are memory mapped in the debugger
datasize: DataCount,
dataaddr: DataAddr
};
// enables a commit log which matches spikes commit log format for easier trace comparison
localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1;
// ------------- Dangerous -------------
// if set to zero a flush will not invalidate the cache-lines, in a single core environment
// where coherence is not necessary this can improve performance. This needs to be switched on
// when more than one core is in a system
localparam logic INVALIDATE_ON_FLUSH = 1'b1;
`ifdef SPIKE_TANDEM
// Spike still places 0 in TVAL for ENV_CALL_* exceptions.
// This may eventually go away when Spike starts to handle TVAL for *all* exceptions.
localparam bit ZERO_TVAL = 1'b1;
`else
localparam bit ZERO_TVAL = 1'b0;
`endif
// read mask for SSTATUS over MMSTATUS
localparam logic [63:0] SMODE_STATUS_READ_MASK = riscv::SSTATUS_UIE
| riscv::SSTATUS_SIE
| riscv::SSTATUS_SPIE
| riscv::SSTATUS_SPP
| riscv::SSTATUS_FS
| riscv::SSTATUS_XS
| riscv::SSTATUS_SUM
| riscv::SSTATUS_MXR
| riscv::SSTATUS_UPIE
| riscv::SSTATUS_SPIE
| riscv::SSTATUS_UXL
| riscv::SSTATUS_SD;
localparam logic [63:0] SMODE_STATUS_WRITE_MASK = riscv::SSTATUS_SIE
| riscv::SSTATUS_SPIE
| riscv::SSTATUS_SPP
| riscv::SSTATUS_FS
| riscv::SSTATUS_SUM
| riscv::SSTATUS_MXR;
// ---------------
// AXI
// ---------------
localparam FETCH_USER_WIDTH = cva6_config_pkg::CVA6ConfigFetchUserWidth;
localparam DATA_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth;
localparam AXI_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn | cva6_config_pkg::CVA6ConfigFetchUserEn;
localparam AXI_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth;
localparam DATA_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn;
localparam FETCH_USER_EN = cva6_config_pkg::CVA6ConfigFetchUserEn;
typedef enum logic {
SINGLE_REQ,
CACHE_LINE_REQ
} ad_req_t;
// ---------------
// Fetch Stage
// ---------------
// leave as is (fails with >8 entries and wider fetch width)
localparam int unsigned FETCH_FIFO_DEPTH = 4;
localparam int unsigned FETCH_WIDTH = 32;
// maximum instructions we can fetch on one request (we support compressed instructions)
localparam int unsigned INSTR_PER_FETCH = RVC == 1'b1 ? (FETCH_WIDTH / 16) : 1;
localparam int unsigned LOG2_INSTR_PER_FETCH = RVC == 1'b1 ? $clog2(INSTR_PER_FETCH) : 1;
// Only use struct when signals have same direction
// exception
typedef struct packed {
riscv::xlen_t cause; // cause of exception
riscv::xlen_t tval; // additional information of causing exception (e.g.: instruction causing it),
// address of LD/ST fault
logic valid;
} exception_t;
typedef enum logic [2:0] {
NoCF, // No control flow prediction
Branch, // Branch
Jump, // Jump to address from immediate
JumpR, // Jump to address from registers
Return // Return Address Prediction
} cf_t;
// branch-predict
// this is the struct we get back from ex stage and we will use it to update
// all the necessary data structures
// bp_resolve_t
typedef struct packed {
logic valid; // prediction with all its values is valid
logic [riscv::VLEN-1:0] pc; // PC of predict or mis-predict
logic [riscv::VLEN-1:0] target_address; // target address at which to jump, or not
logic is_mispredict; // set if this was a mis-predict
logic is_taken; // branch is taken
cf_t cf_type; // Type of control flow change
} bp_resolve_t;
// branchpredict scoreboard entry
// this is the struct which we will inject into the pipeline to guide the various
// units towards the correct branch decision and resolve
typedef struct packed {
cf_t cf; // type of control flow prediction
logic [riscv::VLEN-1:0] predict_address; // target address at which to jump, or not
} branchpredict_sbe_t;
typedef struct packed {
logic valid;
logic [riscv::VLEN-1:0] pc; // update at PC
logic [riscv::VLEN-1:0] target_address;
} btb_update_t;
typedef struct packed {
logic valid;
logic [riscv::VLEN-1:0] target_address;
} btb_prediction_t;
typedef struct packed {
logic valid;
logic [riscv::VLEN-1:0] ra;
} ras_t;
typedef struct packed {
logic valid;
logic [riscv::VLEN-1:0] pc; // update at PC
logic taken;
} bht_update_t;
typedef struct packed {
logic valid;
logic taken;
} bht_prediction_t;
typedef struct packed {
logic valid;
logic [1:0] saturation_counter;
} bht_t;
typedef enum logic [3:0] {
NONE, // 0
LOAD, // 1
STORE, // 2
ALU, // 3
CTRL_FLOW, // 4
MULT, // 5
CSR, // 6
FPU, // 7
FPU_VEC, // 8
CVXIF, // 9
ACCEL // 10
} fu_t;
localparam EXC_OFF_RST = 8'h80;
localparam SupervisorIrq = 1;
localparam MachineIrq = 0;
// All information needed to determine whether we need to associate an interrupt
// with the corresponding instruction or not.
typedef struct packed {
riscv::xlen_t mie;
riscv::xlen_t mip;
riscv::xlen_t mideleg;
logic sie;
logic global_enable;
} irq_ctrl_t;
// ---------------
// Cache config
// ---------------
// for usage in OpenPiton we have to propagate the openpiton L15 configuration from l15.h
`ifdef PITON_ARIANE
`ifndef CONFIG_L1I_CACHELINE_WIDTH
`define CONFIG_L1I_CACHELINE_WIDTH 128
`endif
`ifndef CONFIG_L1I_ASSOCIATIVITY
`define CONFIG_L1I_ASSOCIATIVITY 4
`endif
`ifndef CONFIG_L1I_SIZE
`define CONFIG_L1I_SIZE 16*1024
`endif
`ifndef CONFIG_L1D_CACHELINE_WIDTH
`define CONFIG_L1D_CACHELINE_WIDTH 128
`endif
`ifndef CONFIG_L1D_ASSOCIATIVITY
`define CONFIG_L1D_ASSOCIATIVITY 8
`endif
`ifndef CONFIG_L1D_SIZE
`define CONFIG_L1D_SIZE 32*1024
`endif
`ifndef L15_THREADID_WIDTH
`define L15_THREADID_WIDTH 3
`endif
// I$
localparam int unsigned ICACHE_LINE_WIDTH = `CONFIG_L1I_CACHELINE_WIDTH;
localparam int unsigned ICACHE_SET_ASSOC = `CONFIG_L1I_ASSOCIATIVITY;
localparam int unsigned ICACHE_INDEX_WIDTH = $clog2(`CONFIG_L1I_SIZE / ICACHE_SET_ASSOC);
localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH;
localparam int unsigned ICACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : 128; // in bit
// D$
localparam int unsigned DCACHE_LINE_WIDTH = `CONFIG_L1D_CACHELINE_WIDTH;
localparam int unsigned DCACHE_SET_ASSOC = `CONFIG_L1D_ASSOCIATIVITY;
localparam int unsigned DCACHE_INDEX_WIDTH = $clog2(`CONFIG_L1D_SIZE / DCACHE_SET_ASSOC);
localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH;
localparam int unsigned DCACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : 128; // in bit
localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH;
localparam int unsigned MEM_TID_WIDTH = `L15_THREADID_WIDTH;
`else
// I$
localparam int unsigned CONFIG_L1I_SIZE = cva6_config_pkg::CVA6ConfigIcacheByteSize; // in byte
localparam int unsigned ICACHE_SET_ASSOC = cva6_config_pkg::CVA6ConfigIcacheSetAssoc; // number of ways
localparam int unsigned ICACHE_INDEX_WIDTH = $clog2(
CONFIG_L1I_SIZE / ICACHE_SET_ASSOC
); // in bit, contains also offset width
localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH; // in bit
localparam int unsigned ICACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit
localparam int unsigned ICACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit
// D$
localparam int unsigned CONFIG_L1D_SIZE = cva6_config_pkg::CVA6ConfigDcacheByteSize; // in byte
localparam int unsigned DCACHE_SET_ASSOC = cva6_config_pkg::CVA6ConfigDcacheSetAssoc; // number of ways
localparam int unsigned DCACHE_INDEX_WIDTH = $clog2(
CONFIG_L1D_SIZE / DCACHE_SET_ASSOC
); // in bit, contains also offset width
localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH; // in bit
localparam int unsigned DCACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit
localparam int unsigned DCACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit
localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH;
localparam int unsigned MEM_TID_WIDTH = cva6_config_pkg::CVA6ConfigMemTidWidth;
`endif
localparam int unsigned DCACHE_TID_WIDTH = cva6_config_pkg::CVA6ConfigDcacheIdWidth;
localparam int unsigned WT_DCACHE_WBUF_DEPTH = cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth;
// ---------------
// EX Stage
// ---------------
typedef enum logic [7:0] { // basic ALU op
ADD,
SUB,
ADDW,
SUBW,
// logic operations
XORL,
ORL,
ANDL,
// shifts
SRA,
SRL,
SLL,
SRLW,
SLLW,
SRAW,
// comparisons
LTS,
LTU,
GES,
GEU,
EQ,
NE,
// jumps
JALR,
BRANCH,
// set lower than operations
SLTS,
SLTU,
// CSR functions
MRET,
SRET,
DRET,
ECALL,
WFI,
FENCE,
FENCE_I,
SFENCE_VMA,
CSR_WRITE,
CSR_READ,
CSR_SET,
CSR_CLEAR,
// LSU functions
LD,
SD,
LW,
LWU,
SW,
LH,
LHU,
SH,
LB,
SB,
LBU,
// Atomic Memory Operations
AMO_LRW,
AMO_LRD,
AMO_SCW,
AMO_SCD,
AMO_SWAPW,
AMO_ADDW,
AMO_ANDW,
AMO_ORW,
AMO_XORW,
AMO_MAXW,
AMO_MAXWU,
AMO_MINW,
AMO_MINWU,
AMO_SWAPD,
AMO_ADDD,
AMO_ANDD,
AMO_ORD,
AMO_XORD,
AMO_MAXD,
AMO_MAXDU,
AMO_MIND,
AMO_MINDU,
// Multiplications
MUL,
MULH,
MULHU,
MULHSU,
MULW,
// Divisions
DIV,
DIVU,
DIVW,
DIVUW,
REM,
REMU,
REMW,
REMUW,
// Floating-Point Load and Store Instructions
FLD,
FLW,
FLH,
FLB,
FSD,
FSW,
FSH,
FSB,
// Floating-Point Computational Instructions
FADD,
FSUB,
FMUL,
FDIV,
FMIN_MAX,
FSQRT,
FMADD,
FMSUB,
FNMSUB,
FNMADD,
// Floating-Point Conversion and Move Instructions
FCVT_F2I,
FCVT_I2F,
FCVT_F2F,
FSGNJ,
FMV_F2X,
FMV_X2F,
// Floating-Point Compare Instructions
FCMP,
// Floating-Point Classify Instruction
FCLASS,
// Vectorial Floating-Point Instructions that don't directly map onto the scalar ones
VFMIN,
VFMAX,
VFSGNJ,
VFSGNJN,
VFSGNJX,
VFEQ,
VFNE,
VFLT,
VFGE,
VFLE,
VFGT,
VFCPKAB_S,
VFCPKCD_S,
VFCPKAB_D,
VFCPKCD_D,
// Offload Instructions to be directed into cv_x_if
OFFLOAD,
// Or-Combine and REV8
ORCB,
REV8,
// Bitwise Rotation
ROL,
ROLW,
ROR,
RORI,
RORIW,
RORW,
// Sign and Zero Extend
SEXTB,
SEXTH,
ZEXTH,
// Count population
CPOP,
CPOPW,
// Count Leading/Training Zeros
CLZ,
CLZW,
CTZ,
CTZW,
// Carry less multiplication Op's
CLMUL,
CLMULH,
CLMULR,
// Single bit instructions Op's
BCLR,
BCLRI,
BEXT,
BEXTI,
BINV,
BINVI,
BSET,
BSETI,
// Integer minimum/maximum
MAX,
MAXU,
MIN,
MINU,
// Shift with Add Unsigned Word and Unsigned Word Op's (Bitmanip)
SH1ADDUW,
SH2ADDUW,
SH3ADDUW,
ADDUW,
SLLIUW,
// Shift with Add (Bitmanip)
SH1ADD,
SH2ADD,
SH3ADD,
// Bitmanip Logical with negate op (Bitmanip)
ANDN,
ORN,
XNOR,
// Accelerator operations
ACCEL_OP,
ACCEL_OP_FS1,
ACCEL_OP_FD,
ACCEL_OP_LOAD,
ACCEL_OP_STORE,
// Zicond instruction
CZERO_EQZ,
CZERO_NEZ
} fu_op;
typedef struct packed {
fu_t fu;
fu_op operation;
riscv::xlen_t operand_a;
riscv::xlen_t operand_b;
riscv::xlen_t imm;
logic [TRANS_ID_BITS-1:0] trans_id;
} fu_data_t;
function automatic logic op_is_branch(input fu_op op);
unique case (op) inside
EQ, NE, LTS, GES, LTU, GEU: return 1'b1;
default: return 1'b0; // all other ops
endcase
endfunction
// -------------------------------
// Extract Src/Dst FP Reg from Op
// -------------------------------
// function used in instr_trace svh
// is_rs1_fpr function is kept to allow cva6 compilation with instr_trace feature
function automatic logic is_rs1_fpr(input fu_op op);
unique case (op) inside
[FMUL : FNMADD], // Computational Operations (except ADD/SUB)
FCVT_F2I, // Float-Int Casts
FCVT_F2F, // Float-Float Casts
FSGNJ, // Sign Injections
FMV_F2X, // FPR-GPR Moves
FCMP, // Comparisons
FCLASS, // Classifications
[VFMIN : VFCPKCD_D], // Additional Vectorial FP ops
ACCEL_OP_FS1:
return 1'b1; // Accelerator instructions
default: return 1'b0; // all other ops
endcase
endfunction
// function used in instr_trace svh
// is_rs2_fpr function is kept to allow cva6 compilation with instr_trace feature
function automatic logic is_rs2_fpr(input fu_op op);
unique case (op) inside
[FSD : FSB], // FP Stores
[FADD : FMIN_MAX], // Computational Operations (no sqrt)
[FMADD : FNMADD], // Fused Computational Operations
FCVT_F2F, // Vectorial F2F Conversions requrie target
[FSGNJ : FMV_F2X], // Sign Injections and moves mapped to SGNJ
FCMP, // Comparisons
[VFMIN : VFCPKCD_D]:
return 1'b1; // Additional Vectorial FP ops
default: return 1'b0; // all other ops
endcase
endfunction
// function used in instr_trace svh
// is_imm_fpr function is kept to allow cva6 compilation with instr_trace feature
// ternary operations encode the rs3 address in the imm field, also add/sub
function automatic logic is_imm_fpr(input fu_op op);
unique case (op) inside
[FADD : FSUB], // ADD/SUB need inputs as Operand B/C
[FMADD : FNMADD], // Fused Computational Operations
[VFCPKAB_S : VFCPKCD_D]:
return 1'b1; // Vectorial FP cast and pack ops
default: return 1'b0; // all other ops
endcase
endfunction
// function used in instr_trace svh
// is_rd_fpr function is kept to allow cva6 compilation with instr_trace feature
function automatic logic is_rd_fpr(input fu_op op);
unique case (op) inside
[FLD : FLB], // FP Loads
[FADD : FNMADD], // Computational Operations
FCVT_I2F, // Int-Float Casts
FCVT_F2F, // Float-Float Casts
FSGNJ, // Sign Injections
FMV_X2F, // GPR-FPR Moves
[VFMIN : VFSGNJX], // Vectorial MIN/MAX and SGNJ
[VFCPKAB_S : VFCPKCD_D], // Vectorial FP cast and pack ops
ACCEL_OP_FD:
return 1'b1; // Accelerator instructions
default: return 1'b0; // all other ops
endcase
endfunction
function automatic logic is_amo(fu_op op);
case (op) inside
[AMO_LRW : AMO_MINDU]: begin
return 1'b1;
end
default: return 1'b0;
endcase
endfunction
typedef struct packed {
logic valid;
logic [riscv::VLEN-1:0] vaddr;
logic overflow;
riscv::xlen_t data;
logic [(riscv::XLEN/8)-1:0] be;
fu_t fu;
fu_op operation;
logic [TRANS_ID_BITS-1:0] trans_id;
} lsu_ctrl_t;
// ---------------
// IF/ID Stage
// ---------------
// store the decompressed instruction
typedef struct packed {
logic [riscv::VLEN-1:0] address; // the address of the instructions from below
logic [31:0] instruction; // instruction word
branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path
exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions
} fetch_entry_t;
// ---------------
// ID/EX/WB Stage
// ---------------
localparam RVFI = cva6_config_pkg::CVA6ConfigRvfiTrace;
typedef struct packed {
logic [riscv::VLEN-1:0] pc; // PC of instruction
logic [TRANS_ID_BITS-1:0] trans_id; // this can potentially be simplified, we could index the scoreboard entry
// with the transaction id in any case make the width more generic
fu_t fu; // functional unit to use
fu_op op; // operation to perform in each functional unit
logic [REG_ADDR_SIZE-1:0] rs1; // register source address 1
logic [REG_ADDR_SIZE-1:0] rs2; // register source address 2
logic [REG_ADDR_SIZE-1:0] rd; // register destination address
riscv::xlen_t result; // for unfinished instructions this field also holds the immediate,
// for unfinished floating-point that are partly encoded in rs2, this field also holds rs2
// for unfinished floating-point fused operations (FMADD, FMSUB, FNMADD, FNMSUB)
// this field holds the address of the third operand from the floating-point register file
logic valid; // is the result valid
logic use_imm; // should we use the immediate as operand b?
logic use_zimm; // use zimm as operand a
logic use_pc; // set if we need to use the PC as operand a, PC from exception
exception_t ex; // exception has occurred
branchpredict_sbe_t bp; // branch predict scoreboard data structure
logic is_compressed; // signals a compressed instructions, we need this information at the commit stage if
// we want jump accordingly e.g.: +4, +2
logic vfp; // is this a vector floating-point instruction?
} scoreboard_entry_t;
// ---------------
// MMU instanciation
// ---------------
localparam bit MMU_PRESENT = cva6_config_pkg::CVA6ConfigMmuPresent;
localparam int unsigned INSTR_TLB_ENTRIES = cva6_config_pkg::CVA6ConfigInstrTlbEntries;
localparam int unsigned DATA_TLB_ENTRIES = cva6_config_pkg::CVA6ConfigDataTlbEntries;
// -------------------
// Performance counter
// -------------------
localparam bit PERF_COUNTER_EN = cva6_config_pkg::CVA6ConfigPerfCounterEn;
localparam int unsigned MHPMCounterNum = 6;
// --------------------
// Atomics
// --------------------
typedef enum logic [3:0] {
AMO_NONE = 4'b0000,
AMO_LR = 4'b0001,
AMO_SC = 4'b0010,
AMO_SWAP = 4'b0011,
AMO_ADD = 4'b0100,
AMO_AND = 4'b0101,
AMO_OR = 4'b0110,
AMO_XOR = 4'b0111,
AMO_MAX = 4'b1000,
AMO_MAXU = 4'b1001,
AMO_MIN = 4'b1010,
AMO_MINU = 4'b1011,
AMO_CAS1 = 4'b1100, // unused, not part of riscv spec, but provided in OpenPiton
AMO_CAS2 = 4'b1101 // unused, not part of riscv spec, but provided in OpenPiton
} amo_t;
typedef struct packed {
logic valid; // valid flag
logic is_2M; //
logic is_1G; //
logic [27-1:0] vpn; // VPN (39bits) = 27bits + 12bits offset
logic [ASID_WIDTH-1:0] asid;
riscv::pte_t content;
} tlb_update_t;
// Bits required for representation of physical address space as 4K pages
// (e.g. 27*4K == 39bit address space).
localparam PPN4K_WIDTH = 38;
typedef struct packed {
logic valid; // valid flag
logic is_4M; //
logic [20-1:0] vpn; //VPN (32bits) = 20bits + 12bits offset
logic [9-1:0] asid; //ASID length = 9 for Sv32 mmu
riscv::pte_sv32_t content;
} tlb_update_sv32_t;
typedef enum logic [1:0] {
FE_NONE,
FE_INSTR_ACCESS_FAULT,
FE_INSTR_PAGE_FAULT
} frontend_exception_t;
// ----------------------
// cache request ports
// ----------------------
// I$ address translation requests
typedef struct packed {
logic fetch_valid; // address translation valid
logic [riscv::PLEN-1:0] fetch_paddr; // physical address in
exception_t fetch_exception; // exception occurred during fetch
} icache_areq_t;
typedef struct packed {
logic fetch_req; // address translation request
logic [riscv::VLEN-1:0] fetch_vaddr; // virtual address out
} icache_arsp_t;
// I$ data requests
typedef struct packed {
logic req; // we request a new word
logic kill_s1; // kill the current request
logic kill_s2; // kill the last request
logic spec; // request is speculative
logic [riscv::VLEN-1:0] vaddr; // 1st cycle: 12 bit index is taken for lookup
} icache_dreq_t;
typedef struct packed {
logic ready; // icache is ready
logic valid; // signals a valid read
logic [FETCH_WIDTH-1:0] data; // 2+ cycle out: tag
logic [FETCH_USER_WIDTH-1:0] user; // User bits
logic [riscv::VLEN-1:0] vaddr; // virtual address out
exception_t ex; // we've encountered an exception
} icache_drsp_t;
// AMO request going to cache. this request is unconditionally valid as soon
// as request goes high.
// Furthermore, those signals are kept stable until the response indicates
// completion by asserting ack.
typedef struct packed {
logic req; // this request is valid
amo_t amo_op; // atomic memory operation to perform
logic [1:0] size; // 2'b10 --> word operation, 2'b11 --> double word operation
logic [63:0] operand_a; // address
logic [63:0] operand_b; // data as layouted in the register
} amo_req_t;
// AMO response coming from cache.
typedef struct packed {
logic ack; // response is valid
logic [63:0] result; // sign-extended, result
} amo_resp_t;
// D$ data requests
typedef struct packed {
logic [DCACHE_INDEX_WIDTH-1:0] address_index;
logic [DCACHE_TAG_WIDTH-1:0] address_tag;
riscv::xlen_t data_wdata;
logic [DCACHE_USER_WIDTH-1:0] data_wuser;
logic data_req;
logic data_we;
logic [(riscv::XLEN/8)-1:0] data_be;
logic [1:0] data_size;
logic [DCACHE_TID_WIDTH-1:0] data_id;
logic kill_req;
logic tag_valid;
} dcache_req_i_t;
typedef struct packed {
logic data_gnt;
logic data_rvalid;
logic [DCACHE_TID_WIDTH-1:0] data_rid;
riscv::xlen_t data_rdata;
logic [DCACHE_USER_WIDTH-1:0] data_ruser;
} dcache_req_o_t;
// ----------------------
// Arithmetic Functions
// ----------------------
function automatic riscv::xlen_t sext32(logic [31:0] operand);
return {{riscv::XLEN - 32{operand[31]}}, operand[31:0]};
endfunction
// ----------------------
// Immediate functions
// ----------------------
function automatic logic [riscv::VLEN-1:0] uj_imm(logic [31:0] instruction_i);
return {
{44 + riscv::VLEN - 64{instruction_i[31]}},
instruction_i[19:12],
instruction_i[20],
instruction_i[30:21],
1'b0
};
endfunction
function automatic logic [riscv::VLEN-1:0] i_imm(logic [31:0] instruction_i);
return {{52 + riscv::VLEN - 64{instruction_i[31]}}, instruction_i[31:20]};
endfunction
function automatic logic [riscv::VLEN-1:0] sb_imm(logic [31:0] instruction_i);
return {
{51 + riscv::VLEN - 64{instruction_i[31]}},
instruction_i[31],
instruction_i[7],
instruction_i[30:25],
instruction_i[11:8],
1'b0
};
endfunction
// ----------------------
// LSU Functions
// ----------------------
// align data to address e.g.: shift data to be naturally 64
function automatic riscv::xlen_t data_align(logic [2:0] addr, logic [63:0] data);
// Set addr[2] to 1'b0 when 32bits
logic [ 2:0] addr_tmp = {(addr[2] && riscv::IS_XLEN64), addr[1:0]};
logic [63:0] data_tmp = {64{1'b0}};
case (addr_tmp)
3'b000: data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-1:0]};
3'b001:
data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-9:0], data[riscv::XLEN-1:riscv::XLEN-8]};
3'b010:
data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-17:0], data[riscv::XLEN-1:riscv::XLEN-16]};
3'b011:
data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-25:0], data[riscv::XLEN-1:riscv::XLEN-24]};
3'b100: data_tmp = {data[31:0], data[63:32]};
3'b101: data_tmp = {data[23:0], data[63:24]};
3'b110: data_tmp = {data[15:0], data[63:16]};
3'b111: data_tmp = {data[7:0], data[63:8]};
endcase
return data_tmp[riscv::XLEN-1:0];
endfunction
// generate byte enable mask
function automatic logic [7:0] be_gen(logic [2:0] addr, logic [1:0] size);
case (size)
2'b11: begin
return 8'b1111_1111;
end
2'b10: begin
case (addr[2:0])
3'b000: return 8'b0000_1111;
3'b001: return 8'b0001_1110;
3'b010: return 8'b0011_1100;
3'b011: return 8'b0111_1000;
3'b100: return 8'b1111_0000;
default: ; // Do nothing
endcase
end
2'b01: begin
case (addr[2:0])
3'b000: return 8'b0000_0011;
3'b001: return 8'b0000_0110;
3'b010: return 8'b0000_1100;
3'b011: return 8'b0001_1000;
3'b100: return 8'b0011_0000;
3'b101: return 8'b0110_0000;
3'b110: return 8'b1100_0000;
default: ; // Do nothing
endcase
end
2'b00: begin
case (addr[2:0])
3'b000: return 8'b0000_0001;
3'b001: return 8'b0000_0010;
3'b010: return 8'b0000_0100;
3'b011: return 8'b0000_1000;
3'b100: return 8'b0001_0000;
3'b101: return 8'b0010_0000;
3'b110: return 8'b0100_0000;
3'b111: return 8'b1000_0000;
endcase
end
endcase
return 8'b0;
endfunction
function automatic logic [3:0] be_gen_32(logic [1:0] addr, logic [1:0] size);
case (size)
2'b10: begin
return 4'b1111;
end
2'b01: begin
case (addr[1:0])
2'b00: return 4'b0011;
2'b01: return 4'b0110;
2'b10: return 4'b1100;
default: ; // Do nothing
endcase
end
2'b00: begin
case (addr[1:0])
2'b00: return 4'b0001;
2'b01: return 4'b0010;
2'b10: return 4'b0100;
2'b11: return 4'b1000;
endcase
end
default: return 4'b0;
endcase
return 4'b0;
endfunction
// ----------------------
// Extract Bytes from Op
// ----------------------
function automatic logic [1:0] extract_transfer_size(fu_op op);
case (op)
LD, SD, FLD, FSD,
AMO_LRD, AMO_SCD,
AMO_SWAPD, AMO_ADDD,
AMO_ANDD, AMO_ORD,
AMO_XORD, AMO_MAXD,
AMO_MAXDU, AMO_MIND,
AMO_MINDU: begin
return 2'b11;
end
LW, LWU, SW, FLW, FSW,
AMO_LRW, AMO_SCW,
AMO_SWAPW, AMO_ADDW,
AMO_ANDW, AMO_ORW,
AMO_XORW, AMO_MAXW,
AMO_MAXWU, AMO_MINW,
AMO_MINWU: begin
return 2'b10;
end
LH, LHU, SH, FLH, FSH: return 2'b01;
LB, LBU, SB, FLB, FSB: return 2'b00;
default: return 2'b11;
endcase
endfunction
endpackage

Some files were not shown because too many files have changed in this diff Show More